327 lines
12 KiB
Java
327 lines
12 KiB
Java
package org.gcube.dataanalysis.executor.job.management;
|
|
|
|
import java.io.File;
|
|
import java.util.ArrayList;
|
|
import java.util.HashMap;
|
|
import java.util.LinkedHashMap;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Timer;
|
|
import java.util.TimerTask;
|
|
import java.util.UUID;
|
|
import java.util.concurrent.ExecutorService;
|
|
import java.util.concurrent.Executors;
|
|
|
|
import javax.jms.ExceptionListener;
|
|
import javax.jms.JMSException;
|
|
import javax.jms.Message;
|
|
import javax.jms.MessageListener;
|
|
|
|
import org.apache.activemq.ActiveMQConnection;
|
|
import org.gcube.common.clients.ProxyBuilderImpl;
|
|
import org.gcube.common.resources.gcore.ServiceEndpoint;
|
|
import org.gcube.common.resources.gcore.ServiceEndpoint.AccessPoint;
|
|
import org.gcube.common.scope.api.ScopeProvider;
|
|
import org.gcube.contentmanagement.blobstorage.resource.StorageObject;
|
|
import org.gcube.contentmanagement.blobstorage.service.IClient;
|
|
import org.gcube.contentmanagement.graphtools.utils.HttpRequest;
|
|
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
|
import org.gcube.contentmanager.storageclient.wrapper.AccessType;
|
|
import org.gcube.contentmanager.storageclient.wrapper.MemoryType;
|
|
import org.gcube.contentmanager.storageclient.wrapper.StorageClient;
|
|
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
|
import org.gcube.dataanalysis.ecoengine.utils.Operations;
|
|
import org.gcube.dataanalysis.executor.messagequeue.ATTRIBUTE;
|
|
import org.gcube.dataanalysis.executor.messagequeue.Consumer;
|
|
import org.gcube.dataanalysis.executor.messagequeue.Producer;
|
|
import org.gcube.dataanalysis.executor.messagequeue.QCONSTANTS;
|
|
import org.gcube.dataanalysis.executor.messagequeue.QueueManager;
|
|
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.BionymFlexibleWorkflowTransducer;
|
|
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.utils.YasmeenGlobalParameters;
|
|
import org.gcube.dataanalysis.executor.scripts.ScriptIOWorker;
|
|
import org.gcube.dataanalysis.executor.util.InfraRetrieval;
|
|
import org.gcube.resources.discovery.client.api.DiscoveryClient;
|
|
import org.gcube.resources.discovery.client.queries.api.SimpleQuery;
|
|
import org.gcube.vremanagement.executor.api.SmartExecutor;
|
|
import org.gcube.vremanagement.executor.api.types.LaunchParameter;
|
|
import org.gcube.vremanagement.executor.client.plugins.ExecutorPlugin;
|
|
import org.gcube.vremanagement.executor.client.plugins.query.SmartExecutorPluginQuery;
|
|
import org.gcube.vremanagement.executor.client.plugins.query.filter.ListEndpointDiscoveryFilter;
|
|
import org.gcube.vremanagement.executor.client.plugins.query.filter.SpecificEndpointDiscoveryFilter;
|
|
import org.gcube.vremanagement.executor.client.proxies.SmartExecutorProxy;
|
|
|
|
import com.thoughtworks.xstream.XStream;
|
|
|
|
import static org.gcube.resources.discovery.icclient.ICFactory.*;
|
|
|
|
public class WPSJobManager {
|
|
|
|
static final int pollingTime = 5000;
|
|
static final int maxTrialsPerThread = 3;
|
|
|
|
|
|
int overallFailures = 0;
|
|
int overallSuccess = 0;
|
|
int overallTasks = 0;
|
|
|
|
|
|
boolean stopThreads = false;
|
|
boolean hasResentMessages = false;
|
|
|
|
final public synchronized void incrementOverallFailures() {
|
|
overallFailures++;
|
|
}
|
|
|
|
final public synchronized void hasResentTrue() {
|
|
if (!hasResentMessages)
|
|
hasResentMessages=true;
|
|
}
|
|
|
|
final public synchronized void incrementOverallSuccess() {
|
|
overallSuccess++;
|
|
}
|
|
|
|
final public synchronized void stop() {
|
|
stopThreads=true;
|
|
}
|
|
|
|
final public synchronized boolean isStopped() {
|
|
return stopThreads;
|
|
}
|
|
|
|
public class TasksWatcher implements Runnable {
|
|
AlgorithmConfiguration configuration;
|
|
String algorithm;
|
|
String username;
|
|
String token;
|
|
String wpsHost;
|
|
int wpsPort;
|
|
int taskNumber;
|
|
String session;
|
|
public String exitstatus=GenericWorker.TASK_UNDEFINED;
|
|
int leftSetIndex;
|
|
int rightSetIndex;
|
|
int leftElements;
|
|
int rightElements;
|
|
|
|
|
|
public TasksWatcher(String algorithm, String username, String token, String wpsHost, int wpsPort, String session, int taskNumber, AlgorithmConfiguration configuration, int leftSetIndex, int rightSetIndex, int leftElements, int rightElements) {
|
|
this.algorithm = algorithm;
|
|
this.token = token;
|
|
this.wpsHost = wpsHost;
|
|
this.wpsPort = wpsPort;
|
|
this.taskNumber = taskNumber;
|
|
this.session = session;
|
|
this.username = username;
|
|
this.configuration = configuration;
|
|
this.leftSetIndex = leftSetIndex;
|
|
this.leftElements = leftElements;
|
|
this.rightSetIndex = rightSetIndex;
|
|
this.rightElements = rightElements;
|
|
}
|
|
|
|
|
|
public void callTask(boolean isduplicate){
|
|
String url = "http://" + wpsHost + ":" + wpsPort + "/wps/WebProcessingService";
|
|
|
|
boolean deleteTemporaryFiles = true;
|
|
AnalysisLogger.getLogger().debug("Task Number : " + taskNumber+" GO!");
|
|
try {
|
|
String algorithmCall = GenericWorkerCaller.getGenericWorkerCall(algorithm, session, configuration, leftSetIndex, rightSetIndex, leftElements, rightElements, isduplicate, deleteTemporaryFiles);
|
|
String result = HttpRequest.PostXmlString(url, wpsHost, wpsPort, new LinkedHashMap<String, String>(), username, token, algorithmCall);
|
|
// AnalysisLogger.getLogger().debug("Result: " + result);
|
|
|
|
boolean success = false;
|
|
boolean failure = false;
|
|
|
|
if (result.contains(GenericWorker.TASK_SUCCESS))
|
|
success = true;
|
|
else if (result.contains(GenericWorker.TASK_FAILURE))
|
|
failure = true;
|
|
|
|
String statusLocation = "";
|
|
|
|
while (!success && !isStopped() && (!failure) ) { //while !success and failure
|
|
if (result == null || result.contains(GenericWorker.TASK_FAILURE) || result.contains("Exception"))
|
|
failure = true;
|
|
|
|
else if (result.contains(GenericWorker.TASK_SUCCESS))
|
|
success = true;
|
|
else if (result.contains("<wps:ProcessAccepted>Process Accepted</wps:ProcessAccepted>")) {
|
|
statusLocation = result.substring(result.indexOf("statusLocation=") + "statusLocation=".length());
|
|
statusLocation = statusLocation.substring(0, statusLocation.indexOf(">"));
|
|
statusLocation = statusLocation.replace("\"", "");
|
|
statusLocation = statusLocation + "&gcube-token=" + token;
|
|
// AnalysisLogger.getLogger().debug("Status Location: " + statusLocation);
|
|
result= "";
|
|
} else {
|
|
Thread.sleep(pollingTime);
|
|
result = HttpRequest.sendGetRequest(statusLocation, "");
|
|
// AnalysisLogger.getLogger().debug("Result in location: " + result);
|
|
}
|
|
// request = HttpRequest.sendGetRequest(url, ""); // AnalysisLogger.getLogger().debug("Answer for task "+taskNumber+": "+request); }catch(Exception e){ AnalysisLogger.getLogger().debug("Request failure for task "+taskNumber+": "+e.getLocalizedMessage()); } if (request.contains("<wps:ProcessSucceeded>")) success = true; if (request.contains("<ows:Exception>")){ failure = true; incrementOverallFailures(); } try { Thread.sleep(pollingTime); } catch (InterruptedException e) { e.printStackTrace(); } }
|
|
}
|
|
|
|
if (isStopped() && statusLocation!=null && statusLocation.length()>0){
|
|
String wpscancel = statusLocation.replace("RetrieveResultServlet", "CancelComputationServlet");
|
|
result = HttpRequest.sendGetRequest(wpscancel, "");
|
|
}
|
|
|
|
|
|
exitstatus = GenericWorker.TASK_SUCCESS;
|
|
if (failure)
|
|
{
|
|
exitstatus = GenericWorker.TASK_FAILURE;
|
|
AnalysisLogger.getLogger().debug("Task Number "+taskNumber+" - Failure cause: " + result);
|
|
}
|
|
// AnalysisLogger.getLogger().debug("Process execution finished: " + exitstatus);
|
|
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
AnalysisLogger.getLogger().debug(e);
|
|
AnalysisLogger.getLogger().debug("Task Number "+taskNumber+" - Process exception: " + e.getLocalizedMessage());
|
|
exitstatus = GenericWorker.TASK_FAILURE;
|
|
|
|
}finally{
|
|
|
|
}
|
|
}
|
|
@Override
|
|
public void run() {
|
|
int trials = 0;
|
|
boolean duplicate = false;
|
|
while (!exitstatus.equals(GenericWorker.TASK_SUCCESS) && trials<maxTrialsPerThread){
|
|
callTask(duplicate);
|
|
if (exitstatus.equals(GenericWorker.TASK_FAILURE)){
|
|
trials++;
|
|
hasResentTrue();
|
|
duplicate = true;
|
|
AnalysisLogger.getLogger().debug("Task Number "+taskNumber+" - Retrying n."+trials);
|
|
}
|
|
}
|
|
|
|
if (exitstatus.equals(GenericWorker.TASK_SUCCESS))
|
|
incrementOverallSuccess();
|
|
else
|
|
incrementOverallFailures();
|
|
|
|
AnalysisLogger.getLogger().debug("Task Number "+taskNumber+" - Finished: " + exitstatus);
|
|
|
|
}
|
|
}
|
|
|
|
public int getNumberOfNodes() {
|
|
return 1;
|
|
}
|
|
|
|
public int getActiveNodes() {
|
|
return 1;
|
|
}
|
|
|
|
public float getStatus() {
|
|
return (float)(overallFailures+overallSuccess)/(float)overallTasks;
|
|
}
|
|
|
|
public boolean wasAborted() {
|
|
return stopThreads;
|
|
}
|
|
|
|
public boolean hasResentMessages() {
|
|
return hasResentMessages;
|
|
}
|
|
|
|
public void uploadAndExecuteChunkized(AlgorithmConfiguration configuration, String algorithmClass, List<String> arguments, String session) {
|
|
ExecutorService executor = null;
|
|
try{
|
|
int numberofservices = 1;
|
|
|
|
AnalysisLogger.getLogger().debug("Estimating the number of services");
|
|
|
|
List<String> wpsservices = InfraRetrieval.retrieveService("DataMiner", configuration.getGcubeScope());
|
|
|
|
if (wpsservices==null || wpsservices.size()==0)
|
|
throw new Exception ("No Dataminer GCore Endpoint found in the VRE "+configuration.getGcubeScope());
|
|
|
|
List<String> differentServices = new ArrayList<String>();
|
|
for (String service:wpsservices){
|
|
|
|
service = service.substring(service.indexOf("/")+2);
|
|
service = service.substring(0,service.indexOf(":"));
|
|
if (!differentServices.contains(service))
|
|
differentServices.add(service);
|
|
|
|
}
|
|
numberofservices = differentServices.size();
|
|
|
|
AnalysisLogger.getLogger().debug("WPSJobManager->Number of dataminer services "+numberofservices);
|
|
int parallelisation = numberofservices*2;
|
|
AnalysisLogger.getLogger().debug("WPSJobManager->Number of parallel processes (parallelisation) : "+parallelisation);
|
|
|
|
List<String> wpshosts = InfraRetrieval.retrieveAddresses("DataAnalysis",configuration.getGcubeScope(),"-----");
|
|
|
|
if (wpshosts==null || wpshosts.size()==0)
|
|
throw new Exception ("WPSJobManager->No Dataminer Service Endpoint found in the VRE "+configuration.getGcubeScope());
|
|
|
|
|
|
String wpshost = wpshosts.get(0);
|
|
wpshost = wpshost.substring(wpshost.indexOf("/")+2);
|
|
//String wpshostAddress = wpshost.substring(0,wpshost.indexOf(":"));
|
|
String wpshostAddress = wpshost.substring(0,wpshost.indexOf("/"));
|
|
//String wpshostPort = wpshost.substring(wpshost.indexOf(":")+1,wpshost.indexOf("/"));
|
|
//http://dataminer1-devnext.d4science.org:80/wps/gcube/resourc
|
|
wpshost=wpshostAddress;
|
|
int wpsport = 80;
|
|
overallTasks=arguments.size();
|
|
|
|
executor = Executors.newFixedThreadPool(parallelisation);
|
|
int taskNumber = 0;
|
|
|
|
AnalysisLogger.getLogger().debug("WPSJobManager->Executing algorithm class:"+algorithmClass);
|
|
|
|
|
|
for (String argument:arguments) {
|
|
String[] lfnlnr = argument.split(" ");
|
|
int leftOff = Integer.parseInt(lfnlnr[0]);
|
|
int leftNum = Integer.parseInt(lfnlnr[1]);
|
|
int rightOff = Integer.parseInt(lfnlnr[2]);
|
|
int rightNum = Integer.parseInt(lfnlnr[3]);
|
|
|
|
TasksWatcher watcher = new TasksWatcher(algorithmClass,
|
|
configuration.getGcubeUserName(),
|
|
configuration.getGcubeToken(),wpshost,wpsport,session,taskNumber,configuration, leftOff, rightOff,leftNum,rightNum);
|
|
|
|
executor.execute(watcher);
|
|
AnalysisLogger.getLogger().debug("WPSJobManager->Task number "+taskNumber+" launched!");
|
|
taskNumber++;
|
|
}
|
|
|
|
int njobs = overallFailures+overallSuccess;
|
|
int pnjobs =njobs;
|
|
while (njobs<overallTasks){
|
|
Thread.sleep(pollingTime);
|
|
float percFailure = (float)(overallFailures)/(float)overallTasks;
|
|
//if (percFailure>0.5)
|
|
if (overallFailures>0)
|
|
stop();
|
|
njobs = overallFailures+overallSuccess;
|
|
if (pnjobs<njobs){
|
|
AnalysisLogger.getLogger().debug("WPSJobManager->Number of finished jobs "+njobs+" of "+overallTasks);
|
|
AnalysisLogger.getLogger().debug("WPSJobManager->Number of errors "+overallFailures+" - perc failure "+percFailure);
|
|
}
|
|
}
|
|
|
|
AnalysisLogger.getLogger().debug("WPSJobManager->Overall computation finished");
|
|
}catch(Exception e){
|
|
e.printStackTrace();
|
|
}
|
|
finally{
|
|
if (executor!=null){
|
|
AnalysisLogger.getLogger().debug("WPSJobManager->Shutting down the executions");
|
|
executor.shutdown();
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
}
|