249 lines
9.5 KiB
Java
249 lines
9.5 KiB
Java
package org.gcube.dataanalysis.executor.job.management;
|
|
|
|
import java.io.File;
|
|
import java.util.ArrayList;
|
|
import java.util.HashMap;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.UUID;
|
|
|
|
import org.apache.axis.message.addressing.EndpointReferenceType;
|
|
import org.gcube.common.core.contexts.GHNContext;
|
|
import org.gcube.common.core.informationsystem.client.AtomicCondition;
|
|
import org.gcube.common.core.informationsystem.client.ISClient;
|
|
import org.gcube.common.core.informationsystem.client.RPDocument;
|
|
import org.gcube.common.core.informationsystem.client.queries.WSResourceQuery;
|
|
import org.gcube.common.core.scope.GCUBEScope;
|
|
import org.gcube.common.scope.api.ScopeProvider;
|
|
import org.gcube.contentmanagement.blobstorage.service.IClient;
|
|
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
|
import org.gcube.contentmanager.storageclient.wrapper.AccessType;
|
|
import org.gcube.contentmanager.storageclient.wrapper.StorageClient;
|
|
import org.gcube.dataanalysis.executor.scripts.ScriptIOWorker;
|
|
import org.gcube.vremanagement.executor.stubs.ExecutorCall;
|
|
import org.gcube.vremanagement.executor.stubs.TaskCall;
|
|
import org.gcube.vremanagement.executor.stubs.TaskProxy;
|
|
|
|
public class RemoteJobManager {
|
|
|
|
private static String pluginName = "ExecutorScript";
|
|
private int actualNumberOfNodes;
|
|
private GCUBEScope gscope;
|
|
private List<EndpointReferenceType> eprs;
|
|
float status;
|
|
boolean abort;
|
|
boolean shutdown;
|
|
protected int activeNodes;
|
|
String scope;
|
|
|
|
public int getActiveNodes() {
|
|
return activeNodes;
|
|
}
|
|
|
|
public float getStatus() {
|
|
return status;
|
|
}
|
|
|
|
public int getNumberOfNodes() {
|
|
return actualNumberOfNodes;
|
|
}
|
|
|
|
public void setNumberOfNodes(int newNumberOfNodes) {
|
|
actualNumberOfNodes = newNumberOfNodes;
|
|
}
|
|
|
|
public void init(String scope, int numberOfNodes) throws Exception {
|
|
this.scope = scope;
|
|
gscope = GCUBEScope.getScope(scope);
|
|
AnalysisLogger.getLogger().debug("Using the following scope for this computation:"+gscope);
|
|
shutdown = false;
|
|
yetuploaded = false;
|
|
if (eprs == null)
|
|
actualNumberOfNodes = findNodes(scope);
|
|
else
|
|
actualNumberOfNodes = eprs.size();
|
|
|
|
if (numberOfNodes < actualNumberOfNodes)
|
|
actualNumberOfNodes = numberOfNodes;
|
|
|
|
}
|
|
|
|
public RemoteJobManager(String scope, int numberOfNodes) throws Exception {
|
|
init(scope, numberOfNodes);
|
|
}
|
|
|
|
public RemoteJobManager(String scope, int numberOfNodes, List<EndpointReferenceType> eprs) throws Exception {
|
|
this.eprs = eprs;
|
|
init(scope, numberOfNodes);
|
|
}
|
|
|
|
List<String> filenames;
|
|
List<String> fileurls;
|
|
boolean yetuploaded;
|
|
String session;
|
|
|
|
public boolean uploadAndExecute(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List<String> arguments, boolean deletefiles) throws Exception {
|
|
boolean executeAll = false;
|
|
long t0 = System.currentTimeMillis();
|
|
//if not yet uploaded , upload required files
|
|
if (!yetuploaded) {
|
|
ScopeProvider.instance.set(scope);
|
|
IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED).getClient();
|
|
// IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, gscope).getClient();
|
|
File dir = new File(localDir);
|
|
File[] files = dir.listFiles();
|
|
AnalysisLogger.getLogger().debug("Start uploading");
|
|
filenames = new ArrayList<String>();
|
|
fileurls = new ArrayList<String>();
|
|
for (File sfile : files) {
|
|
String localf = sfile.getAbsolutePath();
|
|
String filename = sfile.getName();
|
|
String remotef = remoteDir + sfile.getName();
|
|
client.put(true).LFile(localf).RFile(remotef);
|
|
String url = client.getUrl().RFile(remotef);
|
|
AnalysisLogger.getLogger().debug("URL created: " + url);
|
|
filenames.add(filename);
|
|
fileurls.add(url);
|
|
}
|
|
AnalysisLogger.getLogger().debug("Upload end");
|
|
yetuploaded = true;
|
|
session = (""+UUID.randomUUID()).replace("-", "");
|
|
}
|
|
|
|
//if the number of available nodes is higher than zero launch the tasks
|
|
if (actualNumberOfNodes > 0) {
|
|
|
|
AnalysisLogger.getLogger().debug("Executing script on " + actualNumberOfNodes + " nodes");
|
|
int len = arguments.size();
|
|
List<WorkerWatcher> tasksProxies = new ArrayList<WorkerWatcher>();
|
|
activeNodes = 0;
|
|
//launch the tasks
|
|
for (int i = 0; i < actualNumberOfNodes; i++) {
|
|
String argum = "";
|
|
//supply the arguments if they are available
|
|
if (i < len)
|
|
argum = arguments.get(i);
|
|
//generate the input map according to the arguments
|
|
Map<String, Object> inputs = generateInput(filenames, fileurls, outputDir, script, argum, i, scope, serviceClass, serviceName, owner, remoteDir,session,deletefiles);
|
|
AnalysisLogger.getLogger().debug("-> Owner: " + owner + " ServiceClass: " + serviceClass + " ServiceName:" + serviceName + " remoteDir:" + remoteDir);
|
|
//take the i-th endpoint of the executor
|
|
EndpointReferenceType selectedEPR = eprs.get(i);
|
|
AnalysisLogger.getLogger().debug("Launching node " + (i + 1) + " on " + selectedEPR);
|
|
//run the executor script
|
|
ExecutorCall call = new ExecutorCall(pluginName, gscope);
|
|
call.setEndpointReference(selectedEPR);
|
|
TaskCall task = null;
|
|
task = call.launch(inputs);
|
|
TaskProxy proxy = task.getProxy();
|
|
tasksProxies.add(new WorkerWatcher(proxy, AnalysisLogger.getLogger()));
|
|
|
|
AnalysisLogger.getLogger().debug("Launching node " + (i + 1) + " OK on " + selectedEPR);
|
|
//add the task to the list in order to reuse it
|
|
}
|
|
|
|
activeNodes = actualNumberOfNodes;
|
|
AnalysisLogger.getLogger().debug("Launch Finished - Controlling Status");
|
|
int allstatus = 0;
|
|
abort = false;
|
|
//control the execution: go until there are active nodes or the process must stop
|
|
while ((activeNodes != 0) && (!abort) && (!shutdown)) {
|
|
//for each node get the task state
|
|
int nworkers = tasksProxies.size();
|
|
int i=0;
|
|
while (i < nworkers) {
|
|
WorkerWatcher proxy = tasksProxies.get(i);
|
|
String state = proxy.getState();
|
|
AnalysisLogger.getLogger().debug("REMOTE JOB MANAGER-> STATE " + state );
|
|
//control for aborted computation
|
|
abort = ((state == null) || state.equals("FAILED") || (!state.equals("DONE") && !state.equals("RUNNING")));
|
|
//control for finished computation
|
|
boolean finished = false;
|
|
if (state != null)
|
|
finished = state.equals("DONE");
|
|
//if finished update the active nodes
|
|
if (finished) {
|
|
tasksProxies.remove(i);
|
|
allstatus++;
|
|
activeNodes--;
|
|
nworkers--;
|
|
if (activeNodes == 0)
|
|
break;
|
|
}
|
|
else
|
|
i++;
|
|
|
|
status = Math.min(((float) allstatus / (float) actualNumberOfNodes) * 100f, 95f);
|
|
if (abort)
|
|
break;
|
|
if (shutdown)
|
|
break;
|
|
// AnalysisLogger.getLogger().debug(String.format("Task " + i + "executed started at %Tc with %s state ", proxy.getStartTime(), state));
|
|
//sleep before polling again
|
|
Thread.sleep(2000);
|
|
}
|
|
}
|
|
|
|
activeNodes = 0;
|
|
|
|
AnalysisLogger.getLogger().debug("All Tasks have Finished");
|
|
if (!abort) {
|
|
AnalysisLogger.getLogger().debug("All Task were successful");
|
|
/*
|
|
* List<StorageObject> listElements = client.showDir().RDir(remoteDir); for (StorageObject obj : listElements) { AnalysisLogger.getLogger().debug("obj stored in directory " + remoteDir + ": " + obj.getName()); }
|
|
*/
|
|
} else
|
|
AnalysisLogger.getLogger().debug("Tasks were NOT successful");
|
|
} else
|
|
AnalysisLogger.getLogger().debug("Warning: could not execute tasks: No Nodes Available!");
|
|
AnalysisLogger.getLogger().debug("Whole procedure done in " + (System.currentTimeMillis() - t0) + " ms");
|
|
status = 100f;
|
|
return executeAll;
|
|
}
|
|
|
|
public boolean wasAborted() {
|
|
return abort;
|
|
}
|
|
|
|
public void stop() {
|
|
shutdown = true;
|
|
}
|
|
|
|
private int findNodes(String scopeString) throws Exception {
|
|
GCUBEScope scope = GCUBEScope.getScope(scopeString);
|
|
ISClient client = GHNContext.getImplementation(ISClient.class);
|
|
WSResourceQuery wsquery = client.getQuery(WSResourceQuery.class);
|
|
wsquery.addAtomicConditions(new AtomicCondition("//gc:ServiceName", "Executor"));
|
|
wsquery.addAtomicConditions(new AtomicCondition("/child::*[local-name()='Task']/name[text()='"+pluginName+"']", pluginName));
|
|
List<RPDocument> listdoc = client.execute(wsquery, scope);
|
|
EndpointReferenceType epr = null;
|
|
eprs = new ArrayList<EndpointReferenceType>();
|
|
int numberOfEP = 0;
|
|
for (RPDocument resource : listdoc) {
|
|
epr = resource.getEndpoint();
|
|
numberOfEP++;
|
|
eprs.add(epr);
|
|
}
|
|
AnalysisLogger.getLogger().debug("Found " + numberOfEP + " endpoints");
|
|
|
|
return numberOfEP;
|
|
}
|
|
|
|
private Map<String, Object> generateInput(Object filenames, Object fileurls, String outputDir, String script, String argum, int i, String scope, String serviceClass, String serviceName, String owner, String remoteDir,String session,boolean deletefiles) {
|
|
Map<String, Object> inputs = new HashMap<String, Object>();
|
|
inputs.put("FILE_NAMES", filenames);
|
|
inputs.put("FILE_URLS", fileurls);
|
|
inputs.put("OUTPUTDIR", ScriptIOWorker.toInputString(outputDir));
|
|
inputs.put("SCRIPT", ScriptIOWorker.toInputString(script));
|
|
inputs.put("ARGUMENTS", ScriptIOWorker.toInputString(argum));
|
|
inputs.put("NODE_IDENTIFIER", "" + i);
|
|
inputs.put("SCOPE", ScriptIOWorker.toInputString(scope));
|
|
inputs.put("SERVICE_CLASS", ScriptIOWorker.toInputString(serviceClass));
|
|
inputs.put("SERVICE_NAME", ScriptIOWorker.toInputString(serviceName));
|
|
inputs.put("OWNER", ScriptIOWorker.toInputString(owner));
|
|
inputs.put("REMOTEDIR", ScriptIOWorker.toInputString(remoteDir));
|
|
inputs.put("CLEAN_CACHE",""+deletefiles);
|
|
// inputs.put("SESSION", ScriptIO.toInputString(session));
|
|
return inputs;
|
|
}
|
|
}
|