package org.gcube.dataanalysis.executor.job.management; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; import org.apache.axis.message.addressing.EndpointReferenceType; import org.gcube.common.core.contexts.GHNContext; import org.gcube.common.core.informationsystem.client.AtomicCondition; import org.gcube.common.core.informationsystem.client.ISClient; import org.gcube.common.core.informationsystem.client.RPDocument; import org.gcube.common.core.informationsystem.client.queries.WSResourceQuery; import org.gcube.common.core.scope.GCUBEScope; import org.gcube.common.scope.api.ScopeProvider; import org.gcube.contentmanagement.blobstorage.service.IClient; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; import org.gcube.contentmanager.storageclient.wrapper.AccessType; import org.gcube.contentmanager.storageclient.wrapper.StorageClient; import org.gcube.dataanalysis.executor.scripts.ScriptIOWorker; import org.gcube.vremanagement.executor.stubs.ExecutorCall; import org.gcube.vremanagement.executor.stubs.TaskCall; import org.gcube.vremanagement.executor.stubs.TaskProxy; public class RemoteJobManager { private static String pluginName = "ExecutorScript"; private int actualNumberOfNodes; private GCUBEScope gscope; private List eprs; float status; boolean abort; boolean shutdown; protected int activeNodes; String scope; public int getActiveNodes() { return activeNodes; } public float getStatus() { return status; } public int getNumberOfNodes() { return actualNumberOfNodes; } public void setNumberOfNodes(int newNumberOfNodes) { actualNumberOfNodes = newNumberOfNodes; } public void init(String scope, int numberOfNodes) throws Exception { this.scope = scope; gscope = GCUBEScope.getScope(scope); AnalysisLogger.getLogger().debug("Using the following scope for this computation:"+gscope); shutdown = false; yetuploaded = false; if (eprs == null) actualNumberOfNodes = findNodes(scope); else actualNumberOfNodes = eprs.size(); if (numberOfNodes < actualNumberOfNodes) actualNumberOfNodes = numberOfNodes; } public RemoteJobManager(String scope, int numberOfNodes) throws Exception { init(scope, numberOfNodes); } public RemoteJobManager(String scope, int numberOfNodes, List eprs) throws Exception { this.eprs = eprs; init(scope, numberOfNodes); } List filenames; List fileurls; boolean yetuploaded; String session; public boolean uploadAndExecute(String serviceClass, String serviceName, String owner, String localDir, String remoteDir, String outputDir, String script, List arguments, boolean deletefiles) throws Exception { boolean executeAll = false; long t0 = System.currentTimeMillis(); //if not yet uploaded , upload required files if (!yetuploaded) { ScopeProvider.instance.set(scope); IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED).getClient(); // IClient client = new StorageClient(serviceClass, serviceName, owner, AccessType.SHARED, gscope).getClient(); File dir = new File(localDir); File[] files = dir.listFiles(); AnalysisLogger.getLogger().debug("Start uploading"); filenames = new ArrayList(); fileurls = new ArrayList(); for (File sfile : files) { String localf = sfile.getAbsolutePath(); String filename = sfile.getName(); String remotef = remoteDir + sfile.getName(); client.put(true).LFile(localf).RFile(remotef); String url = client.getUrl().RFile(remotef); AnalysisLogger.getLogger().debug("URL created: " + url); filenames.add(filename); fileurls.add(url); } AnalysisLogger.getLogger().debug("Upload end"); yetuploaded = true; session = (""+UUID.randomUUID()).replace("-", ""); } //if the number of available nodes is higher than zero launch the tasks if (actualNumberOfNodes > 0) { AnalysisLogger.getLogger().debug("Executing script on " + actualNumberOfNodes + " nodes"); int len = arguments.size(); List tasksProxies = new ArrayList(); activeNodes = 0; //launch the tasks for (int i = 0; i < actualNumberOfNodes; i++) { String argum = ""; //supply the arguments if they are available if (i < len) argum = arguments.get(i); //generate the input map according to the arguments Map inputs = generateInput(filenames, fileurls, outputDir, script, argum, i, scope, serviceClass, serviceName, owner, remoteDir,session,deletefiles); AnalysisLogger.getLogger().debug("-> Owner: " + owner + " ServiceClass: " + serviceClass + " ServiceName:" + serviceName + " remoteDir:" + remoteDir); //take the i-th endpoint of the executor EndpointReferenceType selectedEPR = eprs.get(i); AnalysisLogger.getLogger().debug("Launching node " + (i + 1) + " on " + selectedEPR); //run the executor script ExecutorCall call = new ExecutorCall(pluginName, gscope); call.setEndpointReference(selectedEPR); TaskCall task = null; task = call.launch(inputs); TaskProxy proxy = task.getProxy(); tasksProxies.add(new WorkerWatcher(proxy, AnalysisLogger.getLogger())); AnalysisLogger.getLogger().debug("Launching node " + (i + 1) + " OK on " + selectedEPR); //add the task to the list in order to reuse it } activeNodes = actualNumberOfNodes; AnalysisLogger.getLogger().debug("Launch Finished - Controlling Status"); int allstatus = 0; abort = false; //control the execution: go until there are active nodes or the process must stop while ((activeNodes != 0) && (!abort) && (!shutdown)) { //for each node get the task state int nworkers = tasksProxies.size(); int i=0; while (i < nworkers) { WorkerWatcher proxy = tasksProxies.get(i); String state = proxy.getState(); AnalysisLogger.getLogger().debug("REMOTE JOB MANAGER-> STATE " + state ); //control for aborted computation abort = ((state == null) || state.equals("FAILED") || (!state.equals("DONE") && !state.equals("RUNNING"))); //control for finished computation boolean finished = false; if (state != null) finished = state.equals("DONE"); //if finished update the active nodes if (finished) { tasksProxies.remove(i); allstatus++; activeNodes--; nworkers--; if (activeNodes == 0) break; } else i++; status = Math.min(((float) allstatus / (float) actualNumberOfNodes) * 100f, 95f); if (abort) break; if (shutdown) break; // AnalysisLogger.getLogger().debug(String.format("Task " + i + "executed started at %Tc with %s state ", proxy.getStartTime(), state)); //sleep before polling again Thread.sleep(2000); } } activeNodes = 0; AnalysisLogger.getLogger().debug("All Tasks have Finished"); if (!abort) { AnalysisLogger.getLogger().debug("All Task were successful"); /* * List listElements = client.showDir().RDir(remoteDir); for (StorageObject obj : listElements) { AnalysisLogger.getLogger().debug("obj stored in directory " + remoteDir + ": " + obj.getName()); } */ } else AnalysisLogger.getLogger().debug("Tasks were NOT successful"); } else AnalysisLogger.getLogger().debug("Warning: could not execute tasks: No Nodes Available!"); AnalysisLogger.getLogger().debug("Whole procedure done in " + (System.currentTimeMillis() - t0) + " ms"); status = 100f; return executeAll; } public boolean wasAborted() { return abort; } public void stop() { shutdown = true; } private int findNodes(String scopeString) throws Exception { GCUBEScope scope = GCUBEScope.getScope(scopeString); ISClient client = GHNContext.getImplementation(ISClient.class); WSResourceQuery wsquery = client.getQuery(WSResourceQuery.class); wsquery.addAtomicConditions(new AtomicCondition("//gc:ServiceName", "Executor")); wsquery.addAtomicConditions(new AtomicCondition("/child::*[local-name()='Task']/name[text()='"+pluginName+"']", pluginName)); List listdoc = client.execute(wsquery, scope); EndpointReferenceType epr = null; eprs = new ArrayList(); int numberOfEP = 0; for (RPDocument resource : listdoc) { epr = resource.getEndpoint(); numberOfEP++; eprs.add(epr); } AnalysisLogger.getLogger().debug("Found " + numberOfEP + " endpoints"); return numberOfEP; } private Map generateInput(Object filenames, Object fileurls, String outputDir, String script, String argum, int i, String scope, String serviceClass, String serviceName, String owner, String remoteDir,String session,boolean deletefiles) { Map inputs = new HashMap(); inputs.put("FILE_NAMES", filenames); inputs.put("FILE_URLS", fileurls); inputs.put("OUTPUTDIR", ScriptIOWorker.toInputString(outputDir)); inputs.put("SCRIPT", ScriptIOWorker.toInputString(script)); inputs.put("ARGUMENTS", ScriptIOWorker.toInputString(argum)); inputs.put("NODE_IDENTIFIER", "" + i); inputs.put("SCOPE", ScriptIOWorker.toInputString(scope)); inputs.put("SERVICE_CLASS", ScriptIOWorker.toInputString(serviceClass)); inputs.put("SERVICE_NAME", ScriptIOWorker.toInputString(serviceName)); inputs.put("OWNER", ScriptIOWorker.toInputString(owner)); inputs.put("REMOTEDIR", ScriptIOWorker.toInputString(remoteDir)); inputs.put("CLEAN_CACHE",""+deletefiles); // inputs.put("SESSION", ScriptIO.toInputString(session)); return inputs; } }