package org.gcube.dataanalysis.executor.job.management; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URLDecoder; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.gcube.contentmanagement.graphtools.utils.HttpRequest; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; import org.gcube.dataanalysis.executor.util.InfraRetrieval; public class WPSJobManager { static final int pollingTime = 5000; static final long maxTaskTime= 12*60000; //allowed max 12 hours per task int overallFailures = 0; int overallSuccess = 0; int overallTasks = 0; int nservices = -1; boolean stopThreads = false; boolean hasResentMessages = false; public static String getCallTemplate(){ String call = null; try{ InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("templates/WPSGWTemplate2.xml"); AnalysisLogger.getLogger().debug("WPSJobManager->GW template Input stream is null "+(is==null)); BufferedReader in = new BufferedReader(new InputStreamReader(is)); String line = null; StringBuilder vud = new StringBuilder(); while ((line = in.readLine()) != null) { vud.append(line + "\n"); } in.close(); call = vud.toString(); }catch(Exception e){ e.printStackTrace(); } return call; } final public synchronized void incrementOverallFailures() { overallFailures++; } final public synchronized void hasResentTrue() { if (!hasResentMessages) hasResentMessages=true; } final public synchronized void incrementOverallSuccess() { overallSuccess++; } final public synchronized void stop() { stopThreads=true; } final public synchronized boolean isStopped() { return stopThreads; } public class TasksWatcher implements Runnable { AlgorithmConfiguration configuration; String algorithm; String username; String token; String wpsHost; int wpsPort; int taskNumber; String session; public String exitstatus=GenericWorker.TASK_UNDEFINED; int leftSetIndex; int rightSetIndex; int leftElements; int rightElements; String callTemplate; int maxTrialsPerThread; public TasksWatcher(String algorithm, String username, String token, String wpsHost, int wpsPort, String session, int taskNumber, AlgorithmConfiguration configuration, int leftSetIndex, int rightSetIndex, int leftElements, int rightElements,String callTemplate, int maxTrialsPerThread) { this.algorithm = algorithm; this.token = token; this.wpsHost = wpsHost; this.wpsPort = wpsPort; this.taskNumber = taskNumber; this.session = session; this.username = username; this.configuration = configuration; this.leftSetIndex = leftSetIndex; this.leftElements = leftElements; this.rightSetIndex = rightSetIndex; this.rightElements = rightElements; this.callTemplate=callTemplate; this.maxTrialsPerThread=maxTrialsPerThread; } public void callTask(boolean isduplicate){ String url = "http://" + wpsHost + ":" + wpsPort + "/wps/WebProcessingService"; boolean deleteTemporaryFiles = true; AnalysisLogger.getLogger().debug("WPSJobManager->Task Number : " + taskNumber+" GO!"); try { AnalysisLogger.getLogger().debug("WPSJobManager->Invoking the GW to start"); String algorithmCall = GenericWorkerCaller.getGenericWorkerCall(algorithm, session, configuration, leftSetIndex, rightSetIndex, leftElements, rightElements, isduplicate, deleteTemporaryFiles,callTemplate); String result = HttpRequest.PostXmlString(url, wpsHost, wpsPort, new LinkedHashMap(), username, token, algorithmCall); AnalysisLogger.getLogger().debug("WPSJobManager->GW starting Output " + result.replace("\n", "")); boolean success = false; boolean failure = false; if (result.contains(GenericWorker.TASK_SUCCESS)) success = true; else if (result.contains(GenericWorker.TASK_FAILURE)) failure = true; String statusLocation = ""; long taskTimeCounter = 0; while (!success && !isStopped() && (!failure) ) { //while !success and failure if (result == null || result.contains(GenericWorker.TASK_FAILURE)) failure = true; else if (taskTimeCounter>maxTaskTime){ failure = true; } else if (result.contains(GenericWorker.TASK_SUCCESS)) success = true; else if (result.contains("Process Accepted")) { statusLocation = result.substring(result.indexOf("statusLocation=") + "statusLocation=".length()); statusLocation = statusLocation.substring(0, statusLocation.indexOf(">")); statusLocation = statusLocation.replace("\"", ""); statusLocation = statusLocation + "&gcube-token=" + token; // AnalysisLogger.getLogger().debug("Status Location: " + statusLocation); result= ""; } else { Thread.sleep(pollingTime); taskTimeCounter+=pollingTime; result = HttpRequest.sendGetRequest(statusLocation, ""); // AnalysisLogger.getLogger().debug("Result in location: " + result); } // request = HttpRequest.sendGetRequest(url, ""); // AnalysisLogger.getLogger().debug("Answer for task "+taskNumber+": "+request); }catch(Exception e){ AnalysisLogger.getLogger().debug("Request failure for task "+taskNumber+": "+e.getLocalizedMessage()); } if (request.contains("")) success = true; if (request.contains("")){ failure = true; incrementOverallFailures(); } try { Thread.sleep(pollingTime); } catch (InterruptedException e) { e.printStackTrace(); } } } if (isStopped() && statusLocation!=null && statusLocation.length()>0){ String wpscancel = statusLocation.replace("RetrieveResultServlet", "CancelComputationServlet"); result = HttpRequest.sendGetRequest(wpscancel, ""); } exitstatus = GenericWorker.TASK_SUCCESS; if (failure) { exitstatus = GenericWorker.TASK_FAILURE; AnalysisLogger.getLogger().debug("WPSJobManager->Task Number "+taskNumber+" - Failure cause: " + URLDecoder.decode(result,"UTF-8")); } // AnalysisLogger.getLogger().debug("Process execution finished: " + exitstatus); } catch (Exception e) { e.printStackTrace(); AnalysisLogger.getLogger().error("WPSJobManager->Task Number {} - Process exception ", taskNumber,e); exitstatus = GenericWorker.TASK_FAILURE; }finally{ } } @Override public void run() { int trials = 0; boolean duplicate = false; while (!exitstatus.equals(GenericWorker.TASK_SUCCESS) && trialsTask Number "+taskNumber+" - Retrying n."+trials); } } if (exitstatus.equals(GenericWorker.TASK_SUCCESS)) incrementOverallSuccess(); else incrementOverallFailures(); AnalysisLogger.getLogger().debug("WPSJobManager->Task Number "+taskNumber+" - Finished: " + exitstatus); } } public int getNumberOfNodes() { return 1; } public int getActiveNodes() { return 1; } public float getStatus() { return (float)(overallFailures+overallSuccess)/(float)overallTasks; } public boolean wasAborted() { return stopThreads; } public boolean hasResentMessages() { return hasResentMessages; } public int estimateNumberOfServices(String scope) throws Exception{ List wpsservices = InfraRetrieval.retrieveService("DataMiner", scope); if (wpsservices==null || wpsservices.size()==0){ AnalysisLogger.getLogger().debug("WPSJobManager->Error: No DataMiner GCore Endpoints found!"); throw new Exception ("No DataMinerWorkers GCore Endpoint found in the VRE "+scope); } List differentServices = new ArrayList(); for (String service:wpsservices){ service = service.substring(service.indexOf("/")+2); service = service.substring(0,service.indexOf(":")); if (!differentServices.contains(service)) differentServices.add(service); } int numberofservices = differentServices.size(); AnalysisLogger.getLogger().debug("WPSJobManager->Number of found services "+numberofservices); nservices = Math.max(1,numberofservices-1); return nservices; } public void uploadAndExecuteChunkized(AlgorithmConfiguration configuration, String algorithmClass, List arguments, String session) throws Exception{ ExecutorService executor = null; try{ int numberofservices = 1; String callTemplate = getCallTemplate(); AnalysisLogger.getLogger().debug("WPSJobManager->Estimating the number of services"); if (nservices>0) numberofservices = nservices; else numberofservices = estimateNumberOfServices(configuration.getGcubeScope()); AnalysisLogger.getLogger().debug("WPSJobManager->Number of dataminer services "+numberofservices); int parallelisation = numberofservices*2; AnalysisLogger.getLogger().debug("WPSJobManager->Number of parallel processes (parallelisation) : "+parallelisation); //List wpshosts = InfraRetrieval.retrieveAddresses("DataAnalysis",configuration.getGcubeScope(),"-----"); List wpshosts = InfraRetrieval.retrieveServiceAddress("DataAnalysis","DataMinerWorkers",configuration.getGcubeScope(),"noexclusion"); if (wpshosts==null || wpshosts.size()==0){ AnalysisLogger.getLogger().debug("WPSJobManager->Error: No DataMinerWorkers Service Endpoints found at all!"); throw new Exception ("WPSJobManager->No Dataminer Workers Service Endpoint found in the VRE - DataMinerWorkers Resource is required in the VRE"+configuration.getGcubeScope()); } String wpshost = wpshosts.get(0); wpshost = wpshost.substring(wpshost.indexOf("/")+2); //String wpshostAddress = wpshost.substring(0,wpshost.indexOf(":")); String wpshostAddress = wpshost.substring(0,wpshost.indexOf("/")); //String wpshostPort = wpshost.substring(wpshost.indexOf(":")+1,wpshost.indexOf("/")); //http://dataminer1-devnext.d4science.org:80/wps/gcube/resourc wpshost=wpshostAddress; int wpsport = 80; overallTasks=arguments.size(); AnalysisLogger.getLogger().debug("WPSJobManager->Workers WPS host "+wpshost); executor = Executors.newFixedThreadPool(nservices); // executor = Executors.newFixedThreadPool(2); int taskNumber = 0; AnalysisLogger.getLogger().debug("WPSJobManager->Executing algorithm class:"+algorithmClass); for (String argument:arguments) { String[] lfnlnr = argument.split(" "); int leftOff = Integer.parseInt(lfnlnr[0]); int leftNum = Integer.parseInt(lfnlnr[1]); int rightOff = Integer.parseInt(lfnlnr[2]); int rightNum = Integer.parseInt(lfnlnr[3]); int maxTrials = parallelisation; TasksWatcher watcher = new TasksWatcher(algorithmClass, configuration.getGcubeUserName(), configuration.getGcubeToken(), wpshost,wpsport,session,taskNumber,configuration, leftOff, rightOff,leftNum,rightNum,callTemplate, maxTrials); executor.execute(watcher); AnalysisLogger.getLogger().debug("WPSJobManager->Task number "+taskNumber+" launched with arguments: "+argument); taskNumber++; Thread.sleep(1000); } int njobs = overallFailures+overallSuccess; int pnjobs =njobs; while (njobs0.5) if (overallFailures>0) stop(); njobs = overallFailures+overallSuccess; if (pnjobsNumber of finished jobs "+njobs+" of "+overallTasks); AnalysisLogger.getLogger().debug("WPSJobManager->Number of errors "+overallFailures+" - perc failure "+percFailure); pnjobs=njobs; } } AnalysisLogger.getLogger().debug("WPSJobManager->Overall computation finished"); }catch(Exception e){ e.printStackTrace(); throw e; } finally{ if (executor!=null){ AnalysisLogger.getLogger().debug("WPSJobManager->Shutting down the executions"); executor.shutdown(); AnalysisLogger.getLogger().debug("WPSJobManager->Shut down completed"); } } } }