2016-09-19 19:03:48 +02:00
package org.gcube.dataanalysis.executor.job.management ;
2016-09-27 16:27:28 +02:00
import java.io.BufferedReader ;
import java.io.InputStream ;
import java.io.InputStreamReader ;
2016-09-29 12:49:16 +02:00
import java.net.URLDecoder ;
2016-09-19 19:03:48 +02:00
import java.util.ArrayList ;
import java.util.LinkedHashMap ;
import java.util.List ;
import java.util.concurrent.ExecutorService ;
import java.util.concurrent.Executors ;
import org.gcube.contentmanagement.graphtools.utils.HttpRequest ;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger ;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration ;
import org.gcube.dataanalysis.executor.util.InfraRetrieval ;
public class WPSJobManager {
static final int pollingTime = 5000 ;
2016-09-27 16:27:28 +02:00
static final long maxTaskTime = 12 * 60000 ; //allowed max 12 hours per task
2016-09-19 19:03:48 +02:00
int overallFailures = 0 ;
int overallSuccess = 0 ;
int overallTasks = 0 ;
2016-10-05 19:05:34 +02:00
int nservices = - 1 ;
2016-09-19 19:03:48 +02:00
boolean stopThreads = false ;
boolean hasResentMessages = false ;
2016-09-27 16:27:28 +02:00
public static String getCallTemplate ( ) {
String call = null ;
try {
InputStream is = Thread . currentThread ( ) . getContextClassLoader ( ) . getResourceAsStream ( " templates/WPSGWTemplate2.xml " ) ;
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->GW template Input stream is null " + ( is = = null ) ) ;
BufferedReader in = new BufferedReader ( new InputStreamReader ( is ) ) ;
String line = null ;
StringBuilder vud = new StringBuilder ( ) ;
while ( ( line = in . readLine ( ) ) ! = null ) {
vud . append ( line + " \ n " ) ;
}
in . close ( ) ;
call = vud . toString ( ) ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
return call ;
}
2016-09-19 19:03:48 +02:00
final public synchronized void incrementOverallFailures ( ) {
overallFailures + + ;
}
final public synchronized void hasResentTrue ( ) {
if ( ! hasResentMessages )
hasResentMessages = true ;
}
final public synchronized void incrementOverallSuccess ( ) {
overallSuccess + + ;
}
final public synchronized void stop ( ) {
stopThreads = true ;
}
final public synchronized boolean isStopped ( ) {
return stopThreads ;
}
public class TasksWatcher implements Runnable {
AlgorithmConfiguration configuration ;
String algorithm ;
String username ;
String token ;
String wpsHost ;
int wpsPort ;
int taskNumber ;
String session ;
public String exitstatus = GenericWorker . TASK_UNDEFINED ;
int leftSetIndex ;
int rightSetIndex ;
int leftElements ;
int rightElements ;
2016-09-27 16:27:28 +02:00
String callTemplate ;
int maxTrialsPerThread ;
2016-09-19 19:03:48 +02:00
2016-09-27 16:27:28 +02:00
public TasksWatcher ( String algorithm , String username , String token , String wpsHost , int wpsPort , String session , int taskNumber , AlgorithmConfiguration configuration , int leftSetIndex , int rightSetIndex , int leftElements , int rightElements , String callTemplate , int maxTrialsPerThread ) {
2016-09-19 19:03:48 +02:00
this . algorithm = algorithm ;
this . token = token ;
this . wpsHost = wpsHost ;
this . wpsPort = wpsPort ;
this . taskNumber = taskNumber ;
this . session = session ;
this . username = username ;
this . configuration = configuration ;
this . leftSetIndex = leftSetIndex ;
this . leftElements = leftElements ;
this . rightSetIndex = rightSetIndex ;
this . rightElements = rightElements ;
2016-09-27 16:27:28 +02:00
this . callTemplate = callTemplate ;
this . maxTrialsPerThread = maxTrialsPerThread ;
2016-09-19 19:03:48 +02:00
}
public void callTask ( boolean isduplicate ) {
String url = " http:// " + wpsHost + " : " + wpsPort + " /wps/WebProcessingService " ;
boolean deleteTemporaryFiles = true ;
2016-09-27 16:27:28 +02:00
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Task Number : " + taskNumber + " GO! " ) ;
2016-09-19 19:03:48 +02:00
try {
2016-09-27 16:27:28 +02:00
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Invoking the GW to start " ) ;
String algorithmCall = GenericWorkerCaller . getGenericWorkerCall ( algorithm , session , configuration , leftSetIndex , rightSetIndex , leftElements , rightElements , isduplicate , deleteTemporaryFiles , callTemplate ) ;
2016-09-19 19:03:48 +02:00
String result = HttpRequest . PostXmlString ( url , wpsHost , wpsPort , new LinkedHashMap < String , String > ( ) , username , token , algorithmCall ) ;
2016-09-27 16:27:28 +02:00
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->GW starting Output " + result . replace ( " \ n " , " " ) ) ;
2016-09-19 19:03:48 +02:00
boolean success = false ;
boolean failure = false ;
if ( result . contains ( GenericWorker . TASK_SUCCESS ) )
success = true ;
else if ( result . contains ( GenericWorker . TASK_FAILURE ) )
failure = true ;
String statusLocation = " " ;
2016-09-27 16:27:28 +02:00
long taskTimeCounter = 0 ;
2016-09-19 19:03:48 +02:00
while ( ! success & & ! isStopped ( ) & & ( ! failure ) ) { //while !success and failure
2016-09-27 16:27:28 +02:00
if ( result = = null | | result . contains ( GenericWorker . TASK_FAILURE ) )
2016-09-19 19:03:48 +02:00
failure = true ;
2016-09-27 16:27:28 +02:00
else if ( taskTimeCounter > maxTaskTime ) {
failure = true ;
}
2016-09-19 19:03:48 +02:00
else if ( result . contains ( GenericWorker . TASK_SUCCESS ) )
success = true ;
else if ( result . contains ( " <wps:ProcessAccepted>Process Accepted</wps:ProcessAccepted> " ) ) {
statusLocation = result . substring ( result . indexOf ( " statusLocation= " ) + " statusLocation= " . length ( ) ) ;
statusLocation = statusLocation . substring ( 0 , statusLocation . indexOf ( " > " ) ) ;
statusLocation = statusLocation . replace ( " \" " , " " ) ;
statusLocation = statusLocation + " &gcube-token= " + token ;
// AnalysisLogger.getLogger().debug("Status Location: " + statusLocation);
result = " " ;
} else {
Thread . sleep ( pollingTime ) ;
2016-09-27 16:27:28 +02:00
taskTimeCounter + = pollingTime ;
2016-09-19 19:03:48 +02:00
result = HttpRequest . sendGetRequest ( statusLocation , " " ) ;
// AnalysisLogger.getLogger().debug("Result in location: " + result);
}
// request = HttpRequest.sendGetRequest(url, ""); // AnalysisLogger.getLogger().debug("Answer for task "+taskNumber+": "+request); }catch(Exception e){ AnalysisLogger.getLogger().debug("Request failure for task "+taskNumber+": "+e.getLocalizedMessage()); } if (request.contains("<wps:ProcessSucceeded>")) success = true; if (request.contains("<ows:Exception>")){ failure = true; incrementOverallFailures(); } try { Thread.sleep(pollingTime); } catch (InterruptedException e) { e.printStackTrace(); } }
}
if ( isStopped ( ) & & statusLocation ! = null & & statusLocation . length ( ) > 0 ) {
String wpscancel = statusLocation . replace ( " RetrieveResultServlet " , " CancelComputationServlet " ) ;
result = HttpRequest . sendGetRequest ( wpscancel , " " ) ;
}
exitstatus = GenericWorker . TASK_SUCCESS ;
if ( failure )
{
exitstatus = GenericWorker . TASK_FAILURE ;
2016-09-29 12:49:16 +02:00
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Task Number " + taskNumber + " - Failure cause: " + URLDecoder . decode ( result , " UTF-8 " ) ) ;
2016-09-19 19:03:48 +02:00
}
// AnalysisLogger.getLogger().debug("Process execution finished: " + exitstatus);
} catch ( Exception e ) {
e . printStackTrace ( ) ;
2017-05-08 18:18:18 +02:00
AnalysisLogger . getLogger ( ) . error ( " WPSJobManager->Task Number {} - Process exception " , taskNumber , e ) ;
2016-09-19 19:03:48 +02:00
exitstatus = GenericWorker . TASK_FAILURE ;
} finally {
}
}
@Override
public void run ( ) {
int trials = 0 ;
boolean duplicate = false ;
while ( ! exitstatus . equals ( GenericWorker . TASK_SUCCESS ) & & trials < maxTrialsPerThread ) {
callTask ( duplicate ) ;
if ( exitstatus . equals ( GenericWorker . TASK_FAILURE ) ) {
trials + + ;
hasResentTrue ( ) ;
duplicate = true ;
2016-09-27 16:27:28 +02:00
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Task Number " + taskNumber + " - Retrying n. " + trials ) ;
2016-09-19 19:03:48 +02:00
}
}
if ( exitstatus . equals ( GenericWorker . TASK_SUCCESS ) )
incrementOverallSuccess ( ) ;
else
incrementOverallFailures ( ) ;
2016-09-27 16:27:28 +02:00
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Task Number " + taskNumber + " - Finished: " + exitstatus ) ;
2016-09-19 19:03:48 +02:00
}
}
public int getNumberOfNodes ( ) {
return 1 ;
}
public int getActiveNodes ( ) {
return 1 ;
}
public float getStatus ( ) {
return ( float ) ( overallFailures + overallSuccess ) / ( float ) overallTasks ;
}
public boolean wasAborted ( ) {
return stopThreads ;
}
public boolean hasResentMessages ( ) {
return hasResentMessages ;
}
2016-10-05 19:05:34 +02:00
public int estimateNumberOfServices ( String scope ) throws Exception {
List < String > wpsservices = InfraRetrieval . retrieveService ( " DataMiner " , scope ) ;
2016-09-19 19:03:48 +02:00
2016-09-27 16:27:28 +02:00
if ( wpsservices = = null | | wpsservices . size ( ) = = 0 ) {
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Error: No DataMiner GCore Endpoints found! " ) ;
2016-10-05 19:05:34 +02:00
throw new Exception ( " No DataMinerWorkers GCore Endpoint found in the VRE " + scope ) ;
2016-09-27 16:27:28 +02:00
}
2016-09-19 19:03:48 +02:00
List < String > differentServices = new ArrayList < String > ( ) ;
for ( String service : wpsservices ) {
service = service . substring ( service . indexOf ( " / " ) + 2 ) ;
service = service . substring ( 0 , service . indexOf ( " : " ) ) ;
if ( ! differentServices . contains ( service ) )
differentServices . add ( service ) ;
}
2016-09-27 16:27:28 +02:00
2016-10-05 19:05:34 +02:00
int numberofservices = differentServices . size ( ) ;
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Number of found services " + numberofservices ) ;
2016-10-07 17:19:07 +02:00
nservices = Math . max ( 1 , numberofservices - 1 ) ;
return nservices ;
2016-10-05 19:05:34 +02:00
}
public void uploadAndExecuteChunkized ( AlgorithmConfiguration configuration , String algorithmClass , List < String > arguments , String session ) throws Exception {
ExecutorService executor = null ;
try {
int numberofservices = 1 ;
String callTemplate = getCallTemplate ( ) ;
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Estimating the number of services " ) ;
if ( nservices > 0 )
numberofservices = nservices ;
else
numberofservices = estimateNumberOfServices ( configuration . getGcubeScope ( ) ) ;
2016-09-19 19:03:48 +02:00
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Number of dataminer services " + numberofservices ) ;
int parallelisation = numberofservices * 2 ;
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Number of parallel processes (parallelisation) : " + parallelisation ) ;
2016-09-27 16:27:28 +02:00
//List<String> wpshosts = InfraRetrieval.retrieveAddresses("DataAnalysis",configuration.getGcubeScope(),"-----");
List < String > wpshosts = InfraRetrieval . retrieveServiceAddress ( " DataAnalysis " , " DataMinerWorkers " , configuration . getGcubeScope ( ) , " noexclusion " ) ;
2016-09-19 19:03:48 +02:00
2016-09-27 16:27:28 +02:00
if ( wpshosts = = null | | wpshosts . size ( ) = = 0 ) {
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Error: No DataMinerWorkers Service Endpoints found at all! " ) ;
throw new Exception ( " WPSJobManager->No Dataminer Workers Service Endpoint found in the VRE - DataMinerWorkers Resource is required in the VRE " + configuration . getGcubeScope ( ) ) ;
}
2016-09-19 19:03:48 +02:00
String wpshost = wpshosts . get ( 0 ) ;
2016-09-27 16:27:28 +02:00
2016-09-19 19:03:48 +02:00
wpshost = wpshost . substring ( wpshost . indexOf ( " / " ) + 2 ) ;
//String wpshostAddress = wpshost.substring(0,wpshost.indexOf(":"));
String wpshostAddress = wpshost . substring ( 0 , wpshost . indexOf ( " / " ) ) ;
//String wpshostPort = wpshost.substring(wpshost.indexOf(":")+1,wpshost.indexOf("/"));
//http://dataminer1-devnext.d4science.org:80/wps/gcube/resourc
wpshost = wpshostAddress ;
int wpsport = 80 ;
overallTasks = arguments . size ( ) ;
2016-09-30 17:01:12 +02:00
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Workers WPS host " + wpshost ) ;
2016-10-07 17:19:07 +02:00
executor = Executors . newFixedThreadPool ( nservices ) ;
// executor = Executors.newFixedThreadPool(2);
2016-09-19 19:03:48 +02:00
int taskNumber = 0 ;
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Executing algorithm class: " + algorithmClass ) ;
for ( String argument : arguments ) {
String [ ] lfnlnr = argument . split ( " " ) ;
int leftOff = Integer . parseInt ( lfnlnr [ 0 ] ) ;
int leftNum = Integer . parseInt ( lfnlnr [ 1 ] ) ;
int rightOff = Integer . parseInt ( lfnlnr [ 2 ] ) ;
int rightNum = Integer . parseInt ( lfnlnr [ 3 ] ) ;
2016-09-27 16:27:28 +02:00
int maxTrials = parallelisation ;
2016-09-19 19:03:48 +02:00
TasksWatcher watcher = new TasksWatcher ( algorithmClass ,
configuration . getGcubeUserName ( ) ,
2016-09-27 16:27:28 +02:00
configuration . getGcubeToken ( ) ,
wpshost , wpsport , session , taskNumber , configuration , leftOff , rightOff , leftNum , rightNum , callTemplate , maxTrials ) ;
2016-09-19 19:03:48 +02:00
executor . execute ( watcher ) ;
2016-09-27 16:27:28 +02:00
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Task number " + taskNumber + " launched with arguments: " + argument ) ;
2016-09-19 19:03:48 +02:00
taskNumber + + ;
2016-09-27 16:27:28 +02:00
Thread . sleep ( 1000 ) ;
2016-09-19 19:03:48 +02:00
}
int njobs = overallFailures + overallSuccess ;
int pnjobs = njobs ;
2016-09-27 16:27:28 +02:00
2016-09-19 19:03:48 +02:00
while ( njobs < overallTasks ) {
Thread . sleep ( pollingTime ) ;
float percFailure = ( float ) ( overallFailures ) / ( float ) overallTasks ;
//if (percFailure>0.5)
if ( overallFailures > 0 )
stop ( ) ;
njobs = overallFailures + overallSuccess ;
if ( pnjobs < njobs ) {
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Number of finished jobs " + njobs + " of " + overallTasks ) ;
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Number of errors " + overallFailures + " - perc failure " + percFailure ) ;
2016-09-27 16:27:28 +02:00
pnjobs = njobs ;
2016-09-19 19:03:48 +02:00
}
}
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Overall computation finished " ) ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
2016-09-27 16:27:28 +02:00
throw e ;
2016-09-19 19:03:48 +02:00
}
finally {
if ( executor ! = null ) {
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Shutting down the executions " ) ;
executor . shutdown ( ) ;
2016-09-27 16:27:28 +02:00
AnalysisLogger . getLogger ( ) . debug ( " WPSJobManager->Shut down completed " ) ;
2016-09-19 19:03:48 +02:00
}
}
}
}