git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngineSmartExecutor@154784 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
d9ea81966b
commit
4507c66a84
|
@ -37,6 +37,8 @@ import org.gcube.dataanalysis.executor.nodes.transducers.bionym.interfaces.Match
|
|||
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.utils.YasmeenGlobalParameters;
|
||||
import org.gcube.dataanalysis.executor.scripts.OSCommand;
|
||||
import org.hibernate.SessionFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class BionymFlexibleWorkflowTransducer extends ActorNode {
|
||||
//SM parameters
|
||||
|
@ -50,6 +52,8 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
|
|||
String destinationTable;
|
||||
String destinationTableLabel;
|
||||
|
||||
private static Logger logger = LoggerFactory.getLogger(BionymFlexibleWorkflowTransducer.class);
|
||||
|
||||
//Table
|
||||
public static String headers = "SOURCE_DATA,TARGET_DATA_SCIENTIFIC_NAME,TARGET_DATA_AUTHORITY,MATCHING_SCORE,TARGET_DATA_SOURCE,TARGET_DATA_ID";
|
||||
private static String createOutputTable = "CREATE TABLE %1$s (SOURCE_DATA character varying(255), TARGET_DATA_SCIENTIFIC_NAME character varying(255), TARGET_DATA_AUTHORITY character varying(255), MATCHING_SCORE real,TARGET_DATA_SOURCE character varying, TARGET_DATA_ID character varying)";
|
||||
|
@ -233,9 +237,10 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
|
|||
status = 0;
|
||||
long t0 = System.currentTimeMillis();
|
||||
// rebuild variables
|
||||
System.out.println("Restoring configuration");
|
||||
logger.trace("Restoring configuration");
|
||||
AlgorithmConfiguration config = Transformations.restoreConfig(new File(sandboxFolder, nodeConfigurationFileObject).getAbsolutePath());
|
||||
config.setConfigPath(sandboxFolder);
|
||||
config.setAlgorithmClassLoader(Thread.currentThread().getContextClassLoader());
|
||||
dbconnection = DatabaseUtils.initDBSession(config);
|
||||
|
||||
String destinationTable = config.getParam(destinationTableParam);
|
||||
|
@ -250,15 +255,15 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
|
|||
String overallMaxResults= config.getParam(YasmeenGlobalParameters.overallMaxResults);
|
||||
|
||||
|
||||
System.out.println("Destination Table: " + destinationTable);
|
||||
System.out.println("Origin Table: " + originTable);
|
||||
System.out.println("Column of names: " + rawnamesColumn);
|
||||
System.out.println("Parser to use: " + parser);
|
||||
System.out.println("Accuracy vs Speed: " + accuracyvsspeed);
|
||||
System.out.println("Reference Dataset: " + reference);
|
||||
System.out.println("Do Preprocessing: " + doPreprocess);
|
||||
System.out.println("Use Stemming:" + usestemming);
|
||||
System.out.println("Overall MaxResults:" + overallMaxResults);
|
||||
logger.trace("Destination Table: " + destinationTable);
|
||||
logger.trace("Origin Table: " + originTable);
|
||||
logger.trace("Column of names: " + rawnamesColumn);
|
||||
logger.trace("Parser to use: " + parser);
|
||||
logger.trace("Accuracy vs Speed: " + accuracyvsspeed);
|
||||
logger.trace("Reference Dataset: " + reference);
|
||||
logger.trace("Do Preprocessing: " + doPreprocess);
|
||||
logger.trace("Use Stemming:" + usestemming);
|
||||
logger.trace("Overall MaxResults:" + overallMaxResults);
|
||||
|
||||
//prepare the WF
|
||||
HashMap<String,String> globalparameters = new HashMap<String, String>();
|
||||
|
@ -270,36 +275,36 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
|
|||
globalparameters.put(YasmeenGlobalParameters.performanceParam, accuracyvsspeed);
|
||||
globalparameters.put(YasmeenGlobalParameters.taxaAuthorityFileParam, reference);
|
||||
globalparameters.put(YasmeenGlobalParameters.useStemmedGenusAndSpecies, usestemming);
|
||||
System.out.println("Configuration Restored! - Time: "+(System.currentTimeMillis()-t0));
|
||||
logger.trace("Configuration Restored! - Time: "+(System.currentTimeMillis()-t0));
|
||||
|
||||
// retrieve the list of names to process
|
||||
long t1 = System.currentTimeMillis();
|
||||
System.out.println("Retrieving names to process");
|
||||
logger.trace("Retrieving names to process");
|
||||
String query = DatabaseUtils.getDinstictElements(originTable, rawnamesColumn, "")+" offset "+rightStartIndex+" limit "+numberOfRightElementsToProcess;
|
||||
List<Object> rawnames = DatabaseFactory.executeSQLQuery(query, dbconnection);
|
||||
System.out.println("Retrieved a total of "+rawnames.size()+" species");
|
||||
logger.trace("Retrieved a total of "+rawnames.size()+" species");
|
||||
//modification due to the limit and offset on the query
|
||||
rightStartIndex = 0;
|
||||
|
||||
int end = rightStartIndex + numberOfRightElementsToProcess;
|
||||
|
||||
System.out.println("Processing from "+rightStartIndex+" to "+end);
|
||||
logger.trace("Processing from "+rightStartIndex+" to "+end);
|
||||
List<String> rawnamesFiltered = new ArrayList<String>();
|
||||
for (int i = rightStartIndex; i < end; i++) {
|
||||
String raw = "" + rawnames.get(i);
|
||||
// System.out.println("Taking species:"+raw);
|
||||
// logger.trace("Taking species:"+raw);
|
||||
rawnamesFiltered.add(raw.replaceAll("^'", "").replaceAll("'$", ""));
|
||||
}
|
||||
int rawscounter = rawnamesFiltered.size();
|
||||
System.out.println("Retrieve from DB - Time: "+(System.currentTimeMillis()-t1));
|
||||
logger.trace("Retrieve from DB - Time: "+(System.currentTimeMillis()-t1));
|
||||
|
||||
long t2 = System.currentTimeMillis();
|
||||
System.out.println("Processing " + rawscounter + " species..");
|
||||
logger.trace("Processing " + rawscounter + " species..");
|
||||
// prepare the environment
|
||||
try {
|
||||
OSCommand.ExecuteGetLine("chmod +x *", null);
|
||||
} catch (Exception e) {
|
||||
System.out.println("WARNING: could not change the permissions");
|
||||
logger.trace("WARNING: could not change the permissions");
|
||||
}
|
||||
|
||||
int overallMR = 10;
|
||||
|
@ -312,10 +317,10 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
|
|||
List<Matcher> matchers = buildMatcherList(config, sandboxFolder, globalparameters);
|
||||
if (matchers!=null)
|
||||
bionym.resetMatchers(matchers);
|
||||
System.out.println("WF Initialization - Time: "+(System.currentTimeMillis()-t2));
|
||||
logger.trace("WF Initialization - Time: "+(System.currentTimeMillis()-t2));
|
||||
|
||||
MatcherOutput output = bionym.executeChainedWorkflow(rawnamesFiltered);
|
||||
System.out.println("Workflow Executed");
|
||||
logger.trace("Workflow Executed");
|
||||
long t3 = System.currentTimeMillis();
|
||||
int nEntries = output.getEntriesNumber();
|
||||
List<String[]> toWrite = new ArrayList<String[]>();
|
||||
|
@ -333,14 +338,12 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
|
|||
}
|
||||
//write on DB
|
||||
DatabaseUtils.insertChunksIntoTable(destinationTable, headers, toWrite, 5000, dbconnection);
|
||||
System.out.println("Write on DB - Time: "+(System.currentTimeMillis()-t3));
|
||||
logger.trace("Write on DB - Time: "+(System.currentTimeMillis()-t3));
|
||||
|
||||
System.out.println("The procedure finished successfully. Processed " + rawscounter + " species.");
|
||||
System.out.println("Elapsed Time " + (System.currentTimeMillis() - t0) + " ms");
|
||||
logger.trace("The procedure finished successfully. Processed " + rawscounter + " species.");
|
||||
logger.trace("Elapsed Time " + (System.currentTimeMillis() - t0) + " ms");
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
System.out.println("warning: error in node execution " + e.getLocalizedMessage());
|
||||
System.err.println("Error in node execution " + e.getLocalizedMessage());
|
||||
logger.error("warning: error in node execution ", e);
|
||||
return -1;
|
||||
} finally {
|
||||
if (dbconnection != null)
|
||||
|
|
Loading…
Reference in New Issue