Lucio Lelii 7 years ago
parent d9ea81966b
commit 4507c66a84

@ -37,6 +37,8 @@ import org.gcube.dataanalysis.executor.nodes.transducers.bionym.interfaces.Match
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.utils.YasmeenGlobalParameters;
import org.gcube.dataanalysis.executor.scripts.OSCommand;
import org.hibernate.SessionFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class BionymFlexibleWorkflowTransducer extends ActorNode {
//SM parameters
@ -50,6 +52,8 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
String destinationTable;
String destinationTableLabel;
private static Logger logger = LoggerFactory.getLogger(BionymFlexibleWorkflowTransducer.class);
//Table
public static String headers = "SOURCE_DATA,TARGET_DATA_SCIENTIFIC_NAME,TARGET_DATA_AUTHORITY,MATCHING_SCORE,TARGET_DATA_SOURCE,TARGET_DATA_ID";
private static String createOutputTable = "CREATE TABLE %1$s (SOURCE_DATA character varying(255), TARGET_DATA_SCIENTIFIC_NAME character varying(255), TARGET_DATA_AUTHORITY character varying(255), MATCHING_SCORE real,TARGET_DATA_SOURCE character varying, TARGET_DATA_ID character varying)";
@ -233,9 +237,10 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
status = 0;
long t0 = System.currentTimeMillis();
// rebuild variables
System.out.println("Restoring configuration");
logger.trace("Restoring configuration");
AlgorithmConfiguration config = Transformations.restoreConfig(new File(sandboxFolder, nodeConfigurationFileObject).getAbsolutePath());
config.setConfigPath(sandboxFolder);
config.setAlgorithmClassLoader(Thread.currentThread().getContextClassLoader());
dbconnection = DatabaseUtils.initDBSession(config);
String destinationTable = config.getParam(destinationTableParam);
@ -250,15 +255,15 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
String overallMaxResults= config.getParam(YasmeenGlobalParameters.overallMaxResults);
System.out.println("Destination Table: " + destinationTable);
System.out.println("Origin Table: " + originTable);
System.out.println("Column of names: " + rawnamesColumn);
System.out.println("Parser to use: " + parser);
System.out.println("Accuracy vs Speed: " + accuracyvsspeed);
System.out.println("Reference Dataset: " + reference);
System.out.println("Do Preprocessing: " + doPreprocess);
System.out.println("Use Stemming:" + usestemming);
System.out.println("Overall MaxResults:" + overallMaxResults);
logger.trace("Destination Table: " + destinationTable);
logger.trace("Origin Table: " + originTable);
logger.trace("Column of names: " + rawnamesColumn);
logger.trace("Parser to use: " + parser);
logger.trace("Accuracy vs Speed: " + accuracyvsspeed);
logger.trace("Reference Dataset: " + reference);
logger.trace("Do Preprocessing: " + doPreprocess);
logger.trace("Use Stemming:" + usestemming);
logger.trace("Overall MaxResults:" + overallMaxResults);
//prepare the WF
HashMap<String,String> globalparameters = new HashMap<String, String>();
@ -270,36 +275,36 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
globalparameters.put(YasmeenGlobalParameters.performanceParam, accuracyvsspeed);
globalparameters.put(YasmeenGlobalParameters.taxaAuthorityFileParam, reference);
globalparameters.put(YasmeenGlobalParameters.useStemmedGenusAndSpecies, usestemming);
System.out.println("Configuration Restored! - Time: "+(System.currentTimeMillis()-t0));
logger.trace("Configuration Restored! - Time: "+(System.currentTimeMillis()-t0));
// retrieve the list of names to process
long t1 = System.currentTimeMillis();
System.out.println("Retrieving names to process");
logger.trace("Retrieving names to process");
String query = DatabaseUtils.getDinstictElements(originTable, rawnamesColumn, "")+" offset "+rightStartIndex+" limit "+numberOfRightElementsToProcess;
List<Object> rawnames = DatabaseFactory.executeSQLQuery(query, dbconnection);
System.out.println("Retrieved a total of "+rawnames.size()+" species");
logger.trace("Retrieved a total of "+rawnames.size()+" species");
//modification due to the limit and offset on the query
rightStartIndex = 0;
int end = rightStartIndex + numberOfRightElementsToProcess;
System.out.println("Processing from "+rightStartIndex+" to "+end);
logger.trace("Processing from "+rightStartIndex+" to "+end);
List<String> rawnamesFiltered = new ArrayList<String>();
for (int i = rightStartIndex; i < end; i++) {
String raw = "" + rawnames.get(i);
// System.out.println("Taking species:"+raw);
// logger.trace("Taking species:"+raw);
rawnamesFiltered.add(raw.replaceAll("^'", "").replaceAll("'$", ""));
}
int rawscounter = rawnamesFiltered.size();
System.out.println("Retrieve from DB - Time: "+(System.currentTimeMillis()-t1));
logger.trace("Retrieve from DB - Time: "+(System.currentTimeMillis()-t1));
long t2 = System.currentTimeMillis();
System.out.println("Processing " + rawscounter + " species..");
logger.trace("Processing " + rawscounter + " species..");
// prepare the environment
try {
OSCommand.ExecuteGetLine("chmod +x *", null);
} catch (Exception e) {
System.out.println("WARNING: could not change the permissions");
logger.trace("WARNING: could not change the permissions");
}
int overallMR = 10;
@ -312,10 +317,10 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
List<Matcher> matchers = buildMatcherList(config, sandboxFolder, globalparameters);
if (matchers!=null)
bionym.resetMatchers(matchers);
System.out.println("WF Initialization - Time: "+(System.currentTimeMillis()-t2));
logger.trace("WF Initialization - Time: "+(System.currentTimeMillis()-t2));
MatcherOutput output = bionym.executeChainedWorkflow(rawnamesFiltered);
System.out.println("Workflow Executed");
logger.trace("Workflow Executed");
long t3 = System.currentTimeMillis();
int nEntries = output.getEntriesNumber();
List<String[]> toWrite = new ArrayList<String[]>();
@ -333,14 +338,12 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
}
//write on DB
DatabaseUtils.insertChunksIntoTable(destinationTable, headers, toWrite, 5000, dbconnection);
System.out.println("Write on DB - Time: "+(System.currentTimeMillis()-t3));
logger.trace("Write on DB - Time: "+(System.currentTimeMillis()-t3));
System.out.println("The procedure finished successfully. Processed " + rawscounter + " species.");
System.out.println("Elapsed Time " + (System.currentTimeMillis() - t0) + " ms");
logger.trace("The procedure finished successfully. Processed " + rawscounter + " species.");
logger.trace("Elapsed Time " + (System.currentTimeMillis() - t0) + " ms");
} catch (Exception e) {
e.printStackTrace();
System.out.println("warning: error in node execution " + e.getLocalizedMessage());
System.err.println("Error in node execution " + e.getLocalizedMessage());
logger.error("warning: error in node execution ", e);
return -1;
} finally {
if (dbconnection != null)

Loading…
Cancel
Save