Lucio Lelii 2017-10-03 16:12:47 +00:00
parent d9ea81966b
commit 4507c66a84
1 changed files with 29 additions and 26 deletions

View File

@ -37,6 +37,8 @@ import org.gcube.dataanalysis.executor.nodes.transducers.bionym.interfaces.Match
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.utils.YasmeenGlobalParameters; import org.gcube.dataanalysis.executor.nodes.transducers.bionym.utils.YasmeenGlobalParameters;
import org.gcube.dataanalysis.executor.scripts.OSCommand; import org.gcube.dataanalysis.executor.scripts.OSCommand;
import org.hibernate.SessionFactory; import org.hibernate.SessionFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class BionymFlexibleWorkflowTransducer extends ActorNode { public class BionymFlexibleWorkflowTransducer extends ActorNode {
//SM parameters //SM parameters
@ -50,6 +52,8 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
String destinationTable; String destinationTable;
String destinationTableLabel; String destinationTableLabel;
private static Logger logger = LoggerFactory.getLogger(BionymFlexibleWorkflowTransducer.class);
//Table //Table
public static String headers = "SOURCE_DATA,TARGET_DATA_SCIENTIFIC_NAME,TARGET_DATA_AUTHORITY,MATCHING_SCORE,TARGET_DATA_SOURCE,TARGET_DATA_ID"; public static String headers = "SOURCE_DATA,TARGET_DATA_SCIENTIFIC_NAME,TARGET_DATA_AUTHORITY,MATCHING_SCORE,TARGET_DATA_SOURCE,TARGET_DATA_ID";
private static String createOutputTable = "CREATE TABLE %1$s (SOURCE_DATA character varying(255), TARGET_DATA_SCIENTIFIC_NAME character varying(255), TARGET_DATA_AUTHORITY character varying(255), MATCHING_SCORE real,TARGET_DATA_SOURCE character varying, TARGET_DATA_ID character varying)"; private static String createOutputTable = "CREATE TABLE %1$s (SOURCE_DATA character varying(255), TARGET_DATA_SCIENTIFIC_NAME character varying(255), TARGET_DATA_AUTHORITY character varying(255), MATCHING_SCORE real,TARGET_DATA_SOURCE character varying, TARGET_DATA_ID character varying)";
@ -233,9 +237,10 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
status = 0; status = 0;
long t0 = System.currentTimeMillis(); long t0 = System.currentTimeMillis();
// rebuild variables // rebuild variables
System.out.println("Restoring configuration"); logger.trace("Restoring configuration");
AlgorithmConfiguration config = Transformations.restoreConfig(new File(sandboxFolder, nodeConfigurationFileObject).getAbsolutePath()); AlgorithmConfiguration config = Transformations.restoreConfig(new File(sandboxFolder, nodeConfigurationFileObject).getAbsolutePath());
config.setConfigPath(sandboxFolder); config.setConfigPath(sandboxFolder);
config.setAlgorithmClassLoader(Thread.currentThread().getContextClassLoader());
dbconnection = DatabaseUtils.initDBSession(config); dbconnection = DatabaseUtils.initDBSession(config);
String destinationTable = config.getParam(destinationTableParam); String destinationTable = config.getParam(destinationTableParam);
@ -250,15 +255,15 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
String overallMaxResults= config.getParam(YasmeenGlobalParameters.overallMaxResults); String overallMaxResults= config.getParam(YasmeenGlobalParameters.overallMaxResults);
System.out.println("Destination Table: " + destinationTable); logger.trace("Destination Table: " + destinationTable);
System.out.println("Origin Table: " + originTable); logger.trace("Origin Table: " + originTable);
System.out.println("Column of names: " + rawnamesColumn); logger.trace("Column of names: " + rawnamesColumn);
System.out.println("Parser to use: " + parser); logger.trace("Parser to use: " + parser);
System.out.println("Accuracy vs Speed: " + accuracyvsspeed); logger.trace("Accuracy vs Speed: " + accuracyvsspeed);
System.out.println("Reference Dataset: " + reference); logger.trace("Reference Dataset: " + reference);
System.out.println("Do Preprocessing: " + doPreprocess); logger.trace("Do Preprocessing: " + doPreprocess);
System.out.println("Use Stemming:" + usestemming); logger.trace("Use Stemming:" + usestemming);
System.out.println("Overall MaxResults:" + overallMaxResults); logger.trace("Overall MaxResults:" + overallMaxResults);
//prepare the WF //prepare the WF
HashMap<String,String> globalparameters = new HashMap<String, String>(); HashMap<String,String> globalparameters = new HashMap<String, String>();
@ -270,36 +275,36 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
globalparameters.put(YasmeenGlobalParameters.performanceParam, accuracyvsspeed); globalparameters.put(YasmeenGlobalParameters.performanceParam, accuracyvsspeed);
globalparameters.put(YasmeenGlobalParameters.taxaAuthorityFileParam, reference); globalparameters.put(YasmeenGlobalParameters.taxaAuthorityFileParam, reference);
globalparameters.put(YasmeenGlobalParameters.useStemmedGenusAndSpecies, usestemming); globalparameters.put(YasmeenGlobalParameters.useStemmedGenusAndSpecies, usestemming);
System.out.println("Configuration Restored! - Time: "+(System.currentTimeMillis()-t0)); logger.trace("Configuration Restored! - Time: "+(System.currentTimeMillis()-t0));
// retrieve the list of names to process // retrieve the list of names to process
long t1 = System.currentTimeMillis(); long t1 = System.currentTimeMillis();
System.out.println("Retrieving names to process"); logger.trace("Retrieving names to process");
String query = DatabaseUtils.getDinstictElements(originTable, rawnamesColumn, "")+" offset "+rightStartIndex+" limit "+numberOfRightElementsToProcess; String query = DatabaseUtils.getDinstictElements(originTable, rawnamesColumn, "")+" offset "+rightStartIndex+" limit "+numberOfRightElementsToProcess;
List<Object> rawnames = DatabaseFactory.executeSQLQuery(query, dbconnection); List<Object> rawnames = DatabaseFactory.executeSQLQuery(query, dbconnection);
System.out.println("Retrieved a total of "+rawnames.size()+" species"); logger.trace("Retrieved a total of "+rawnames.size()+" species");
//modification due to the limit and offset on the query //modification due to the limit and offset on the query
rightStartIndex = 0; rightStartIndex = 0;
int end = rightStartIndex + numberOfRightElementsToProcess; int end = rightStartIndex + numberOfRightElementsToProcess;
System.out.println("Processing from "+rightStartIndex+" to "+end); logger.trace("Processing from "+rightStartIndex+" to "+end);
List<String> rawnamesFiltered = new ArrayList<String>(); List<String> rawnamesFiltered = new ArrayList<String>();
for (int i = rightStartIndex; i < end; i++) { for (int i = rightStartIndex; i < end; i++) {
String raw = "" + rawnames.get(i); String raw = "" + rawnames.get(i);
// System.out.println("Taking species:"+raw); // logger.trace("Taking species:"+raw);
rawnamesFiltered.add(raw.replaceAll("^'", "").replaceAll("'$", "")); rawnamesFiltered.add(raw.replaceAll("^'", "").replaceAll("'$", ""));
} }
int rawscounter = rawnamesFiltered.size(); int rawscounter = rawnamesFiltered.size();
System.out.println("Retrieve from DB - Time: "+(System.currentTimeMillis()-t1)); logger.trace("Retrieve from DB - Time: "+(System.currentTimeMillis()-t1));
long t2 = System.currentTimeMillis(); long t2 = System.currentTimeMillis();
System.out.println("Processing " + rawscounter + " species.."); logger.trace("Processing " + rawscounter + " species..");
// prepare the environment // prepare the environment
try { try {
OSCommand.ExecuteGetLine("chmod +x *", null); OSCommand.ExecuteGetLine("chmod +x *", null);
} catch (Exception e) { } catch (Exception e) {
System.out.println("WARNING: could not change the permissions"); logger.trace("WARNING: could not change the permissions");
} }
int overallMR = 10; int overallMR = 10;
@ -312,10 +317,10 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
List<Matcher> matchers = buildMatcherList(config, sandboxFolder, globalparameters); List<Matcher> matchers = buildMatcherList(config, sandboxFolder, globalparameters);
if (matchers!=null) if (matchers!=null)
bionym.resetMatchers(matchers); bionym.resetMatchers(matchers);
System.out.println("WF Initialization - Time: "+(System.currentTimeMillis()-t2)); logger.trace("WF Initialization - Time: "+(System.currentTimeMillis()-t2));
MatcherOutput output = bionym.executeChainedWorkflow(rawnamesFiltered); MatcherOutput output = bionym.executeChainedWorkflow(rawnamesFiltered);
System.out.println("Workflow Executed"); logger.trace("Workflow Executed");
long t3 = System.currentTimeMillis(); long t3 = System.currentTimeMillis();
int nEntries = output.getEntriesNumber(); int nEntries = output.getEntriesNumber();
List<String[]> toWrite = new ArrayList<String[]>(); List<String[]> toWrite = new ArrayList<String[]>();
@ -333,14 +338,12 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
} }
//write on DB //write on DB
DatabaseUtils.insertChunksIntoTable(destinationTable, headers, toWrite, 5000, dbconnection); DatabaseUtils.insertChunksIntoTable(destinationTable, headers, toWrite, 5000, dbconnection);
System.out.println("Write on DB - Time: "+(System.currentTimeMillis()-t3)); logger.trace("Write on DB - Time: "+(System.currentTimeMillis()-t3));
System.out.println("The procedure finished successfully. Processed " + rawscounter + " species."); logger.trace("The procedure finished successfully. Processed " + rawscounter + " species.");
System.out.println("Elapsed Time " + (System.currentTimeMillis() - t0) + " ms"); logger.trace("Elapsed Time " + (System.currentTimeMillis() - t0) + " ms");
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); logger.error("warning: error in node execution ", e);
System.out.println("warning: error in node execution " + e.getLocalizedMessage());
System.err.println("Error in node execution " + e.getLocalizedMessage());
return -1; return -1;
} finally { } finally {
if (dbconnection != null) if (dbconnection != null)