@ -37,6 +37,8 @@ import org.gcube.dataanalysis.executor.nodes.transducers.bionym.interfaces.Match
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.utils.YasmeenGlobalParameters ;
import org.gcube.dataanalysis.executor.scripts.OSCommand ;
import org.hibernate.SessionFactory ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
public class BionymFlexibleWorkflowTransducer extends ActorNode {
//SM parameters
@ -50,6 +52,8 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
String destinationTable ;
String destinationTableLabel ;
private static Logger logger = LoggerFactory . getLogger ( BionymFlexibleWorkflowTransducer . class ) ;
//Table
public static String headers = "SOURCE_DATA,TARGET_DATA_SCIENTIFIC_NAME,TARGET_DATA_AUTHORITY,MATCHING_SCORE,TARGET_DATA_SOURCE,TARGET_DATA_ID" ;
private static String createOutputTable = "CREATE TABLE %1$s (SOURCE_DATA character varying(255), TARGET_DATA_SCIENTIFIC_NAME character varying(255), TARGET_DATA_AUTHORITY character varying(255), MATCHING_SCORE real,TARGET_DATA_SOURCE character varying, TARGET_DATA_ID character varying)" ;
@ -233,9 +237,10 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
status = 0 ;
long t0 = System . currentTimeMillis ( ) ;
// rebuild variables
System. out . println ( "Restoring configuration" ) ;
logger. trace ( "Restoring configuration" ) ;
AlgorithmConfiguration config = Transformations . restoreConfig ( new File ( sandboxFolder , nodeConfigurationFileObject ) . getAbsolutePath ( ) ) ;
config . setConfigPath ( sandboxFolder ) ;
config . setAlgorithmClassLoader ( Thread . currentThread ( ) . getContextClassLoader ( ) ) ;
dbconnection = DatabaseUtils . initDBSession ( config ) ;
String destinationTable = config . getParam ( destinationTableParam ) ;
@ -250,15 +255,15 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
String overallMaxResults = config . getParam ( YasmeenGlobalParameters . overallMaxResults ) ;
System. out . println ( "Destination Table: " + destinationTable ) ;
System. out . println ( "Origin Table: " + originTable ) ;
System. out . println ( "Column of names: " + rawnamesColumn ) ;
System. out . println ( "Parser to use: " + parser ) ;
System. out . println ( "Accuracy vs Speed: " + accuracyvsspeed ) ;
System. out . println ( "Reference Dataset: " + reference ) ;
System. out . println ( "Do Preprocessing: " + doPreprocess ) ;
System. out . println ( "Use Stemming:" + usestemming ) ;
System. out . println ( "Overall MaxResults:" + overallMaxResults ) ;
logger. trace ( "Destination Table: " + destinationTable ) ;
logger. trace ( "Origin Table: " + originTable ) ;
logger. trace ( "Column of names: " + rawnamesColumn ) ;
logger. trace ( "Parser to use: " + parser ) ;
logger. trace ( "Accuracy vs Speed: " + accuracyvsspeed ) ;
logger. trace ( "Reference Dataset: " + reference ) ;
logger. trace ( "Do Preprocessing: " + doPreprocess ) ;
logger. trace ( "Use Stemming:" + usestemming ) ;
logger. trace ( "Overall MaxResults:" + overallMaxResults ) ;
//prepare the WF
HashMap < String , String > globalparameters = new HashMap < String , String > ( ) ;
@ -270,36 +275,36 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
globalparameters . put ( YasmeenGlobalParameters . performanceParam , accuracyvsspeed ) ;
globalparameters . put ( YasmeenGlobalParameters . taxaAuthorityFileParam , reference ) ;
globalparameters . put ( YasmeenGlobalParameters . useStemmedGenusAndSpecies , usestemming ) ;
System. out . println ( "Configuration Restored! - Time: " + ( System . currentTimeMillis ( ) - t0 ) ) ;
logger. trace ( "Configuration Restored! - Time: " + ( System . currentTimeMillis ( ) - t0 ) ) ;
// retrieve the list of names to process
long t1 = System . currentTimeMillis ( ) ;
System. out . println ( "Retrieving names to process" ) ;
logger. trace ( "Retrieving names to process" ) ;
String query = DatabaseUtils . getDinstictElements ( originTable , rawnamesColumn , "" ) + " offset " + rightStartIndex + " limit " + numberOfRightElementsToProcess ;
List < Object > rawnames = DatabaseFactory . executeSQLQuery ( query , dbconnection ) ;
System. out . println ( "Retrieved a total of " + rawnames . size ( ) + " species" ) ;
logger. trace ( "Retrieved a total of " + rawnames . size ( ) + " species" ) ;
//modification due to the limit and offset on the query
rightStartIndex = 0 ;
int end = rightStartIndex + numberOfRightElementsToProcess ;
System. out . println ( "Processing from " + rightStartIndex + " to " + end ) ;
logger. trace ( "Processing from " + rightStartIndex + " to " + end ) ;
List < String > rawnamesFiltered = new ArrayList < String > ( ) ;
for ( int i = rightStartIndex ; i < end ; i + + ) {
String raw = "" + rawnames . get ( i ) ;
// System.out.println ("Taking species:"+raw);
// logger.trace ("Taking species:"+raw);
rawnamesFiltered . add ( raw . replaceAll ( "^'" , "" ) . replaceAll ( "'$" , "" ) ) ;
}
int rawscounter = rawnamesFiltered . size ( ) ;
System. out . println ( "Retrieve from DB - Time: " + ( System . currentTimeMillis ( ) - t1 ) ) ;
logger. trace ( "Retrieve from DB - Time: " + ( System . currentTimeMillis ( ) - t1 ) ) ;
long t2 = System . currentTimeMillis ( ) ;
System. out . println ( "Processing " + rawscounter + " species.." ) ;
logger. trace ( "Processing " + rawscounter + " species.." ) ;
// prepare the environment
try {
OSCommand . ExecuteGetLine ( "chmod +x *" , null ) ;
} catch ( Exception e ) {
System. out . println ( "WARNING: could not change the permissions" ) ;
logger. trace ( "WARNING: could not change the permissions" ) ;
}
int overallMR = 10 ;
@ -312,10 +317,10 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
List < Matcher > matchers = buildMatcherList ( config , sandboxFolder , globalparameters ) ;
if ( matchers ! = null )
bionym . resetMatchers ( matchers ) ;
System. out . println ( "WF Initialization - Time: " + ( System . currentTimeMillis ( ) - t2 ) ) ;
logger. trace ( "WF Initialization - Time: " + ( System . currentTimeMillis ( ) - t2 ) ) ;
MatcherOutput output = bionym . executeChainedWorkflow ( rawnamesFiltered ) ;
System. out . println ( "Workflow Executed" ) ;
logger. trace ( "Workflow Executed" ) ;
long t3 = System . currentTimeMillis ( ) ;
int nEntries = output . getEntriesNumber ( ) ;
List < String [ ] > toWrite = new ArrayList < String [ ] > ( ) ;
@ -333,14 +338,12 @@ public class BionymFlexibleWorkflowTransducer extends ActorNode {
}
//write on DB
DatabaseUtils . insertChunksIntoTable ( destinationTable , headers , toWrite , 5000 , dbconnection ) ;
System. out . println ( "Write on DB - Time: " + ( System . currentTimeMillis ( ) - t3 ) ) ;
logger. trace ( "Write on DB - Time: " + ( System . currentTimeMillis ( ) - t3 ) ) ;
System. out . println ( "The procedure finished successfully. Processed " + rawscounter + " species." ) ;
System. out . println ( "Elapsed Time " + ( System . currentTimeMillis ( ) - t0 ) + " ms" ) ;
logger. trace ( "The procedure finished successfully. Processed " + rawscounter + " species." ) ;
logger. trace ( "Elapsed Time " + ( System . currentTimeMillis ( ) - t0 ) + " ms" ) ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
System . out . println ( "warning: error in node execution " + e . getLocalizedMessage ( ) ) ;
System . err . println ( "Error in node execution " + e . getLocalizedMessage ( ) ) ;
logger . error ( "warning: error in node execution " , e ) ;
return - 1 ;
} finally {
if ( dbconnection ! = null )