diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPENTransducer.java b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPENTransducer.java index 1a7a26f..f803fdb 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPENTransducer.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPENTransducer.java @@ -44,7 +44,7 @@ public class BioClimateHSPENTransducer extends BioClimateHSPECTransducer{ DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect"); List templateHspec = new ArrayList(); - templateHspec.add(TableTemplates.HCAF); + templateHspec.add(TableTemplates.HSPEN); TablesList p7 = new TablesList(templateHspec, "HSPEN_TABLE_LIST", "List of HSPEN tables containing the species for which the salinity will be analyzed", false); PrimitiveTypesList p8 = new PrimitiveTypesList(PrimitiveTypes.STRING, "HSPEN_TABLE_NAMES", "List of HSPEN table names to be used as labels", false); diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsMerger.java b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsMerger.java new file mode 100644 index 0000000..966b48b --- /dev/null +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsMerger.java @@ -0,0 +1,343 @@ +package org.gcube.dataanalysis.ecoengine.transducers; + +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; + +import org.gcube.contentmanagement.graphtools.utils.DateGuesser; +import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; +import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory; +import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; +import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE; +import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType; +import org.gcube.dataanalysis.ecoengine.interfaces.Transducerer; +import org.gcube.dataanalysis.ecoengine.test.regression.Regressor; +import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils; +import org.hibernate.SessionFactory; + +public class OccurrencePointsMerger implements Transducerer{ + + static String longitudeColumn= "longitudeColumn"; + static String latitudeColumn= "latitudeColumn"; + static String recordedByColumn= "recordedByColumn"; + static String scientificNameColumn = "scientificNameColumn"; + static String eventDateColumn = "eventDateColumn"; + static String lastModificationColumn = "lastModificationColumn"; + static String rightTableNameF= "rightTableName"; + static String leftTableNameF = "leftTableName"; + static String mergedTableNameF = "mergedTableName"; + static String spatialTolerance= "spatialTolerance"; + static String confidence= "confidence"; + + protected List records_left; + protected List records_right; + protected AlgorithmConfiguration config; + + protected String lonFld; + protected String latFld; + protected String recordedByFld; + protected String scientificNameFld; + protected String eventDatFld; + protected String modifDatFld; + protected String leftTableName; + protected String rightTableName; + protected String mergedTableName; + protected float spatialToleranceValue; + protected float confidenceValue; + protected StringBuffer columns; + protected List objectstoinsert; + protected List objectstodelete; + protected List columnsNames; + protected SessionFactory dbconnection; + + protected class OccurrenceRecord{ + + public String scientificName; + public String recordedby; + public Calendar eventdate; + public Calendar modifdate; +// public String locality; +// public String country; + public float x; + public float y; + +// Map metadata; + public List otherValues; + public OccurrenceRecord(){ + otherValues = new ArrayList(); + } + } + + + public OccurrenceRecord row2OccurrenceRecord(Object[] row){ + OccurrenceRecord record = new OccurrenceRecord(); + int index = 0; + for (Object name:columnsNames){ + String name$ = ""+name; + String value$ = ""+row[index]; + if (name$.equalsIgnoreCase(lonFld)){ + record.x=Float.parseFloat(value$); + } + else if (name$.equalsIgnoreCase(latFld)){ + record.y=Float.parseFloat(value$); + } + else if (name$.equalsIgnoreCase(recordedByFld)){ + record.recordedby=value$; + } + else if (name$.equalsIgnoreCase(eventDatFld)){ + record.eventdate=DateGuesser.convertDate(value$); + } + else if (name$.equalsIgnoreCase(modifDatFld)){ + record.modifdate=DateGuesser.convertDate(value$); + } + else + record.otherValues.add(value$); + + index++; + } + + return record; + } + + public String occurrenceRecord2String(OccurrenceRecord record){ + StringBuffer buffer =new StringBuffer(); + int index = 0; + int nNames = columnsNames.size(); + for (Object name:columnsNames){ + + String name$ = ""+name; + String value$ = null; + if (name$.equalsIgnoreCase(lonFld)){ + value$="'"+record.x+"'"; + } + else if (name$.equalsIgnoreCase(latFld)){ + value$="'"+record.y+"'"; + } + else if (name$.equalsIgnoreCase(recordedByFld)){ + value$="'"+record.recordedby+"'"; + } + else if (name$.equalsIgnoreCase(eventDatFld)){ + value$="'"+record.eventdate.toString()+"'"; + } + else if (name$.equalsIgnoreCase(modifDatFld)){ + value$="'"+record.modifdate.toString()+"'"; + } + else + value$ = "'"+record.otherValues.get(index)+"'"; + + buffer.append(value$); + if (index getInputParameters() { + // TODO Auto-generated method stub + return null; + } + + + @Override + public String getResourceLoad() { + // TODO Auto-generated method stub + return null; + } + + + @Override + public String getResources() { + // TODO Auto-generated method stub + return null; + } + + + @Override + public float getStatus() { + // TODO Auto-generated method stub + return 0; + } + + + @Override + public INFRASTRUCTURE getInfrastructure() { + // TODO Auto-generated method stub + return null; + } + + + @Override + public StatisticalType getOutput() { + // TODO Auto-generated method stub + return null; + } + + + @Override + public void init() throws Exception { + + AnalysisLogger.setLogger(config.getConfigPath()+AlgorithmConfiguration.defaultLoggerFile); + lonFld=config.getParam(longitudeColumn); + latFld=config.getParam(latitudeColumn); + recordedByFld=config.getParam(recordedByColumn); + scientificNameFld=config.getParam(scientificNameColumn); + eventDatFld=config.getParam(eventDateColumn); + modifDatFld=config.getParam(lastModificationColumn); + leftTableName=config.getParam(leftTableNameF); + rightTableName=config.getParam(rightTableNameF); + mergedTableName=config.getParam(mergedTableNameF); + spatialToleranceValue=Float.parseFloat(config.getParam(spatialTolerance)); + confidenceValue=Float.parseFloat(config.getParam(confidence)); + + objectstoinsert = new ArrayList(); + objectstodelete = new ArrayList(); + } + + + @Override + public void setConfiguration(AlgorithmConfiguration config) { + this.config=config; + } + + + @Override + public void shutdown() { + // TODO Auto-generated method stub + + } + + + @Override + public String getDescription() { + // TODO Auto-generated method stub + return null; + } + + protected float extProb(OccurrenceRecord right,OccurrenceRecord left){ + return (float)Math.random(); + } + + protected void manageProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){ + //if over the threshold then add to the complete list of elements + if (probability leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(leftTableName, columns.toString(),""),dbconnection); + //take the elements from dx table + AnalysisLogger.getLogger().trace("Taking elements from right table: "+rightTableName); + List rightRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(rightTableName, columns.toString(),""),dbconnection); + //for each element in dx + List leftRecords = new ArrayList(); + AnalysisLogger.getLogger().trace("Processing "+leftTableName+" vs "+rightTableName); + int iterations = 0; + for (Object rRow:rightRows){ + OccurrenceRecord rightOcc = row2OccurrenceRecord((Object[])rRow); + //for each element in sx + int k=0; + for (Object lRow:leftRows){ + OccurrenceRecord leftOcc = null; + if (iterations==0){ + leftOcc = row2OccurrenceRecord((Object[])lRow); + leftRecords.add(leftOcc); + } + else + leftOcc =leftRecords.get(k); + + //evaluate P(dx,sx) + float p = extProb(leftOcc,rightOcc); + manageProbability(p, leftOcc, rightOcc); + if (p>=confidenceValue){ + AnalysisLogger.getLogger().trace("Found a similarity between ("+leftOcc.x+","+leftOcc.y+","+leftOcc.recordedby+ ") "+"("+rightOcc.x+","+rightOcc.y+","+rightOcc.recordedby+")"); + break; + } + k++; + } + iterations++; + } + //transform the complete list into a table + persist(); + //close DB connection + }catch(Exception e){ + throw e; + } + finally{ + if (dbconnection!=null) + dbconnection.close(); + } + } +} diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java b/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java index df5857a..d033d8d 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java @@ -8,6 +8,7 @@ import org.hibernate.SessionFactory; public class DatabaseUtils { static String queryDesc = "SELECT column_name,data_type, character_maximum_length, is_nullable FROM information_schema.COLUMNS WHERE table_name ='%1$s'"; + static String queryColumns = "SELECT column_name FROM information_schema.COLUMNS WHERE table_name ='%1$s'"; static String queryForKeys = "SELECT b.column_name as name, a.constraint_type as type FROM information_schema.table_constraints as a join information_schema.key_column_usage as b on a.table_name ='%1$s' and a.constraint_name = b.constraint_name"; static String genCreationStatement = "CREATE TABLE %1$s ( %2$s %3$s );"; static String updateColValues = "UPDATE %1$s SET %2$s = %3$s.%4$s FROM %3$s WHERE %1$s.%5$s = %3$s.%6$s ;"; @@ -86,7 +87,7 @@ public class DatabaseUtils { columnDescrs = colbuffer.toString(); } - List columns; + public List columns; public List getColumnDecriptions() { return columns; @@ -193,6 +194,12 @@ public class DatabaseUtils { return creationStatement; } + + public static String getColumnsNamesStatement(String table) { + String statement = String.format(queryColumns, table); + return statement; + } + public static void createBigTable(boolean createTable, String table, String dbdriver, String dbuser, String dbpassword, String dburl, String creationStatement, SessionFactory dbHibConnection) throws Exception { if (createTable) { try {