diff --git a/cfg/algorithms.properties b/cfg/algorithms.properties index bc0a857..7db92a4 100644 --- a/cfg/algorithms.properties +++ b/cfg/algorithms.properties @@ -2,12 +2,5 @@ AQUAMAPS_SUITABLE=org.gcube.dataanalysis.ecoengine.spatialdistributions.Aquamaps AQUAMAPS_NATIVE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative2050 AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable2050 -REMOTE_AQUAMAPS_SUITABLE=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator -REMOTE_AQUAMAPS_NATIVE=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator -REMOTE_AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator -REMOTE_AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator -DUMMY=org.gcube.dataanalysis.ecoengine.spatialdistributions.DummyAlgorithm -TEST=org.gcube.dataanalysis.ecoengine.spatialdistributions.TestAlgorithm AQUAMAPS_NATIVE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNN AQUAMAPS_SUITABLE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNNSuitable -AQUAMAPS_NEURAL_NETWORK_NS=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNNNS \ No newline at end of file diff --git a/cfg/algorithms1.0.properties b/cfg/algorithms1.0.properties new file mode 100644 index 0000000..bc0a857 --- /dev/null +++ b/cfg/algorithms1.0.properties @@ -0,0 +1,13 @@ +AQUAMAPS_SUITABLE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable +AQUAMAPS_NATIVE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative +AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative2050 +AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable2050 +REMOTE_AQUAMAPS_SUITABLE=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator +REMOTE_AQUAMAPS_NATIVE=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator +REMOTE_AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator +REMOTE_AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator +DUMMY=org.gcube.dataanalysis.ecoengine.spatialdistributions.DummyAlgorithm +TEST=org.gcube.dataanalysis.ecoengine.spatialdistributions.TestAlgorithm +AQUAMAPS_NATIVE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNN +AQUAMAPS_SUITABLE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNNSuitable +AQUAMAPS_NEURAL_NETWORK_NS=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNNNS \ No newline at end of file diff --git a/cfg/transducerers.properties b/cfg/transducerers.properties index d444058..9c0a9f0 100644 --- a/cfg/transducerers.properties +++ b/cfg/transducerers.properties @@ -1,4 +1,7 @@ BIOCLIMATE_HSPEC=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHSPECTransducer BIOCLIMATE_HCAF=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHCAFTransducer BIOCLIMATE_HSPEN=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHSPENTransducer -HCAF_INTERPOLATION=org.gcube.dataanalysis.ecoengine.transducers.InterpolationTransducer \ No newline at end of file +HCAF_INTERPOLATION=org.gcube.dataanalysis.ecoengine.transducers.InterpolationTransducer +OCCURRENCES_MERGER=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsMerger +OCCURRENCES_INSEAS_ONEARTH=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsInSeaOnEarth +OCCURRENCES_DUPLICATE_DELETER=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsDuplicatesDeleter diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestTransducers.java b/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestTransducers.java index 56ee8c5..ad53a60 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestTransducers.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestTransducers.java @@ -13,7 +13,7 @@ public static void main(String[] args) throws Exception { System.out.println("TEST 1"); List trans = null; - + /* trans = TransducerersFactory.getTransducerers(testConfigLocal()); trans.get(0).init(); Regressor.process(trans.get(0)); @@ -38,6 +38,16 @@ public static void main(String[] args) throws Exception { trans.get(0).init(); Regressor.process(trans.get(0)); trans = null; + + trans = TransducerersFactory.getTransducerers(testConfigLocal6()); + trans.get(0).init(); + Regressor.process(trans.get(0)); + trans = null; + */ + trans = TransducerersFactory.getTransducerers(testConfigLocal7()); + trans.get(0).init(); + Regressor.process(trans.get(0)); + trans = null; } @@ -101,12 +111,47 @@ public static void main(String[] args) throws Exception { config.setParam("lastModificationColumn", "modified"); config.setParam("rightTableName", "whitesharkoccurrences2"); config.setParam("leftTableName", "whitesharkoccurrences1"); - config.setParam("mergedTableName", "whitesharkoccurrencesmerged"); + config.setParam("finalTableName", "whitesharkoccurrencesmerged"); config.setParam("spatialTolerance", "0.5"); config.setParam("confidence", "0.8"); return config; } + + private static AlgorithmConfiguration testConfigLocal6() { + + AlgorithmConfiguration config = Regressor.getConfig(); + config.setAgent("OCCURRENCES_INSEAS_ONEARTH"); + + config.setParam("longitudeColumn", "decimallongitude"); + config.setParam("latitudeColumn", "decimallatitude"); + config.setParam("OccurrencePointsTableName", "whitesharkoccurrences2"); + config.setParam("finalTableName", "whitesharkoccurrencesfilteredseas"); + config.setParam("FilterType", "IN_THE_WATER"); +// config.setParam("FilterType", "ON_EARTH"); + + return config; + } + + private static AlgorithmConfiguration testConfigLocal7() { + + AlgorithmConfiguration config = Regressor.getConfig(); + config.setAgent("OCCURRENCES_DUPLICATE_DELETER"); + + config.setParam("longitudeColumn", "decimallongitude"); + config.setParam("latitudeColumn", "decimallatitude"); + config.setParam("recordedByColumn", "recordedby"); + config.setParam("scientificNameColumn", "scientificname"); + config.setParam("eventDateColumn", "eventdate"); + config.setParam("lastModificationColumn", "modified"); + config.setParam("OccurrencePointsTableName", "whitesharkoccurrences2"); + config.setParam("finalTableName", "whitesharkoccurrencesnoduplicates"); + config.setParam("spatialTolerance", "0.5"); + config.setParam("confidence", "0.8"); + + return config; + } + } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsDuplicatesDeleter.java b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsDuplicatesDeleter.java new file mode 100644 index 0000000..9151ffc --- /dev/null +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsDuplicatesDeleter.java @@ -0,0 +1,189 @@ +package org.gcube.dataanalysis.ecoengine.transducers; + +import java.util.ArrayList; +import java.util.List; + +import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; +import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory; +import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; +import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType; +import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType; +import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; +import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType; +import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType; +import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType; +import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes; +import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters; +import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates; +import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils; + +public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{ + static String tableNameF = "OccurrencePointsTableName"; + String tableName; + List records = new ArrayList(); + + public OccurrencePointsDuplicatesDeleter(){ + + } + + + @Override + public List getInputParameters() { + List templatesOccurrence = new ArrayList(); + templatesOccurrence.add(TableTemplates.OCCURRENCE_SPECIES); + // occurrence points tables + InputTable p1 = new InputTable(templatesOccurrence, tableNameF, "The table containing the occurrence points", ""); + ColumnType p3 = new ColumnType(leftTableNameF, longitudeColumn, "column with longitude values", "decimallongitude", false); + ColumnType p4 = new ColumnType(leftTableNameF, latitudeColumn, "column with latitude values", "decimallatitude", false); + ColumnType p5 = new ColumnType(leftTableNameF, recordedByColumn, "column with RecordedBy values", "recordedby", false); + ColumnType p6 = new ColumnType(leftTableNameF, scientificNameColumn, "column with Scientific Names", "scientificname", false); + ColumnType p7 = new ColumnType(leftTableNameF, eventDateColumn, "column with EventDate values", "eventdate", false); + ColumnType p8 = new ColumnType(leftTableNameF, lastModificationColumn, "column with Modified values", "modified", false); + ServiceType p9 = new ServiceType(ServiceParameters.RANDOMSTRING, finalTableNameF, "Name of the resulting table", "processedOccurrences_"); + PrimitiveType p10 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, spatialTolerance, "The tolerance in degree for assessing that two points could be the same", "0.5"); + PrimitiveType p11 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, confidence, "The overall acceptance similarity threshold over which two points are the same", "0.8"); + + List inputs = new ArrayList(); + inputs.add(p1); + inputs.add(p3); + inputs.add(p4); + inputs.add(p5); + inputs.add(p6); + inputs.add(p7); + inputs.add(p8); + inputs.add(p9); + inputs.add(p10); + inputs.add(p11); + + DatabaseType.addDefaultDBPars(inputs); + return inputs; + } + + @Override + public String getDescription() { + return "An algorithm for deleting similar occurrences in a sets of occurrence points of species coming from the Species Discovery Facility of D4Science"; + } + + @Override + public void init() throws Exception { + + AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile); + lonFld = config.getParam(longitudeColumn); + latFld = config.getParam(latitudeColumn); + recordedByFld = config.getParam(recordedByColumn); + scientificNameFld = config.getParam(scientificNameColumn); + eventDatFld = config.getParam(eventDateColumn); + modifDatFld = config.getParam(lastModificationColumn); + tableName = config.getParam(tableNameF); + rightTableName=tableName; + leftTableName=tableName; + finalTableName = config.getParam(finalTableNameF); + spatialToleranceValue = Float.parseFloat(config.getParam(spatialTolerance)); + confidenceValue = Float.parseFloat(config.getParam(confidence)); + + objectstoinsert = new ArrayList(); + objectstodelete = new ArrayList(); + records = new ArrayList(); + status = 0; + } + + protected boolean isBetterThan(OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) { + if ( + ((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.before(rightOcc.modifdate)) + || + (leftOcc.modifdate==null)&&(rightOcc.modifdate!=null) + ) + return false; + else if ((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.after(rightOcc.modifdate) + || + (leftOcc.modifdate!=null)&&(rightOcc.modifdate==null)) + return true; + else + return false; + } + + @Override + protected void prepareFinalTable(){ + DatabaseFactory.executeSQLUpdate(DatabaseUtils.createBlankTableFromAnotherStatement(tableName, finalTableName), dbconnection); + } + + @Override + public void compute() throws Exception { + + try { + // init DB connection + AnalysisLogger.getLogger().trace("Initializing DB Connection"); + dbconnection = DatabaseUtils.initDBSession(config); + AnalysisLogger.getLogger().trace("Taking Table Description"); + AnalysisLogger.getLogger().trace("Creating final table: " + finalTableName); + // create new merged table + DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(finalTableName), dbconnection); + AnalysisLogger.getLogger().trace("Preparing table: " + finalTableName); + prepareFinalTable(); + AnalysisLogger.getLogger().trace("Extracting columns from: " + finalTableName); + extractColumnNames(); + AnalysisLogger.getLogger().trace("Taken Table Description: " + columns); + // take distinct elements from table + AnalysisLogger.getLogger().trace("Taking elements from table: " + tableName); + List rows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getDinstictElements(tableName, columns.toString(), ""), dbconnection); + // for each element in dx + AnalysisLogger.getLogger().trace("Processing"); + status = 10; + int similaritiesCounter = 0; + int allrows = rows.size(); + int rowcounter = 0;; + for (Object row : rows) { + // transform into an occurrence object + OccurrenceRecord testOcc = row2OccurrenceRecord((Object[]) row); + // for each element in the white list + int k = 0; + int insertedSize = objectstoinsert.size(); + boolean candidate = true; + + while (k= confidenceValue) { + similaritiesCounter++; + if (isBetterThan(testOcc, yetInserted)) { + AnalysisLogger.getLogger().trace("Found a similarity with P=" + prob + " between (" + "\"" + testOcc.scientificName + "\"" + "," + testOcc.x + "\"" + "," + "\"" + testOcc.y + "\"" + "," + "\"" + testOcc.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(testOcc.eventdate) + "\"" + ") VS " + "(" + "\"" + yetInserted.scientificName + "\"" + "," + "\"" + yetInserted.x + "\"" + "," + "\"" + yetInserted.y + "\"" + "," + "\"" + yetInserted.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(yetInserted.eventdate) + "\"" + ")"); + objectstoinsert.remove(k); + k--; + insertedSize--; + + } + //if there is yet one better then discard the testOcc + else{ + candidate=false; + break; + } + } + + k++; + } + + if (candidate) + objectstoinsert.add(testOcc); + + status = Math.min(90, 10f + (80 * ((float) rowcounter) / ((float) allrows))); + rowcounter++; + } + + AnalysisLogger.getLogger().trace("Found " + similaritiesCounter + " similarities on " + allrows + " distinct elements"); + status = 90; + // transform the complete list into a table + persist(); + // close DB connection + } catch (Exception e) { + throw e; + } finally { + if (dbconnection != null) + dbconnection.close(); + status = 100; + AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed"); + } + } + + +} diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsInSeaOnEarth.java b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsInSeaOnEarth.java new file mode 100644 index 0000000..38984e9 --- /dev/null +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsInSeaOnEarth.java @@ -0,0 +1,112 @@ +package org.gcube.dataanalysis.ecoengine.transducers; + +import java.util.ArrayList; +import java.util.List; + +import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; +import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory; +import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; +import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType; +import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType; +import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; +import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType; +import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType; +import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType; +import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes; +import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters; +import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates; +import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils; + +public class OccurrencePointsInSeaOnEarth extends OccurrencePointsMerger{ + + //NOTE: 0.125 is the diagonal of a csquare, which is the maximum extent to which a point can lie in a csquare + private static String inthesea="select * into %1$s from (select distinct a.* from %2$s as a join hcaf_d as b on ((b.centerlat-a.%3$s)*(b.centerlat-a.%3$s)+(b.centerlong-a.%4$s)*(b.centerlong-a.%4$s)<= 0.125) and b.oceanarea>0) as t"; + private static String onearth="select * into %1$s from (select distinct a.* from %2$s as a join hcaf_d as b on ((b.centerlat-a.%3$s)*(b.centerlat-a.%3$s)+(b.centerlong-a.%4$s)*(b.centerlong-a.%4$s)<= 0.125) and b.landdist<=0.3) as t"; + static String tableNameF = "OccurrencePointsTableName"; + static String filterTypeF = "FilterType"; + String tableName; + public static enum inseasonearth {IN_THE_WATER, ON_EARTH}; + inseasonearth filter; + + @Override + public List getInputParameters() { + List templatesOccurrence = new ArrayList(); + templatesOccurrence.add(TableTemplates.OCCURRENCE_SPECIES); + // occurrence points tables + InputTable p1 = new InputTable(templatesOccurrence, tableNameF, "The table containing the occurrence points", ""); + // string parameters + ColumnType p3 = new ColumnType(tableNameF, longitudeColumn, "column with longitude values", "decimallongitude", false); + ColumnType p4 = new ColumnType(tableNameF, latitudeColumn, "column with latitude values", "decimallatitude", false); + ServiceType p9 = new ServiceType(ServiceParameters.RANDOMSTRING, finalTableNameF, "Name of the resulting table", "processedOccurrences_"); + PrimitiveType p10 = new PrimitiveType(Enum.class.getName(), inseasonearth.values(), PrimitiveTypes.ENUMERATED, filterTypeF, "The filter type",""+inseasonearth.IN_THE_WATER); + + List inputs = new ArrayList(); + inputs.add(p1); + inputs.add(p3); + inputs.add(p4); + inputs.add(p9); + inputs.add(p10); + + DatabaseType.addDefaultDBPars(inputs); + return inputs; + } + + + @Override + public void init() throws Exception { + + AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile); + lonFld = config.getParam(longitudeColumn); + latFld = config.getParam(latitudeColumn); + tableName = config.getParam(tableNameF); + finalTableName = config.getParam(finalTableNameF); + filter = inseasonearth.valueOf(config.getParam(filterTypeF)); + status = 0; + } + + @Override + public String getDescription() { + return "An algorithm for filtering only the points lying in the seas or on the earth. It acts on occurrence points of species coming from the Species Discovery Facility of D4Science"; + } + + @Override + protected void prepareFinalTable(){ + DatabaseFactory.executeSQLUpdate(DatabaseUtils.createBlankTableFromAnotherStatement(tableName, finalTableName), dbconnection); + } + + @Override + public void compute() throws Exception { + + try { + // init DB connection + AnalysisLogger.getLogger().trace("Initializing DB Connection"); + dbconnection = DatabaseUtils.initDBSession(config); + AnalysisLogger.getLogger().trace("Taking Table Description"); + AnalysisLogger.getLogger().trace("Creating merged table: " + finalTableName); + // create new merged table + DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(finalTableName), dbconnection); +// prepareFinalTable(); + status = 10; + String generationquery = ""; + if (filter==inseasonearth.IN_THE_WATER) + generationquery = String.format(inthesea,finalTableName,tableName,latFld,lonFld); + else + generationquery = String.format(onearth,finalTableName,tableName,latFld,lonFld); + + AnalysisLogger.getLogger().trace("Applying filter " + filter.name()); + AnalysisLogger.getLogger().trace("Applying query " + generationquery); + DatabaseFactory.executeSQLUpdate(generationquery, dbconnection); + AnalysisLogger.getLogger().trace("Final Table created!"); + + } catch (Exception e) { + throw e; + } finally { + if (dbconnection != null) + dbconnection.close(); + status = 100; + AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed"); + } + + } + +} diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsIntersector.java b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsIntersector.java new file mode 100644 index 0000000..7cde63b --- /dev/null +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsIntersector.java @@ -0,0 +1,67 @@ +package org.gcube.dataanalysis.ecoengine.transducers; + +import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory; +import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; +import org.gcube.dataanalysis.ecoengine.test.regression.Regressor; +import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils; + +public class OccurrencePointsIntersector extends OccurrencePointsMerger{ + + public OccurrencePointsIntersector(){ + + } + + @Override + public String getDescription() { + return "An algorithm for intesecting two sets of occurrence points of species coming from the Species Discovery Facility of D4Science"; + } + + @Override + protected void prepareFinalTable(){ + DatabaseFactory.executeSQLUpdate(DatabaseUtils.createBlankTableFromAnotherStatement(leftTableName, finalTableName), dbconnection); + } + + @Override + protected void manageHighProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) { + if ( + ((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.before(rightOcc.modifdate)) + || + (leftOcc.modifdate==null)&&(rightOcc.modifdate!=null) + ) + objectstoinsert.add(rightOcc); + else if ((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.after(rightOcc.modifdate) + || + (leftOcc.modifdate!=null)&&(rightOcc.modifdate==null)) + objectstoinsert.add(leftOcc); + else + objectstoinsert.add(leftOcc); + } + + @Override + protected void manageLowProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) { + + } + + + public static void main(String[] args) throws Exception { + AlgorithmConfiguration config = Regressor.getConfig(); + config.setNumberOfResources(1); + config.setParam(longitudeColumn, "decimallongitude"); + config.setParam(latitudeColumn, "decimallatitude"); + config.setParam(recordedByColumn, "recordedby"); + config.setParam(scientificNameColumn, "scientificname"); + config.setParam(eventDateColumn, "eventdate"); + config.setParam(lastModificationColumn, "modified"); + config.setParam(rightTableNameF, "whitesharkoccurrences2"); + config.setParam(leftTableNameF, "whitesharkoccurrences1"); + config.setParam(finalTableNameF, "whitesharkoccurrencesintersected"); + config.setParam(spatialTolerance, "0.5"); + config.setParam(confidence, "0.8"); + + OccurrencePointsIntersector occm = new OccurrencePointsIntersector(); + occm.setConfiguration(config); + occm.init(); + occm.compute(); + } + +} diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsMerger.java b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsMerger.java index 2bc2d5e..d5ec2a5 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsMerger.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsMerger.java @@ -38,7 +38,7 @@ public class OccurrencePointsMerger implements Transducerer { static String lastModificationColumn = "lastModificationColumn"; static String rightTableNameF = "rightTableName"; static String leftTableNameF = "leftTableName"; - static String mergedTableNameF = "mergedTableName"; + static String finalTableNameF = "finalTableName"; static String spatialTolerance = "spatialTolerance"; static String confidence = "confidence"; @@ -54,7 +54,7 @@ public class OccurrencePointsMerger implements Transducerer { protected String modifDatFld; protected String leftTableName; protected String rightTableName; - protected String mergedTableName; + protected String finalTableName; protected float spatialToleranceValue; protected float confidenceValue; protected StringBuffer columns; @@ -64,6 +64,11 @@ public class OccurrencePointsMerger implements Transducerer { protected SessionFactory dbconnection; protected float status; + + public OccurrencePointsMerger(){ + + } + protected class OccurrenceRecord { public String scientificName; @@ -179,30 +184,6 @@ public class OccurrencePointsMerger implements Transducerer { return buffer.toString(); } - public static void main(String[] args) throws Exception { - AlgorithmConfiguration config = Regressor.getConfig(); - config.setNumberOfResources(1); - config.setParam(longitudeColumn, "decimallongitude"); - config.setParam(latitudeColumn, "decimallatitude"); - config.setParam(recordedByColumn, "recordedby"); - config.setParam(scientificNameColumn, "scientificname"); - config.setParam(eventDateColumn, "eventdate"); - config.setParam(lastModificationColumn, "modified"); - config.setParam(rightTableNameF, "whitesharkoccurrences2"); - config.setParam(leftTableNameF, "whitesharkoccurrences1"); - // config.setParam(rightTableNameF,"whitesharkoccurrences2"); - // config.setParam(rightTableNameF,"whitesharkoccurrences1"); - config.setParam(mergedTableNameF, "whitesharkoccurrencesmerged"); - config.setParam(spatialTolerance, "0.5"); - config.setParam(confidence, "0.8"); - - List templatesOccurrence = new ArrayList(); - templatesOccurrence.add(TableTemplates.OCCURRENCE_AQUAMAPS); - OccurrencePointsMerger occm = new OccurrencePointsMerger(); - occm.setConfiguration(config); - occm.init(); - occm.compute(); - } @Override public List getInputParameters() { @@ -219,7 +200,7 @@ public class OccurrencePointsMerger implements Transducerer { ColumnType p6 = new ColumnType(leftTableNameF, scientificNameColumn, "column with Scientific Names", "scientificname", false); ColumnType p7 = new ColumnType(leftTableNameF, eventDateColumn, "column with EventDate values", "eventdate", false); ColumnType p8 = new ColumnType(leftTableNameF, lastModificationColumn, "column with Modified values", "modified", false); - ServiceType p9 = new ServiceType(ServiceParameters.RANDOMSTRING, mergedTableNameF, "Name of the final produced", "mergedoccurrences_"); + ServiceType p9 = new ServiceType(ServiceParameters.RANDOMSTRING, finalTableNameF, "Name of the resulting table", "processedOccurrences_"); PrimitiveType p10 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, spatialTolerance, "The tolerance in degree for assessing that two points could be the same", "0.5"); PrimitiveType p11 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, confidence, "The overall acceptance similarity threshold over which two points are the same", "0.8"); @@ -272,7 +253,7 @@ public class OccurrencePointsMerger implements Transducerer { List templatesOccurrence = new ArrayList(); templatesOccurrence.add(TableTemplates.OCCURRENCE_SPECIES); // occurrence points tables - OutputTable p = new OutputTable(templatesOccurrence, mergedTableName, mergedTableName, "The output table containing the merged points"); + OutputTable p = new OutputTable(templatesOccurrence, finalTableName, finalTableName, "The output table containing the processed points"); return p; } @@ -289,7 +270,7 @@ public class OccurrencePointsMerger implements Transducerer { modifDatFld = config.getParam(lastModificationColumn); leftTableName = config.getParam(leftTableNameF); rightTableName = config.getParam(rightTableNameF); - mergedTableName = config.getParam(mergedTableNameF); + finalTableName = config.getParam(finalTableNameF); spatialToleranceValue = Float.parseFloat(config.getParam(spatialTolerance)); confidenceValue = Float.parseFloat(config.getParam(confidence)); @@ -379,9 +360,21 @@ public class OccurrencePointsMerger implements Transducerer { String sci = scientificNameFld + "='" + record.scientificName.replace("'","")+"'"; String x = lonFld + "='" + record.x+"'"; String y = latFld + "='" + record.y+"'"; - + String event = null; + String modified = null; + if (record.eventdate!=null) + event = eventDatFld + "='" + convert2conventionalFormat(record.eventdate)+"'"; + + if (record.modifdate!=null) + modified = modifDatFld + "='" + convert2conventionalFormat(record.modifdate)+"'"; + buffer.append("("); buffer.append(rec + " AND " + sci + " AND " + x + " AND " + y); + if (event!=null) + buffer.append(" AND "+event); + if (modified!=null) + buffer.append(" AND "+modified); + buffer.append(")"); if (counter < todel - 1) buffer.append(" OR "); @@ -389,7 +382,7 @@ public class OccurrencePointsMerger implements Transducerer { counter++; } - String updateQ = DatabaseUtils.deleteFromBuffer(mergedTableName, buffer); + String updateQ = DatabaseUtils.deleteFromBuffer(finalTableName, buffer); // System.out.println("Update:\n"+updateQ); DatabaseFactory.executeSQLUpdate(updateQ, dbconnection); AnalysisLogger.getLogger().debug("Objects deleted"); @@ -410,14 +403,29 @@ public class OccurrencePointsMerger implements Transducerer { counter++; } - String updateQ = DatabaseUtils.insertFromBuffer(mergedTableName, columns.toString(), buffer); - // System.out.println("Update:\n"+updateQ); + String updateQ = DatabaseUtils.insertFromBuffer(finalTableName, columns.toString(), buffer); +// System.out.println("Update:\n"+updateQ); DatabaseFactory.executeSQLUpdate(updateQ, dbconnection); AnalysisLogger.getLogger().debug("Objects inserted"); } } + protected void prepareFinalTable(){ + DatabaseFactory.executeSQLUpdate(DatabaseUtils.duplicateTableStatement(leftTableName, finalTableName), dbconnection); + } + + protected void extractColumnNames(){ + // take the description of the table + columnsNames = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsNamesStatement(rightTableName), dbconnection); + int nCols = columnsNames.size(); + columns = new StringBuffer(); + for (int i = 0; i < nCols; i++) { + columns.append("\"" + columnsNames.get(i) + "\""); + if (i < nCols - 1) + columns.append(","); + } + } @Override public void compute() throws Exception { @@ -426,22 +434,13 @@ public class OccurrencePointsMerger implements Transducerer { AnalysisLogger.getLogger().trace("Initializing DB Connection"); dbconnection = DatabaseUtils.initDBSession(config); AnalysisLogger.getLogger().trace("Taking Table Description"); - // take the description of the table - columnsNames = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsNamesStatement(rightTableName), dbconnection); - - int nCols = columnsNames.size(); - columns = new StringBuffer(); - for (int i = 0; i < nCols; i++) { - columns.append("\"" + columnsNames.get(i) + "\""); - if (i < nCols - 1) - columns.append(","); - } + extractColumnNames(); AnalysisLogger.getLogger().trace("Taken Table Description: " + columns); - AnalysisLogger.getLogger().trace("Creating merged table: " + mergedTableName); + AnalysisLogger.getLogger().trace("Creating final table: " + finalTableName); // create new merged table - DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(mergedTableName), dbconnection); - DatabaseFactory.executeSQLUpdate(DatabaseUtils.duplicateTableStatement(leftTableName, mergedTableName), dbconnection); + DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(finalTableName), dbconnection); + prepareFinalTable(); // take the elements from sx table AnalysisLogger.getLogger().trace("Taking elements from left table: " + leftTableName); List leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(leftTableName, columns.toString(), ""), dbconnection); @@ -511,4 +510,25 @@ public class OccurrencePointsMerger implements Transducerer { AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed"); } } + + public static void main(String[] args) throws Exception { + AlgorithmConfiguration config = Regressor.getConfig(); + config.setNumberOfResources(1); + config.setParam(longitudeColumn, "decimallongitude"); + config.setParam(latitudeColumn, "decimallatitude"); + config.setParam(recordedByColumn, "recordedby"); + config.setParam(scientificNameColumn, "scientificname"); + config.setParam(eventDateColumn, "eventdate"); + config.setParam(lastModificationColumn, "modified"); + config.setParam(rightTableNameF, "whitesharkoccurrences2"); + config.setParam(leftTableNameF, "whitesharkoccurrences1"); + config.setParam(finalTableNameF, "whitesharkoccurrencesmerged"); + config.setParam(spatialTolerance, "0.5"); + config.setParam(confidence, "0.8"); + + OccurrencePointsMerger occm = new OccurrencePointsMerger(); + occm.setConfiguration(config); + occm.init(); + occm.compute(); + } } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java b/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java index 71eff8a..ee2ef53 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java @@ -105,12 +105,17 @@ public class DatabaseUtils { return "select * into " + tableTo + " from " + tableFrom; } + public static String createBlankTableFromAnotherStatement(String tableFrom, String tableTo) { + return "select * into "+tableTo+" from (select * from "+tableFrom+" limit 0) a"; + } + + public static String dropTableStatement(String table) { return "drop table " + table; } - public static String getDinstictElements(String table, String column,String filter) { - return "select distinct " + column + " from " + table + " " + filter+" order by " + column; + public static String getDinstictElements(String table, String columns,String filter) { + return "select distinct " + columns + " from " + table + " " + filter+" order by " + columns; } public static String getOrderedElements(String table, String key, String column) {