added algebraic operations

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@58802 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Gianpaolo Coro 2012-09-20 12:47:28 +00:00
parent 00f8dbdb88
commit e49bef4ae2
9 changed files with 505 additions and 58 deletions

View File

@ -2,12 +2,5 @@ AQUAMAPS_SUITABLE=org.gcube.dataanalysis.ecoengine.spatialdistributions.Aquamaps
AQUAMAPS_NATIVE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative
AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative2050
AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable2050
REMOTE_AQUAMAPS_SUITABLE=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator
REMOTE_AQUAMAPS_NATIVE=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator
REMOTE_AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator
REMOTE_AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator
DUMMY=org.gcube.dataanalysis.ecoengine.spatialdistributions.DummyAlgorithm
TEST=org.gcube.dataanalysis.ecoengine.spatialdistributions.TestAlgorithm
AQUAMAPS_NATIVE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNN
AQUAMAPS_SUITABLE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNNSuitable
AQUAMAPS_NEURAL_NETWORK_NS=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNNNS

View File

@ -0,0 +1,13 @@
AQUAMAPS_SUITABLE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable
AQUAMAPS_NATIVE=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative
AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNative2050
AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsSuitable2050
REMOTE_AQUAMAPS_SUITABLE=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator
REMOTE_AQUAMAPS_NATIVE=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator
REMOTE_AQUAMAPS_NATIVE_2050=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator
REMOTE_AQUAMAPS_SUITABLE_2050=org.gcube.dataanalysis.ecoengine.processing.RainyCloudGenerator
DUMMY=org.gcube.dataanalysis.ecoengine.spatialdistributions.DummyAlgorithm
TEST=org.gcube.dataanalysis.ecoengine.spatialdistributions.TestAlgorithm
AQUAMAPS_NATIVE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNN
AQUAMAPS_SUITABLE_NEURALNETWORK=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNNSuitable
AQUAMAPS_NEURAL_NETWORK_NS=org.gcube.dataanalysis.ecoengine.spatialdistributions.AquamapsNNNS

View File

@ -1,4 +1,7 @@
BIOCLIMATE_HSPEC=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHSPECTransducer
BIOCLIMATE_HCAF=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHCAFTransducer
BIOCLIMATE_HSPEN=org.gcube.dataanalysis.ecoengine.transducers.BioClimateHSPENTransducer
HCAF_INTERPOLATION=org.gcube.dataanalysis.ecoengine.transducers.InterpolationTransducer
HCAF_INTERPOLATION=org.gcube.dataanalysis.ecoengine.transducers.InterpolationTransducer
OCCURRENCES_MERGER=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsMerger
OCCURRENCES_INSEAS_ONEARTH=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsInSeaOnEarth
OCCURRENCES_DUPLICATE_DELETER=org.gcube.dataanalysis.ecoengine.transducers.OccurrencePointsDuplicatesDeleter

View File

@ -13,7 +13,7 @@ public static void main(String[] args) throws Exception {
System.out.println("TEST 1");
List<Transducerer> trans = null;
/*
trans = TransducerersFactory.getTransducerers(testConfigLocal());
trans.get(0).init();
Regressor.process(trans.get(0));
@ -38,6 +38,16 @@ public static void main(String[] args) throws Exception {
trans.get(0).init();
Regressor.process(trans.get(0));
trans = null;
trans = TransducerersFactory.getTransducerers(testConfigLocal6());
trans.get(0).init();
Regressor.process(trans.get(0));
trans = null;
*/
trans = TransducerersFactory.getTransducerers(testConfigLocal7());
trans.get(0).init();
Regressor.process(trans.get(0));
trans = null;
}
@ -101,12 +111,47 @@ public static void main(String[] args) throws Exception {
config.setParam("lastModificationColumn", "modified");
config.setParam("rightTableName", "whitesharkoccurrences2");
config.setParam("leftTableName", "whitesharkoccurrences1");
config.setParam("mergedTableName", "whitesharkoccurrencesmerged");
config.setParam("finalTableName", "whitesharkoccurrencesmerged");
config.setParam("spatialTolerance", "0.5");
config.setParam("confidence", "0.8");
return config;
}
private static AlgorithmConfiguration testConfigLocal6() {
AlgorithmConfiguration config = Regressor.getConfig();
config.setAgent("OCCURRENCES_INSEAS_ONEARTH");
config.setParam("longitudeColumn", "decimallongitude");
config.setParam("latitudeColumn", "decimallatitude");
config.setParam("OccurrencePointsTableName", "whitesharkoccurrences2");
config.setParam("finalTableName", "whitesharkoccurrencesfilteredseas");
config.setParam("FilterType", "IN_THE_WATER");
// config.setParam("FilterType", "ON_EARTH");
return config;
}
private static AlgorithmConfiguration testConfigLocal7() {
AlgorithmConfiguration config = Regressor.getConfig();
config.setAgent("OCCURRENCES_DUPLICATE_DELETER");
config.setParam("longitudeColumn", "decimallongitude");
config.setParam("latitudeColumn", "decimallatitude");
config.setParam("recordedByColumn", "recordedby");
config.setParam("scientificNameColumn", "scientificname");
config.setParam("eventDateColumn", "eventdate");
config.setParam("lastModificationColumn", "modified");
config.setParam("OccurrencePointsTableName", "whitesharkoccurrences2");
config.setParam("finalTableName", "whitesharkoccurrencesnoduplicates");
config.setParam("spatialTolerance", "0.5");
config.setParam("confidence", "0.8");
return config;
}
}

View File

@ -0,0 +1,189 @@
package org.gcube.dataanalysis.ecoengine.transducers;
import java.util.ArrayList;
import java.util.List;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType;
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
static String tableNameF = "OccurrencePointsTableName";
String tableName;
List<String> records = new ArrayList<String>();
public OccurrencePointsDuplicatesDeleter(){
}
@Override
public List<StatisticalType> getInputParameters() {
List<TableTemplates> templatesOccurrence = new ArrayList<TableTemplates>();
templatesOccurrence.add(TableTemplates.OCCURRENCE_SPECIES);
// occurrence points tables
InputTable p1 = new InputTable(templatesOccurrence, tableNameF, "The table containing the occurrence points", "");
ColumnType p3 = new ColumnType(leftTableNameF, longitudeColumn, "column with longitude values", "decimallongitude", false);
ColumnType p4 = new ColumnType(leftTableNameF, latitudeColumn, "column with latitude values", "decimallatitude", false);
ColumnType p5 = new ColumnType(leftTableNameF, recordedByColumn, "column with RecordedBy values", "recordedby", false);
ColumnType p6 = new ColumnType(leftTableNameF, scientificNameColumn, "column with Scientific Names", "scientificname", false);
ColumnType p7 = new ColumnType(leftTableNameF, eventDateColumn, "column with EventDate values", "eventdate", false);
ColumnType p8 = new ColumnType(leftTableNameF, lastModificationColumn, "column with Modified values", "modified", false);
ServiceType p9 = new ServiceType(ServiceParameters.RANDOMSTRING, finalTableNameF, "Name of the resulting table", "processedOccurrences_");
PrimitiveType p10 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, spatialTolerance, "The tolerance in degree for assessing that two points could be the same", "0.5");
PrimitiveType p11 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, confidence, "The overall acceptance similarity threshold over which two points are the same", "0.8");
List<StatisticalType> inputs = new ArrayList<StatisticalType>();
inputs.add(p1);
inputs.add(p3);
inputs.add(p4);
inputs.add(p5);
inputs.add(p6);
inputs.add(p7);
inputs.add(p8);
inputs.add(p9);
inputs.add(p10);
inputs.add(p11);
DatabaseType.addDefaultDBPars(inputs);
return inputs;
}
@Override
public String getDescription() {
return "An algorithm for deleting similar occurrences in a sets of occurrence points of species coming from the Species Discovery Facility of D4Science";
}
@Override
public void init() throws Exception {
AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile);
lonFld = config.getParam(longitudeColumn);
latFld = config.getParam(latitudeColumn);
recordedByFld = config.getParam(recordedByColumn);
scientificNameFld = config.getParam(scientificNameColumn);
eventDatFld = config.getParam(eventDateColumn);
modifDatFld = config.getParam(lastModificationColumn);
tableName = config.getParam(tableNameF);
rightTableName=tableName;
leftTableName=tableName;
finalTableName = config.getParam(finalTableNameF);
spatialToleranceValue = Float.parseFloat(config.getParam(spatialTolerance));
confidenceValue = Float.parseFloat(config.getParam(confidence));
objectstoinsert = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
objectstodelete = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
records = new ArrayList<String>();
status = 0;
}
protected boolean isBetterThan(OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) {
if (
((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.before(rightOcc.modifdate))
||
(leftOcc.modifdate==null)&&(rightOcc.modifdate!=null)
)
return false;
else if ((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.after(rightOcc.modifdate)
||
(leftOcc.modifdate!=null)&&(rightOcc.modifdate==null))
return true;
else
return false;
}
@Override
protected void prepareFinalTable(){
DatabaseFactory.executeSQLUpdate(DatabaseUtils.createBlankTableFromAnotherStatement(tableName, finalTableName), dbconnection);
}
@Override
public void compute() throws Exception {
try {
// init DB connection
AnalysisLogger.getLogger().trace("Initializing DB Connection");
dbconnection = DatabaseUtils.initDBSession(config);
AnalysisLogger.getLogger().trace("Taking Table Description");
AnalysisLogger.getLogger().trace("Creating final table: " + finalTableName);
// create new merged table
DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(finalTableName), dbconnection);
AnalysisLogger.getLogger().trace("Preparing table: " + finalTableName);
prepareFinalTable();
AnalysisLogger.getLogger().trace("Extracting columns from: " + finalTableName);
extractColumnNames();
AnalysisLogger.getLogger().trace("Taken Table Description: " + columns);
// take distinct elements from table
AnalysisLogger.getLogger().trace("Taking elements from table: " + tableName);
List<Object> rows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getDinstictElements(tableName, columns.toString(), ""), dbconnection);
// for each element in dx
AnalysisLogger.getLogger().trace("Processing");
status = 10;
int similaritiesCounter = 0;
int allrows = rows.size();
int rowcounter = 0;;
for (Object row : rows) {
// transform into an occurrence object
OccurrenceRecord testOcc = row2OccurrenceRecord((Object[]) row);
// for each element in the white list
int k = 0;
int insertedSize = objectstoinsert.size();
boolean candidate = true;
while (k<insertedSize) {
OccurrenceRecord yetInserted = objectstoinsert.get(k);
float prob = extProb(yetInserted, testOcc);
//if the occurrence is better than the the yet inserted then delete the yet inserted and in the end insert the new occ
if (prob >= confidenceValue) {
similaritiesCounter++;
if (isBetterThan(testOcc, yetInserted)) {
AnalysisLogger.getLogger().trace("Found a similarity with P=" + prob + " between (" + "\"" + testOcc.scientificName + "\"" + "," + testOcc.x + "\"" + "," + "\"" + testOcc.y + "\"" + "," + "\"" + testOcc.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(testOcc.eventdate) + "\"" + ") VS " + "(" + "\"" + yetInserted.scientificName + "\"" + "," + "\"" + yetInserted.x + "\"" + "," + "\"" + yetInserted.y + "\"" + "," + "\"" + yetInserted.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(yetInserted.eventdate) + "\"" + ")");
objectstoinsert.remove(k);
k--;
insertedSize--;
}
//if there is yet one better then discard the testOcc
else{
candidate=false;
break;
}
}
k++;
}
if (candidate)
objectstoinsert.add(testOcc);
status = Math.min(90, 10f + (80 * ((float) rowcounter) / ((float) allrows)));
rowcounter++;
}
AnalysisLogger.getLogger().trace("Found " + similaritiesCounter + " similarities on " + allrows + " distinct elements");
status = 90;
// transform the complete list into a table
persist();
// close DB connection
} catch (Exception e) {
throw e;
} finally {
if (dbconnection != null)
dbconnection.close();
status = 100;
AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed");
}
}
}

View File

@ -0,0 +1,112 @@
package org.gcube.dataanalysis.ecoengine.transducers;
import java.util.ArrayList;
import java.util.List;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType;
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
public class OccurrencePointsInSeaOnEarth extends OccurrencePointsMerger{
//NOTE: 0.125 is the diagonal of a csquare, which is the maximum extent to which a point can lie in a csquare
private static String inthesea="select * into %1$s from (select distinct a.* from %2$s as a join hcaf_d as b on ((b.centerlat-a.%3$s)*(b.centerlat-a.%3$s)+(b.centerlong-a.%4$s)*(b.centerlong-a.%4$s)<= 0.125) and b.oceanarea>0) as t";
private static String onearth="select * into %1$s from (select distinct a.* from %2$s as a join hcaf_d as b on ((b.centerlat-a.%3$s)*(b.centerlat-a.%3$s)+(b.centerlong-a.%4$s)*(b.centerlong-a.%4$s)<= 0.125) and b.landdist<=0.3) as t";
static String tableNameF = "OccurrencePointsTableName";
static String filterTypeF = "FilterType";
String tableName;
public static enum inseasonearth {IN_THE_WATER, ON_EARTH};
inseasonearth filter;
@Override
public List<StatisticalType> getInputParameters() {
List<TableTemplates> templatesOccurrence = new ArrayList<TableTemplates>();
templatesOccurrence.add(TableTemplates.OCCURRENCE_SPECIES);
// occurrence points tables
InputTable p1 = new InputTable(templatesOccurrence, tableNameF, "The table containing the occurrence points", "");
// string parameters
ColumnType p3 = new ColumnType(tableNameF, longitudeColumn, "column with longitude values", "decimallongitude", false);
ColumnType p4 = new ColumnType(tableNameF, latitudeColumn, "column with latitude values", "decimallatitude", false);
ServiceType p9 = new ServiceType(ServiceParameters.RANDOMSTRING, finalTableNameF, "Name of the resulting table", "processedOccurrences_");
PrimitiveType p10 = new PrimitiveType(Enum.class.getName(), inseasonearth.values(), PrimitiveTypes.ENUMERATED, filterTypeF, "The filter type",""+inseasonearth.IN_THE_WATER);
List<StatisticalType> inputs = new ArrayList<StatisticalType>();
inputs.add(p1);
inputs.add(p3);
inputs.add(p4);
inputs.add(p9);
inputs.add(p10);
DatabaseType.addDefaultDBPars(inputs);
return inputs;
}
@Override
public void init() throws Exception {
AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile);
lonFld = config.getParam(longitudeColumn);
latFld = config.getParam(latitudeColumn);
tableName = config.getParam(tableNameF);
finalTableName = config.getParam(finalTableNameF);
filter = inseasonearth.valueOf(config.getParam(filterTypeF));
status = 0;
}
@Override
public String getDescription() {
return "An algorithm for filtering only the points lying in the seas or on the earth. It acts on occurrence points of species coming from the Species Discovery Facility of D4Science";
}
@Override
protected void prepareFinalTable(){
DatabaseFactory.executeSQLUpdate(DatabaseUtils.createBlankTableFromAnotherStatement(tableName, finalTableName), dbconnection);
}
@Override
public void compute() throws Exception {
try {
// init DB connection
AnalysisLogger.getLogger().trace("Initializing DB Connection");
dbconnection = DatabaseUtils.initDBSession(config);
AnalysisLogger.getLogger().trace("Taking Table Description");
AnalysisLogger.getLogger().trace("Creating merged table: " + finalTableName);
// create new merged table
DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(finalTableName), dbconnection);
// prepareFinalTable();
status = 10;
String generationquery = "";
if (filter==inseasonearth.IN_THE_WATER)
generationquery = String.format(inthesea,finalTableName,tableName,latFld,lonFld);
else
generationquery = String.format(onearth,finalTableName,tableName,latFld,lonFld);
AnalysisLogger.getLogger().trace("Applying filter " + filter.name());
AnalysisLogger.getLogger().trace("Applying query " + generationquery);
DatabaseFactory.executeSQLUpdate(generationquery, dbconnection);
AnalysisLogger.getLogger().trace("Final Table created!");
} catch (Exception e) {
throw e;
} finally {
if (dbconnection != null)
dbconnection.close();
status = 100;
AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed");
}
}
}

View File

@ -0,0 +1,67 @@
package org.gcube.dataanalysis.ecoengine.transducers;
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.test.regression.Regressor;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
public class OccurrencePointsIntersector extends OccurrencePointsMerger{
public OccurrencePointsIntersector(){
}
@Override
public String getDescription() {
return "An algorithm for intesecting two sets of occurrence points of species coming from the Species Discovery Facility of D4Science";
}
@Override
protected void prepareFinalTable(){
DatabaseFactory.executeSQLUpdate(DatabaseUtils.createBlankTableFromAnotherStatement(leftTableName, finalTableName), dbconnection);
}
@Override
protected void manageHighProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) {
if (
((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.before(rightOcc.modifdate))
||
(leftOcc.modifdate==null)&&(rightOcc.modifdate!=null)
)
objectstoinsert.add(rightOcc);
else if ((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.after(rightOcc.modifdate)
||
(leftOcc.modifdate!=null)&&(rightOcc.modifdate==null))
objectstoinsert.add(leftOcc);
else
objectstoinsert.add(leftOcc);
}
@Override
protected void manageLowProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) {
}
public static void main(String[] args) throws Exception {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);
config.setParam(longitudeColumn, "decimallongitude");
config.setParam(latitudeColumn, "decimallatitude");
config.setParam(recordedByColumn, "recordedby");
config.setParam(scientificNameColumn, "scientificname");
config.setParam(eventDateColumn, "eventdate");
config.setParam(lastModificationColumn, "modified");
config.setParam(rightTableNameF, "whitesharkoccurrences2");
config.setParam(leftTableNameF, "whitesharkoccurrences1");
config.setParam(finalTableNameF, "whitesharkoccurrencesintersected");
config.setParam(spatialTolerance, "0.5");
config.setParam(confidence, "0.8");
OccurrencePointsIntersector occm = new OccurrencePointsIntersector();
occm.setConfiguration(config);
occm.init();
occm.compute();
}
}

View File

@ -38,7 +38,7 @@ public class OccurrencePointsMerger implements Transducerer {
static String lastModificationColumn = "lastModificationColumn";
static String rightTableNameF = "rightTableName";
static String leftTableNameF = "leftTableName";
static String mergedTableNameF = "mergedTableName";
static String finalTableNameF = "finalTableName";
static String spatialTolerance = "spatialTolerance";
static String confidence = "confidence";
@ -54,7 +54,7 @@ public class OccurrencePointsMerger implements Transducerer {
protected String modifDatFld;
protected String leftTableName;
protected String rightTableName;
protected String mergedTableName;
protected String finalTableName;
protected float spatialToleranceValue;
protected float confidenceValue;
protected StringBuffer columns;
@ -64,6 +64,11 @@ public class OccurrencePointsMerger implements Transducerer {
protected SessionFactory dbconnection;
protected float status;
public OccurrencePointsMerger(){
}
protected class OccurrenceRecord {
public String scientificName;
@ -179,30 +184,6 @@ public class OccurrencePointsMerger implements Transducerer {
return buffer.toString();
}
public static void main(String[] args) throws Exception {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);
config.setParam(longitudeColumn, "decimallongitude");
config.setParam(latitudeColumn, "decimallatitude");
config.setParam(recordedByColumn, "recordedby");
config.setParam(scientificNameColumn, "scientificname");
config.setParam(eventDateColumn, "eventdate");
config.setParam(lastModificationColumn, "modified");
config.setParam(rightTableNameF, "whitesharkoccurrences2");
config.setParam(leftTableNameF, "whitesharkoccurrences1");
// config.setParam(rightTableNameF,"whitesharkoccurrences2");
// config.setParam(rightTableNameF,"whitesharkoccurrences1");
config.setParam(mergedTableNameF, "whitesharkoccurrencesmerged");
config.setParam(spatialTolerance, "0.5");
config.setParam(confidence, "0.8");
List<TableTemplates> templatesOccurrence = new ArrayList<TableTemplates>();
templatesOccurrence.add(TableTemplates.OCCURRENCE_AQUAMAPS);
OccurrencePointsMerger occm = new OccurrencePointsMerger();
occm.setConfiguration(config);
occm.init();
occm.compute();
}
@Override
public List<StatisticalType> getInputParameters() {
@ -219,7 +200,7 @@ public class OccurrencePointsMerger implements Transducerer {
ColumnType p6 = new ColumnType(leftTableNameF, scientificNameColumn, "column with Scientific Names", "scientificname", false);
ColumnType p7 = new ColumnType(leftTableNameF, eventDateColumn, "column with EventDate values", "eventdate", false);
ColumnType p8 = new ColumnType(leftTableNameF, lastModificationColumn, "column with Modified values", "modified", false);
ServiceType p9 = new ServiceType(ServiceParameters.RANDOMSTRING, mergedTableNameF, "Name of the final produced", "mergedoccurrences_");
ServiceType p9 = new ServiceType(ServiceParameters.RANDOMSTRING, finalTableNameF, "Name of the resulting table", "processedOccurrences_");
PrimitiveType p10 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, spatialTolerance, "The tolerance in degree for assessing that two points could be the same", "0.5");
PrimitiveType p11 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, confidence, "The overall acceptance similarity threshold over which two points are the same", "0.8");
@ -272,7 +253,7 @@ public class OccurrencePointsMerger implements Transducerer {
List<TableTemplates> templatesOccurrence = new ArrayList<TableTemplates>();
templatesOccurrence.add(TableTemplates.OCCURRENCE_SPECIES);
// occurrence points tables
OutputTable p = new OutputTable(templatesOccurrence, mergedTableName, mergedTableName, "The output table containing the merged points");
OutputTable p = new OutputTable(templatesOccurrence, finalTableName, finalTableName, "The output table containing the processed points");
return p;
}
@ -289,7 +270,7 @@ public class OccurrencePointsMerger implements Transducerer {
modifDatFld = config.getParam(lastModificationColumn);
leftTableName = config.getParam(leftTableNameF);
rightTableName = config.getParam(rightTableNameF);
mergedTableName = config.getParam(mergedTableNameF);
finalTableName = config.getParam(finalTableNameF);
spatialToleranceValue = Float.parseFloat(config.getParam(spatialTolerance));
confidenceValue = Float.parseFloat(config.getParam(confidence));
@ -379,9 +360,21 @@ public class OccurrencePointsMerger implements Transducerer {
String sci = scientificNameFld + "='" + record.scientificName.replace("'","")+"'";
String x = lonFld + "='" + record.x+"'";
String y = latFld + "='" + record.y+"'";
String event = null;
String modified = null;
if (record.eventdate!=null)
event = eventDatFld + "='" + convert2conventionalFormat(record.eventdate)+"'";
if (record.modifdate!=null)
modified = modifDatFld + "='" + convert2conventionalFormat(record.modifdate)+"'";
buffer.append("(");
buffer.append(rec + " AND " + sci + " AND " + x + " AND " + y);
if (event!=null)
buffer.append(" AND "+event);
if (modified!=null)
buffer.append(" AND "+modified);
buffer.append(")");
if (counter < todel - 1)
buffer.append(" OR ");
@ -389,7 +382,7 @@ public class OccurrencePointsMerger implements Transducerer {
counter++;
}
String updateQ = DatabaseUtils.deleteFromBuffer(mergedTableName, buffer);
String updateQ = DatabaseUtils.deleteFromBuffer(finalTableName, buffer);
// System.out.println("Update:\n"+updateQ);
DatabaseFactory.executeSQLUpdate(updateQ, dbconnection);
AnalysisLogger.getLogger().debug("Objects deleted");
@ -410,14 +403,29 @@ public class OccurrencePointsMerger implements Transducerer {
counter++;
}
String updateQ = DatabaseUtils.insertFromBuffer(mergedTableName, columns.toString(), buffer);
// System.out.println("Update:\n"+updateQ);
String updateQ = DatabaseUtils.insertFromBuffer(finalTableName, columns.toString(), buffer);
// System.out.println("Update:\n"+updateQ);
DatabaseFactory.executeSQLUpdate(updateQ, dbconnection);
AnalysisLogger.getLogger().debug("Objects inserted");
}
}
protected void prepareFinalTable(){
DatabaseFactory.executeSQLUpdate(DatabaseUtils.duplicateTableStatement(leftTableName, finalTableName), dbconnection);
}
protected void extractColumnNames(){
// take the description of the table
columnsNames = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsNamesStatement(rightTableName), dbconnection);
int nCols = columnsNames.size();
columns = new StringBuffer();
for (int i = 0; i < nCols; i++) {
columns.append("\"" + columnsNames.get(i) + "\"");
if (i < nCols - 1)
columns.append(",");
}
}
@Override
public void compute() throws Exception {
@ -426,22 +434,13 @@ public class OccurrencePointsMerger implements Transducerer {
AnalysisLogger.getLogger().trace("Initializing DB Connection");
dbconnection = DatabaseUtils.initDBSession(config);
AnalysisLogger.getLogger().trace("Taking Table Description");
// take the description of the table
columnsNames = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsNamesStatement(rightTableName), dbconnection);
int nCols = columnsNames.size();
columns = new StringBuffer();
for (int i = 0; i < nCols; i++) {
columns.append("\"" + columnsNames.get(i) + "\"");
if (i < nCols - 1)
columns.append(",");
}
extractColumnNames();
AnalysisLogger.getLogger().trace("Taken Table Description: " + columns);
AnalysisLogger.getLogger().trace("Creating merged table: " + mergedTableName);
AnalysisLogger.getLogger().trace("Creating final table: " + finalTableName);
// create new merged table
DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(mergedTableName), dbconnection);
DatabaseFactory.executeSQLUpdate(DatabaseUtils.duplicateTableStatement(leftTableName, mergedTableName), dbconnection);
DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(finalTableName), dbconnection);
prepareFinalTable();
// take the elements from sx table
AnalysisLogger.getLogger().trace("Taking elements from left table: " + leftTableName);
List<Object> leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(leftTableName, columns.toString(), ""), dbconnection);
@ -511,4 +510,25 @@ public class OccurrencePointsMerger implements Transducerer {
AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed");
}
}
public static void main(String[] args) throws Exception {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);
config.setParam(longitudeColumn, "decimallongitude");
config.setParam(latitudeColumn, "decimallatitude");
config.setParam(recordedByColumn, "recordedby");
config.setParam(scientificNameColumn, "scientificname");
config.setParam(eventDateColumn, "eventdate");
config.setParam(lastModificationColumn, "modified");
config.setParam(rightTableNameF, "whitesharkoccurrences2");
config.setParam(leftTableNameF, "whitesharkoccurrences1");
config.setParam(finalTableNameF, "whitesharkoccurrencesmerged");
config.setParam(spatialTolerance, "0.5");
config.setParam(confidence, "0.8");
OccurrencePointsMerger occm = new OccurrencePointsMerger();
occm.setConfiguration(config);
occm.init();
occm.compute();
}
}

View File

@ -105,12 +105,17 @@ public class DatabaseUtils {
return "select * into " + tableTo + " from " + tableFrom;
}
public static String createBlankTableFromAnotherStatement(String tableFrom, String tableTo) {
return "select * into "+tableTo+" from (select * from "+tableFrom+" limit 0) a";
}
public static String dropTableStatement(String table) {
return "drop table " + table;
}
public static String getDinstictElements(String table, String column,String filter) {
return "select distinct " + column + " from " + table + " " + filter+" order by " + column;
public static String getDinstictElements(String table, String columns,String filter) {
return "select distinct " + columns + " from " + table + " " + filter+" order by " + columns;
}
public static String getOrderedElements(String table, String key, String column) {