From 00f8dbdb88f52a2a0cfd6d95a31bab026827b49f Mon Sep 17 00:00:00 2001 From: Gianpaolo Coro Date: Wed, 19 Sep 2012 16:04:35 +0000 Subject: [PATCH] git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@58772 82a268e6-3cf1-43bd-a215-b396298e98cf --- .../ecoengine/clustering/DBScan.java | 29 +- .../ecoengine/clustering/KMeans.java | 19 +- .../ecoengine/clustering/XMeansWrapper.java | 18 +- .../ecoengine/datatypes/ColumnType.java | 24 + .../ecoengine/datatypes/ColumnTypesList.java | 27 + .../ecoengine/datatypes/DatabaseType.java | 18 + .../datatypes/enumtypes/TableTemplates.java | 5 +- .../evaluation/DiscrepancyAnalysis.java | 22 +- .../DistributionQualityAnalysis.java | 27 +- .../evaluation/HabitatRepresentativeness.java | 22 +- .../ecoengine/models/ModelAquamapsNN.java | 7 +- .../ecoengine/models/ModelAquamapsNNNS.java | 2 +- .../ecoengine/models/ModelHSPEN.java | 2 +- .../AquamapsSuitable.java | 16 +- .../regression/RegressionTestTransducers.java | 31 +- .../ecoengine/test/regression/Regressor.java | 4 +- .../transducers/BioClimateHCAFTransducer.java | 15 +- .../BioClimateHSPECTransducer.java | 15 +- .../BioClimateHSPENTransducer.java | 16 +- .../transducers/InterpolationTransducer.java | 21 +- .../transducers/OccurrencePointsMerger.java | 601 ++++++++++-------- .../ecoengine/utils/DatabaseUtils.java | 4 + 22 files changed, 490 insertions(+), 455 deletions(-) create mode 100644 src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/ColumnType.java create mode 100644 src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/ColumnTypesList.java diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/DBScan.java b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/DBScan.java index 6ba2577..767528d 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/DBScan.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/DBScan.java @@ -6,11 +6,11 @@ import java.util.List; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE; +import org.gcube.dataanalysis.ecoengine.datatypes.ColumnTypesList; import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType; import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; import org.gcube.dataanalysis.ecoengine.datatypes.OutputTable; import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType; -import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveTypesList; import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType; import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType; import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.DatabaseParameters; @@ -387,35 +387,19 @@ public class DBScan implements Clusterer{ List parameters = new ArrayList(); List templateOccs = new ArrayList(); templateOccs.add(TableTemplates.GENERIC); - InputTable p1 = new InputTable(templateOccs,"OccurrencePointsTable","Occurrence Points Table","occurrences"); -// PrimitiveType p2 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "FeaturesColumnNames","Column Names for the features comma separated","x,y"); - - PrimitiveTypesList p2 = new PrimitiveTypesList(PrimitiveTypes.STRING, "FeaturesColumnNames","Column Names for the features",false); - + ColumnTypesList p2 = new ColumnTypesList ("OccurrencePointsTable","FeaturesColumnNames", "Column Names for the features", false); ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_"); PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "epsilon","DBScan epsilon parameter","10"); PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points","DBScan minimum points parameter (identifies outliers)","1"); - - - DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); - DatabaseType p7 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); - DatabaseType p8 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); - DatabaseType p9 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); - DatabaseType p10 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); - DatabaseType p11 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect"); parameters.add(p1); parameters.add(p2); parameters.add(p3); parameters.add(p4); parameters.add(p5); - parameters.add(p6); - parameters.add(p7); - parameters.add(p8); - parameters.add(p9); - parameters.add(p10); - parameters.add(p11); + + DatabaseType.addDefaultDBPars(parameters); return parameters; } @@ -436,7 +420,10 @@ public class DBScan implements Clusterer{ @Override public String getResources() { - return ResourceFactory.getResources(100f); + if ((status>0)&&(status<100)) + return ResourceFactory.getResources(100f); + else + return ResourceFactory.getResources(0f); } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/KMeans.java b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/KMeans.java index 34411e9..34e9925 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/KMeans.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/KMeans.java @@ -5,10 +5,10 @@ import java.util.List; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; +import org.gcube.dataanalysis.ecoengine.datatypes.ColumnTypesList; import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType; import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType; -import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveTypesList; import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType; import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType; import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.DatabaseParameters; @@ -114,17 +114,9 @@ public class KMeans extends DBScan{ List templateOccs = new ArrayList(); templateOccs.add(TableTemplates.GENERIC); InputTable p1 = new InputTable(templateOccs,"OccurrencePointsTable","Occurrence Points Table","occurrences"); - PrimitiveTypesList p2 = new PrimitiveTypesList(PrimitiveTypes.STRING, "FeaturesColumnNames","Column Names for the features",false); + ColumnTypesList p2 = new ColumnTypesList ("OccurrencePointsTable","FeaturesColumnNames", "Column Names for the features", false); ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_"); - - DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); - DatabaseType p7 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); - DatabaseType p8 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); - DatabaseType p9 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); - DatabaseType p10 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); - DatabaseType p11 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect"); - PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "k","Expected Number of Clusters","3"); PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_runs","Max runs of the clustering procedure","10"); PrimitiveType p12 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_optimization_steps","Max number of internal optimization steps","5"); @@ -135,15 +127,10 @@ public class KMeans extends DBScan{ parameters.add(p3); parameters.add(p4); parameters.add(p5); - parameters.add(p6); - parameters.add(p7); - parameters.add(p8); - parameters.add(p9); - parameters.add(p10); - parameters.add(p11); parameters.add(p12); parameters.add(p13); + DatabaseType.addDefaultDBPars(parameters); return parameters; } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/XMeansWrapper.java b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/XMeansWrapper.java index b51536e..ac9a290 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/XMeansWrapper.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/XMeansWrapper.java @@ -8,10 +8,10 @@ import java.util.List; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; +import org.gcube.dataanalysis.ecoengine.datatypes.ColumnTypesList; import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType; import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType; -import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveTypesList; import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType; import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType; import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.DatabaseParameters; @@ -120,7 +120,7 @@ public class XMeansWrapper extends DBScan { templateOccs.add(TableTemplates.GENERIC); InputTable p1 = new InputTable(templateOccs, "OccurrencePointsTable", "Occurrence Points Table", "occurrences"); - PrimitiveTypesList p2 = new PrimitiveTypesList(PrimitiveTypes.STRING, "FeaturesColumnNames", "Column Names for the features", false); + ColumnTypesList p2 = new ColumnTypesList ("OccurrencePointsTable","FeaturesColumnNames", "Column Names for the features", false); ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable", "Table name of the distribution", "occCluster_"); PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "maxIterations", "XMeans max number of overall iterations of the clustering learning", "10"); @@ -128,27 +128,15 @@ public class XMeansWrapper extends DBScan { PrimitiveType p12 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "maxClusters", "Maximum number of clusters to produce", "50"); PrimitiveType p13 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points", "Number of points which define an outlier set", "2"); - DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); - DatabaseType p7 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); - DatabaseType p8 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); - DatabaseType p9 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); - DatabaseType p10 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); - DatabaseType p11 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect"); - parameters.add(p1); parameters.add(p2); parameters.add(p3); parameters.add(p4); parameters.add(p5); - parameters.add(p6); - parameters.add(p7); - parameters.add(p8); - parameters.add(p9); - parameters.add(p10); - parameters.add(p11); parameters.add(p12); parameters.add(p13); + DatabaseType.addDefaultDBPars(parameters); return parameters; } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/ColumnType.java b/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/ColumnType.java new file mode 100644 index 0000000..fedd070 --- /dev/null +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/ColumnType.java @@ -0,0 +1,24 @@ +package org.gcube.dataanalysis.ecoengine.datatypes; + + +//name of columns +public class ColumnType extends StatisticalType{ + + private String tableName; + + public ColumnType(String tableName, String name, String description, String defaultValue, boolean optional) { + super(name, description, defaultValue, optional); + this.tableName=tableName; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + + +} diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/ColumnTypesList.java b/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/ColumnTypesList.java new file mode 100644 index 0000000..8f11e5c --- /dev/null +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/ColumnTypesList.java @@ -0,0 +1,27 @@ +package org.gcube.dataanalysis.ecoengine.datatypes; + +import java.util.List; + +public class ColumnTypesList extends StatisticalType { + + String tableName; + protected List list; + + public ColumnTypesList(String tableName, String name, String description, boolean optional) { + super(name, description, optional); + this.tableName=tableName; + } + + public void add(ColumnType st){ + list.add(st); + } + + public List getList(){ + return list; + } + + public String getTabelName(){ + return tableName; + } + +} diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/DatabaseType.java b/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/DatabaseType.java index a8e8bdb..cc0bc87 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/DatabaseType.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/DatabaseType.java @@ -1,5 +1,7 @@ package org.gcube.dataanalysis.ecoengine.datatypes; +import java.util.List; + import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.DatabaseParameters; public class DatabaseType extends StatisticalType{ @@ -29,4 +31,20 @@ public class DatabaseType extends StatisticalType{ this.databaseParameter = databaseParameters; } + public static void addDefaultDBPars(List parameters){ + DatabaseType p1 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); + DatabaseType p2 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); + DatabaseType p3 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); + DatabaseType p4 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); + DatabaseType p5 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); + DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect"); + + parameters.add(p1); + parameters.add(p2); + parameters.add(p3); + parameters.add(p4); + parameters.add(p5); + parameters.add(p6); + } + } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/enumtypes/TableTemplates.java b/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/enumtypes/TableTemplates.java index 97f90cc..63da933 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/enumtypes/TableTemplates.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/datatypes/enumtypes/TableTemplates.java @@ -5,10 +5,11 @@ public enum TableTemplates { HSPEN, HCAF, HSPEC, - OCCURRENCE, + OCCURRENCE_AQUAMAPS, MINMAXLAT, TRAININGSET, TESTSET, GENERIC, - CLUSTER + CLUSTER, + OCCURRENCE_SPECIES } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/DiscrepancyAnalysis.java b/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/DiscrepancyAnalysis.java index 2677c03..9b9afe3 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/DiscrepancyAnalysis.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/DiscrepancyAnalysis.java @@ -3,15 +3,14 @@ package org.gcube.dataanalysis.ecoengine.evaluation; import java.util.ArrayList; import java.util.HashMap; import java.util.List; -import java.util.Map; import org.gcube.contentmanagement.graphtools.utils.MathFunctions; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; +import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType; import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType; import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType; import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType; -import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.DatabaseParameters; import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes; import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates; import org.gcube.dataanalysis.ecoengine.interfaces.DataAnalysis; @@ -54,17 +53,12 @@ public class DiscrepancyAnalysis extends DataAnalysis { templates.add(TableTemplates.TESTSET); InputTable p1 = new InputTable(templates,"FirstTable","First Table"); InputTable p2 = new InputTable(templates,"SecondTable","Second Table"); - PrimitiveType p3 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "FirstTableCsquareColumn","the csquares column name in the first table ","csquarecode"); - PrimitiveType p4 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "SecondTableCsquareColumn","the csquares column name in the second table","csquarecode"); - PrimitiveType p5 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "FirstTableProbabilityColumn","the probability column in the first table","probability"); - PrimitiveType p13 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "SecondTableProbabilityColumn","the probability column in the second table","probability"); + ColumnType p3 = new ColumnType("FirstTable", "FirstTableCsquareColumn", "the csquares column name in the first table", "csquarecode", false); + ColumnType p4 = new ColumnType("SecondTable", "SecondTableCsquareColumn", "the csquares column name in the second table", "csquarecode", false); + ColumnType p5 = new ColumnType("FirstTable", "FirstTableProbabilityColumn", "the probability column in the first table", "probability", false); + ColumnType p13 = new ColumnType("SecondTable", "SecondTableProbabilityColumn", "the probability column in the second table", "probability", false); PrimitiveType p6 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, "ComparisonThreshold","the comparison threshold","0.1"); PrimitiveType p7 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "MaxSamples","the comparison threshold","10000"); - DatabaseType p8 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); - DatabaseType p9 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); - DatabaseType p10 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); - DatabaseType p11 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); - DatabaseType p12 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); parameters.add(p1); parameters.add(p2); @@ -74,12 +68,8 @@ public class DiscrepancyAnalysis extends DataAnalysis { parameters.add(p13); parameters.add(p6); parameters.add(p7); - parameters.add(p8); - parameters.add(p9); - parameters.add(p10); - parameters.add(p11); - parameters.add(p12); + DatabaseType.addDefaultDBPars(parameters); return parameters; } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/DistributionQualityAnalysis.java b/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/DistributionQualityAnalysis.java index 79d68ac..4ce2235 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/DistributionQualityAnalysis.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/DistributionQualityAnalysis.java @@ -8,6 +8,7 @@ import java.util.Map; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; +import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType; import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType; import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType; @@ -50,25 +51,22 @@ public class DistributionQualityAnalysis extends DataAnalysis { templates.add(TableTemplates.TESTSET); List templatesOccurrences = new ArrayList(); - templatesOccurrences.add(TableTemplates.OCCURRENCE); + templatesOccurrences.add(TableTemplates.OCCURRENCE_AQUAMAPS); templatesOccurrences.add(TableTemplates.TRAININGSET); templatesOccurrences.add(TableTemplates.TESTSET); InputTable p1 = new InputTable(templatesOccurrences,"PositiveCasesTable","A Table containing positive cases"); InputTable p2 = new InputTable(templatesOccurrences,"NegativeCasesTable","A Table containing negative cases"); - PrimitiveType p3 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "PositiveCasesTableKeyColumn","Positive Cases Table Key Column","csquarecode"); - PrimitiveType p4 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "NegativeCasesTableKeyColumn","Negative Cases Table Key Column","csquarecode"); InputTable p5 = new InputTable(templates,"DistributionTable","A probability distribution table"); - PrimitiveType p6 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "DistributionTableKeyColumn","Distribution Table Key Column","csquarecode"); - PrimitiveType p7 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "DistributionTableProbabilityColumn","Distribution Table Probability Column","csquarecode"); + + ColumnType p3 = new ColumnType("PositiveCasesTable", "PositiveCasesTableKeyColumn", "Positive Cases Table Key Column", "csquarecode", false); + ColumnType p4 = new ColumnType("NegativeCasesTable", "NegativeCasesTableKeyColumn", "Negative Cases Table Key Column", "csquarecode", false); + ColumnType p6 = new ColumnType("DistributionTable", "DistributionTableKeyColumn", "Distribution Table Key Column", "csquarecode", false); + ColumnType p7 = new ColumnType("DistributionTable", "DistributionTableProbabilityColumn", "Distribution Table Probability Column", "probability", false); + PrimitiveType p8 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "PositiveThreshold","Positive acceptance threshold","0.8"); PrimitiveType p9 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "NegativeThreshold","Negative acceptance threshold","0.3"); - DatabaseType p10 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); - DatabaseType p11 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); - DatabaseType p12 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); - DatabaseType p13 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); - DatabaseType p14 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); - + parameters.add(p1); parameters.add(p2); parameters.add(p3); @@ -78,11 +76,8 @@ public class DistributionQualityAnalysis extends DataAnalysis { parameters.add(p7); parameters.add(p8); parameters.add(p9); - parameters.add(p10); - parameters.add(p11); - parameters.add(p12); - parameters.add(p13); - parameters.add(p14); + + DatabaseType.addDefaultDBPars(parameters); return parameters; } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/HabitatRepresentativeness.java b/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/HabitatRepresentativeness.java index c374a27..966f6d8 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/HabitatRepresentativeness.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/evaluation/HabitatRepresentativeness.java @@ -8,6 +8,7 @@ import java.util.Map; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; import org.gcube.contentmanagement.lexicalmatcher.utils.MathFunctions; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; +import org.gcube.dataanalysis.ecoengine.datatypes.ColumnTypesList; import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType; import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType; @@ -20,7 +21,6 @@ import org.gcube.dataanalysis.ecoengine.models.cores.pca.PrincipalComponentAnaly import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory; import org.gcube.dataanalysis.ecoengine.utils.Operations; import org.gcube.dataanalysis.ecoengine.utils.Transformations; -import org.hibernate.SessionFactory; public class HabitatRepresentativeness extends DataAnalysis { @@ -45,7 +45,7 @@ public class HabitatRepresentativeness extends DataAnalysis { templates.add(TableTemplates.TESTSET); List templatesOccurrences = new ArrayList(); - templatesOccurrences.add(TableTemplates.OCCURRENCE); + templatesOccurrences.add(TableTemplates.OCCURRENCE_AQUAMAPS); templatesOccurrences.add(TableTemplates.TRAININGSET); templatesOccurrences.add(TableTemplates.TESTSET); @@ -53,23 +53,17 @@ public class HabitatRepresentativeness extends DataAnalysis { PrimitiveType p2 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "ProjectingAreaFeaturesOptionalCondition","optional filter for taking area rows","oceanarea>0",true); InputTable p3 = new InputTable(templatesOccurrences,"PositiveCasesTable","A Table containing positive cases"); InputTable p4 = new InputTable(templatesOccurrences,"NegativeCasesTable","A Table containing negative cases"); - PrimitiveType p5 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "FeaturesColumns","fetures columns names separated by comma","depthmean,depthmax,depthmin, sstanmean,sbtanmean,salinitymean,salinitybmean, primprodmean,iceconann,landdist,oceanarea"); - DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); - DatabaseType p7 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); - DatabaseType p8 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); - DatabaseType p9 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); - DatabaseType p10 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); + +// PrimitiveType p5 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "FeaturesColumns","fetures columns names separated by comma","depthmean,depthmax,depthmin, sstanmean,sbtanmean,salinitymean,salinitybmean, primprodmean,iceconann,landdist,oceanarea"); + ColumnTypesList p5 = new ColumnTypesList ("PositiveCasesTable","FeaturesColumns", "Features columns", false); parameters.add(p1); parameters.add(p2); parameters.add(p3); parameters.add(p4); parameters.add(p5); - parameters.add(p6); - parameters.add(p7); - parameters.add(p8); - parameters.add(p9); - parameters.add(p10); + + DatabaseType.addDefaultDBPars(parameters); return parameters; } @@ -203,7 +197,7 @@ public class HabitatRepresentativeness extends DataAnalysis { status = 0; String projectingAreaTable = config.getParam("ProjectingAreaTable"); String projectingAreaFeaturesOptionalCondition = config.getParam("ProjectingAreaFeaturesOptionalCondition"); - String FeaturesColumns = config.getParam("FeaturesColumns"); + String FeaturesColumns = config.getParam("FeaturesColumns").replace(AlgorithmConfiguration.getListSeparator(), ","); String positiveCasesTable = config.getParam("PositiveCasesTable"); String negativeCasesTable = config.getParam("NegativeCasesTable"); connection = AlgorithmConfiguration.getConnectionFromConfig(config); diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/models/ModelAquamapsNN.java b/src/main/java/org/gcube/dataanalysis/ecoengine/models/ModelAquamapsNN.java index aeda7cb..f5eddea 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/models/ModelAquamapsNN.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/models/ModelAquamapsNN.java @@ -15,6 +15,7 @@ import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType; import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType; +import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveTypesList; import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType; import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType; import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.DatabaseParameters; @@ -48,12 +49,12 @@ public class ModelAquamapsNN implements Model { public List getInputParameters() { List parameters = new ArrayList(); List templatesOccurrences = new ArrayList(); - templatesOccurrences.add(TableTemplates.OCCURRENCE); + templatesOccurrences.add(TableTemplates.OCCURRENCE_AQUAMAPS); InputTable p1 = new InputTable(templatesOccurrences,"AbsenceDataTable","A Table containing absence points"); InputTable p2 = new InputTable(templatesOccurrences,"PresenceDataTable","A Table containing positive occurrences"); PrimitiveType p3 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "SpeciesName","Species Code of the fish the NN will correspond to","Fis-10407"); - PrimitiveType p4 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "LayersNeurons","a list of neurons number for each inner layer separated by comma","100,2"); + PrimitiveTypesList p4 = new PrimitiveTypesList(PrimitiveTypes.NUMBER,"LayersNeurons","a list of neurons number for each inner layer separated by comma",false); DatabaseType p5 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); @@ -118,7 +119,7 @@ public class ModelAquamapsNN implements Model { String layersNeurons$ = Input.getParam("LayersNeurons"); if ((layersNeurons$!=null)&&(layersNeurons$.length()>0)) { - String [] split = layersNeurons$.split(","); + String [] split = layersNeurons$.split(AlgorithmConfiguration.getListSeparator()); layersNeurons = new int[split.length]; for (int i = 0;i getInputParameters() { List parameters = new ArrayList(); List templatesOccurrences = new ArrayList(); - templatesOccurrences.add(TableTemplates.OCCURRENCE); + templatesOccurrences.add(TableTemplates.OCCURRENCE_AQUAMAPS); InputTable p1 = new InputTable(templatesOccurrences,"AbsenceDataTable","A Table containing absence points"); InputTable p2 = new InputTable(templatesOccurrences,"PresenceDataTable","A Table containing positive occurrences"); diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/models/ModelHSPEN.java b/src/main/java/org/gcube/dataanalysis/ecoengine/models/ModelHSPEN.java index 0a71249..455b77d 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/models/ModelHSPEN.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/models/ModelHSPEN.java @@ -432,7 +432,7 @@ public class ModelHSPEN implements Model { public List getInputParameters() { List parameters = new ArrayList(); List templatesOccurrences = new ArrayList(); - templatesOccurrences.add(TableTemplates.OCCURRENCE); + templatesOccurrences.add(TableTemplates.OCCURRENCE_AQUAMAPS); List templateHspen = new ArrayList(); templateHspen.add(TableTemplates.HSPEN); List templateHcaf = new ArrayList(); diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/spatialdistributions/AquamapsSuitable.java b/src/main/java/org/gcube/dataanalysis/ecoengine/spatialdistributions/AquamapsSuitable.java index 75fc25a..c2bbb0b 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/spatialdistributions/AquamapsSuitable.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/spatialdistributions/AquamapsSuitable.java @@ -251,7 +251,7 @@ public class AquamapsSuitable implements SpatialProbabilityDistributionTable{ public List getInputParameters() { List parameters = new ArrayList(); List templatesOccurrence = new ArrayList(); - templatesOccurrence.add(TableTemplates.OCCURRENCE); + templatesOccurrence.add(TableTemplates.OCCURRENCE_AQUAMAPS); List templateHspen = new ArrayList(); templateHspen.add(TableTemplates.HSPEN); List templateHcaf = new ArrayList(); @@ -263,13 +263,6 @@ public class AquamapsSuitable implements SpatialProbabilityDistributionTable{ PrimitiveType p4 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "DistributionTableLabel","Name of the HSPEC probability distribution","hspec"); InputTable p5 = new InputTable(templatesOccurrence,"OccurrencePointsTable","The Occurrence points table for calculating the bounding box","occurrencecells"); PrimitiveType p6 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.CONSTANT, "CreateTable","Create New Table for each computation","true"); - - DatabaseType p7 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); - DatabaseType p8 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); - DatabaseType p9 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); - DatabaseType p10 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); - DatabaseType p11 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); - DatabaseType p12 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db tablespace"); parameters.add(p1); parameters.add(p2); @@ -277,13 +270,8 @@ public class AquamapsSuitable implements SpatialProbabilityDistributionTable{ parameters.add(p4); parameters.add(p5); parameters.add(p6); - parameters.add(p7); - parameters.add(p8); - parameters.add(p9); - parameters.add(p10); - parameters.add(p11); - parameters.add(p12); + DatabaseType.addDefaultDBPars(parameters); return parameters; } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestTransducers.java b/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestTransducers.java index d4b1845..56ee8c5 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestTransducers.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestTransducers.java @@ -12,8 +12,9 @@ public class RegressionTestTransducers { public static void main(String[] args) throws Exception { System.out.println("TEST 1"); - - List trans = TransducerersFactory.getTransducerers(testConfigLocal()); + List trans = null; + + trans = TransducerersFactory.getTransducerers(testConfigLocal()); trans.get(0).init(); Regressor.process(trans.get(0)); trans = null; @@ -32,6 +33,11 @@ public static void main(String[] args) throws Exception { trans.get(0).init(); Regressor.process(trans.get(0)); trans = null; + + trans = TransducerersFactory.getTransducerers(testConfigLocal5()); + trans.get(0).init(); + Regressor.process(trans.get(0)); + trans = null; } @@ -81,5 +87,26 @@ public static void main(String[] args) throws Exception { return config; } + + private static AlgorithmConfiguration testConfigLocal5() { + + AlgorithmConfiguration config = Regressor.getConfig(); + config.setAgent("OCCURRENCES_MERGER"); + + config.setParam("longitudeColumn", "decimallongitude"); + config.setParam("latitudeColumn", "decimallatitude"); + config.setParam("recordedByColumn", "recordedby"); + config.setParam("scientificNameColumn", "scientificname"); + config.setParam("eventDateColumn", "eventdate"); + config.setParam("lastModificationColumn", "modified"); + config.setParam("rightTableName", "whitesharkoccurrences2"); + config.setParam("leftTableName", "whitesharkoccurrences1"); + config.setParam("mergedTableName", "whitesharkoccurrencesmerged"); + config.setParam("spatialTolerance", "0.5"); + config.setParam("confidence", "0.8"); + + + return config; + } } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/Regressor.java b/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/Regressor.java index e2f3907..d1f0a4a 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/Regressor.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/Regressor.java @@ -1,6 +1,5 @@ package org.gcube.dataanalysis.ecoengine.test.regression; -import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; import org.gcube.dataanalysis.ecoengine.interfaces.ComputationalAgent; @@ -24,8 +23,9 @@ public class Regressor { Thread.sleep(1000); } } else - AnalysisLogger.getLogger().trace("Generator Algorithm Not Supported"); + System.out.println("Generator Algorithm Not Supported" ); + System.out.println("-|"+agent.getStatus()); } public class ThreadCalculator implements Runnable { diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHCAFTransducer.java b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHCAFTransducer.java index 10204c0..4a127ef 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHCAFTransducer.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHCAFTransducer.java @@ -35,28 +35,15 @@ public class BioClimateHCAFTransducer extends BioClimateHSPECTransducer{ @Override public List getInputParameters() { List parameters = new ArrayList(); - - DatabaseType p1 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); - DatabaseType p2 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); - DatabaseType p3 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); - DatabaseType p4 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); - DatabaseType p5 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); - DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect"); - List templateHspec = new ArrayList(); templateHspec.add(TableTemplates.HCAF); TablesList p7 = new TablesList(templateHspec, "HCAF_TABLE_LIST", "List of HCAF tables to analyze", false); PrimitiveTypesList p8 = new PrimitiveTypesList(PrimitiveTypes.STRING, "HCAF_TABLE_NAMES", "List of HCAF table names to be used as labels", false); - parameters.add(p1); - parameters.add(p2); - parameters.add(p3); - parameters.add(p4); - parameters.add(p5); - parameters.add(p6); parameters.add(p7); parameters.add(p8); + DatabaseType.addDefaultDBPars(parameters); return parameters; } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPECTransducer.java b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPECTransducer.java index 6140a08..847b08d 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPECTransducer.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPECTransducer.java @@ -72,13 +72,6 @@ public class BioClimateHSPECTransducer implements Transducerer{ public List getInputParameters() { List parameters = new ArrayList(); - DatabaseType p1 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); - DatabaseType p2 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); - DatabaseType p3 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); - DatabaseType p4 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); - DatabaseType p5 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); - DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect"); - List templateHspec = new ArrayList(); templateHspec.add(TableTemplates.HSPEC); TablesList p7 = new TablesList(templateHspec, "HSPEC_TABLE_LIST", "List of HSPEC tables to analyze", false); @@ -86,16 +79,10 @@ public class BioClimateHSPECTransducer implements Transducerer{ PrimitiveType p9 = new PrimitiveType(Double.class.getName(), null, PrimitiveTypes.NUMBER, "Threshold", "A threshold of probability over which the abundancy per species will be calculated","0.5"); - parameters.add(p1); - parameters.add(p2); - parameters.add(p3); - parameters.add(p4); - parameters.add(p5); - parameters.add(p6); parameters.add(p7); parameters.add(p8); parameters.add(p9); - + DatabaseType.addDefaultDBPars(parameters); return parameters; } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPENTransducer.java b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPENTransducer.java index f803fdb..3feac9d 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPENTransducer.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/BioClimateHSPENTransducer.java @@ -35,28 +35,14 @@ public class BioClimateHSPENTransducer extends BioClimateHSPECTransducer{ @Override public List getInputParameters() { List parameters = new ArrayList(); - - DatabaseType p1 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); - DatabaseType p2 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); - DatabaseType p3 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); - DatabaseType p4 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); - DatabaseType p5 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); - DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect"); - List templateHspec = new ArrayList(); templateHspec.add(TableTemplates.HSPEN); TablesList p7 = new TablesList(templateHspec, "HSPEN_TABLE_LIST", "List of HSPEN tables containing the species for which the salinity will be analyzed", false); PrimitiveTypesList p8 = new PrimitiveTypesList(PrimitiveTypes.STRING, "HSPEN_TABLE_NAMES", "List of HSPEN table names to be used as labels", false); - parameters.add(p1); - parameters.add(p2); - parameters.add(p3); - parameters.add(p4); - parameters.add(p5); - parameters.add(p6); parameters.add(p7); parameters.add(p8); - + DatabaseType.addDefaultDBPars(parameters); return parameters; } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/InterpolationTransducer.java b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/InterpolationTransducer.java index c251a57..9f59afb 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/InterpolationTransducer.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/InterpolationTransducer.java @@ -1,8 +1,6 @@ package org.gcube.dataanalysis.ecoengine.transducers; -import java.awt.Image; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; @@ -10,10 +8,9 @@ import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE; import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType; import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; import org.gcube.dataanalysis.ecoengine.datatypes.OutputTable; -import org.gcube.dataanalysis.ecoengine.datatypes.TablesList; import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType; import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType; -import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.DatabaseParameters; +import org.gcube.dataanalysis.ecoengine.datatypes.TablesList; import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes; import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates; import org.gcube.dataanalysis.ecoengine.evaluation.bioclimate.InterpolateTables; @@ -67,14 +64,6 @@ public class InterpolationTransducer implements Transducerer{ @Override public List getInputParameters() { List parameters = new ArrayList(); - - DatabaseType p1 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); - DatabaseType p2 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password"); - DatabaseType p3 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver"); - DatabaseType p4 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url"); - DatabaseType p5 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect"); - DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect"); - List templates = new ArrayList(); templates.add(TableTemplates.HCAF); @@ -86,12 +75,6 @@ public class InterpolationTransducer implements Transducerer{ PrimitiveType p12 = new PrimitiveType(Enum.class.getName(), InterpolateTables.INTERPOLATIONFUNCTIONS.values(), PrimitiveTypes.ENUMERATED, "InterpolationFunction", "The interpolation Function to use",""+InterpolateTables.INTERPOLATIONFUNCTIONS.LINEAR); - parameters.add(p1); - parameters.add(p2); - parameters.add(p3); - parameters.add(p4); - parameters.add(p5); - parameters.add(p6); parameters.add(p7); parameters.add(p8); parameters.add(p9); @@ -99,7 +82,7 @@ public class InterpolationTransducer implements Transducerer{ parameters.add(p11); parameters.add(p12); - + DatabaseType.addDefaultDBPars(parameters); return parameters; } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsMerger.java b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsMerger.java index 49da996..2bc2d5e 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsMerger.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/transducers/OccurrencePointsMerger.java @@ -1,12 +1,10 @@ package org.gcube.dataanalysis.ecoengine.transducers; -import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.List; -import java.util.Locale; import org.gcube.contentmanagement.graphtools.utils.DateGuesser; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; @@ -14,30 +12,40 @@ import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory; import org.gcube.contentmanagement.lexicalmatcher.utils.DistanceCalculator; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE; +import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType; +import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType; +import org.gcube.dataanalysis.ecoengine.datatypes.InputTable; +import org.gcube.dataanalysis.ecoengine.datatypes.OutputTable; +import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType; +import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType; import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType; +import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes; +import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters; +import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates; import org.gcube.dataanalysis.ecoengine.interfaces.Transducerer; import org.gcube.dataanalysis.ecoengine.test.regression.Regressor; import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils; +import org.gcube.dataanalysis.ecoengine.utils.ResourceFactory; import org.hibernate.SessionFactory; -public class OccurrencePointsMerger implements Transducerer{ +public class OccurrencePointsMerger implements Transducerer { - static String longitudeColumn= "longitudeColumn"; - static String latitudeColumn= "latitudeColumn"; - static String recordedByColumn= "recordedByColumn"; + static String longitudeColumn = "longitudeColumn"; + static String latitudeColumn = "latitudeColumn"; + static String recordedByColumn = "recordedByColumn"; static String scientificNameColumn = "scientificNameColumn"; static String eventDateColumn = "eventDateColumn"; static String lastModificationColumn = "lastModificationColumn"; - static String rightTableNameF= "rightTableName"; + static String rightTableNameF = "rightTableName"; static String leftTableNameF = "leftTableName"; static String mergedTableNameF = "mergedTableName"; - static String spatialTolerance= "spatialTolerance"; - static String confidence= "confidence"; - + static String spatialTolerance = "spatialTolerance"; + static String confidence = "confidence"; + protected List records_left; protected List records_right; protected AlgorithmConfiguration config; - + protected String lonFld; protected String latFld; protected String recordedByFld; @@ -54,377 +62,428 @@ public class OccurrencePointsMerger implements Transducerer{ protected List objectstodelete; protected List columnsNames; protected SessionFactory dbconnection; - - protected class OccurrenceRecord{ - + protected float status; + + protected class OccurrenceRecord { + public String scientificName; public String recordedby; public Calendar eventdate; public Calendar modifdate; -// public String locality; -// public String country; + // public String locality; + // public String country; public float x; public float y; - -// Map metadata; + + // Map metadata; public List otherValues; - public OccurrenceRecord(){ + + public OccurrenceRecord() { otherValues = new ArrayList(); } } - public static String convert2conventionalFormat(Calendar date){ - if (date==null) - return ""; + public static String convert2conventionalFormat(Calendar date) { + if (date == null) + return ""; SimpleDateFormat formatter = new SimpleDateFormat("MM/dd/yy KK:mm:ss a"); String formattedDate = formatter.format(new Date(date.getTimeInMillis())); return formattedDate; - -} + } + boolean displaydateconvert = true; - public OccurrenceRecord row2OccurrenceRecord(Object[] row){ + + public OccurrenceRecord row2OccurrenceRecord(Object[] row) { OccurrenceRecord record = new OccurrenceRecord(); int index = 0; - - for (Object name:columnsNames){ - String name$ = ""+name; - String value$ = ""+row[index]; - if (name$.equalsIgnoreCase(lonFld)){ - record.x=Float.parseFloat(value$); - } - else if (name$.equalsIgnoreCase(latFld)){ - record.y=Float.parseFloat(value$); - } - else if (name$.equalsIgnoreCase(recordedByFld)){ - record.recordedby=value$; - } - else if (name$.equalsIgnoreCase(scientificNameFld)){ - record.scientificName=value$; - } - else if (name$.equalsIgnoreCase(eventDatFld)){ - if ((value$==null) || (value$.length()==0)){ - record.eventdate=null; - } - else{ - /* - SimpleDateFormat formatter = new SimpleDateFormat("MM/dd/yy KK:mm a",Locale.UK); - try { - Date d = (Date) formatter.parse(value$); - Calendar cal = Calendar.getInstance(); - cal.setTime(d); - System.out.println("From "+value$+"->"+(cal.get(Calendar.MONTH)+1)+" "+cal.get(Calendar.DAY_OF_MONTH)+" "+cal.get(Calendar.YEAR)+" "+cal.get(Calendar.HOUR)+" "+cal.get(Calendar.MINUTE)); -// System.out.println("->"+cal.toString()); - } catch (ParseException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - */ - record.eventdate=DateGuesser.convertDate(value$); - if (displaydateconvert) - { AnalysisLogger.getLogger().trace("From "+value$+"->"+convert2conventionalFormat(record.eventdate)+" pattern "+DateGuesser.getPattern(value$)); - displaydateconvert=false; + + for (Object name : columnsNames) { + String name$ = "" + name; + String value$ = "" + row[index]; + if (name$.equalsIgnoreCase(lonFld)) { + record.x = Float.parseFloat(value$); + } else if (name$.equalsIgnoreCase(latFld)) { + record.y = Float.parseFloat(value$); + } else if (name$.equalsIgnoreCase(recordedByFld)) { + record.recordedby = value$; + } else if (name$.equalsIgnoreCase(scientificNameFld)) { + record.scientificName = value$; + } else if (name$.equalsIgnoreCase(eventDatFld)) { + if ((value$ == null) || (value$.length() == 0)) { + record.eventdate = null; + } else { + /* + * SimpleDateFormat formatter = new SimpleDateFormat("MM/dd/yy KK:mm a",Locale.UK); try { Date d = (Date) formatter.parse(value$); Calendar cal = Calendar.getInstance(); cal.setTime(d); System.out.println("From "+value$+"->"+(cal.get(Calendar.MONTH)+1)+" "+cal.get(Calendar.DAY_OF_MONTH)+" "+cal.get(Calendar.YEAR)+" "+cal.get(Calendar.HOUR)+" "+cal.get(Calendar.MINUTE)); // System.out.println("->"+cal.toString()); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } + */ + record.eventdate = DateGuesser.convertDate(value$); + if (displaydateconvert) { + AnalysisLogger.getLogger().trace("From " + value$ + "->" + convert2conventionalFormat(record.eventdate) + " pattern " + DateGuesser.getPattern(value$)); + displaydateconvert = false; } - - + } - } - else if (name$.equalsIgnoreCase(modifDatFld)){ - record.modifdate=DateGuesser.convertDate(value$); - } - else + } else if (name$.equalsIgnoreCase(modifDatFld)) { + record.modifdate = DateGuesser.convertDate(value$); + } else record.otherValues.add(value$); - + index++; } - + return record; } - - public String occurrenceRecord2String(OccurrenceRecord record){ - StringBuffer buffer =new StringBuffer(); + + public String occurrenceRecord2String(OccurrenceRecord record) { + StringBuffer buffer = new StringBuffer(); int index = 0; - int k=0; + int k = 0; int nNames = columnsNames.size(); - for (Object name:columnsNames){ - - String name$ = ""+name; + for (Object name : columnsNames) { + + String name$ = "" + name; String value$ = "''"; - if (name$.equalsIgnoreCase(lonFld)){ - value$="'"+record.x+"'"; - } - else if (name$.equalsIgnoreCase(latFld)){ - value$="'"+record.y+"'"; - } - else if (name$.equalsIgnoreCase(recordedByFld)){ - if (record.recordedby!=null) - value$="'"+record.recordedby+"'"; - } - else if (name$.equalsIgnoreCase(scientificNameFld)){ - if (record.scientificName!=null) - value$="'"+record.scientificName+"'"; - } - else if (name$.equalsIgnoreCase(eventDatFld)){ - if (record.eventdate!=null){ - value$="'"+convert2conventionalFormat(record.eventdate)+"'"; -// value$="'"+record.eventdate.getTimeInMillis()+"'"; + if (name$.equalsIgnoreCase(lonFld)) { + value$ = "'" + record.x + "'"; + } else if (name$.equalsIgnoreCase(latFld)) { + value$ = "'" + record.y + "'"; + } else if (name$.equalsIgnoreCase(recordedByFld)) { + if (record.recordedby != null) + value$ = "'" + record.recordedby + "'"; + } else if (name$.equalsIgnoreCase(scientificNameFld)) { + if (record.scientificName != null) + value$ = "'" + record.scientificName + "'"; + } else if (name$.equalsIgnoreCase(eventDatFld)) { + if (record.eventdate != null) { + value$ = "'" + convert2conventionalFormat(record.eventdate) + "'"; + // value$="'"+record.eventdate.getTimeInMillis()+"'"; } - } - else if (name$.equalsIgnoreCase(modifDatFld)){ - if (record.modifdate!=null){ - value$="'"+convert2conventionalFormat(record.modifdate)+"'"; -// value$="'"+record.modifdate.getTimeInMillis()+"'"; + } else if (name$.equalsIgnoreCase(modifDatFld)) { + if (record.modifdate != null) { + value$ = "'" + convert2conventionalFormat(record.modifdate) + "'"; + // value$="'"+record.modifdate.getTimeInMillis()+"'"; } - } - else{ - if (record.otherValues!=null){ - value$ = "'"+record.otherValues.get(k)+"'"; + } else { + if (record.otherValues != null) { + value$ = "'" + record.otherValues.get(k) + "'"; k++; } } buffer.append(value$); - if (index templatesOccurrence = new ArrayList(); + templatesOccurrence.add(TableTemplates.OCCURRENCE_AQUAMAPS); OccurrencePointsMerger occm = new OccurrencePointsMerger(); occm.setConfiguration(config); occm.init(); occm.compute(); } - + @Override public List getInputParameters() { - // TODO Auto-generated method stub - return null; + List templatesOccurrence = new ArrayList(); + templatesOccurrence.add(TableTemplates.OCCURRENCE_SPECIES); + // occurrence points tables + InputTable p1 = new InputTable(templatesOccurrence, leftTableNameF, "The First table containing the occurrence points", ""); + InputTable p2 = new InputTable(templatesOccurrence, rightTableNameF, "The Second table containing the occurrence points", ""); + + // string parameters + ColumnType p3 = new ColumnType(leftTableNameF, longitudeColumn, "column with longitude values", "decimallongitude", false); + ColumnType p4 = new ColumnType(leftTableNameF, latitudeColumn, "column with latitude values", "decimallatitude", false); + ColumnType p5 = new ColumnType(leftTableNameF, recordedByColumn, "column with RecordedBy values", "recordedby", false); + ColumnType p6 = new ColumnType(leftTableNameF, scientificNameColumn, "column with Scientific Names", "scientificname", false); + ColumnType p7 = new ColumnType(leftTableNameF, eventDateColumn, "column with EventDate values", "eventdate", false); + ColumnType p8 = new ColumnType(leftTableNameF, lastModificationColumn, "column with Modified values", "modified", false); + ServiceType p9 = new ServiceType(ServiceParameters.RANDOMSTRING, mergedTableNameF, "Name of the final produced", "mergedoccurrences_"); + PrimitiveType p10 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, spatialTolerance, "The tolerance in degree for assessing that two points could be the same", "0.5"); + PrimitiveType p11 = new PrimitiveType(Float.class.getName(), null, PrimitiveTypes.NUMBER, confidence, "The overall acceptance similarity threshold over which two points are the same", "0.8"); + + List inputs = new ArrayList(); + inputs.add(p1); + inputs.add(p2); + inputs.add(p3); + inputs.add(p4); + inputs.add(p5); + inputs.add(p6); + inputs.add(p7); + inputs.add(p8); + inputs.add(p9); + inputs.add(p10); + inputs.add(p11); + + DatabaseType.addDefaultDBPars(inputs); + return inputs; } - - @Override - public String getResourceLoad() { - // TODO Auto-generated method stub - return null; - } - - @Override public String getResources() { - // TODO Auto-generated method stub - return null; + if ((status > 0) && (status < 100)) + return ResourceFactory.getResources(100f); + else + return ResourceFactory.getResources(0f); } + ResourceFactory resourceManager; + + @Override + public String getResourceLoad() { + if (resourceManager == null) + resourceManager = new ResourceFactory(); + return resourceManager.getResourceLoad(1); + } @Override public float getStatus() { - // TODO Auto-generated method stub - return 0; + return status; } - @Override public INFRASTRUCTURE getInfrastructure() { - // TODO Auto-generated method stub - return null; + return INFRASTRUCTURE.LOCAL; } - @Override public StatisticalType getOutput() { - // TODO Auto-generated method stub - return null; - } + List templatesOccurrence = new ArrayList(); + templatesOccurrence.add(TableTemplates.OCCURRENCE_SPECIES); + // occurrence points tables + OutputTable p = new OutputTable(templatesOccurrence, mergedTableName, mergedTableName, "The output table containing the merged points"); + return p; + } @Override public void init() throws Exception { - - AnalysisLogger.setLogger(config.getConfigPath()+AlgorithmConfiguration.defaultLoggerFile); - lonFld=config.getParam(longitudeColumn); - latFld=config.getParam(latitudeColumn); - recordedByFld=config.getParam(recordedByColumn); - scientificNameFld=config.getParam(scientificNameColumn); - eventDatFld=config.getParam(eventDateColumn); - modifDatFld=config.getParam(lastModificationColumn); - leftTableName=config.getParam(leftTableNameF); - rightTableName=config.getParam(rightTableNameF); - mergedTableName=config.getParam(mergedTableNameF); - spatialToleranceValue=Float.parseFloat(config.getParam(spatialTolerance)); - confidenceValue=Float.parseFloat(config.getParam(confidence)); - + + AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile); + lonFld = config.getParam(longitudeColumn); + latFld = config.getParam(latitudeColumn); + recordedByFld = config.getParam(recordedByColumn); + scientificNameFld = config.getParam(scientificNameColumn); + eventDatFld = config.getParam(eventDateColumn); + modifDatFld = config.getParam(lastModificationColumn); + leftTableName = config.getParam(leftTableNameF); + rightTableName = config.getParam(rightTableNameF); + mergedTableName = config.getParam(mergedTableNameF); + spatialToleranceValue = Float.parseFloat(config.getParam(spatialTolerance)); + confidenceValue = Float.parseFloat(config.getParam(confidence)); + objectstoinsert = new ArrayList(); objectstodelete = new ArrayList(); + status = 0; } - @Override public void setConfiguration(AlgorithmConfiguration config) { - this.config=config; + this.config = config; } - @Override public void shutdown() { - // TODO Auto-generated method stub - - } + } @Override public String getDescription() { - // TODO Auto-generated method stub - return null; + return "An algorithm for merging two sets of occurrence points of species coming from the Species Discovery Facility of D4Science"; } - protected float probabilityStrings(String first, String second){ - if ((first==null) ||(second==null)) + protected float probabilityStrings(String first, String second) { + if ((first == null) || (second == null)) return 1; - + return (float) new DistanceCalculator().CD(false, first, second); } - - protected float probabilityDates(Calendar first, Calendar second){ - if ((first==null) ||(second==null)) + + protected float probabilityDates(Calendar first, Calendar second) { + if ((first == null) || (second == null)) return 1; - if (first.compareTo(second)==0) + if (first.compareTo(second) == 0) return 1; else return 0; } - - protected float extProb(OccurrenceRecord right,OccurrenceRecord left){ - float probability = 0; - float distance = (float)Math.sqrt(Math.abs(left.x-right.x)+Math.abs(left.y-right.y)); - if (distance>spatialToleranceValue) - probability=0; - else{ - float pSpecies = probabilityStrings(right.scientificName, left.scientificName); - float pRecordedBy= probabilityStrings(right.recordedby, left.recordedby); - float pDates = probabilityDates(right.eventdate, left.eventdate); - probability = pSpecies*pRecordedBy*pDates; - } - - return probability*100; - } - - protected void manageHighProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){ - //if over the threshold then don't add - } - - protected void manageLowProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){ - //if over the threshold then add to the element - objectstoinsert.add(rightOcc); - } - - protected void persist(){ - StringBuffer buffer = new StringBuffer(); - int toins = objectstoinsert.size(); - int counter = 0; - if (toins>0){ - for (OccurrenceRecord record:objectstoinsert){ - buffer.append("("); - buffer.append(occurrenceRecord2String(record)); - buffer.append(")"); - if (counter spatialToleranceValue) + probability = 0; + else { + float pSpecies = probabilityStrings(right.scientificName, left.scientificName); + float pRecordedBy = probabilityStrings(right.recordedby, left.recordedby); + float pDates = probabilityDates(right.eventdate, left.eventdate); + probability = pSpecies * pRecordedBy * pDates; } + + return probability * 100; + } + + protected void manageHighProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) { + // insert the most recent: + // if it is the left then leave it as is + // otherwise put the left in the deletion list and the right in the insertion list - String updateQ = DatabaseUtils.insertFromBuffer(mergedTableName,columns.toString(),buffer); -// System.out.println("Update:\n"+updateQ); - DatabaseFactory.executeSQLUpdate(updateQ, dbconnection); + if ( + ((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.before(rightOcc.modifdate)) + || + (leftOcc.modifdate==null)&&(rightOcc.modifdate!=null) + ) + { + + objectstodelete.add(leftOcc); + objectstoinsert.add(rightOcc); } } - + + protected void manageLowProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) { + // if over the threshold then add to the element + objectstoinsert.add(rightOcc); + } + + protected void persist() { + + // DELETE ELEMENTS IN THE DELETION LIST + int todel = objectstodelete.size(); + int counter = 0; + StringBuffer buffer = new StringBuffer(); + AnalysisLogger.getLogger().debug("Deleting " + todel + " objects"); + if (todel > 0) { + for (OccurrenceRecord record : objectstodelete) { + String rec = recordedByFld + "='" + record.recordedby.replace("'","")+"'"; + String sci = scientificNameFld + "='" + record.scientificName.replace("'","")+"'"; + String x = lonFld + "='" + record.x+"'"; + String y = latFld + "='" + record.y+"'"; + + buffer.append("("); + buffer.append(rec + " AND " + sci + " AND " + x + " AND " + y); + buffer.append(")"); + if (counter < todel - 1) + buffer.append(" OR "); + + counter++; + } + + String updateQ = DatabaseUtils.deleteFromBuffer(mergedTableName, buffer); +// System.out.println("Update:\n"+updateQ); + DatabaseFactory.executeSQLUpdate(updateQ, dbconnection); + AnalysisLogger.getLogger().debug("Objects deleted"); + } + + buffer = new StringBuffer(); + int toins = objectstoinsert.size(); + AnalysisLogger.getLogger().debug("Inserting " + toins + " objects"); + counter = 0; + if (toins > 0) { + for (OccurrenceRecord record : objectstoinsert) { + buffer.append("("); + buffer.append(occurrenceRecord2String(record)); + buffer.append(")"); + if (counter < toins - 1) + buffer.append(","); + + counter++; + } + + String updateQ = DatabaseUtils.insertFromBuffer(mergedTableName, columns.toString(), buffer); + // System.out.println("Update:\n"+updateQ); + DatabaseFactory.executeSQLUpdate(updateQ, dbconnection); + AnalysisLogger.getLogger().debug("Objects inserted"); + } + + } + @Override public void compute() throws Exception { - - try{ - //init DB connection + + try { + // init DB connection AnalysisLogger.getLogger().trace("Initializing DB Connection"); dbconnection = DatabaseUtils.initDBSession(config); AnalysisLogger.getLogger().trace("Taking Table Description"); - //take the description of the table - columnsNames = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsNamesStatement(rightTableName),dbconnection); - + // take the description of the table + columnsNames = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsNamesStatement(rightTableName), dbconnection); + int nCols = columnsNames.size(); columns = new StringBuffer(); - for (int i=0;i leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(leftTableName, columns.toString(),""),dbconnection); - //take the elements from dx table - AnalysisLogger.getLogger().trace("Taking elements from right table: "+rightTableName); - List rightRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(rightTableName, columns.toString(),""),dbconnection); - //for each element in dx + // take the elements from sx table + AnalysisLogger.getLogger().trace("Taking elements from left table: " + leftTableName); + List leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(leftTableName, columns.toString(), ""), dbconnection); + // take the elements from dx table + AnalysisLogger.getLogger().trace("Taking elements from right table: " + rightTableName); + List rightRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(rightTableName, columns.toString(), ""), dbconnection); + // for each element in dx List leftRecords = new ArrayList(); - AnalysisLogger.getLogger().trace("Processing "+leftTableName+" vs "+rightTableName); + AnalysisLogger.getLogger().trace("Processing " + leftTableName + " vs " + rightTableName); + status = 10; int rightCounter = 0; int similaritiesCounter = 0; - for (Object rRow:rightRows){ - //transform into an occurrence object - OccurrenceRecord rightOcc = row2OccurrenceRecord((Object[])rRow); - //for each element in sx - int k=0; + int allrightrows = rightRows.size(); + for (Object rRow : rightRows) { + // transform into an occurrence object + OccurrenceRecord rightOcc = row2OccurrenceRecord((Object[]) rRow); + // for each element in sx + int k = 0; int leftrecordsSize = 0; boolean found = false; float p = 0; OccurrenceRecord bestleftOcc = null; - for (Object lRow:leftRows){ + for (Object lRow : leftRows) { OccurrenceRecord leftOcc = null; - //only for the first iteration on the left occurrences perform the transformation - if (leftrecordsSize<=k){ - //transform into an occurrence object - leftOcc = row2OccurrenceRecord((Object[])lRow); + // only for the first iteration on the left occurrences perform the transformation + if (leftrecordsSize <= k) { + // transform into an occurrence object + leftOcc = row2OccurrenceRecord((Object[]) lRow); leftRecords.add(leftOcc); leftrecordsSize++; -// System.out.println("ADDED "+k+"-th elements size: "+leftRecords.size()); - } - else - leftOcc =leftRecords.get(k); + // System.out.println("ADDED "+k+"-th elements size: "+leftRecords.size()); + } else + leftOcc = leftRecords.get(k); - //evaluate P(dx,sx) - p = extProb(leftOcc,rightOcc); - - if (p>=confidenceValue){ - bestleftOcc=leftOcc; - found=true; + // evaluate P(dx,sx) + p = extProb(leftOcc, rightOcc); + + if (p >= confidenceValue) { + bestleftOcc = leftOcc; + found = true; similaritiesCounter++; - AnalysisLogger.getLogger().trace("Found a similarity with P="+p+" between ("+"\""+leftOcc.scientificName+"\""+","+leftOcc.x+"\""+","+"\""+leftOcc.y+"\""+","+"\""+leftOcc.recordedby+"\""+","+"\""+ convert2conventionalFormat(leftOcc.eventdate)+"\""+") VS "+ - "("+"\""+rightOcc.scientificName+"\""+","+"\""+rightOcc.x+"\""+","+"\""+rightOcc.y+"\""+","+"\""+rightOcc.recordedby+"\""+","+"\""+ convert2conventionalFormat(rightOcc.eventdate)+"\""+")"); + AnalysisLogger.getLogger().trace("Found a similarity with P=" + p + " between (" + "\"" + leftOcc.scientificName + "\"" + "," + leftOcc.x + "\"" + "," + "\"" + leftOcc.y + "\"" + "," + "\"" + leftOcc.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(leftOcc.eventdate) + "\"" + ") VS " + "(" + "\"" + rightOcc.scientificName + "\"" + "," + "\"" + rightOcc.x + "\"" + "," + "\"" + rightOcc.y + "\"" + "," + "\"" + rightOcc.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(rightOcc.eventdate) + "\"" + ")"); break; } k++; @@ -434,19 +493,21 @@ public class OccurrencePointsMerger implements Transducerer{ manageHighProbability(p, bestleftOcc, rightOcc); else manageLowProbability(p, bestleftOcc, rightOcc); + + status = Math.min(90, 10f + (80 * ((float) rightCounter) / ((float) allrightrows))); } - - AnalysisLogger.getLogger().trace("Found "+similaritiesCounter+" similarities on "+rightCounter+" elements"); - - //transform the complete list into a table + + AnalysisLogger.getLogger().trace("Found " + similaritiesCounter + " similarities on " + rightCounter + " elements"); + status = 90; + // transform the complete list into a table persist(); - //close DB connection - }catch(Exception e){ + // close DB connection + } catch (Exception e) { throw e; - } - finally{ - if (dbconnection!=null) + } finally { + if (dbconnection != null) dbconnection.close(); + status = 100; AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed"); } } diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java b/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java index cf53c73..71eff8a 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/utils/DatabaseUtils.java @@ -158,6 +158,10 @@ public class DatabaseUtils { return "insert into "+table+" ("+columnsNames+") values "+values; } + public static String deleteFromBuffer(String table, StringBuffer couples) { + + return "delete from "+table+" where "+couples; + } public static String copyFileToTableStatement (String file, String table){ return "COPY "+table+" FROM '"+file+"' DELIMITERS ';' WITH NULL AS 'null string'";