git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@61785 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
8441ebc381
commit
e3797794a0
|
@ -58,7 +58,7 @@ public static void main(String[] args) throws Exception {
|
|||
*/
|
||||
// List<Evaluator> trans = null;
|
||||
// trans = EvaluatorsFactory.getEvaluators(testConfigLocal12());
|
||||
List<ComputationalAgent> trans = TransducerersFactory.getTransducerers(testConfigLocal5b());
|
||||
List<ComputationalAgent> trans = TransducerersFactory.getTransducerers(testConfigLocal5c());
|
||||
trans.get(0).init();
|
||||
Regressor.process(trans.get(0));
|
||||
|
||||
|
@ -137,21 +137,21 @@ public static void main(String[] args) throws Exception {
|
|||
config.setParam("scientificNameColumn", "scientificname");
|
||||
config.setParam("eventDateColumn", "eventdate");
|
||||
config.setParam("lastModificationColumn", "modified");
|
||||
// config.setParam("rightTableName", "occurrencetestduplicates2");
|
||||
// config.setParam("leftTableName", "occurrencetestduplicates");
|
||||
config.setParam("rightTableName", "occurrence_species_id7a77d613_c21d_495d_8a04_b9534cf5e159");
|
||||
config.setParam("leftTableName", "processedoccurrences_id_6d416554_7a06_422f_8f4c_a65051025221");
|
||||
|
||||
config.setParam("leftTableName", "speciesset1");
|
||||
config.setParam("rightTableName", "speciesset2");
|
||||
// config.setParam("leftTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
|
||||
// config.setParam("rightTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
|
||||
|
||||
config.setParam("finalTableName", "occurrencesmerged");
|
||||
config.setParam("spatialTolerance", "0.5");
|
||||
config.setParam("confidence", "90");
|
||||
config.setParam("spatialTolerance", "10.0");
|
||||
config.setParam("confidence", "0");
|
||||
|
||||
/*
|
||||
config.setParam("DatabaseUserName","utente");
|
||||
config.setParam("DatabasePassword","d4science");
|
||||
config.setParam("DatabaseURL","jdbc:postgresql://dbtest.research-infrastructures.eu/testdb");
|
||||
config.setParam("DatabaseDriver","org.postgresql.Driver");
|
||||
|
||||
*/
|
||||
|
||||
return config;
|
||||
}
|
||||
|
@ -168,8 +168,8 @@ public static void main(String[] args) throws Exception {
|
|||
config.setParam("eventDateColumn", "eventdate");
|
||||
config.setParam("lastModificationColumn", "modified");
|
||||
|
||||
config.setParam("rightTableName", "occurrence_species2");
|
||||
config.setParam("leftTableName", "occurrence_species1");
|
||||
// config.setParam("rightTableName", "occurrence_species2");
|
||||
// config.setParam("leftTableName", "occurrence_species1");
|
||||
|
||||
/*
|
||||
config.setParam("rightTableName", "occurrence_species_id1e8f7b48_b99a_48a3_8b52_89976fd79cd4");
|
||||
|
@ -180,11 +180,14 @@ public static void main(String[] args) throws Exception {
|
|||
|
||||
// config.setParam("leftTableName", "processedoccurrences_id_e7b77fc2_f1cf_4a46_b7b7_898b663b65dd");
|
||||
// config.setParam("rightTableName", "processedoccurrences_id_bd3fdae3_a64e_4215_8eb3_c1bd95981dd2");
|
||||
|
||||
config.setParam("leftTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
|
||||
config.setParam("rightTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
|
||||
// config.setParam("leftTableName", "speciesset1");
|
||||
// config.setParam("rightTableName", "speciesset2");
|
||||
|
||||
config.setParam("finalTableName", "occurrencessubtractedarticle3");
|
||||
config.setParam("spatialTolerance", "0.01");
|
||||
config.setParam("confidence", "0");
|
||||
config.setParam("spatialTolerance", "10.0");
|
||||
config.setParam("confidence", "80");
|
||||
|
||||
config.setParam("DatabaseUserName","utente");
|
||||
config.setParam("DatabasePassword","d4science");
|
||||
|
@ -214,12 +217,14 @@ public static void main(String[] args) throws Exception {
|
|||
config.setParam("rightTableName", "occurrence_species_id1e8f7b48_b99a_48a3_8b52_89976fd79cd4");
|
||||
config.setParam("leftTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
|
||||
*/
|
||||
config.setParam("leftTableName", "occurrence_species_idbb2931ef_af2c_495a_ad5f_4ef81ad16159");
|
||||
config.setParam("rightTableName", "occurrence_species_id7a77d613_c21d_495d_8a04_b9534cf5e159");
|
||||
config.setParam("leftTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
|
||||
config.setParam("rightTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
|
||||
// config.setParam("leftTableName", "speciesset1");
|
||||
// config.setParam("rightTableName", "speciesset2");
|
||||
|
||||
config.setParam("finalTableName", "occurrencesintersected");
|
||||
config.setParam("spatialTolerance", "0.000001");
|
||||
config.setParam("confidence", "90");
|
||||
config.setParam("spatialTolerance", "10.0");
|
||||
config.setParam("confidence", "0");
|
||||
|
||||
config.setParam("DatabaseUserName","utente");
|
||||
config.setParam("DatabasePassword","d4science");
|
||||
|
|
|
@ -138,7 +138,7 @@ public static void main(String[] args) throws Exception {
|
|||
private static AlgorithmConfiguration testConfigLocal7() {
|
||||
|
||||
AlgorithmConfiguration config = Regressor.getConfig();
|
||||
config.setAgent("OCCURRENCES_DUPLICATE_DELETER");
|
||||
config.setAgent("OCCURRENCES_DUPLICATES_DELETER");
|
||||
|
||||
config.setParam("longitudeColumn", "decimallongitude");
|
||||
config.setParam("latitudeColumn", "decimallatitude");
|
||||
|
|
|
@ -18,7 +18,7 @@ import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
|
|||
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
|
||||
|
||||
public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger {
|
||||
static String tableNameF = "OccurrencePointsTableName";
|
||||
|
||||
String tableName;
|
||||
List<String> records = new ArrayList<String>();
|
||||
|
||||
|
@ -26,7 +26,6 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
|
|||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<StatisticalType> getInputParameters() {
|
||||
List<TableTemplates> templatesOccurrence = new ArrayList<TableTemplates>();
|
||||
|
@ -91,15 +90,9 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
|
|||
}
|
||||
|
||||
protected boolean isBetterThan(OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) {
|
||||
if (
|
||||
((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.before(rightOcc.modifdate))
|
||||
||
|
||||
(leftOcc.modifdate==null)&&(rightOcc.modifdate!=null)
|
||||
)
|
||||
if (((leftOcc.modifdate != null) && (rightOcc.modifdate != null) && leftOcc.modifdate.before(rightOcc.modifdate)) || (leftOcc.modifdate == null) && (rightOcc.modifdate != null))
|
||||
return false;
|
||||
else if ((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.after(rightOcc.modifdate)
|
||||
||
|
||||
(leftOcc.modifdate!=null)&&(rightOcc.modifdate==null))
|
||||
else if ((leftOcc.modifdate != null) && (rightOcc.modifdate != null) && leftOcc.modifdate.after(rightOcc.modifdate) || (leftOcc.modifdate != null) && (rightOcc.modifdate == null))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
|
@ -110,8 +103,86 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
|
|||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.createBlankTableFromAnotherStatement(tableName, finalTableName), dbconnection);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void compute() throws Exception {
|
||||
public void takeFullRanges() {
|
||||
// take the elements from sx table
|
||||
AnalysisLogger.getLogger().info("Taking elements from left table: " + leftTableName);
|
||||
leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getDinstictElements(tableName, columns.toString(), ""), dbconnection);
|
||||
}
|
||||
|
||||
public void takeRange(int offsetLeft, int numLeft, int offsetRight, int numRight) {
|
||||
// take the elements from sx table
|
||||
AnalysisLogger.getLogger().info("Taking elements from left table: " + leftTableName);
|
||||
leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getDinstictElements(leftTableName, columns.toString(), "offset " + offsetLeft + " limit " + numLeft), dbconnection);
|
||||
}
|
||||
|
||||
public void computeRange() throws Exception {
|
||||
try {
|
||||
// for each element in dx
|
||||
AnalysisLogger.getLogger().trace("Processing");
|
||||
status = 10;
|
||||
int similaritiesCounter = 0;
|
||||
int allrows = 0;
|
||||
if (leftRows!=null)
|
||||
allrows = leftRows.size();
|
||||
int rowcounter = 0;
|
||||
if (allrows > 0) {
|
||||
for (Object row : leftRows) {
|
||||
// transform into an occurrence object
|
||||
OccurrenceRecord testOcc = row2OccurrenceRecord((Object[]) row);
|
||||
// for each element in the white list
|
||||
int k = 0;
|
||||
int insertedSize = objectstoinsert.size();
|
||||
boolean candidate = true;
|
||||
|
||||
while (k < insertedSize) {
|
||||
OccurrenceRecord yetInserted = objectstoinsert.get(k);
|
||||
float prob = extProb(yetInserted, testOcc);
|
||||
// if the occurrence is better than the the yet inserted then delete the yet inserted and in the end insert the new occ
|
||||
if (prob >= confidenceValue) {
|
||||
similaritiesCounter++;
|
||||
if (isBetterThan(testOcc, yetInserted)) {
|
||||
AnalysisLogger.getLogger().trace("Found a similarity with P=" + prob + " between (" + "\"" + testOcc.scientificName + "\"" + "," + testOcc.x + "\"" + "," + "\"" + testOcc.y + "\"" + "," + "\"" + testOcc.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(testOcc.eventdate) + "\"" + ") VS " + "(" + "\"" + yetInserted.scientificName + "\"" + "," + "\"" + yetInserted.x + "\"" + "," + "\"" + yetInserted.y + "\"" + "," + "\"" + yetInserted.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(yetInserted.eventdate) + "\"" + ")");
|
||||
objectstoinsert.remove(k);
|
||||
k--;
|
||||
insertedSize--;
|
||||
|
||||
}
|
||||
// if there is yet one better then discard the testOcc
|
||||
else {
|
||||
candidate = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
k++;
|
||||
}
|
||||
|
||||
if (candidate)
|
||||
objectstoinsert.add(testOcc);
|
||||
|
||||
status = Math.min(90, 10f + (80 * ((float) rowcounter) / ((float) allrows)));
|
||||
rowcounter++;
|
||||
}
|
||||
|
||||
AnalysisLogger.getLogger().trace("Found " + similaritiesCounter + " similarities on " + allrows + " distinct elements");
|
||||
status = 90;
|
||||
// transform the complete list into a table
|
||||
persist();
|
||||
// close DB connection
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("Error in computation");
|
||||
AnalysisLogger.getLogger().info(e);
|
||||
throw e;
|
||||
} finally {
|
||||
shutdown();
|
||||
status = 100;
|
||||
AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void computeOLD() throws Exception {
|
||||
|
||||
try {
|
||||
// init DB connection
|
||||
|
@ -122,7 +193,8 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
|
|||
// create new merged table
|
||||
try {
|
||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(finalTableName), dbconnection);
|
||||
}catch(Exception e1){}
|
||||
} catch (Exception e1) {
|
||||
}
|
||||
AnalysisLogger.getLogger().trace("Preparing table: " + finalTableName);
|
||||
prepareFinalTable();
|
||||
AnalysisLogger.getLogger().trace("Extracting columns from: " + finalTableName);
|
||||
|
@ -136,7 +208,8 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
|
|||
status = 10;
|
||||
int similaritiesCounter = 0;
|
||||
int allrows = rows.size();
|
||||
int rowcounter = 0;;
|
||||
int rowcounter = 0;
|
||||
;
|
||||
for (Object row : rows) {
|
||||
// transform into an occurrence object
|
||||
OccurrenceRecord testOcc = row2OccurrenceRecord((Object[]) row);
|
||||
|
@ -191,5 +264,7 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
|
|||
}
|
||||
}
|
||||
|
||||
public void postProcess() throws Exception {
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,12 +8,12 @@ import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
|
|||
public class OccurrencePointsIntersector extends OccurrencePointsMerger{
|
||||
|
||||
public OccurrencePointsIntersector(){
|
||||
firstbest=false;
|
||||
firstbest=true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "An algorithm for intesecting two sets of occurrence points of species coming from the Species Discovery Facility of D4Science";
|
||||
return "Between two Ocurrence Sets, keeps the elements of the Right Set that are not in the Left Set.";
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -23,6 +23,8 @@ public class OccurrencePointsIntersector extends OccurrencePointsMerger{
|
|||
|
||||
@Override
|
||||
protected void manageHighProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) {
|
||||
objectstoinsert.add(rightOcc);
|
||||
/*
|
||||
if (
|
||||
((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.before(rightOcc.modifdate))
|
||||
||
|
||||
|
@ -35,6 +37,7 @@ public class OccurrencePointsIntersector extends OccurrencePointsMerger{
|
|||
objectstoinsert.add(leftOcc);
|
||||
else
|
||||
objectstoinsert.add(leftOcc);
|
||||
*/
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -42,8 +42,10 @@ public class OccurrencePointsMerger implements Transducerer {
|
|||
static protected String finalTableNameF = "finalTableName";
|
||||
static protected String spatialTolerance = "spatialTolerance";
|
||||
static protected String confidence = "confidence";
|
||||
//NOTE: on local computer we should set SET datestyle = "ISO, MDY";
|
||||
static protected String sqlDateFormat = "MM/DD/YYYY HH24:MI:SS";
|
||||
static protected String javaDateFormat = "MM/dd/yyyy HH:mm:ss";
|
||||
static protected String tableNameF = "OccurrencePointsTableName";
|
||||
|
||||
protected List<OccurrenceRecord> records_left;
|
||||
protected List<OccurrenceRecord> records_right;
|
||||
|
@ -295,6 +297,8 @@ public class OccurrencePointsMerger implements Transducerer {
|
|||
spatialToleranceValue = Float.parseFloat(config.getParam(spatialTolerance));
|
||||
confidenceValue = Float.parseFloat(config.getParam(confidence));
|
||||
|
||||
config.setParam(tableNameF,finalTableName);
|
||||
|
||||
objectstoinsert = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
|
||||
objectstodelete = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
|
||||
status = 0;
|
||||
|
@ -307,7 +311,7 @@ public class OccurrencePointsMerger implements Transducerer {
|
|||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "An algorithm for merging two sets of occurrence points of species coming from the Species Discovery Facility of D4Science";
|
||||
return "Between two Ocurrence Sets, enrichs the Left Set with the elements of the Right Set that are not in the Left Set. Updates the elements of the Left Set with more recent elements in the Right Set.";
|
||||
}
|
||||
|
||||
protected float probabilityStrings(String first, String second) {
|
||||
|
@ -415,7 +419,7 @@ public class OccurrencePointsMerger implements Transducerer {
|
|||
counter++;
|
||||
}
|
||||
|
||||
String updateQ = DatabaseUtils.insertFromBuffer(finalTableName, columns.toString(), buffer);
|
||||
String updateQ = "SET datestyle = \"ISO, MDY\"; "+DatabaseUtils.insertFromBuffer(finalTableName, columns.toString(), buffer);
|
||||
// System.out.println("Update:\n"+updateQ);
|
||||
// AnalysisLogger.getLogger().debug("Update:\n"+updateQ);
|
||||
DatabaseFactory.executeSQLUpdate(updateQ, dbconnection);
|
||||
|
@ -546,8 +550,10 @@ public class OccurrencePointsMerger implements Transducerer {
|
|||
manageHighProbability(p, bestleftOcc, rightOcc);
|
||||
else
|
||||
break;
|
||||
} else if (!firstbest)
|
||||
manageLowProbability(p, bestleftOcc, rightOcc);
|
||||
}
|
||||
//else if (!firstbest)
|
||||
//manageLowProbability(p, bestleftOcc, rightOcc);
|
||||
|
||||
k++;
|
||||
}
|
||||
rightCounter++;
|
||||
|
@ -558,6 +564,9 @@ public class OccurrencePointsMerger implements Transducerer {
|
|||
else
|
||||
manageLowProbability(p, bestleftOcc, rightOcc);
|
||||
}
|
||||
else
|
||||
if (!found)
|
||||
manageLowProbability(p, bestleftOcc, rightOcc);
|
||||
|
||||
status = Math.min(90, 10f + (80 * ((float) rightCounter) / ((float) allrightrows)));
|
||||
|
||||
|
@ -589,7 +598,23 @@ public class OccurrencePointsMerger implements Transducerer {
|
|||
initDB(true);
|
||||
takeFullRanges();
|
||||
computeRange();
|
||||
postProcess();
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void postProcess() throws Exception{
|
||||
/*
|
||||
AnalysisLogger.getLogger().info("Post processing ... Deleting duplicates");
|
||||
|
||||
OccurrencePointsDuplicatesDeleter opdd = new OccurrencePointsDuplicatesDeleter();
|
||||
opdd.setConfiguration(config);
|
||||
opdd.init();
|
||||
opdd.initDB(false);
|
||||
opdd.takeFullRanges();
|
||||
opdd.computeRange();
|
||||
AnalysisLogger.getLogger().info("Post processing ... Finished");
|
||||
*/
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
|
|
@ -8,7 +8,7 @@ public class OccurrencePointsSubtraction extends OccurrencePointsMerger{
|
|||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "An algorithm for subtracting a sets of occurrence points from another. Sets refer to species coming from the Species Discovery Facility of D4Science";
|
||||
return "Between two Ocurrence Sets, keeps the elements of the Left Set that are not in the Right Set";
|
||||
}
|
||||
|
||||
protected void manageHighProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) {
|
||||
|
|
Loading…
Reference in New Issue