This commit is contained in:
Gianpaolo Coro 2012-12-01 12:35:43 +00:00
parent 8441ebc381
commit e3797794a0
6 changed files with 189 additions and 81 deletions

View File

@ -58,7 +58,7 @@ public static void main(String[] args) throws Exception {
*/
// List<Evaluator> trans = null;
// trans = EvaluatorsFactory.getEvaluators(testConfigLocal12());
List<ComputationalAgent> trans = TransducerersFactory.getTransducerers(testConfigLocal5b());
List<ComputationalAgent> trans = TransducerersFactory.getTransducerers(testConfigLocal5c());
trans.get(0).init();
Regressor.process(trans.get(0));
@ -137,21 +137,21 @@ public static void main(String[] args) throws Exception {
config.setParam("scientificNameColumn", "scientificname");
config.setParam("eventDateColumn", "eventdate");
config.setParam("lastModificationColumn", "modified");
// config.setParam("rightTableName", "occurrencetestduplicates2");
// config.setParam("leftTableName", "occurrencetestduplicates");
config.setParam("rightTableName", "occurrence_species_id7a77d613_c21d_495d_8a04_b9534cf5e159");
config.setParam("leftTableName", "processedoccurrences_id_6d416554_7a06_422f_8f4c_a65051025221");
config.setParam("leftTableName", "speciesset1");
config.setParam("rightTableName", "speciesset2");
// config.setParam("leftTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
// config.setParam("rightTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
config.setParam("finalTableName", "occurrencesmerged");
config.setParam("spatialTolerance", "0.5");
config.setParam("confidence", "90");
config.setParam("spatialTolerance", "10.0");
config.setParam("confidence", "0");
/*
config.setParam("DatabaseUserName","utente");
config.setParam("DatabasePassword","d4science");
config.setParam("DatabaseURL","jdbc:postgresql://dbtest.research-infrastructures.eu/testdb");
config.setParam("DatabaseDriver","org.postgresql.Driver");
*/
return config;
}
@ -168,8 +168,8 @@ public static void main(String[] args) throws Exception {
config.setParam("eventDateColumn", "eventdate");
config.setParam("lastModificationColumn", "modified");
config.setParam("rightTableName", "occurrence_species2");
config.setParam("leftTableName", "occurrence_species1");
// config.setParam("rightTableName", "occurrence_species2");
// config.setParam("leftTableName", "occurrence_species1");
/*
config.setParam("rightTableName", "occurrence_species_id1e8f7b48_b99a_48a3_8b52_89976fd79cd4");
@ -180,11 +180,14 @@ public static void main(String[] args) throws Exception {
// config.setParam("leftTableName", "processedoccurrences_id_e7b77fc2_f1cf_4a46_b7b7_898b663b65dd");
// config.setParam("rightTableName", "processedoccurrences_id_bd3fdae3_a64e_4215_8eb3_c1bd95981dd2");
config.setParam("leftTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
config.setParam("rightTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
// config.setParam("leftTableName", "speciesset1");
// config.setParam("rightTableName", "speciesset2");
config.setParam("finalTableName", "occurrencessubtractedarticle3");
config.setParam("spatialTolerance", "0.01");
config.setParam("confidence", "0");
config.setParam("spatialTolerance", "10.0");
config.setParam("confidence", "80");
config.setParam("DatabaseUserName","utente");
config.setParam("DatabasePassword","d4science");
@ -214,12 +217,14 @@ public static void main(String[] args) throws Exception {
config.setParam("rightTableName", "occurrence_species_id1e8f7b48_b99a_48a3_8b52_89976fd79cd4");
config.setParam("leftTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
*/
config.setParam("leftTableName", "occurrence_species_idbb2931ef_af2c_495a_ad5f_4ef81ad16159");
config.setParam("rightTableName", "occurrence_species_id7a77d613_c21d_495d_8a04_b9534cf5e159");
config.setParam("leftTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
config.setParam("rightTableName", "occurrence_species_id0045886b_2a7c_4ede_afc4_3157c694b893");
// config.setParam("leftTableName", "speciesset1");
// config.setParam("rightTableName", "speciesset2");
config.setParam("finalTableName", "occurrencesintersected");
config.setParam("spatialTolerance", "0.000001");
config.setParam("confidence", "90");
config.setParam("spatialTolerance", "10.0");
config.setParam("confidence", "0");
config.setParam("DatabaseUserName","utente");
config.setParam("DatabasePassword","d4science");

View File

@ -138,7 +138,7 @@ public static void main(String[] args) throws Exception {
private static AlgorithmConfiguration testConfigLocal7() {
AlgorithmConfiguration config = Regressor.getConfig();
config.setAgent("OCCURRENCES_DUPLICATE_DELETER");
config.setAgent("OCCURRENCES_DUPLICATES_DELETER");
config.setParam("longitudeColumn", "decimallongitude");
config.setParam("latitudeColumn", "decimallatitude");

View File

@ -17,22 +17,21 @@ import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
static String tableNameF = "OccurrencePointsTableName";
public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger {
String tableName;
List<String> records = new ArrayList<String>();
public OccurrencePointsDuplicatesDeleter(){
public OccurrencePointsDuplicatesDeleter() {
}
@Override
public List<StatisticalType> getInputParameters() {
List<TableTemplates> templatesOccurrence = new ArrayList<TableTemplates>();
templatesOccurrence.add(TableTemplates.OCCURRENCE_SPECIES);
// occurrence points tables
PrimitiveType p0 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, finalTableNameL,"the name of the produced table", "DeletedOcc_");
PrimitiveType p0 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, finalTableNameL, "the name of the produced table", "DeletedOcc_");
InputTable p1 = new InputTable(templatesOccurrence, tableNameF, "the table containing the occurrence points", "");
ColumnType p3 = new ColumnType(tableNameF, longitudeColumn, "column with longitude values", "decimallongitude", false);
ColumnType p4 = new ColumnType(tableNameF, latitudeColumn, "column with latitude values", "decimallatitude", false);
@ -56,16 +55,16 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
inputs.add(p9);
inputs.add(p10);
inputs.add(p11);
DatabaseType.addDefaultDBPars(inputs);
return inputs;
}
@Override
public String getDescription() {
return "An algorithm for deleting similar occurrences in a sets of occurrence points of species coming from the Species Discovery Facility of D4Science";
}
@Override
public void init() throws Exception {
@ -77,8 +76,8 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
eventDatFld = config.getParam(eventDateColumn);
modifDatFld = config.getParam(lastModificationColumn);
tableName = config.getParam(tableNameF);
rightTableName=tableName;
leftTableName=tableName;
rightTableName = tableName;
leftTableName = tableName;
finalTableName = config.getParam(finalTableNameF);
finalTableLabel = config.getParam(finalTableNameL);
spatialToleranceValue = Float.parseFloat(config.getParam(spatialTolerance));
@ -89,29 +88,101 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
records = new ArrayList<String>();
status = 0;
}
protected boolean isBetterThan(OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) {
if (
((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.before(rightOcc.modifdate))
||
(leftOcc.modifdate==null)&&(rightOcc.modifdate!=null)
)
return false;
else if ((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.after(rightOcc.modifdate)
||
(leftOcc.modifdate!=null)&&(rightOcc.modifdate==null))
if (((leftOcc.modifdate != null) && (rightOcc.modifdate != null) && leftOcc.modifdate.before(rightOcc.modifdate)) || (leftOcc.modifdate == null) && (rightOcc.modifdate != null))
return false;
else if ((leftOcc.modifdate != null) && (rightOcc.modifdate != null) && leftOcc.modifdate.after(rightOcc.modifdate) || (leftOcc.modifdate != null) && (rightOcc.modifdate == null))
return true;
else
return false;
}
@Override
protected void prepareFinalTable() throws Exception{
protected void prepareFinalTable() throws Exception {
DatabaseFactory.executeSQLUpdate(DatabaseUtils.createBlankTableFromAnotherStatement(tableName, finalTableName), dbconnection);
}
@Override
public void compute() throws Exception {
public void takeFullRanges() {
// take the elements from sx table
AnalysisLogger.getLogger().info("Taking elements from left table: " + leftTableName);
leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getDinstictElements(tableName, columns.toString(), ""), dbconnection);
}
public void takeRange(int offsetLeft, int numLeft, int offsetRight, int numRight) {
// take the elements from sx table
AnalysisLogger.getLogger().info("Taking elements from left table: " + leftTableName);
leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getDinstictElements(leftTableName, columns.toString(), "offset " + offsetLeft + " limit " + numLeft), dbconnection);
}
public void computeRange() throws Exception {
try {
// for each element in dx
AnalysisLogger.getLogger().trace("Processing");
status = 10;
int similaritiesCounter = 0;
int allrows = 0;
if (leftRows!=null)
allrows = leftRows.size();
int rowcounter = 0;
if (allrows > 0) {
for (Object row : leftRows) {
// transform into an occurrence object
OccurrenceRecord testOcc = row2OccurrenceRecord((Object[]) row);
// for each element in the white list
int k = 0;
int insertedSize = objectstoinsert.size();
boolean candidate = true;
while (k < insertedSize) {
OccurrenceRecord yetInserted = objectstoinsert.get(k);
float prob = extProb(yetInserted, testOcc);
// if the occurrence is better than the the yet inserted then delete the yet inserted and in the end insert the new occ
if (prob >= confidenceValue) {
similaritiesCounter++;
if (isBetterThan(testOcc, yetInserted)) {
AnalysisLogger.getLogger().trace("Found a similarity with P=" + prob + " between (" + "\"" + testOcc.scientificName + "\"" + "," + testOcc.x + "\"" + "," + "\"" + testOcc.y + "\"" + "," + "\"" + testOcc.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(testOcc.eventdate) + "\"" + ") VS " + "(" + "\"" + yetInserted.scientificName + "\"" + "," + "\"" + yetInserted.x + "\"" + "," + "\"" + yetInserted.y + "\"" + "," + "\"" + yetInserted.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(yetInserted.eventdate) + "\"" + ")");
objectstoinsert.remove(k);
k--;
insertedSize--;
}
// if there is yet one better then discard the testOcc
else {
candidate = false;
break;
}
}
k++;
}
if (candidate)
objectstoinsert.add(testOcc);
status = Math.min(90, 10f + (80 * ((float) rowcounter) / ((float) allrows)));
rowcounter++;
}
AnalysisLogger.getLogger().trace("Found " + similaritiesCounter + " similarities on " + allrows + " distinct elements");
status = 90;
// transform the complete list into a table
persist();
// close DB connection
}
} catch (Exception e) {
System.err.println("Error in computation");
AnalysisLogger.getLogger().info(e);
throw e;
} finally {
shutdown();
status = 100;
AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed");
}
}
public void computeOLD() throws Exception {
try {
// init DB connection
@ -120,9 +191,10 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
AnalysisLogger.getLogger().trace("Taking Table Description");
AnalysisLogger.getLogger().trace("Creating final table: " + finalTableName);
// create new merged table
try{
DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(finalTableName), dbconnection);
}catch(Exception e1){}
try {
DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(finalTableName), dbconnection);
} catch (Exception e1) {
}
AnalysisLogger.getLogger().trace("Preparing table: " + finalTableName);
prepareFinalTable();
AnalysisLogger.getLogger().trace("Extracting columns from: " + finalTableName);
@ -136,7 +208,8 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
status = 10;
int similaritiesCounter = 0;
int allrows = rows.size();
int rowcounter = 0;;
int rowcounter = 0;
;
for (Object row : rows) {
// transform into an occurrence object
OccurrenceRecord testOcc = row2OccurrenceRecord((Object[]) row);
@ -144,30 +217,30 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
int k = 0;
int insertedSize = objectstoinsert.size();
boolean candidate = true;
while (k<insertedSize) {
while (k < insertedSize) {
OccurrenceRecord yetInserted = objectstoinsert.get(k);
float prob = extProb(yetInserted, testOcc);
//if the occurrence is better than the the yet inserted then delete the yet inserted and in the end insert the new occ
// if the occurrence is better than the the yet inserted then delete the yet inserted and in the end insert the new occ
if (prob >= confidenceValue) {
similaritiesCounter++;
if (isBetterThan(testOcc, yetInserted)) {
AnalysisLogger.getLogger().trace("Found a similarity with P=" + prob + " between (" + "\"" + testOcc.scientificName + "\"" + "," + testOcc.x + "\"" + "," + "\"" + testOcc.y + "\"" + "," + "\"" + testOcc.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(testOcc.eventdate) + "\"" + ") VS " + "(" + "\"" + yetInserted.scientificName + "\"" + "," + "\"" + yetInserted.x + "\"" + "," + "\"" + yetInserted.y + "\"" + "," + "\"" + yetInserted.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(yetInserted.eventdate) + "\"" + ")");
objectstoinsert.remove(k);
k--;
insertedSize--;
AnalysisLogger.getLogger().trace("Found a similarity with P=" + prob + " between (" + "\"" + testOcc.scientificName + "\"" + "," + testOcc.x + "\"" + "," + "\"" + testOcc.y + "\"" + "," + "\"" + testOcc.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(testOcc.eventdate) + "\"" + ") VS " + "(" + "\"" + yetInserted.scientificName + "\"" + "," + "\"" + yetInserted.x + "\"" + "," + "\"" + yetInserted.y + "\"" + "," + "\"" + yetInserted.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(yetInserted.eventdate) + "\"" + ")");
objectstoinsert.remove(k);
k--;
insertedSize--;
}
//if there is yet one better then discard the testOcc
else{
candidate=false;
// if there is yet one better then discard the testOcc
else {
candidate = false;
break;
}
}
k++;
}
if (candidate)
objectstoinsert.add(testOcc);
@ -181,7 +254,7 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
persist();
// close DB connection
} catch (Exception e) {
AnalysisLogger.getLogger().trace("An error occurred "+e.getLocalizedMessage());
AnalysisLogger.getLogger().trace("An error occurred " + e.getLocalizedMessage());
throw e;
} finally {
if (dbconnection != null)
@ -190,6 +263,8 @@ public class OccurrencePointsDuplicatesDeleter extends OccurrencePointsMerger{
AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed");
}
}
public void postProcess() throws Exception {
}
}

View File

@ -8,12 +8,12 @@ import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
public class OccurrencePointsIntersector extends OccurrencePointsMerger{
public OccurrencePointsIntersector(){
firstbest=false;
firstbest=true;
}
@Override
public String getDescription() {
return "An algorithm for intesecting two sets of occurrence points of species coming from the Species Discovery Facility of D4Science";
return "Between two Ocurrence Sets, keeps the elements of the Right Set that are not in the Left Set.";
}
@Override
@ -23,6 +23,8 @@ public class OccurrencePointsIntersector extends OccurrencePointsMerger{
@Override
protected void manageHighProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) {
objectstoinsert.add(rightOcc);
/*
if (
((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.before(rightOcc.modifdate))
||
@ -35,6 +37,7 @@ public class OccurrencePointsIntersector extends OccurrencePointsMerger{
objectstoinsert.add(leftOcc);
else
objectstoinsert.add(leftOcc);
*/
}
@Override

View File

@ -42,9 +42,11 @@ public class OccurrencePointsMerger implements Transducerer {
static protected String finalTableNameF = "finalTableName";
static protected String spatialTolerance = "spatialTolerance";
static protected String confidence = "confidence";
//NOTE: on local computer we should set SET datestyle = "ISO, MDY";
static protected String sqlDateFormat = "MM/DD/YYYY HH24:MI:SS";
static protected String javaDateFormat = "MM/dd/yyyy HH:mm:ss";
static protected String tableNameF = "OccurrencePointsTableName";
protected List<OccurrenceRecord> records_left;
protected List<OccurrenceRecord> records_right;
protected AlgorithmConfiguration config;
@ -294,7 +296,9 @@ public class OccurrencePointsMerger implements Transducerer {
finalTableLabel = config.getParam(finalTableNameL);
spatialToleranceValue = Float.parseFloat(config.getParam(spatialTolerance));
confidenceValue = Float.parseFloat(config.getParam(confidence));
config.setParam(tableNameF,finalTableName);
objectstoinsert = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
objectstodelete = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
status = 0;
@ -307,7 +311,7 @@ public class OccurrencePointsMerger implements Transducerer {
@Override
public String getDescription() {
return "An algorithm for merging two sets of occurrence points of species coming from the Species Discovery Facility of D4Science";
return "Between two Ocurrence Sets, enrichs the Left Set with the elements of the Right Set that are not in the Left Set. Updates the elements of the Left Set with more recent elements in the Right Set.";
}
protected float probabilityStrings(String first, String second) {
@ -415,9 +419,9 @@ public class OccurrencePointsMerger implements Transducerer {
counter++;
}
String updateQ = DatabaseUtils.insertFromBuffer(finalTableName, columns.toString(), buffer);
String updateQ = "SET datestyle = \"ISO, MDY\"; "+DatabaseUtils.insertFromBuffer(finalTableName, columns.toString(), buffer);
// System.out.println("Update:\n"+updateQ);
// AnalysisLogger.getLogger().debug("Update:\n"+updateQ);
// AnalysisLogger.getLogger().debug("Update:\n"+updateQ);
DatabaseFactory.executeSQLUpdate(updateQ, dbconnection);
AnalysisLogger.getLogger().info("Objects inserted");
}
@ -546,8 +550,10 @@ public class OccurrencePointsMerger implements Transducerer {
manageHighProbability(p, bestleftOcc, rightOcc);
else
break;
} else if (!firstbest)
manageLowProbability(p, bestleftOcc, rightOcc);
}
//else if (!firstbest)
//manageLowProbability(p, bestleftOcc, rightOcc);
k++;
}
rightCounter++;
@ -558,7 +564,10 @@ public class OccurrencePointsMerger implements Transducerer {
else
manageLowProbability(p, bestleftOcc, rightOcc);
}
else
if (!found)
manageLowProbability(p, bestleftOcc, rightOcc);
status = Math.min(90, 10f + (80 * ((float) rightCounter) / ((float) allrightrows)));
if (rightCounter % 500 == 0) {
@ -589,9 +598,25 @@ public class OccurrencePointsMerger implements Transducerer {
initDB(true);
takeFullRanges();
computeRange();
postProcess();
}
public void postProcess() throws Exception{
/*
AnalysisLogger.getLogger().info("Post processing ... Deleting duplicates");
OccurrencePointsDuplicatesDeleter opdd = new OccurrencePointsDuplicatesDeleter();
opdd.setConfiguration(config);
opdd.init();
opdd.initDB(false);
opdd.takeFullRanges();
opdd.computeRange();
AnalysisLogger.getLogger().info("Post processing ... Finished");
*/
}
public static void main(String[] args) throws Exception {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);

View File

@ -8,7 +8,7 @@ public class OccurrencePointsSubtraction extends OccurrencePointsMerger{
@Override
public String getDescription() {
return "An algorithm for subtracting a sets of occurrence points from another. Sets refer to species coming from the Species Discovery Facility of D4Science";
return "Between two Ocurrence Sets, keeps the elements of the Left Set that are not in the Right Set";
}
protected void manageHighProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc) {