|
|
|
@ -1,12 +1,17 @@
|
|
|
|
|
package org.gcube.dataanalysis.ecoengine.transducers;
|
|
|
|
|
|
|
|
|
|
import java.text.ParseException;
|
|
|
|
|
import java.text.SimpleDateFormat;
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.Calendar;
|
|
|
|
|
import java.util.Date;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.Locale;
|
|
|
|
|
|
|
|
|
|
import org.gcube.contentmanagement.graphtools.utils.DateGuesser;
|
|
|
|
|
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
|
|
|
|
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
|
|
|
|
|
import org.gcube.contentmanagement.lexicalmatcher.utils.DistanceCalculator;
|
|
|
|
|
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
|
|
|
|
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
|
|
|
|
|
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
|
|
|
|
@ -68,10 +73,19 @@ public class OccurrencePointsMerger implements Transducerer{
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static String convert2conventionalFormat(Calendar date){
|
|
|
|
|
if (date==null)
|
|
|
|
|
return "";
|
|
|
|
|
SimpleDateFormat formatter = new SimpleDateFormat("MM/dd/yy KK:mm:ss a");
|
|
|
|
|
String formattedDate = formatter.format(new Date(date.getTimeInMillis()));
|
|
|
|
|
return formattedDate;
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
boolean displaydateconvert = true;
|
|
|
|
|
public OccurrenceRecord row2OccurrenceRecord(Object[] row){
|
|
|
|
|
OccurrenceRecord record = new OccurrenceRecord();
|
|
|
|
|
int index = 0;
|
|
|
|
|
|
|
|
|
|
for (Object name:columnsNames){
|
|
|
|
|
String name$ = ""+name;
|
|
|
|
|
String value$ = ""+row[index];
|
|
|
|
@ -84,8 +98,35 @@ public class OccurrencePointsMerger implements Transducerer{
|
|
|
|
|
else if (name$.equalsIgnoreCase(recordedByFld)){
|
|
|
|
|
record.recordedby=value$;
|
|
|
|
|
}
|
|
|
|
|
else if (name$.equalsIgnoreCase(scientificNameFld)){
|
|
|
|
|
record.scientificName=value$;
|
|
|
|
|
}
|
|
|
|
|
else if (name$.equalsIgnoreCase(eventDatFld)){
|
|
|
|
|
record.eventdate=DateGuesser.convertDate(value$);
|
|
|
|
|
if ((value$==null) || (value$.length()==0)){
|
|
|
|
|
record.eventdate=null;
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
/*
|
|
|
|
|
SimpleDateFormat formatter = new SimpleDateFormat("MM/dd/yy KK:mm a",Locale.UK);
|
|
|
|
|
try {
|
|
|
|
|
Date d = (Date) formatter.parse(value$);
|
|
|
|
|
Calendar cal = Calendar.getInstance();
|
|
|
|
|
cal.setTime(d);
|
|
|
|
|
System.out.println("From "+value$+"->"+(cal.get(Calendar.MONTH)+1)+" "+cal.get(Calendar.DAY_OF_MONTH)+" "+cal.get(Calendar.YEAR)+" "+cal.get(Calendar.HOUR)+" "+cal.get(Calendar.MINUTE));
|
|
|
|
|
// System.out.println("->"+cal.toString());
|
|
|
|
|
} catch (ParseException e) {
|
|
|
|
|
// TODO Auto-generated catch block
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
record.eventdate=DateGuesser.convertDate(value$);
|
|
|
|
|
if (displaydateconvert)
|
|
|
|
|
{ AnalysisLogger.getLogger().trace("From "+value$+"->"+convert2conventionalFormat(record.eventdate)+" pattern "+DateGuesser.getPattern(value$));
|
|
|
|
|
displaydateconvert=false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (name$.equalsIgnoreCase(modifDatFld)){
|
|
|
|
|
record.modifdate=DateGuesser.convertDate(value$);
|
|
|
|
@ -102,11 +143,12 @@ public class OccurrencePointsMerger implements Transducerer{
|
|
|
|
|
public String occurrenceRecord2String(OccurrenceRecord record){
|
|
|
|
|
StringBuffer buffer =new StringBuffer();
|
|
|
|
|
int index = 0;
|
|
|
|
|
int k=0;
|
|
|
|
|
int nNames = columnsNames.size();
|
|
|
|
|
for (Object name:columnsNames){
|
|
|
|
|
|
|
|
|
|
String name$ = ""+name;
|
|
|
|
|
String value$ = null;
|
|
|
|
|
String value$ = "''";
|
|
|
|
|
if (name$.equalsIgnoreCase(lonFld)){
|
|
|
|
|
value$="'"+record.x+"'";
|
|
|
|
|
}
|
|
|
|
@ -114,17 +156,31 @@ public class OccurrencePointsMerger implements Transducerer{
|
|
|
|
|
value$="'"+record.y+"'";
|
|
|
|
|
}
|
|
|
|
|
else if (name$.equalsIgnoreCase(recordedByFld)){
|
|
|
|
|
value$="'"+record.recordedby+"'";
|
|
|
|
|
if (record.recordedby!=null)
|
|
|
|
|
value$="'"+record.recordedby+"'";
|
|
|
|
|
}
|
|
|
|
|
else if (name$.equalsIgnoreCase(scientificNameFld)){
|
|
|
|
|
if (record.scientificName!=null)
|
|
|
|
|
value$="'"+record.scientificName+"'";
|
|
|
|
|
}
|
|
|
|
|
else if (name$.equalsIgnoreCase(eventDatFld)){
|
|
|
|
|
value$="'"+record.eventdate.toString()+"'";
|
|
|
|
|
if (record.eventdate!=null){
|
|
|
|
|
value$="'"+convert2conventionalFormat(record.eventdate)+"'";
|
|
|
|
|
// value$="'"+record.eventdate.getTimeInMillis()+"'";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (name$.equalsIgnoreCase(modifDatFld)){
|
|
|
|
|
value$="'"+record.modifdate.toString()+"'";
|
|
|
|
|
if (record.modifdate!=null){
|
|
|
|
|
value$="'"+convert2conventionalFormat(record.modifdate)+"'";
|
|
|
|
|
// value$="'"+record.modifdate.getTimeInMillis()+"'";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
if (record.otherValues!=null){
|
|
|
|
|
value$ = "'"+record.otherValues.get(k)+"'";
|
|
|
|
|
k++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
value$ = "'"+record.otherValues.get(index)+"'";
|
|
|
|
|
|
|
|
|
|
buffer.append(value$);
|
|
|
|
|
if (index<nNames-1){
|
|
|
|
|
buffer.append(",");
|
|
|
|
@ -137,29 +193,27 @@ public class OccurrencePointsMerger implements Transducerer{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
|
|
public static void main(String[] args) throws Exception{
|
|
|
|
|
AlgorithmConfiguration config = Regressor.getConfig();
|
|
|
|
|
config.setNumberOfResources(1);
|
|
|
|
|
config.setParam(longitudeColumn,"presence_basking_cluster");
|
|
|
|
|
config.setParam(latitudeColumn,"centerlong"+AlgorithmConfiguration.getListSeparator()+"centerlat");
|
|
|
|
|
config.setParam("OccurrencePointsClusterTable","occcluster_xmeans");
|
|
|
|
|
config.setParam("maxIterations","1000");
|
|
|
|
|
config.setParam("minClusters","20");
|
|
|
|
|
config.setParam("maxClusters","30");
|
|
|
|
|
config.setParam("min_points","1");
|
|
|
|
|
/*
|
|
|
|
|
lonFld=config.getParam(longitudeColumn);
|
|
|
|
|
latFld=config.getParam(latitudeColumn);
|
|
|
|
|
recordedByFld=config.getParam(recordedByColumn);
|
|
|
|
|
scientificNameFld=config.getParam(scientificNameColumn);
|
|
|
|
|
eventDatFld=config.getParam(eventDateColumn);
|
|
|
|
|
modifDatFld=config.getParam(lastModificationColumn);
|
|
|
|
|
leftTableName=config.getParam(leftTableNameF);
|
|
|
|
|
rightTableName=config.getParam(rightTableNameF);
|
|
|
|
|
mergedTableName=config.getParam(mergedTableNameF);
|
|
|
|
|
spatialToleranceValue=Float.parseFloat(config.getParam(spatialTolerance));
|
|
|
|
|
confidenceValue=Float.parseFloat(config.getParam(confidence));
|
|
|
|
|
*/
|
|
|
|
|
config.setParam(longitudeColumn,"decimallongitude");
|
|
|
|
|
config.setParam(latitudeColumn,"decimallatitude");
|
|
|
|
|
config.setParam(recordedByColumn,"recordedby");
|
|
|
|
|
config.setParam(scientificNameColumn,"scientificname");
|
|
|
|
|
config.setParam(eventDateColumn,"eventdate");
|
|
|
|
|
config.setParam(lastModificationColumn,"modified");
|
|
|
|
|
config.setParam(rightTableNameF,"whitesharkoccurrences2");
|
|
|
|
|
config.setParam(leftTableNameF,"whitesharkoccurrences1");
|
|
|
|
|
// config.setParam(rightTableNameF,"whitesharkoccurrences2");
|
|
|
|
|
// config.setParam(rightTableNameF,"whitesharkoccurrences1");
|
|
|
|
|
config.setParam(mergedTableNameF,"whitesharkoccurrencesmerged");
|
|
|
|
|
config.setParam(spatialTolerance,"0.5");
|
|
|
|
|
config.setParam(confidence,"0.8");
|
|
|
|
|
|
|
|
|
|
OccurrencePointsMerger occm = new OccurrencePointsMerger();
|
|
|
|
|
occm.setConfiguration(config);
|
|
|
|
|
occm.init();
|
|
|
|
|
occm.compute();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
@ -244,20 +298,51 @@ public class OccurrencePointsMerger implements Transducerer{
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected float extProb(OccurrenceRecord right,OccurrenceRecord left){
|
|
|
|
|
return (float)Math.random();
|
|
|
|
|
protected float probabilityStrings(String first, String second){
|
|
|
|
|
if ((first==null) ||(second==null))
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
return (float) new DistanceCalculator().CD(false, first, second);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected void manageProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){
|
|
|
|
|
//if over the threshold then add to the complete list of elements
|
|
|
|
|
if (probability<confidenceValue)
|
|
|
|
|
objectstoinsert.add(rightOcc);
|
|
|
|
|
protected float probabilityDates(Calendar first, Calendar second){
|
|
|
|
|
if ((first==null) ||(second==null))
|
|
|
|
|
return 1;
|
|
|
|
|
if (first.compareTo(second)==0)
|
|
|
|
|
return 1;
|
|
|
|
|
else
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected float extProb(OccurrenceRecord right,OccurrenceRecord left){
|
|
|
|
|
float probability = 0;
|
|
|
|
|
float distance = (float)Math.sqrt(Math.abs(left.x-right.x)+Math.abs(left.y-right.y));
|
|
|
|
|
if (distance>spatialToleranceValue)
|
|
|
|
|
probability=0;
|
|
|
|
|
else{
|
|
|
|
|
float pSpecies = probabilityStrings(right.scientificName, left.scientificName);
|
|
|
|
|
float pRecordedBy= probabilityStrings(right.recordedby, left.recordedby);
|
|
|
|
|
float pDates = probabilityDates(right.eventdate, left.eventdate);
|
|
|
|
|
probability = pSpecies*pRecordedBy*pDates;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return probability*100;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected void manageHighProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){
|
|
|
|
|
//if over the threshold then don't add
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected void manageLowProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){
|
|
|
|
|
//if over the threshold then add to the element
|
|
|
|
|
objectstoinsert.add(rightOcc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected void persist(){
|
|
|
|
|
StringBuffer buffer = new StringBuffer();
|
|
|
|
|
int toins = objectstoinsert.size();
|
|
|
|
|
int counter = 0;
|
|
|
|
|
if (toins>0){
|
|
|
|
|
for (OccurrenceRecord record:objectstoinsert){
|
|
|
|
|
buffer.append("(");
|
|
|
|
|
buffer.append(occurrenceRecord2String(record));
|
|
|
|
@ -267,8 +352,11 @@ public class OccurrencePointsMerger implements Transducerer{
|
|
|
|
|
|
|
|
|
|
counter++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
String updateQ = DatabaseUtils.insertFromBuffer(mergedTableName,columns.toString(),buffer);
|
|
|
|
|
// System.out.println("Update:\n"+updateQ);
|
|
|
|
|
DatabaseFactory.executeSQLUpdate(updateQ, dbconnection);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
@ -285,7 +373,7 @@ public class OccurrencePointsMerger implements Transducerer{
|
|
|
|
|
int nCols = columnsNames.size();
|
|
|
|
|
columns = new StringBuffer();
|
|
|
|
|
for (int i=0;i<nCols;i++){
|
|
|
|
|
columns.append(columnsNames);
|
|
|
|
|
columns.append("\""+columnsNames.get(i)+"\"");
|
|
|
|
|
if (i<nCols-1)
|
|
|
|
|
columns.append(",");
|
|
|
|
|
}
|
|
|
|
@ -304,31 +392,52 @@ public class OccurrencePointsMerger implements Transducerer{
|
|
|
|
|
//for each element in dx
|
|
|
|
|
List<OccurrenceRecord> leftRecords = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
|
|
|
|
|
AnalysisLogger.getLogger().trace("Processing "+leftTableName+" vs "+rightTableName);
|
|
|
|
|
int iterations = 0;
|
|
|
|
|
int rightCounter = 0;
|
|
|
|
|
int similaritiesCounter = 0;
|
|
|
|
|
for (Object rRow:rightRows){
|
|
|
|
|
//transform into an occurrence object
|
|
|
|
|
OccurrenceRecord rightOcc = row2OccurrenceRecord((Object[])rRow);
|
|
|
|
|
//for each element in sx
|
|
|
|
|
int k=0;
|
|
|
|
|
int leftrecordsSize = 0;
|
|
|
|
|
boolean found = false;
|
|
|
|
|
float p = 0;
|
|
|
|
|
OccurrenceRecord bestleftOcc = null;
|
|
|
|
|
for (Object lRow:leftRows){
|
|
|
|
|
OccurrenceRecord leftOcc = null;
|
|
|
|
|
if (iterations==0){
|
|
|
|
|
//only for the first iteration on the left occurrences perform the transformation
|
|
|
|
|
if (leftrecordsSize<=k){
|
|
|
|
|
//transform into an occurrence object
|
|
|
|
|
leftOcc = row2OccurrenceRecord((Object[])lRow);
|
|
|
|
|
leftRecords.add(leftOcc);
|
|
|
|
|
leftrecordsSize++;
|
|
|
|
|
// System.out.println("ADDED "+k+"-th elements size: "+leftRecords.size());
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
leftOcc =leftRecords.get(k);
|
|
|
|
|
|
|
|
|
|
//evaluate P(dx,sx)
|
|
|
|
|
float p = extProb(leftOcc,rightOcc);
|
|
|
|
|
manageProbability(p, leftOcc, rightOcc);
|
|
|
|
|
p = extProb(leftOcc,rightOcc);
|
|
|
|
|
|
|
|
|
|
if (p>=confidenceValue){
|
|
|
|
|
AnalysisLogger.getLogger().trace("Found a similarity between ("+leftOcc.x+","+leftOcc.y+","+leftOcc.recordedby+ ") "+"("+rightOcc.x+","+rightOcc.y+","+rightOcc.recordedby+")");
|
|
|
|
|
bestleftOcc=leftOcc;
|
|
|
|
|
found=true;
|
|
|
|
|
similaritiesCounter++;
|
|
|
|
|
AnalysisLogger.getLogger().trace("Found a similarity with P="+p+" between ("+"\""+leftOcc.scientificName+"\""+","+leftOcc.x+"\""+","+"\""+leftOcc.y+"\""+","+"\""+leftOcc.recordedby+"\""+","+"\""+ convert2conventionalFormat(leftOcc.eventdate)+"\""+") VS "+
|
|
|
|
|
"("+"\""+rightOcc.scientificName+"\""+","+"\""+rightOcc.x+"\""+","+"\""+rightOcc.y+"\""+","+"\""+rightOcc.recordedby+"\""+","+"\""+ convert2conventionalFormat(rightOcc.eventdate)+"\""+")");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
k++;
|
|
|
|
|
}
|
|
|
|
|
iterations++;
|
|
|
|
|
rightCounter++;
|
|
|
|
|
if (found)
|
|
|
|
|
manageHighProbability(p, bestleftOcc, rightOcc);
|
|
|
|
|
else
|
|
|
|
|
manageLowProbability(p, bestleftOcc, rightOcc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
AnalysisLogger.getLogger().trace("Found "+similaritiesCounter+" similarities on "+rightCounter+" elements");
|
|
|
|
|
|
|
|
|
|
//transform the complete list into a table
|
|
|
|
|
persist();
|
|
|
|
|
//close DB connection
|
|
|
|
@ -338,6 +447,7 @@ public class OccurrencePointsMerger implements Transducerer{
|
|
|
|
|
finally{
|
|
|
|
|
if (dbconnection!=null)
|
|
|
|
|
dbconnection.close();
|
|
|
|
|
AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|