git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@58627 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
9fd65becdf
commit
9b2b34b3de
|
@ -32,7 +32,7 @@ public class DateGuesser {
|
||||||
}
|
}
|
||||||
|
|
||||||
// private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss","MM/yy","MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy","MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm","yyyy-MM-dd","yyyy-MM-dd HH:mm:ss", "h:mm a", "yyyy"};
|
// private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss","MM/yy","MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy","MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm","yyyy-MM-dd","yyyy-MM-dd HH:mm:ss", "h:mm a", "yyyy"};
|
||||||
private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "MM/yy","MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "MM/dd/yyyy HH:mm:ss","MM/dd/yyyy HH:mm:ss aaa","dd/MM/yyyy HH:mm:ss","EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy","MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm","yyyy-MM-dd","yyyy-MM-dd HH:mm:ss", "h:mm a", "yyyy"};
|
private static final String[] formats = { "MM\\dd\\yyyy", "MM\\dd\\yy", "MM/dd/yy", "MM/dd/yyyy", "MM/yy","MM/yyyy", "yyyy.MM.dd G 'at' HH:mm:ss z", "MM/dd/yyyy HH:mm:ss","dd/MM/yyyy HH:mm:ss","EEE, MMM d, ''yy", "h:mm a", "hh 'o''clock' a, zzzz", "K:mm a, z", "MM-dd-yy","MM-dd-yyyy", "dd-MMM-yy", "yyyy.MM.dd.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "yyyy-MM-dd HH:mm","yyyy-MM-dd","yyyy-MM-dd HH:mm:ss","MM/dd/yy KK:mm a","MM/dd/yy KK:mm:ss a","h:mm a", "yyyy"};
|
||||||
private static final String[] formatiITA = { "dd\\MM\\yyyy", "dd\\MM\\yy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yy", "dd/MM/yyyy","dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss", "MM/yy","MM/yyyy","dd.MM.yyyy G 'alle' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh a, zzzz", "K:mm a, z", "dd-MMM-yy", "dd.MM.yyyy.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "dd-MMM-yyyy HH:mm", "h:mm a", "yyyy"};
|
private static final String[] formatiITA = { "dd\\MM\\yyyy", "dd\\MM\\yy", "dd/MM/yy", "dd/MM/yyyy", "dd/MM/yy", "dd/MM/yyyy","dd/MM/yyyy HH:mm:ss", "dd/MM/yy HH:mm:ss", "dd/MM/yyyy HH:mm:ss", "MM/yy","MM/yyyy","dd.MM.yyyy G 'alle' HH:mm:ss z", "EEE, MMM d, ''yy", "h:mm a", "hh a, zzzz", "K:mm a, z", "dd-MMM-yy", "dd.MM.yyyy.HH.mm.ss", "E, dd MMM yyyy HH:mm:ss Z", "yyyyy.MMMMM.dd GGG hh:mm aaa", "EEE, d MMM yyyy HH:mm:ss Z", "yyMMddHHmmssZ", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "dd-MMM-yyyy HH:mm", "h:mm a", "yyyy"};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package org.gcube.dataanalysis.ecoengine.modeling;
|
package org.gcube.dataanalysis.ecoengine.modeling;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.gcube.dataanalysis.ecoengine.configuration.ALG_PROPS;
|
import org.gcube.dataanalysis.ecoengine.configuration.ALG_PROPS;
|
||||||
|
@ -65,7 +66,8 @@ public class SimpleModeler implements Modeler{
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<StatisticalType> getInputParameters() {
|
public List<StatisticalType> getInputParameters() {
|
||||||
return innermodel.getInputParameters();
|
return new ArrayList<StatisticalType>();
|
||||||
|
// return innermodel.getInputParameters();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package org.gcube.dataanalysis.ecoengine.processing;
|
package org.gcube.dataanalysis.ecoengine.processing;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
@ -328,7 +329,8 @@ public class LocalSimpleSplitGenerator implements Generator {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<StatisticalType> getInputParameters() {
|
public List<StatisticalType> getInputParameters() {
|
||||||
return distributionModel.getInputParameters();
|
// return distributionModel.getInputParameters();
|
||||||
|
return new ArrayList<StatisticalType>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package org.gcube.dataanalysis.ecoengine.processing;
|
package org.gcube.dataanalysis.ecoengine.processing;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
|
@ -497,7 +498,8 @@ public class LocalSplitGenerator implements Generator {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<StatisticalType> getInputParameters() {
|
public List<StatisticalType> getInputParameters() {
|
||||||
return distributionModel.getInputParameters();
|
return new ArrayList<StatisticalType>();
|
||||||
|
// return distributionModel.getInputParameters();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,17 @@
|
||||||
package org.gcube.dataanalysis.ecoengine.transducers;
|
package org.gcube.dataanalysis.ecoengine.transducers;
|
||||||
|
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
|
import java.util.Date;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
import org.gcube.contentmanagement.graphtools.utils.DateGuesser;
|
import org.gcube.contentmanagement.graphtools.utils.DateGuesser;
|
||||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||||
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
|
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
|
||||||
|
import org.gcube.contentmanagement.lexicalmatcher.utils.DistanceCalculator;
|
||||||
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
||||||
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
|
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
|
||||||
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
|
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
|
||||||
|
@ -68,10 +73,19 @@ public class OccurrencePointsMerger implements Transducerer{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String convert2conventionalFormat(Calendar date){
|
||||||
|
if (date==null)
|
||||||
|
return "";
|
||||||
|
SimpleDateFormat formatter = new SimpleDateFormat("MM/dd/yy KK:mm:ss a");
|
||||||
|
String formattedDate = formatter.format(new Date(date.getTimeInMillis()));
|
||||||
|
return formattedDate;
|
||||||
|
|
||||||
|
}
|
||||||
|
boolean displaydateconvert = true;
|
||||||
public OccurrenceRecord row2OccurrenceRecord(Object[] row){
|
public OccurrenceRecord row2OccurrenceRecord(Object[] row){
|
||||||
OccurrenceRecord record = new OccurrenceRecord();
|
OccurrenceRecord record = new OccurrenceRecord();
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
|
||||||
for (Object name:columnsNames){
|
for (Object name:columnsNames){
|
||||||
String name$ = ""+name;
|
String name$ = ""+name;
|
||||||
String value$ = ""+row[index];
|
String value$ = ""+row[index];
|
||||||
|
@ -84,8 +98,35 @@ public class OccurrencePointsMerger implements Transducerer{
|
||||||
else if (name$.equalsIgnoreCase(recordedByFld)){
|
else if (name$.equalsIgnoreCase(recordedByFld)){
|
||||||
record.recordedby=value$;
|
record.recordedby=value$;
|
||||||
}
|
}
|
||||||
|
else if (name$.equalsIgnoreCase(scientificNameFld)){
|
||||||
|
record.scientificName=value$;
|
||||||
|
}
|
||||||
else if (name$.equalsIgnoreCase(eventDatFld)){
|
else if (name$.equalsIgnoreCase(eventDatFld)){
|
||||||
record.eventdate=DateGuesser.convertDate(value$);
|
if ((value$==null) || (value$.length()==0)){
|
||||||
|
record.eventdate=null;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
/*
|
||||||
|
SimpleDateFormat formatter = new SimpleDateFormat("MM/dd/yy KK:mm a",Locale.UK);
|
||||||
|
try {
|
||||||
|
Date d = (Date) formatter.parse(value$);
|
||||||
|
Calendar cal = Calendar.getInstance();
|
||||||
|
cal.setTime(d);
|
||||||
|
System.out.println("From "+value$+"->"+(cal.get(Calendar.MONTH)+1)+" "+cal.get(Calendar.DAY_OF_MONTH)+" "+cal.get(Calendar.YEAR)+" "+cal.get(Calendar.HOUR)+" "+cal.get(Calendar.MINUTE));
|
||||||
|
// System.out.println("->"+cal.toString());
|
||||||
|
} catch (ParseException e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
record.eventdate=DateGuesser.convertDate(value$);
|
||||||
|
if (displaydateconvert)
|
||||||
|
{ AnalysisLogger.getLogger().trace("From "+value$+"->"+convert2conventionalFormat(record.eventdate)+" pattern "+DateGuesser.getPattern(value$));
|
||||||
|
displaydateconvert=false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (name$.equalsIgnoreCase(modifDatFld)){
|
else if (name$.equalsIgnoreCase(modifDatFld)){
|
||||||
record.modifdate=DateGuesser.convertDate(value$);
|
record.modifdate=DateGuesser.convertDate(value$);
|
||||||
|
@ -102,11 +143,12 @@ public class OccurrencePointsMerger implements Transducerer{
|
||||||
public String occurrenceRecord2String(OccurrenceRecord record){
|
public String occurrenceRecord2String(OccurrenceRecord record){
|
||||||
StringBuffer buffer =new StringBuffer();
|
StringBuffer buffer =new StringBuffer();
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
int k=0;
|
||||||
int nNames = columnsNames.size();
|
int nNames = columnsNames.size();
|
||||||
for (Object name:columnsNames){
|
for (Object name:columnsNames){
|
||||||
|
|
||||||
String name$ = ""+name;
|
String name$ = ""+name;
|
||||||
String value$ = null;
|
String value$ = "''";
|
||||||
if (name$.equalsIgnoreCase(lonFld)){
|
if (name$.equalsIgnoreCase(lonFld)){
|
||||||
value$="'"+record.x+"'";
|
value$="'"+record.x+"'";
|
||||||
}
|
}
|
||||||
|
@ -114,17 +156,31 @@ public class OccurrencePointsMerger implements Transducerer{
|
||||||
value$="'"+record.y+"'";
|
value$="'"+record.y+"'";
|
||||||
}
|
}
|
||||||
else if (name$.equalsIgnoreCase(recordedByFld)){
|
else if (name$.equalsIgnoreCase(recordedByFld)){
|
||||||
value$="'"+record.recordedby+"'";
|
if (record.recordedby!=null)
|
||||||
|
value$="'"+record.recordedby+"'";
|
||||||
|
}
|
||||||
|
else if (name$.equalsIgnoreCase(scientificNameFld)){
|
||||||
|
if (record.scientificName!=null)
|
||||||
|
value$="'"+record.scientificName+"'";
|
||||||
}
|
}
|
||||||
else if (name$.equalsIgnoreCase(eventDatFld)){
|
else if (name$.equalsIgnoreCase(eventDatFld)){
|
||||||
value$="'"+record.eventdate.toString()+"'";
|
if (record.eventdate!=null){
|
||||||
|
value$="'"+convert2conventionalFormat(record.eventdate)+"'";
|
||||||
|
// value$="'"+record.eventdate.getTimeInMillis()+"'";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (name$.equalsIgnoreCase(modifDatFld)){
|
else if (name$.equalsIgnoreCase(modifDatFld)){
|
||||||
value$="'"+record.modifdate.toString()+"'";
|
if (record.modifdate!=null){
|
||||||
|
value$="'"+convert2conventionalFormat(record.modifdate)+"'";
|
||||||
|
// value$="'"+record.modifdate.getTimeInMillis()+"'";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
if (record.otherValues!=null){
|
||||||
|
value$ = "'"+record.otherValues.get(k)+"'";
|
||||||
|
k++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
value$ = "'"+record.otherValues.get(index)+"'";
|
|
||||||
|
|
||||||
buffer.append(value$);
|
buffer.append(value$);
|
||||||
if (index<nNames-1){
|
if (index<nNames-1){
|
||||||
buffer.append(",");
|
buffer.append(",");
|
||||||
|
@ -137,29 +193,27 @@ public class OccurrencePointsMerger implements Transducerer{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) throws Exception{
|
||||||
AlgorithmConfiguration config = Regressor.getConfig();
|
AlgorithmConfiguration config = Regressor.getConfig();
|
||||||
config.setNumberOfResources(1);
|
config.setNumberOfResources(1);
|
||||||
config.setParam(longitudeColumn,"presence_basking_cluster");
|
config.setParam(longitudeColumn,"decimallongitude");
|
||||||
config.setParam(latitudeColumn,"centerlong"+AlgorithmConfiguration.getListSeparator()+"centerlat");
|
config.setParam(latitudeColumn,"decimallatitude");
|
||||||
config.setParam("OccurrencePointsClusterTable","occcluster_xmeans");
|
config.setParam(recordedByColumn,"recordedby");
|
||||||
config.setParam("maxIterations","1000");
|
config.setParam(scientificNameColumn,"scientificname");
|
||||||
config.setParam("minClusters","20");
|
config.setParam(eventDateColumn,"eventdate");
|
||||||
config.setParam("maxClusters","30");
|
config.setParam(lastModificationColumn,"modified");
|
||||||
config.setParam("min_points","1");
|
config.setParam(rightTableNameF,"whitesharkoccurrences2");
|
||||||
/*
|
config.setParam(leftTableNameF,"whitesharkoccurrences1");
|
||||||
lonFld=config.getParam(longitudeColumn);
|
// config.setParam(rightTableNameF,"whitesharkoccurrences2");
|
||||||
latFld=config.getParam(latitudeColumn);
|
// config.setParam(rightTableNameF,"whitesharkoccurrences1");
|
||||||
recordedByFld=config.getParam(recordedByColumn);
|
config.setParam(mergedTableNameF,"whitesharkoccurrencesmerged");
|
||||||
scientificNameFld=config.getParam(scientificNameColumn);
|
config.setParam(spatialTolerance,"0.5");
|
||||||
eventDatFld=config.getParam(eventDateColumn);
|
config.setParam(confidence,"0.8");
|
||||||
modifDatFld=config.getParam(lastModificationColumn);
|
|
||||||
leftTableName=config.getParam(leftTableNameF);
|
OccurrencePointsMerger occm = new OccurrencePointsMerger();
|
||||||
rightTableName=config.getParam(rightTableNameF);
|
occm.setConfiguration(config);
|
||||||
mergedTableName=config.getParam(mergedTableNameF);
|
occm.init();
|
||||||
spatialToleranceValue=Float.parseFloat(config.getParam(spatialTolerance));
|
occm.compute();
|
||||||
confidenceValue=Float.parseFloat(config.getParam(confidence));
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -244,20 +298,51 @@ public class OccurrencePointsMerger implements Transducerer{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected float extProb(OccurrenceRecord right,OccurrenceRecord left){
|
protected float probabilityStrings(String first, String second){
|
||||||
return (float)Math.random();
|
if ((first==null) ||(second==null))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return (float) new DistanceCalculator().CD(false, first, second);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void manageProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){
|
protected float probabilityDates(Calendar first, Calendar second){
|
||||||
//if over the threshold then add to the complete list of elements
|
if ((first==null) ||(second==null))
|
||||||
if (probability<confidenceValue)
|
return 1;
|
||||||
objectstoinsert.add(rightOcc);
|
if (first.compareTo(second)==0)
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected float extProb(OccurrenceRecord right,OccurrenceRecord left){
|
||||||
|
float probability = 0;
|
||||||
|
float distance = (float)Math.sqrt(Math.abs(left.x-right.x)+Math.abs(left.y-right.y));
|
||||||
|
if (distance>spatialToleranceValue)
|
||||||
|
probability=0;
|
||||||
|
else{
|
||||||
|
float pSpecies = probabilityStrings(right.scientificName, left.scientificName);
|
||||||
|
float pRecordedBy= probabilityStrings(right.recordedby, left.recordedby);
|
||||||
|
float pDates = probabilityDates(right.eventdate, left.eventdate);
|
||||||
|
probability = pSpecies*pRecordedBy*pDates;
|
||||||
|
}
|
||||||
|
|
||||||
|
return probability*100;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void manageHighProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){
|
||||||
|
//if over the threshold then don't add
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void manageLowProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){
|
||||||
|
//if over the threshold then add to the element
|
||||||
|
objectstoinsert.add(rightOcc);
|
||||||
|
}
|
||||||
|
|
||||||
protected void persist(){
|
protected void persist(){
|
||||||
StringBuffer buffer = new StringBuffer();
|
StringBuffer buffer = new StringBuffer();
|
||||||
int toins = objectstoinsert.size();
|
int toins = objectstoinsert.size();
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
|
if (toins>0){
|
||||||
for (OccurrenceRecord record:objectstoinsert){
|
for (OccurrenceRecord record:objectstoinsert){
|
||||||
buffer.append("(");
|
buffer.append("(");
|
||||||
buffer.append(occurrenceRecord2String(record));
|
buffer.append(occurrenceRecord2String(record));
|
||||||
|
@ -267,8 +352,11 @@ public class OccurrencePointsMerger implements Transducerer{
|
||||||
|
|
||||||
counter++;
|
counter++;
|
||||||
}
|
}
|
||||||
|
|
||||||
String updateQ = DatabaseUtils.insertFromBuffer(mergedTableName,columns.toString(),buffer);
|
String updateQ = DatabaseUtils.insertFromBuffer(mergedTableName,columns.toString(),buffer);
|
||||||
|
// System.out.println("Update:\n"+updateQ);
|
||||||
DatabaseFactory.executeSQLUpdate(updateQ, dbconnection);
|
DatabaseFactory.executeSQLUpdate(updateQ, dbconnection);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -285,7 +373,7 @@ public class OccurrencePointsMerger implements Transducerer{
|
||||||
int nCols = columnsNames.size();
|
int nCols = columnsNames.size();
|
||||||
columns = new StringBuffer();
|
columns = new StringBuffer();
|
||||||
for (int i=0;i<nCols;i++){
|
for (int i=0;i<nCols;i++){
|
||||||
columns.append(columnsNames);
|
columns.append("\""+columnsNames.get(i)+"\"");
|
||||||
if (i<nCols-1)
|
if (i<nCols-1)
|
||||||
columns.append(",");
|
columns.append(",");
|
||||||
}
|
}
|
||||||
|
@ -304,31 +392,52 @@ public class OccurrencePointsMerger implements Transducerer{
|
||||||
//for each element in dx
|
//for each element in dx
|
||||||
List<OccurrenceRecord> leftRecords = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
|
List<OccurrenceRecord> leftRecords = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
|
||||||
AnalysisLogger.getLogger().trace("Processing "+leftTableName+" vs "+rightTableName);
|
AnalysisLogger.getLogger().trace("Processing "+leftTableName+" vs "+rightTableName);
|
||||||
int iterations = 0;
|
int rightCounter = 0;
|
||||||
|
int similaritiesCounter = 0;
|
||||||
for (Object rRow:rightRows){
|
for (Object rRow:rightRows){
|
||||||
|
//transform into an occurrence object
|
||||||
OccurrenceRecord rightOcc = row2OccurrenceRecord((Object[])rRow);
|
OccurrenceRecord rightOcc = row2OccurrenceRecord((Object[])rRow);
|
||||||
//for each element in sx
|
//for each element in sx
|
||||||
int k=0;
|
int k=0;
|
||||||
|
int leftrecordsSize = 0;
|
||||||
|
boolean found = false;
|
||||||
|
float p = 0;
|
||||||
|
OccurrenceRecord bestleftOcc = null;
|
||||||
for (Object lRow:leftRows){
|
for (Object lRow:leftRows){
|
||||||
OccurrenceRecord leftOcc = null;
|
OccurrenceRecord leftOcc = null;
|
||||||
if (iterations==0){
|
//only for the first iteration on the left occurrences perform the transformation
|
||||||
|
if (leftrecordsSize<=k){
|
||||||
|
//transform into an occurrence object
|
||||||
leftOcc = row2OccurrenceRecord((Object[])lRow);
|
leftOcc = row2OccurrenceRecord((Object[])lRow);
|
||||||
leftRecords.add(leftOcc);
|
leftRecords.add(leftOcc);
|
||||||
|
leftrecordsSize++;
|
||||||
|
// System.out.println("ADDED "+k+"-th elements size: "+leftRecords.size());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
leftOcc =leftRecords.get(k);
|
leftOcc =leftRecords.get(k);
|
||||||
|
|
||||||
//evaluate P(dx,sx)
|
//evaluate P(dx,sx)
|
||||||
float p = extProb(leftOcc,rightOcc);
|
p = extProb(leftOcc,rightOcc);
|
||||||
manageProbability(p, leftOcc, rightOcc);
|
|
||||||
if (p>=confidenceValue){
|
if (p>=confidenceValue){
|
||||||
AnalysisLogger.getLogger().trace("Found a similarity between ("+leftOcc.x+","+leftOcc.y+","+leftOcc.recordedby+ ") "+"("+rightOcc.x+","+rightOcc.y+","+rightOcc.recordedby+")");
|
bestleftOcc=leftOcc;
|
||||||
|
found=true;
|
||||||
|
similaritiesCounter++;
|
||||||
|
AnalysisLogger.getLogger().trace("Found a similarity with P="+p+" between ("+"\""+leftOcc.scientificName+"\""+","+leftOcc.x+"\""+","+"\""+leftOcc.y+"\""+","+"\""+leftOcc.recordedby+"\""+","+"\""+ convert2conventionalFormat(leftOcc.eventdate)+"\""+") VS "+
|
||||||
|
"("+"\""+rightOcc.scientificName+"\""+","+"\""+rightOcc.x+"\""+","+"\""+rightOcc.y+"\""+","+"\""+rightOcc.recordedby+"\""+","+"\""+ convert2conventionalFormat(rightOcc.eventdate)+"\""+")");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
k++;
|
k++;
|
||||||
}
|
}
|
||||||
iterations++;
|
rightCounter++;
|
||||||
|
if (found)
|
||||||
|
manageHighProbability(p, bestleftOcc, rightOcc);
|
||||||
|
else
|
||||||
|
manageLowProbability(p, bestleftOcc, rightOcc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AnalysisLogger.getLogger().trace("Found "+similaritiesCounter+" similarities on "+rightCounter+" elements");
|
||||||
|
|
||||||
//transform the complete list into a table
|
//transform the complete list into a table
|
||||||
persist();
|
persist();
|
||||||
//close DB connection
|
//close DB connection
|
||||||
|
@ -338,6 +447,7 @@ public class OccurrencePointsMerger implements Transducerer{
|
||||||
finally{
|
finally{
|
||||||
if (dbconnection!=null)
|
if (dbconnection!=null)
|
||||||
dbconnection.close();
|
dbconnection.close();
|
||||||
|
AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -158,6 +158,7 @@ public class DatabaseUtils {
|
||||||
return "insert into "+table+" ("+columnsNames+") values "+values;
|
return "insert into "+table+" ("+columnsNames+") values "+values;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static String copyFileToTableStatement (String file, String table){
|
public static String copyFileToTableStatement (String file, String table){
|
||||||
return "COPY "+table+" FROM '"+file+"' DELIMITERS ';' WITH NULL AS 'null string'";
|
return "COPY "+table+" FROM '"+file+"' DELIMITERS ';' WITH NULL AS 'null string'";
|
||||||
}
|
}
|
||||||
|
|
|
@ -268,13 +268,13 @@ public class Operations {
|
||||||
return a * (x - shift) * (x - shift) + b * (x - shift) + c;
|
return a * (x - shift) * (x - shift) + b * (x - shift) + c;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static double[] inverseParabol(double a, double b, double c, double y) {
|
public static double[] inverseParabol(double a, double b, double c, double y) {
|
||||||
|
|
||||||
double[] ret = { (-1d * b + Math.sqrt(b * b + 4 * a * (Math.abs(y) - c))) / (2 * a), (-1d * b - Math.sqrt(b * b + 4 * a * (Math.abs(y) - c))) / (2 * a) };
|
double[] ret = { (-1d * b + Math.sqrt(b * b + 4 * a * (Math.abs(y) - c))) / (2 * a), (-1d * b - Math.sqrt(b * b + 4 * a * (Math.abs(y) - c))) / (2 * a) };
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static double logaritmicTransformation(double y) {
|
public static double logaritmicTransformation(double y) {
|
||||||
y = Math.abs(y);
|
y = Math.abs(y);
|
||||||
if (y == 0)
|
if (y == 0)
|
||||||
return -Double.MAX_VALUE;
|
return -Double.MAX_VALUE;
|
||||||
|
@ -356,6 +356,7 @@ public class Operations {
|
||||||
System.out.println("OK");
|
System.out.println("OK");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//distributes uniformly elements in parts
|
||||||
public static int[] takeChunks(int numberOfElements, int partitionFactor) {
|
public static int[] takeChunks(int numberOfElements, int partitionFactor) {
|
||||||
int[] partitions = new int[1];
|
int[] partitions = new int[1];
|
||||||
if (partitionFactor <= 0) {
|
if (partitionFactor <= 0) {
|
||||||
|
|
Loading…
Reference in New Issue