git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@58426 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
d6686c91dc
commit
b3bf347f0d
|
@ -44,7 +44,7 @@ public class BioClimateHSPENTransducer extends BioClimateHSPECTransducer{
|
|||
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect");
|
||||
|
||||
List<TableTemplates> templateHspec = new ArrayList<TableTemplates>();
|
||||
templateHspec.add(TableTemplates.HCAF);
|
||||
templateHspec.add(TableTemplates.HSPEN);
|
||||
TablesList p7 = new TablesList(templateHspec, "HSPEN_TABLE_LIST", "List of HSPEN tables containing the species for which the salinity will be analyzed", false);
|
||||
PrimitiveTypesList p8 = new PrimitiveTypesList(PrimitiveTypes.STRING, "HSPEN_TABLE_NAMES", "List of HSPEN table names to be used as labels", false);
|
||||
|
||||
|
|
|
@ -0,0 +1,343 @@
|
|||
package org.gcube.dataanalysis.ecoengine.transducers;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.List;
|
||||
|
||||
import org.gcube.contentmanagement.graphtools.utils.DateGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
|
||||
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
||||
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
|
||||
import org.gcube.dataanalysis.ecoengine.interfaces.Transducerer;
|
||||
import org.gcube.dataanalysis.ecoengine.test.regression.Regressor;
|
||||
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
public class OccurrencePointsMerger implements Transducerer{
|
||||
|
||||
static String longitudeColumn= "longitudeColumn";
|
||||
static String latitudeColumn= "latitudeColumn";
|
||||
static String recordedByColumn= "recordedByColumn";
|
||||
static String scientificNameColumn = "scientificNameColumn";
|
||||
static String eventDateColumn = "eventDateColumn";
|
||||
static String lastModificationColumn = "lastModificationColumn";
|
||||
static String rightTableNameF= "rightTableName";
|
||||
static String leftTableNameF = "leftTableName";
|
||||
static String mergedTableNameF = "mergedTableName";
|
||||
static String spatialTolerance= "spatialTolerance";
|
||||
static String confidence= "confidence";
|
||||
|
||||
protected List<OccurrenceRecord> records_left;
|
||||
protected List<OccurrenceRecord> records_right;
|
||||
protected AlgorithmConfiguration config;
|
||||
|
||||
protected String lonFld;
|
||||
protected String latFld;
|
||||
protected String recordedByFld;
|
||||
protected String scientificNameFld;
|
||||
protected String eventDatFld;
|
||||
protected String modifDatFld;
|
||||
protected String leftTableName;
|
||||
protected String rightTableName;
|
||||
protected String mergedTableName;
|
||||
protected float spatialToleranceValue;
|
||||
protected float confidenceValue;
|
||||
protected StringBuffer columns;
|
||||
protected List<OccurrenceRecord> objectstoinsert;
|
||||
protected List<OccurrenceRecord> objectstodelete;
|
||||
protected List<Object> columnsNames;
|
||||
protected SessionFactory dbconnection;
|
||||
|
||||
protected class OccurrenceRecord{
|
||||
|
||||
public String scientificName;
|
||||
public String recordedby;
|
||||
public Calendar eventdate;
|
||||
public Calendar modifdate;
|
||||
// public String locality;
|
||||
// public String country;
|
||||
public float x;
|
||||
public float y;
|
||||
|
||||
// Map<String,String> metadata;
|
||||
public List<String> otherValues;
|
||||
public OccurrenceRecord(){
|
||||
otherValues = new ArrayList<String>();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public OccurrenceRecord row2OccurrenceRecord(Object[] row){
|
||||
OccurrenceRecord record = new OccurrenceRecord();
|
||||
int index = 0;
|
||||
for (Object name:columnsNames){
|
||||
String name$ = ""+name;
|
||||
String value$ = ""+row[index];
|
||||
if (name$.equalsIgnoreCase(lonFld)){
|
||||
record.x=Float.parseFloat(value$);
|
||||
}
|
||||
else if (name$.equalsIgnoreCase(latFld)){
|
||||
record.y=Float.parseFloat(value$);
|
||||
}
|
||||
else if (name$.equalsIgnoreCase(recordedByFld)){
|
||||
record.recordedby=value$;
|
||||
}
|
||||
else if (name$.equalsIgnoreCase(eventDatFld)){
|
||||
record.eventdate=DateGuesser.convertDate(value$);
|
||||
}
|
||||
else if (name$.equalsIgnoreCase(modifDatFld)){
|
||||
record.modifdate=DateGuesser.convertDate(value$);
|
||||
}
|
||||
else
|
||||
record.otherValues.add(value$);
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
public String occurrenceRecord2String(OccurrenceRecord record){
|
||||
StringBuffer buffer =new StringBuffer();
|
||||
int index = 0;
|
||||
int nNames = columnsNames.size();
|
||||
for (Object name:columnsNames){
|
||||
|
||||
String name$ = ""+name;
|
||||
String value$ = null;
|
||||
if (name$.equalsIgnoreCase(lonFld)){
|
||||
value$="'"+record.x+"'";
|
||||
}
|
||||
else if (name$.equalsIgnoreCase(latFld)){
|
||||
value$="'"+record.y+"'";
|
||||
}
|
||||
else if (name$.equalsIgnoreCase(recordedByFld)){
|
||||
value$="'"+record.recordedby+"'";
|
||||
}
|
||||
else if (name$.equalsIgnoreCase(eventDatFld)){
|
||||
value$="'"+record.eventdate.toString()+"'";
|
||||
}
|
||||
else if (name$.equalsIgnoreCase(modifDatFld)){
|
||||
value$="'"+record.modifdate.toString()+"'";
|
||||
}
|
||||
else
|
||||
value$ = "'"+record.otherValues.get(index)+"'";
|
||||
|
||||
buffer.append(value$);
|
||||
if (index<nNames-1){
|
||||
buffer.append(",");
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
AlgorithmConfiguration config = Regressor.getConfig();
|
||||
config.setNumberOfResources(1);
|
||||
config.setParam(longitudeColumn,"presence_basking_cluster");
|
||||
config.setParam(latitudeColumn,"centerlong"+AlgorithmConfiguration.getListSeparator()+"centerlat");
|
||||
config.setParam("OccurrencePointsClusterTable","occcluster_xmeans");
|
||||
config.setParam("maxIterations","1000");
|
||||
config.setParam("minClusters","20");
|
||||
config.setParam("maxClusters","30");
|
||||
config.setParam("min_points","1");
|
||||
|
||||
lonFld=config.getParam(longitudeColumn);
|
||||
latFld=config.getParam(latitudeColumn);
|
||||
recordedByFld=config.getParam(recordedByColumn);
|
||||
scientificNameFld=config.getParam(scientificNameColumn);
|
||||
eventDatFld=config.getParam(eventDateColumn);
|
||||
modifDatFld=config.getParam(lastModificationColumn);
|
||||
leftTableName=config.getParam(leftTableNameF);
|
||||
rightTableName=config.getParam(rightTableNameF);
|
||||
mergedTableName=config.getParam(mergedTableNameF);
|
||||
spatialToleranceValue=Float.parseFloat(config.getParam(spatialTolerance));
|
||||
confidenceValue=Float.parseFloat(config.getParam(confidence));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<StatisticalType> getInputParameters() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getResourceLoad() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getResources() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public float getStatus() {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public INFRASTRUCTURE getInfrastructure() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public StatisticalType getOutput() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void init() throws Exception {
|
||||
|
||||
AnalysisLogger.setLogger(config.getConfigPath()+AlgorithmConfiguration.defaultLoggerFile);
|
||||
lonFld=config.getParam(longitudeColumn);
|
||||
latFld=config.getParam(latitudeColumn);
|
||||
recordedByFld=config.getParam(recordedByColumn);
|
||||
scientificNameFld=config.getParam(scientificNameColumn);
|
||||
eventDatFld=config.getParam(eventDateColumn);
|
||||
modifDatFld=config.getParam(lastModificationColumn);
|
||||
leftTableName=config.getParam(leftTableNameF);
|
||||
rightTableName=config.getParam(rightTableNameF);
|
||||
mergedTableName=config.getParam(mergedTableNameF);
|
||||
spatialToleranceValue=Float.parseFloat(config.getParam(spatialTolerance));
|
||||
confidenceValue=Float.parseFloat(config.getParam(confidence));
|
||||
|
||||
objectstoinsert = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
|
||||
objectstodelete = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void setConfiguration(AlgorithmConfiguration config) {
|
||||
this.config=config;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
protected float extProb(OccurrenceRecord right,OccurrenceRecord left){
|
||||
return (float)Math.random();
|
||||
}
|
||||
|
||||
protected void manageProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){
|
||||
//if over the threshold then add to the complete list of elements
|
||||
if (probability<confidenceValue)
|
||||
objectstoinsert.add(rightOcc);
|
||||
}
|
||||
|
||||
protected void persist(){
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
int toins = objectstoinsert.size();
|
||||
int counter = 0;
|
||||
for (OccurrenceRecord record:objectstoinsert){
|
||||
buffer.append("(");
|
||||
buffer.append(occurrenceRecord2String(record));
|
||||
buffer.append(")");
|
||||
if (counter<toins-1)
|
||||
buffer.append(",");
|
||||
|
||||
counter++;
|
||||
}
|
||||
String updateQ = DatabaseUtils.insertFromBuffer(mergedTableName,columns.toString(),buffer);
|
||||
DatabaseFactory.executeSQLUpdate(updateQ, dbconnection);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void compute() throws Exception {
|
||||
|
||||
try{
|
||||
//init DB connection
|
||||
AnalysisLogger.getLogger().trace("Initializing DB Connection");
|
||||
dbconnection = DatabaseUtils.initDBSession(config);
|
||||
AnalysisLogger.getLogger().trace("Taking Table Description");
|
||||
//take the description of the table
|
||||
columnsNames = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsNamesStatement(rightTableName),dbconnection);
|
||||
|
||||
int nCols = columnsNames.size();
|
||||
columns = new StringBuffer();
|
||||
for (int i=0;i<nCols;i++){
|
||||
columns.append(columnsNames);
|
||||
if (i<nCols-1)
|
||||
columns.append(",");
|
||||
}
|
||||
|
||||
AnalysisLogger.getLogger().trace("Taken Table Description: "+columns);
|
||||
AnalysisLogger.getLogger().trace("Creating merged table: "+mergedTableName);
|
||||
//create new merged table
|
||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(mergedTableName), dbconnection);
|
||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.duplicateTableStatement(leftTableName, mergedTableName), dbconnection);
|
||||
//take the elements from sx table
|
||||
AnalysisLogger.getLogger().trace("Taking elements from left table: "+leftTableName);
|
||||
List<Object> leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(leftTableName, columns.toString(),""),dbconnection);
|
||||
//take the elements from dx table
|
||||
AnalysisLogger.getLogger().trace("Taking elements from right table: "+rightTableName);
|
||||
List<Object> rightRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(rightTableName, columns.toString(),""),dbconnection);
|
||||
//for each element in dx
|
||||
List<OccurrenceRecord> leftRecords = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
|
||||
AnalysisLogger.getLogger().trace("Processing "+leftTableName+" vs "+rightTableName);
|
||||
int iterations = 0;
|
||||
for (Object rRow:rightRows){
|
||||
OccurrenceRecord rightOcc = row2OccurrenceRecord((Object[])rRow);
|
||||
//for each element in sx
|
||||
int k=0;
|
||||
for (Object lRow:leftRows){
|
||||
OccurrenceRecord leftOcc = null;
|
||||
if (iterations==0){
|
||||
leftOcc = row2OccurrenceRecord((Object[])lRow);
|
||||
leftRecords.add(leftOcc);
|
||||
}
|
||||
else
|
||||
leftOcc =leftRecords.get(k);
|
||||
|
||||
//evaluate P(dx,sx)
|
||||
float p = extProb(leftOcc,rightOcc);
|
||||
manageProbability(p, leftOcc, rightOcc);
|
||||
if (p>=confidenceValue){
|
||||
AnalysisLogger.getLogger().trace("Found a similarity between ("+leftOcc.x+","+leftOcc.y+","+leftOcc.recordedby+ ") "+"("+rightOcc.x+","+rightOcc.y+","+rightOcc.recordedby+")");
|
||||
break;
|
||||
}
|
||||
k++;
|
||||
}
|
||||
iterations++;
|
||||
}
|
||||
//transform the complete list into a table
|
||||
persist();
|
||||
//close DB connection
|
||||
}catch(Exception e){
|
||||
throw e;
|
||||
}
|
||||
finally{
|
||||
if (dbconnection!=null)
|
||||
dbconnection.close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -8,6 +8,7 @@ import org.hibernate.SessionFactory;
|
|||
public class DatabaseUtils {
|
||||
|
||||
static String queryDesc = "SELECT column_name,data_type, character_maximum_length, is_nullable FROM information_schema.COLUMNS WHERE table_name ='%1$s'";
|
||||
static String queryColumns = "SELECT column_name FROM information_schema.COLUMNS WHERE table_name ='%1$s'";
|
||||
static String queryForKeys = "SELECT b.column_name as name, a.constraint_type as type FROM information_schema.table_constraints as a join information_schema.key_column_usage as b on a.table_name ='%1$s' and a.constraint_name = b.constraint_name";
|
||||
static String genCreationStatement = "CREATE TABLE %1$s ( %2$s %3$s );";
|
||||
static String updateColValues = "UPDATE %1$s SET %2$s = %3$s.%4$s FROM %3$s WHERE %1$s.%5$s = %3$s.%6$s ;";
|
||||
|
@ -86,7 +87,7 @@ public class DatabaseUtils {
|
|||
columnDescrs = colbuffer.toString();
|
||||
}
|
||||
|
||||
List<Object> columns;
|
||||
public List<Object> columns;
|
||||
|
||||
public List<Object> getColumnDecriptions() {
|
||||
return columns;
|
||||
|
@ -193,6 +194,12 @@ public class DatabaseUtils {
|
|||
return creationStatement;
|
||||
}
|
||||
|
||||
|
||||
public static String getColumnsNamesStatement(String table) {
|
||||
String statement = String.format(queryColumns, table);
|
||||
return statement;
|
||||
}
|
||||
|
||||
public static void createBigTable(boolean createTable, String table, String dbdriver, String dbuser, String dbpassword, String dburl, String creationStatement, SessionFactory dbHibConnection) throws Exception {
|
||||
if (createTable) {
|
||||
try {
|
||||
|
|
Loading…
Reference in New Issue