git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@58426 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
d6686c91dc
commit
b3bf347f0d
|
@ -44,7 +44,7 @@ public class BioClimateHSPENTransducer extends BioClimateHSPECTransducer{
|
||||||
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect");
|
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect");
|
||||||
|
|
||||||
List<TableTemplates> templateHspec = new ArrayList<TableTemplates>();
|
List<TableTemplates> templateHspec = new ArrayList<TableTemplates>();
|
||||||
templateHspec.add(TableTemplates.HCAF);
|
templateHspec.add(TableTemplates.HSPEN);
|
||||||
TablesList p7 = new TablesList(templateHspec, "HSPEN_TABLE_LIST", "List of HSPEN tables containing the species for which the salinity will be analyzed", false);
|
TablesList p7 = new TablesList(templateHspec, "HSPEN_TABLE_LIST", "List of HSPEN tables containing the species for which the salinity will be analyzed", false);
|
||||||
PrimitiveTypesList p8 = new PrimitiveTypesList(PrimitiveTypes.STRING, "HSPEN_TABLE_NAMES", "List of HSPEN table names to be used as labels", false);
|
PrimitiveTypesList p8 = new PrimitiveTypesList(PrimitiveTypes.STRING, "HSPEN_TABLE_NAMES", "List of HSPEN table names to be used as labels", false);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,343 @@
|
||||||
|
package org.gcube.dataanalysis.ecoengine.transducers;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Calendar;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.gcube.contentmanagement.graphtools.utils.DateGuesser;
|
||||||
|
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||||
|
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.interfaces.Transducerer;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.test.regression.Regressor;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
|
||||||
|
import org.hibernate.SessionFactory;
|
||||||
|
|
||||||
|
public class OccurrencePointsMerger implements Transducerer{
|
||||||
|
|
||||||
|
static String longitudeColumn= "longitudeColumn";
|
||||||
|
static String latitudeColumn= "latitudeColumn";
|
||||||
|
static String recordedByColumn= "recordedByColumn";
|
||||||
|
static String scientificNameColumn = "scientificNameColumn";
|
||||||
|
static String eventDateColumn = "eventDateColumn";
|
||||||
|
static String lastModificationColumn = "lastModificationColumn";
|
||||||
|
static String rightTableNameF= "rightTableName";
|
||||||
|
static String leftTableNameF = "leftTableName";
|
||||||
|
static String mergedTableNameF = "mergedTableName";
|
||||||
|
static String spatialTolerance= "spatialTolerance";
|
||||||
|
static String confidence= "confidence";
|
||||||
|
|
||||||
|
protected List<OccurrenceRecord> records_left;
|
||||||
|
protected List<OccurrenceRecord> records_right;
|
||||||
|
protected AlgorithmConfiguration config;
|
||||||
|
|
||||||
|
protected String lonFld;
|
||||||
|
protected String latFld;
|
||||||
|
protected String recordedByFld;
|
||||||
|
protected String scientificNameFld;
|
||||||
|
protected String eventDatFld;
|
||||||
|
protected String modifDatFld;
|
||||||
|
protected String leftTableName;
|
||||||
|
protected String rightTableName;
|
||||||
|
protected String mergedTableName;
|
||||||
|
protected float spatialToleranceValue;
|
||||||
|
protected float confidenceValue;
|
||||||
|
protected StringBuffer columns;
|
||||||
|
protected List<OccurrenceRecord> objectstoinsert;
|
||||||
|
protected List<OccurrenceRecord> objectstodelete;
|
||||||
|
protected List<Object> columnsNames;
|
||||||
|
protected SessionFactory dbconnection;
|
||||||
|
|
||||||
|
protected class OccurrenceRecord{
|
||||||
|
|
||||||
|
public String scientificName;
|
||||||
|
public String recordedby;
|
||||||
|
public Calendar eventdate;
|
||||||
|
public Calendar modifdate;
|
||||||
|
// public String locality;
|
||||||
|
// public String country;
|
||||||
|
public float x;
|
||||||
|
public float y;
|
||||||
|
|
||||||
|
// Map<String,String> metadata;
|
||||||
|
public List<String> otherValues;
|
||||||
|
public OccurrenceRecord(){
|
||||||
|
otherValues = new ArrayList<String>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public OccurrenceRecord row2OccurrenceRecord(Object[] row){
|
||||||
|
OccurrenceRecord record = new OccurrenceRecord();
|
||||||
|
int index = 0;
|
||||||
|
for (Object name:columnsNames){
|
||||||
|
String name$ = ""+name;
|
||||||
|
String value$ = ""+row[index];
|
||||||
|
if (name$.equalsIgnoreCase(lonFld)){
|
||||||
|
record.x=Float.parseFloat(value$);
|
||||||
|
}
|
||||||
|
else if (name$.equalsIgnoreCase(latFld)){
|
||||||
|
record.y=Float.parseFloat(value$);
|
||||||
|
}
|
||||||
|
else if (name$.equalsIgnoreCase(recordedByFld)){
|
||||||
|
record.recordedby=value$;
|
||||||
|
}
|
||||||
|
else if (name$.equalsIgnoreCase(eventDatFld)){
|
||||||
|
record.eventdate=DateGuesser.convertDate(value$);
|
||||||
|
}
|
||||||
|
else if (name$.equalsIgnoreCase(modifDatFld)){
|
||||||
|
record.modifdate=DateGuesser.convertDate(value$);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
record.otherValues.add(value$);
|
||||||
|
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return record;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String occurrenceRecord2String(OccurrenceRecord record){
|
||||||
|
StringBuffer buffer =new StringBuffer();
|
||||||
|
int index = 0;
|
||||||
|
int nNames = columnsNames.size();
|
||||||
|
for (Object name:columnsNames){
|
||||||
|
|
||||||
|
String name$ = ""+name;
|
||||||
|
String value$ = null;
|
||||||
|
if (name$.equalsIgnoreCase(lonFld)){
|
||||||
|
value$="'"+record.x+"'";
|
||||||
|
}
|
||||||
|
else if (name$.equalsIgnoreCase(latFld)){
|
||||||
|
value$="'"+record.y+"'";
|
||||||
|
}
|
||||||
|
else if (name$.equalsIgnoreCase(recordedByFld)){
|
||||||
|
value$="'"+record.recordedby+"'";
|
||||||
|
}
|
||||||
|
else if (name$.equalsIgnoreCase(eventDatFld)){
|
||||||
|
value$="'"+record.eventdate.toString()+"'";
|
||||||
|
}
|
||||||
|
else if (name$.equalsIgnoreCase(modifDatFld)){
|
||||||
|
value$="'"+record.modifdate.toString()+"'";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
value$ = "'"+record.otherValues.get(index)+"'";
|
||||||
|
|
||||||
|
buffer.append(value$);
|
||||||
|
if (index<nNames-1){
|
||||||
|
buffer.append(",");
|
||||||
|
}
|
||||||
|
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return buffer.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
AlgorithmConfiguration config = Regressor.getConfig();
|
||||||
|
config.setNumberOfResources(1);
|
||||||
|
config.setParam(longitudeColumn,"presence_basking_cluster");
|
||||||
|
config.setParam(latitudeColumn,"centerlong"+AlgorithmConfiguration.getListSeparator()+"centerlat");
|
||||||
|
config.setParam("OccurrencePointsClusterTable","occcluster_xmeans");
|
||||||
|
config.setParam("maxIterations","1000");
|
||||||
|
config.setParam("minClusters","20");
|
||||||
|
config.setParam("maxClusters","30");
|
||||||
|
config.setParam("min_points","1");
|
||||||
|
|
||||||
|
lonFld=config.getParam(longitudeColumn);
|
||||||
|
latFld=config.getParam(latitudeColumn);
|
||||||
|
recordedByFld=config.getParam(recordedByColumn);
|
||||||
|
scientificNameFld=config.getParam(scientificNameColumn);
|
||||||
|
eventDatFld=config.getParam(eventDateColumn);
|
||||||
|
modifDatFld=config.getParam(lastModificationColumn);
|
||||||
|
leftTableName=config.getParam(leftTableNameF);
|
||||||
|
rightTableName=config.getParam(rightTableNameF);
|
||||||
|
mergedTableName=config.getParam(mergedTableNameF);
|
||||||
|
spatialToleranceValue=Float.parseFloat(config.getParam(spatialTolerance));
|
||||||
|
confidenceValue=Float.parseFloat(config.getParam(confidence));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<StatisticalType> getInputParameters() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getResourceLoad() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getResources() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getStatus() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public INFRASTRUCTURE getInfrastructure() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StatisticalType getOutput() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init() throws Exception {
|
||||||
|
|
||||||
|
AnalysisLogger.setLogger(config.getConfigPath()+AlgorithmConfiguration.defaultLoggerFile);
|
||||||
|
lonFld=config.getParam(longitudeColumn);
|
||||||
|
latFld=config.getParam(latitudeColumn);
|
||||||
|
recordedByFld=config.getParam(recordedByColumn);
|
||||||
|
scientificNameFld=config.getParam(scientificNameColumn);
|
||||||
|
eventDatFld=config.getParam(eventDateColumn);
|
||||||
|
modifDatFld=config.getParam(lastModificationColumn);
|
||||||
|
leftTableName=config.getParam(leftTableNameF);
|
||||||
|
rightTableName=config.getParam(rightTableNameF);
|
||||||
|
mergedTableName=config.getParam(mergedTableNameF);
|
||||||
|
spatialToleranceValue=Float.parseFloat(config.getParam(spatialTolerance));
|
||||||
|
confidenceValue=Float.parseFloat(config.getParam(confidence));
|
||||||
|
|
||||||
|
objectstoinsert = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
|
||||||
|
objectstodelete = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setConfiguration(AlgorithmConfiguration config) {
|
||||||
|
this.config=config;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void shutdown() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDescription() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected float extProb(OccurrenceRecord right,OccurrenceRecord left){
|
||||||
|
return (float)Math.random();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void manageProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){
|
||||||
|
//if over the threshold then add to the complete list of elements
|
||||||
|
if (probability<confidenceValue)
|
||||||
|
objectstoinsert.add(rightOcc);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void persist(){
|
||||||
|
StringBuffer buffer = new StringBuffer();
|
||||||
|
int toins = objectstoinsert.size();
|
||||||
|
int counter = 0;
|
||||||
|
for (OccurrenceRecord record:objectstoinsert){
|
||||||
|
buffer.append("(");
|
||||||
|
buffer.append(occurrenceRecord2String(record));
|
||||||
|
buffer.append(")");
|
||||||
|
if (counter<toins-1)
|
||||||
|
buffer.append(",");
|
||||||
|
|
||||||
|
counter++;
|
||||||
|
}
|
||||||
|
String updateQ = DatabaseUtils.insertFromBuffer(mergedTableName,columns.toString(),buffer);
|
||||||
|
DatabaseFactory.executeSQLUpdate(updateQ, dbconnection);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void compute() throws Exception {
|
||||||
|
|
||||||
|
try{
|
||||||
|
//init DB connection
|
||||||
|
AnalysisLogger.getLogger().trace("Initializing DB Connection");
|
||||||
|
dbconnection = DatabaseUtils.initDBSession(config);
|
||||||
|
AnalysisLogger.getLogger().trace("Taking Table Description");
|
||||||
|
//take the description of the table
|
||||||
|
columnsNames = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsNamesStatement(rightTableName),dbconnection);
|
||||||
|
|
||||||
|
int nCols = columnsNames.size();
|
||||||
|
columns = new StringBuffer();
|
||||||
|
for (int i=0;i<nCols;i++){
|
||||||
|
columns.append(columnsNames);
|
||||||
|
if (i<nCols-1)
|
||||||
|
columns.append(",");
|
||||||
|
}
|
||||||
|
|
||||||
|
AnalysisLogger.getLogger().trace("Taken Table Description: "+columns);
|
||||||
|
AnalysisLogger.getLogger().trace("Creating merged table: "+mergedTableName);
|
||||||
|
//create new merged table
|
||||||
|
DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(mergedTableName), dbconnection);
|
||||||
|
DatabaseFactory.executeSQLUpdate(DatabaseUtils.duplicateTableStatement(leftTableName, mergedTableName), dbconnection);
|
||||||
|
//take the elements from sx table
|
||||||
|
AnalysisLogger.getLogger().trace("Taking elements from left table: "+leftTableName);
|
||||||
|
List<Object> leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(leftTableName, columns.toString(),""),dbconnection);
|
||||||
|
//take the elements from dx table
|
||||||
|
AnalysisLogger.getLogger().trace("Taking elements from right table: "+rightTableName);
|
||||||
|
List<Object> rightRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(rightTableName, columns.toString(),""),dbconnection);
|
||||||
|
//for each element in dx
|
||||||
|
List<OccurrenceRecord> leftRecords = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
|
||||||
|
AnalysisLogger.getLogger().trace("Processing "+leftTableName+" vs "+rightTableName);
|
||||||
|
int iterations = 0;
|
||||||
|
for (Object rRow:rightRows){
|
||||||
|
OccurrenceRecord rightOcc = row2OccurrenceRecord((Object[])rRow);
|
||||||
|
//for each element in sx
|
||||||
|
int k=0;
|
||||||
|
for (Object lRow:leftRows){
|
||||||
|
OccurrenceRecord leftOcc = null;
|
||||||
|
if (iterations==0){
|
||||||
|
leftOcc = row2OccurrenceRecord((Object[])lRow);
|
||||||
|
leftRecords.add(leftOcc);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
leftOcc =leftRecords.get(k);
|
||||||
|
|
||||||
|
//evaluate P(dx,sx)
|
||||||
|
float p = extProb(leftOcc,rightOcc);
|
||||||
|
manageProbability(p, leftOcc, rightOcc);
|
||||||
|
if (p>=confidenceValue){
|
||||||
|
AnalysisLogger.getLogger().trace("Found a similarity between ("+leftOcc.x+","+leftOcc.y+","+leftOcc.recordedby+ ") "+"("+rightOcc.x+","+rightOcc.y+","+rightOcc.recordedby+")");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
k++;
|
||||||
|
}
|
||||||
|
iterations++;
|
||||||
|
}
|
||||||
|
//transform the complete list into a table
|
||||||
|
persist();
|
||||||
|
//close DB connection
|
||||||
|
}catch(Exception e){
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
finally{
|
||||||
|
if (dbconnection!=null)
|
||||||
|
dbconnection.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -8,6 +8,7 @@ import org.hibernate.SessionFactory;
|
||||||
public class DatabaseUtils {
|
public class DatabaseUtils {
|
||||||
|
|
||||||
static String queryDesc = "SELECT column_name,data_type, character_maximum_length, is_nullable FROM information_schema.COLUMNS WHERE table_name ='%1$s'";
|
static String queryDesc = "SELECT column_name,data_type, character_maximum_length, is_nullable FROM information_schema.COLUMNS WHERE table_name ='%1$s'";
|
||||||
|
static String queryColumns = "SELECT column_name FROM information_schema.COLUMNS WHERE table_name ='%1$s'";
|
||||||
static String queryForKeys = "SELECT b.column_name as name, a.constraint_type as type FROM information_schema.table_constraints as a join information_schema.key_column_usage as b on a.table_name ='%1$s' and a.constraint_name = b.constraint_name";
|
static String queryForKeys = "SELECT b.column_name as name, a.constraint_type as type FROM information_schema.table_constraints as a join information_schema.key_column_usage as b on a.table_name ='%1$s' and a.constraint_name = b.constraint_name";
|
||||||
static String genCreationStatement = "CREATE TABLE %1$s ( %2$s %3$s );";
|
static String genCreationStatement = "CREATE TABLE %1$s ( %2$s %3$s );";
|
||||||
static String updateColValues = "UPDATE %1$s SET %2$s = %3$s.%4$s FROM %3$s WHERE %1$s.%5$s = %3$s.%6$s ;";
|
static String updateColValues = "UPDATE %1$s SET %2$s = %3$s.%4$s FROM %3$s WHERE %1$s.%5$s = %3$s.%6$s ;";
|
||||||
|
@ -86,7 +87,7 @@ public class DatabaseUtils {
|
||||||
columnDescrs = colbuffer.toString();
|
columnDescrs = colbuffer.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Object> columns;
|
public List<Object> columns;
|
||||||
|
|
||||||
public List<Object> getColumnDecriptions() {
|
public List<Object> getColumnDecriptions() {
|
||||||
return columns;
|
return columns;
|
||||||
|
@ -193,6 +194,12 @@ public class DatabaseUtils {
|
||||||
return creationStatement;
|
return creationStatement;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static String getColumnsNamesStatement(String table) {
|
||||||
|
String statement = String.format(queryColumns, table);
|
||||||
|
return statement;
|
||||||
|
}
|
||||||
|
|
||||||
public static void createBigTable(boolean createTable, String table, String dbdriver, String dbuser, String dbpassword, String dburl, String creationStatement, SessionFactory dbHibConnection) throws Exception {
|
public static void createBigTable(boolean createTable, String table, String dbdriver, String dbuser, String dbpassword, String dburl, String creationStatement, SessionFactory dbHibConnection) throws Exception {
|
||||||
if (createTable) {
|
if (createTable) {
|
||||||
try {
|
try {
|
||||||
|
|
Loading…
Reference in New Issue