This commit is contained in:
Gianpaolo Coro 2012-09-14 16:10:52 +00:00
parent d6686c91dc
commit b3bf347f0d
3 changed files with 352 additions and 2 deletions

View File

@ -44,7 +44,7 @@ public class BioClimateHSPENTransducer extends BioClimateHSPECTransducer{
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect"); DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect");
List<TableTemplates> templateHspec = new ArrayList<TableTemplates>(); List<TableTemplates> templateHspec = new ArrayList<TableTemplates>();
templateHspec.add(TableTemplates.HCAF); templateHspec.add(TableTemplates.HSPEN);
TablesList p7 = new TablesList(templateHspec, "HSPEN_TABLE_LIST", "List of HSPEN tables containing the species for which the salinity will be analyzed", false); TablesList p7 = new TablesList(templateHspec, "HSPEN_TABLE_LIST", "List of HSPEN tables containing the species for which the salinity will be analyzed", false);
PrimitiveTypesList p8 = new PrimitiveTypesList(PrimitiveTypes.STRING, "HSPEN_TABLE_NAMES", "List of HSPEN table names to be used as labels", false); PrimitiveTypesList p8 = new PrimitiveTypesList(PrimitiveTypes.STRING, "HSPEN_TABLE_NAMES", "List of HSPEN table names to be used as labels", false);

View File

@ -0,0 +1,343 @@
package org.gcube.dataanalysis.ecoengine.transducers;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import org.gcube.contentmanagement.graphtools.utils.DateGuesser;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.configuration.INFRASTRUCTURE;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.interfaces.Transducerer;
import org.gcube.dataanalysis.ecoengine.test.regression.Regressor;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
import org.hibernate.SessionFactory;
public class OccurrencePointsMerger implements Transducerer{
static String longitudeColumn= "longitudeColumn";
static String latitudeColumn= "latitudeColumn";
static String recordedByColumn= "recordedByColumn";
static String scientificNameColumn = "scientificNameColumn";
static String eventDateColumn = "eventDateColumn";
static String lastModificationColumn = "lastModificationColumn";
static String rightTableNameF= "rightTableName";
static String leftTableNameF = "leftTableName";
static String mergedTableNameF = "mergedTableName";
static String spatialTolerance= "spatialTolerance";
static String confidence= "confidence";
protected List<OccurrenceRecord> records_left;
protected List<OccurrenceRecord> records_right;
protected AlgorithmConfiguration config;
protected String lonFld;
protected String latFld;
protected String recordedByFld;
protected String scientificNameFld;
protected String eventDatFld;
protected String modifDatFld;
protected String leftTableName;
protected String rightTableName;
protected String mergedTableName;
protected float spatialToleranceValue;
protected float confidenceValue;
protected StringBuffer columns;
protected List<OccurrenceRecord> objectstoinsert;
protected List<OccurrenceRecord> objectstodelete;
protected List<Object> columnsNames;
protected SessionFactory dbconnection;
protected class OccurrenceRecord{
public String scientificName;
public String recordedby;
public Calendar eventdate;
public Calendar modifdate;
// public String locality;
// public String country;
public float x;
public float y;
// Map<String,String> metadata;
public List<String> otherValues;
public OccurrenceRecord(){
otherValues = new ArrayList<String>();
}
}
public OccurrenceRecord row2OccurrenceRecord(Object[] row){
OccurrenceRecord record = new OccurrenceRecord();
int index = 0;
for (Object name:columnsNames){
String name$ = ""+name;
String value$ = ""+row[index];
if (name$.equalsIgnoreCase(lonFld)){
record.x=Float.parseFloat(value$);
}
else if (name$.equalsIgnoreCase(latFld)){
record.y=Float.parseFloat(value$);
}
else if (name$.equalsIgnoreCase(recordedByFld)){
record.recordedby=value$;
}
else if (name$.equalsIgnoreCase(eventDatFld)){
record.eventdate=DateGuesser.convertDate(value$);
}
else if (name$.equalsIgnoreCase(modifDatFld)){
record.modifdate=DateGuesser.convertDate(value$);
}
else
record.otherValues.add(value$);
index++;
}
return record;
}
public String occurrenceRecord2String(OccurrenceRecord record){
StringBuffer buffer =new StringBuffer();
int index = 0;
int nNames = columnsNames.size();
for (Object name:columnsNames){
String name$ = ""+name;
String value$ = null;
if (name$.equalsIgnoreCase(lonFld)){
value$="'"+record.x+"'";
}
else if (name$.equalsIgnoreCase(latFld)){
value$="'"+record.y+"'";
}
else if (name$.equalsIgnoreCase(recordedByFld)){
value$="'"+record.recordedby+"'";
}
else if (name$.equalsIgnoreCase(eventDatFld)){
value$="'"+record.eventdate.toString()+"'";
}
else if (name$.equalsIgnoreCase(modifDatFld)){
value$="'"+record.modifdate.toString()+"'";
}
else
value$ = "'"+record.otherValues.get(index)+"'";
buffer.append(value$);
if (index<nNames-1){
buffer.append(",");
}
index++;
}
return buffer.toString();
}
public static void main(String[] args) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);
config.setParam(longitudeColumn,"presence_basking_cluster");
config.setParam(latitudeColumn,"centerlong"+AlgorithmConfiguration.getListSeparator()+"centerlat");
config.setParam("OccurrencePointsClusterTable","occcluster_xmeans");
config.setParam("maxIterations","1000");
config.setParam("minClusters","20");
config.setParam("maxClusters","30");
config.setParam("min_points","1");
lonFld=config.getParam(longitudeColumn);
latFld=config.getParam(latitudeColumn);
recordedByFld=config.getParam(recordedByColumn);
scientificNameFld=config.getParam(scientificNameColumn);
eventDatFld=config.getParam(eventDateColumn);
modifDatFld=config.getParam(lastModificationColumn);
leftTableName=config.getParam(leftTableNameF);
rightTableName=config.getParam(rightTableNameF);
mergedTableName=config.getParam(mergedTableNameF);
spatialToleranceValue=Float.parseFloat(config.getParam(spatialTolerance));
confidenceValue=Float.parseFloat(config.getParam(confidence));
}
@Override
public List<StatisticalType> getInputParameters() {
// TODO Auto-generated method stub
return null;
}
@Override
public String getResourceLoad() {
// TODO Auto-generated method stub
return null;
}
@Override
public String getResources() {
// TODO Auto-generated method stub
return null;
}
@Override
public float getStatus() {
// TODO Auto-generated method stub
return 0;
}
@Override
public INFRASTRUCTURE getInfrastructure() {
// TODO Auto-generated method stub
return null;
}
@Override
public StatisticalType getOutput() {
// TODO Auto-generated method stub
return null;
}
@Override
public void init() throws Exception {
AnalysisLogger.setLogger(config.getConfigPath()+AlgorithmConfiguration.defaultLoggerFile);
lonFld=config.getParam(longitudeColumn);
latFld=config.getParam(latitudeColumn);
recordedByFld=config.getParam(recordedByColumn);
scientificNameFld=config.getParam(scientificNameColumn);
eventDatFld=config.getParam(eventDateColumn);
modifDatFld=config.getParam(lastModificationColumn);
leftTableName=config.getParam(leftTableNameF);
rightTableName=config.getParam(rightTableNameF);
mergedTableName=config.getParam(mergedTableNameF);
spatialToleranceValue=Float.parseFloat(config.getParam(spatialTolerance));
confidenceValue=Float.parseFloat(config.getParam(confidence));
objectstoinsert = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
objectstodelete = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
}
@Override
public void setConfiguration(AlgorithmConfiguration config) {
this.config=config;
}
@Override
public void shutdown() {
// TODO Auto-generated method stub
}
@Override
public String getDescription() {
// TODO Auto-generated method stub
return null;
}
protected float extProb(OccurrenceRecord right,OccurrenceRecord left){
return (float)Math.random();
}
protected void manageProbability(float probability, OccurrenceRecord leftOcc, OccurrenceRecord rightOcc){
//if over the threshold then add to the complete list of elements
if (probability<confidenceValue)
objectstoinsert.add(rightOcc);
}
protected void persist(){
StringBuffer buffer = new StringBuffer();
int toins = objectstoinsert.size();
int counter = 0;
for (OccurrenceRecord record:objectstoinsert){
buffer.append("(");
buffer.append(occurrenceRecord2String(record));
buffer.append(")");
if (counter<toins-1)
buffer.append(",");
counter++;
}
String updateQ = DatabaseUtils.insertFromBuffer(mergedTableName,columns.toString(),buffer);
DatabaseFactory.executeSQLUpdate(updateQ, dbconnection);
}
@Override
public void compute() throws Exception {
try{
//init DB connection
AnalysisLogger.getLogger().trace("Initializing DB Connection");
dbconnection = DatabaseUtils.initDBSession(config);
AnalysisLogger.getLogger().trace("Taking Table Description");
//take the description of the table
columnsNames = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsNamesStatement(rightTableName),dbconnection);
int nCols = columnsNames.size();
columns = new StringBuffer();
for (int i=0;i<nCols;i++){
columns.append(columnsNames);
if (i<nCols-1)
columns.append(",");
}
AnalysisLogger.getLogger().trace("Taken Table Description: "+columns);
AnalysisLogger.getLogger().trace("Creating merged table: "+mergedTableName);
//create new merged table
DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(mergedTableName), dbconnection);
DatabaseFactory.executeSQLUpdate(DatabaseUtils.duplicateTableStatement(leftTableName, mergedTableName), dbconnection);
//take the elements from sx table
AnalysisLogger.getLogger().trace("Taking elements from left table: "+leftTableName);
List<Object> leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(leftTableName, columns.toString(),""),dbconnection);
//take the elements from dx table
AnalysisLogger.getLogger().trace("Taking elements from right table: "+rightTableName);
List<Object> rightRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(rightTableName, columns.toString(),""),dbconnection);
//for each element in dx
List<OccurrenceRecord> leftRecords = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
AnalysisLogger.getLogger().trace("Processing "+leftTableName+" vs "+rightTableName);
int iterations = 0;
for (Object rRow:rightRows){
OccurrenceRecord rightOcc = row2OccurrenceRecord((Object[])rRow);
//for each element in sx
int k=0;
for (Object lRow:leftRows){
OccurrenceRecord leftOcc = null;
if (iterations==0){
leftOcc = row2OccurrenceRecord((Object[])lRow);
leftRecords.add(leftOcc);
}
else
leftOcc =leftRecords.get(k);
//evaluate P(dx,sx)
float p = extProb(leftOcc,rightOcc);
manageProbability(p, leftOcc, rightOcc);
if (p>=confidenceValue){
AnalysisLogger.getLogger().trace("Found a similarity between ("+leftOcc.x+","+leftOcc.y+","+leftOcc.recordedby+ ") "+"("+rightOcc.x+","+rightOcc.y+","+rightOcc.recordedby+")");
break;
}
k++;
}
iterations++;
}
//transform the complete list into a table
persist();
//close DB connection
}catch(Exception e){
throw e;
}
finally{
if (dbconnection!=null)
dbconnection.close();
}
}
}

View File

@ -8,6 +8,7 @@ import org.hibernate.SessionFactory;
public class DatabaseUtils { public class DatabaseUtils {
static String queryDesc = "SELECT column_name,data_type, character_maximum_length, is_nullable FROM information_schema.COLUMNS WHERE table_name ='%1$s'"; static String queryDesc = "SELECT column_name,data_type, character_maximum_length, is_nullable FROM information_schema.COLUMNS WHERE table_name ='%1$s'";
static String queryColumns = "SELECT column_name FROM information_schema.COLUMNS WHERE table_name ='%1$s'";
static String queryForKeys = "SELECT b.column_name as name, a.constraint_type as type FROM information_schema.table_constraints as a join information_schema.key_column_usage as b on a.table_name ='%1$s' and a.constraint_name = b.constraint_name"; static String queryForKeys = "SELECT b.column_name as name, a.constraint_type as type FROM information_schema.table_constraints as a join information_schema.key_column_usage as b on a.table_name ='%1$s' and a.constraint_name = b.constraint_name";
static String genCreationStatement = "CREATE TABLE %1$s ( %2$s %3$s );"; static String genCreationStatement = "CREATE TABLE %1$s ( %2$s %3$s );";
static String updateColValues = "UPDATE %1$s SET %2$s = %3$s.%4$s FROM %3$s WHERE %1$s.%5$s = %3$s.%6$s ;"; static String updateColValues = "UPDATE %1$s SET %2$s = %3$s.%4$s FROM %3$s WHERE %1$s.%5$s = %3$s.%6$s ;";
@ -86,7 +87,7 @@ public class DatabaseUtils {
columnDescrs = colbuffer.toString(); columnDescrs = colbuffer.toString();
} }
List<Object> columns; public List<Object> columns;
public List<Object> getColumnDecriptions() { public List<Object> getColumnDecriptions() {
return columns; return columns;
@ -193,6 +194,12 @@ public class DatabaseUtils {
return creationStatement; return creationStatement;
} }
public static String getColumnsNamesStatement(String table) {
String statement = String.format(queryColumns, table);
return statement;
}
public static void createBigTable(boolean createTable, String table, String dbdriver, String dbuser, String dbpassword, String dburl, String creationStatement, SessionFactory dbHibConnection) throws Exception { public static void createBigTable(boolean createTable, String table, String dbdriver, String dbuser, String dbpassword, String dburl, String creationStatement, SessionFactory dbHibConnection) throws Exception {
if (createTable) { if (createTable) {
try { try {