This commit is contained in:
Gianpaolo Coro 2012-11-30 09:41:06 +00:00
parent 0b9aac8a87
commit e732849b50
6 changed files with 219 additions and 98 deletions

View File

@ -12,7 +12,7 @@ public class AnalysisLogger {
public static Logger getLogger(){ public static Logger getLogger(){
if (logger == null){ if (logger == null){
setLogger("./ALog.properties"); // setLogger("./ALog.properties");
logger = Logger.getLogger("AnalysisLogger"); logger = Logger.getLogger("AnalysisLogger");
} }

View File

@ -11,7 +11,7 @@ public abstract class ActorNode implements GenericAlgorithm{
public abstract float getInternalStatus(); public abstract float getInternalStatus();
// execute a single node // execute a single node
public abstract int executeNode(int cellStarIndex, int numberOfRightElementsToProcess, int leftStartIndex, int numberOfLeftElementsToProcess, boolean duplicate, String sandboxFolder, String nodeConfigurationFileObject, String logfileNameToProduce); public abstract int executeNode(int rightStartIndex, int numberOfRightElementsToProcess, int leftStartIndex, int numberOfLeftElementsToProcess, boolean duplicate, String sandboxFolder, String nodeConfigurationFileObject, String logfileNameToProduce);
// An initialization phase in which the inputs are initialized // An initialization phase in which the inputs are initialized
public abstract void setup(AlgorithmConfiguration config) throws Exception; public abstract void setup(AlgorithmConfiguration config) throws Exception;
@ -22,9 +22,6 @@ public abstract class ActorNode implements GenericAlgorithm{
// get overall number of geographical information to process // get overall number of geographical information to process
public abstract int getNumberOfLeftElements(); public abstract int getNumberOfLeftElements();
// get overall number of processed species
public abstract int getNumberOfProcessedElements();
// stop the sexecution of the node // stop the sexecution of the node
public abstract void stop(); public abstract void stop();
@ -65,7 +62,7 @@ public abstract class ActorNode implements GenericAlgorithm{
System.out.println("Generic Node: executing class"); System.out.println("Generic Node: executing class");
SpatialProbabilityDistributionNode node = (SpatialProbabilityDistributionNode) Class.forName(algorithmClass).newInstance(); ActorNode node = (ActorNode) Class.forName(algorithmClass).newInstance();
node.executeNode(order, chunksize, speciesOrder, speciesChunksize, duplicate, path, nodeConfiguration, logfile); node.executeNode(order, chunksize, speciesOrder, speciesChunksize, duplicate, path, nodeConfiguration, logfile);
} catch (Exception e) { } catch (Exception e) {

View File

@ -28,7 +28,7 @@ public abstract class SpatialProbabilityDistributionNode implements GenericAlgor
//get overall number of processed species //get overall number of processed species
public abstract int getNumberOfProcessedSpecies(); public abstract int getNumberOfProcessedSpecies();
//stop the sexecution of the node //stop the execution of the node
public abstract void stop(); public abstract void stop();
//prostprocess after the whole calculation : reduce operation //prostprocess after the whole calculation : reduce operation

View File

@ -43,7 +43,7 @@ public class FeedForwardNNFile extends ModelAquamapsNN{
public StatisticalType getOutput() { public StatisticalType getOutput() {
HashMap<String,StatisticalType> map = new HashMap<String, StatisticalType>(); HashMap<String,StatisticalType> map = new HashMap<String, StatisticalType>();
PrimitiveType p = new PrimitiveType(File.class.getName(), new File(fileName), PrimitiveTypes.FILE, "NeuralNetwork","Trained Neural Network"); PrimitiveType p = new PrimitiveType(File.class.getName(), new File(fileName), PrimitiveTypes.FILE, "NeuralNetwork","Trained Neural Network");
PrimitiveType score = new PrimitiveType(String.class.getName(), nn.en, PrimitiveTypes.STRING, "LearningScore","Learning Score"); PrimitiveType score = new PrimitiveType(String.class.getName(), ""+nn.en, PrimitiveTypes.STRING, "LearningScore","Learning Score");
List<TableTemplates> template = new ArrayList<TableTemplates>(); List<TableTemplates> template = new ArrayList<TableTemplates>();
template.add(TableTemplates.GENERIC); template.add(TableTemplates.GENERIC);
OutputTable outTable = new OutputTable(template, trainingDataSet, trainingDataSet, "Output table"); OutputTable outTable = new OutputTable(template, trainingDataSet, trainingDataSet, "Output table");

View File

@ -202,7 +202,6 @@ public class OccurrencePointsMerger implements Transducerer {
return buffer.toString(); return buffer.toString();
} }
@Override @Override
public List<StatisticalType> getInputParameters() { public List<StatisticalType> getInputParameters() {
List<TableTemplates> templatesOccurrence = new ArrayList<TableTemplates>(); List<TableTemplates> templatesOccurrence = new ArrayList<TableTemplates>();
@ -282,7 +281,7 @@ public class OccurrencePointsMerger implements Transducerer {
@Override @Override
public void init() throws Exception { public void init() throws Exception {
AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile); // AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile);
lonFld = config.getParam(longitudeColumn); lonFld = config.getParam(longitudeColumn);
latFld = config.getParam(latitudeColumn); latFld = config.getParam(latitudeColumn);
recordedByFld = config.getParam(recordedByColumn); recordedByFld = config.getParam(recordedByColumn);
@ -306,11 +305,6 @@ public class OccurrencePointsMerger implements Transducerer {
this.config = config; this.config = config;
} }
@Override
public void shutdown() {
}
@Override @Override
public String getDescription() { public String getDescription() {
return "An algorithm for merging two sets of occurrence points of species coming from the Species Discovery Facility of D4Science"; return "An algorithm for merging two sets of occurrence points of species coming from the Species Discovery Facility of D4Science";
@ -352,12 +346,7 @@ public class OccurrencePointsMerger implements Transducerer {
// if it is the left then leave it as is // if it is the left then leave it as is
// otherwise put the left in the deletion list and the right in the insertion list // otherwise put the left in the deletion list and the right in the insertion list
if ( if (((leftOcc.modifdate != null) && (rightOcc.modifdate != null) && leftOcc.modifdate.before(rightOcc.modifdate)) || (leftOcc.modifdate == null) && (rightOcc.modifdate != null)) {
((leftOcc.modifdate!=null)&&(rightOcc.modifdate!=null)&&leftOcc.modifdate.before(rightOcc.modifdate))
||
(leftOcc.modifdate==null)&&(rightOcc.modifdate!=null)
)
{
objectstodelete.add(leftOcc); objectstodelete.add(leftOcc);
objectstoinsert.add(rightOcc); objectstoinsert.add(rightOcc);
@ -439,6 +428,7 @@ public class OccurrencePointsMerger implements Transducerer {
objectstodelete = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>(); objectstodelete = new ArrayList<OccurrencePointsMerger.OccurrenceRecord>();
System.gc(); System.gc();
} }
protected void prepareFinalTable() throws Exception { protected void prepareFinalTable() throws Exception {
DatabaseFactory.executeSQLUpdate(DatabaseUtils.duplicateTableStatement(leftTableName, finalTableName), dbconnection); DatabaseFactory.executeSQLUpdate(DatabaseUtils.duplicateTableStatement(leftTableName, finalTableName), dbconnection);
} }
@ -455,6 +445,136 @@ public class OccurrencePointsMerger implements Transducerer {
columns.append(","); columns.append(",");
} }
} }
public void initDB(boolean buildTable) throws Exception {
// init DB connection
AnalysisLogger.getLogger().trace("Initializing DB Connection");
dbconnection = DatabaseUtils.initDBSession(config);
if (buildTable) {
AnalysisLogger.getLogger().trace("Taking Table Description");
extractColumnNames();
AnalysisLogger.getLogger().trace("Taken Table Description: " + columns);
AnalysisLogger.getLogger().trace("Creating final table: " + finalTableName);
// create new merged table
try {
DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(finalTableName), dbconnection);
} catch (Exception e1) {
}
prepareFinalTable();
}
}
@Override
public void shutdown() {
if (dbconnection != null)
try {
dbconnection.close();
} catch (Exception e) {
}
}
public List<Object> leftRows;
public List<Object> rightRows;
public int getNumLeftObjects(){
if (leftRows!=null)
return leftRows.size();
else return 0;
}
public int getNumRightObjects(){
if (rightRows!=null)
return rightRows.size();
else return 0;
}
public void takeFullRanges() {
// take the elements from sx table
AnalysisLogger.getLogger().trace("Taking elements from left table: " + leftTableName);
leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(leftTableName, columns.toString(), ""), dbconnection);
// take the elements from dx table
AnalysisLogger.getLogger().trace("Taking elements from right table: " + rightTableName);
rightRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(rightTableName, columns.toString(), ""), dbconnection);
}
public void takeRange(int offsetLeft, int numLeft, int offsetRight, int numRight) {
// take the elements from sx table
AnalysisLogger.getLogger().trace("Taking elements from left table: " + leftTableName);
leftRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(leftTableName, columns.toString(), "offset " + offsetLeft + " limit " + numLeft), dbconnection);
// take the elements from dx table
AnalysisLogger.getLogger().trace("Taking elements from right table: " + rightTableName);
rightRows = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(rightTableName, columns.toString(), "offset " + offsetRight + " limit " + numRight), dbconnection);
}
public void computeRange() throws Exception {
try {
AnalysisLogger.getLogger().trace("Processing " + leftTableName + " vs " + rightTableName);
status = 10;
int rightCounter = 0;
int similaritiesCounter = 0;
int allrightrows = rightRows.size();
for (Object rRow : rightRows) {
// transform into an occurrence object
OccurrenceRecord rightOcc = row2OccurrenceRecord((Object[]) rRow);
// for each element in sx
int k = 0;
boolean found = false;
float p = 0;
OccurrenceRecord bestleftOcc = null;
for (Object lRow : leftRows) {
OccurrenceRecord leftOcc = null;
leftOcc = row2OccurrenceRecord((Object[]) lRow);
p = extProb(leftOcc, rightOcc);
if (p >= confidenceValue) {
bestleftOcc = leftOcc;
found = true;
similaritiesCounter++;
AnalysisLogger.getLogger().trace("Found a similarity with P=" + p + " between (" + "\"" + leftOcc.scientificName + "\"" + ",\"" + leftOcc.x + "\"" + "," + "\"" + leftOcc.y + "\"" + "," + "\"" + leftOcc.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(leftOcc.eventdate) + "\"" + ") VS " + "(" + "\"" + rightOcc.scientificName + "\"" + "," + "\"" + rightOcc.x + "\"" + "," + "\"" + rightOcc.y + "\"" + "," + "\"" + rightOcc.recordedby + "\"" + "," + "\"" + convert2conventionalFormat(rightOcc.eventdate) + "\"" + ")");
// break;
if (!firstbest)
manageHighProbability(p, bestleftOcc, rightOcc);
else
break;
} else if (!firstbest)
manageLowProbability(p, bestleftOcc, rightOcc);
k++;
}
rightCounter++;
if (firstbest) {
if (found)
manageHighProbability(p, bestleftOcc, rightOcc);
else
manageLowProbability(p, bestleftOcc, rightOcc);
}
status = Math.min(90, 10f + (80 * ((float) rightCounter) / ((float) allrightrows)));
if (rightCounter % 500 == 0) {
AnalysisLogger.getLogger().trace("Persisting ... " + rightCounter + " over " + allrightrows);
persist();
}
}
AnalysisLogger.getLogger().trace("Found " + similaritiesCounter + " similarities on " + rightCounter + " elements");
status = 90;
// transform the complete list into a table
persist();
// close DB connection
} catch (Exception e) {
throw e;
} finally {
shutdown();
status = 100;
AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed");
}
}
@Override @Override
public void compute() throws Exception { public void compute() throws Exception {
@ -500,14 +620,7 @@ public class OccurrencePointsMerger implements Transducerer {
OccurrenceRecord leftOcc = null; OccurrenceRecord leftOcc = null;
// only for the first iteration on the left occurrences perform the transformation // only for the first iteration on the left occurrences perform the transformation
/* /*
if (leftrecordsSize <= k) { * if (leftrecordsSize <= k) { // transform into an occurrence object leftOcc = row2OccurrenceRecord((Object[]) lRow); leftRecords.add(leftOcc); leftrecordsSize++; // System.out.println("ADDED "+k+"-th elements size: "+leftRecords.size()); } else leftOcc = leftRecords.get(k);
// transform into an occurrence object
leftOcc = row2OccurrenceRecord((Object[]) lRow);
leftRecords.add(leftOcc);
leftrecordsSize++;
// System.out.println("ADDED "+k+"-th elements size: "+leftRecords.size());
} else
leftOcc = leftRecords.get(k);
*/ */
leftOcc = row2OccurrenceRecord((Object[]) lRow); leftOcc = row2OccurrenceRecord((Object[]) lRow);
// evaluate P(dx,sx) // evaluate P(dx,sx)
@ -523,8 +636,7 @@ public class OccurrencePointsMerger implements Transducerer {
manageHighProbability(p, bestleftOcc, rightOcc); manageHighProbability(p, bestleftOcc, rightOcc);
else else
break; break;
} } else if (!firstbest)
else if (!firstbest)
manageLowProbability(p, bestleftOcc, rightOcc); manageLowProbability(p, bestleftOcc, rightOcc);
k++; k++;
} }
@ -556,7 +668,8 @@ public class OccurrencePointsMerger implements Transducerer {
if (dbconnection != null) if (dbconnection != null)
try { try {
dbconnection.close(); dbconnection.close();
}catch(Exception e){} } catch (Exception e) {
}
status = 100; status = 100;
AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed"); AnalysisLogger.getLogger().trace("Occ Points Processing Finished and db closed");
} }

View File

@ -1,11 +1,12 @@
package org.gcube.dataanalysis.ecoengine.utils; package org.gcube.dataanalysis.ecoengine.utils;
import java.io.BufferedWriter; import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileWriter; import java.io.FileWriter;
import org.gcube.contentmanagement.graphtools.data.BigSamplesTable; import org.gcube.contentmanagement.graphtools.data.BigSamplesTable;
import org.gcube.contentmanagement.lexicalmatcher.utils.FileTools; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import com.rapidminer.example.ExampleSet; import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.DataRow; import com.rapidminer.example.table.DataRow;
@ -147,6 +148,16 @@ public class Transformations {
*/ */
} }
public static void dumpConfig(String pathToFile, AlgorithmConfiguration config) throws Exception {
Transformations.dumpObjectToFile(pathToFile, config);
}
public static AlgorithmConfiguration restoreConfig(String configFile) throws Exception{
FileInputStream fis = new FileInputStream(new File(configFile));
AlgorithmConfiguration config = (AlgorithmConfiguration) new XStream().fromXML(fis);
fis.close();
return config;
}
public static double indexString(String string) { public static double indexString(String string) {
// string = Sha1.SHA1(string); // string = Sha1.SHA1(string);