This commit is contained in:
Gianpaolo Coro 2013-03-12 13:48:46 +00:00
parent 3ef30fe289
commit f5217598a8
43 changed files with 699 additions and 268 deletions

View File

@ -0,0 +1,18 @@
<?xml version='1.0' encoding='UTF-8'?>
<hibernate-configuration>
<session-factory>
<property name="connection.driver_class">org.postgresql.Driver</property>
<property name="connection.provider_class">org.hibernate.connection.C3P0ConnectionProvider</property>
<property name="connection.url">jdbc:postgresql://localhost/testdb</property>
<property name="connection.username">gcube</property>
<property name="connection.password">d4science2</property>
<!-- <property name="dialect">org.hibernatespatial.postgis.PostgisDialect</property>-->
<property name="dialect">org.hibernate.dialect.PostgreSQLDialect</property>
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
<property name="c3p0.timeout">0</property>
<property name="c3p0.max_size">1</property>
<property name="c3p0.max_statements">0</property>
<property name="c3p0.min_size">1</property>
<property name="current_session_context_class">thread</property>
</session-factory>
</hibernate-configuration>

View File

@ -0,0 +1,20 @@
#Percentage threshold for discarding a category
categoryDiscardThreshold=0
#Percentage threshold for accepting similarity between a single Time series entry and a reference entry
entryAcceptanceThreshold=50
#Size of a comparison chunk
chunkSize=50
#Number of chunks to take from Time series for performing comparison respect to reference data; if set to -1 all chunks will be analyzed
timeSeriesChunksToTake=1
#Number of chunks to take from Reference for performing comparison Time Series Elements; if set to -1 all chunks will be analyzed
referenceChunksToTake=5
#Use random choice for chunks selection = true |false
randomTake=true
#Use Simple String Match as distance calculation
useSimpleDistance=false
#Number Of Threads to use
numberOfThreadsToUse=5
#if two final scores differ for more than this percentage, prune the lower result
categoryDiscardDifferencialThreshold = 50
#maximum difference between a result and the best result
singleEntryRecognitionMaxDeviation = 40

View File

@ -21,7 +21,7 @@ import org.hibernate.SessionFactory;
public class Engine {
private String ConfigurationFileNameLocal = "hibernate.cfg.xml";
private String ConfigurationFileNameLocal = "lexicalguesser/hibernate.cfg.xml";
private SessionFactory referenceDBSession;
public ArrayList<String> bestCategories;
@ -58,24 +58,22 @@ public class Engine {
return referenceDBSession;
}
public void resetEngine(LexicalEngineConfiguration Config,String ColumnFilter,String configPath){
public void resetEngine(LexicalEngineConfiguration Config,String ColumnFilter){
config = Config;
scoresTable = new HashMap<String, CategoryScores>();
bestCategories = new ArrayList<String>();
bestColumns = new ArrayList<String>();
bestScores = new ArrayList<Double>();
columnFilter = ColumnFilter;
// ConfigurationFileNameLocal = configPath+"/"+ConfigurationFileNameLocal;
}
public Engine(LexicalEngineConfiguration Config,String ColumnFilter,String configPath) {
public Engine(LexicalEngineConfiguration Config,String ColumnFilter) {
config = Config;
scoresTable = new HashMap<String, CategoryScores>();
bestCategories = new ArrayList<String>();
bestColumns = new ArrayList<String>();
bestScores = new ArrayList<Double>();
columnFilter = ColumnFilter;
ConfigurationFileNameLocal = configPath+"/"+ConfigurationFileNameLocal;
}
public void calcLike(CategoryOrderedList col, String unknownSeriesName, String unknownSeriesColumn) {

View File

@ -1,8 +1,11 @@
package org.gcube.contentmanagement.lexicalmatcher.analysis.core;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Properties;
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.Category;
public class LexicalEngineConfiguration {
@ -10,6 +13,19 @@ public class LexicalEngineConfiguration {
Properties props = new Properties();
FileInputStream fis = new FileInputStream(absoluteFilePath);
props.load(fis);
setProperties(props);
fis.close();
}
public void configureByStream(String file) throws Exception {
Properties props = new Properties();
InputStream is = ClassLoader.getSystemResourceAsStream(file);
props.load(is);
setProperties(props);
is.close();
}
private void setProperties(Properties props) {
categoryDiscardThreshold = Float.parseFloat(props.getProperty("categoryDiscardThreshold"));
entryAcceptanceThreshold = Integer.parseInt(props.getProperty("entryAcceptanceThreshold"));
chunkSize = Integer.parseInt(props.getProperty("chunkSize"));
@ -20,11 +36,8 @@ public class LexicalEngineConfiguration {
numberOfThreadsToUse = Integer.parseInt(props.getProperty("numberOfThreadsToUse"));
categoryDiscardDifferencialThreshold = Float.parseFloat(props.getProperty("categoryDiscardDifferencialThreshold"));
singleEntryRecognitionMaxDeviation = Float.parseFloat(props.getProperty("singleEntryRecognitionMaxDeviation"));
fis.close();
}
public void setCategoryDiscardThreshold(float categoryDiscardThreshold) {
this.categoryDiscardThreshold = categoryDiscardThreshold;
}
@ -41,8 +54,6 @@ public class LexicalEngineConfiguration {
return entryAcceptanceThreshold;
}
public void setCategoryDiscardDifferencialThreshold(float categoryDiscardDifferencialThreshold) {
this.categoryDiscardDifferencialThreshold = categoryDiscardDifferencialThreshold;
}
@ -91,7 +102,6 @@ public class LexicalEngineConfiguration {
return useSimpleDistance;
}
public void setNumberOfThreadsToUse(int numberOfThreadsToUse) {
this.numberOfThreadsToUse = numberOfThreadsToUse;
}
@ -120,7 +130,7 @@ public class LexicalEngineConfiguration {
public Boolean useSimpleDistance = null;
public int numberOfThreadsToUse = -Integer.MIN_VALUE;
//database parameters
// database parameters
public String databaseDriver = null;
public String databaseURL = null;
public String databaseUserName = null;
@ -129,142 +139,123 @@ public class LexicalEngineConfiguration {
public String databaseIdleConnectionTestPeriod = null;
public String databaseAutomaticTestTable = null;
//reference data parameters
// reference data parameters
public String referenceTable = null;
public String referenceColumn = null;
public String idColumn= null;
public String idColumn = null;
public String nameHuman = null;
public String description = null;
public ArrayList<Category> categories = null;
public void mergeConfig(LexicalEngineConfiguration config){
if (config.getCategoryDiscardDifferencialThreshold()!=-Float.MIN_VALUE)
setCategoryDiscardDifferencialThreshold(config.getCategoryDiscardDifferencialThreshold());
if (config.getSingleEntryRecognitionMaxDeviation()!=-Float.MIN_VALUE)
setSingleEntryRecognitionMaxDeviation(config.getSingleEntryRecognitionMaxDeviation());
if (config.getCategoryDiscardThreshold()!=-Float.MIN_VALUE)
setCategoryDiscardThreshold(config.getCategoryDiscardThreshold());
if (config.getChunkSize()!=-Integer.MIN_VALUE)
setChunkSize(config.getChunkSize());
if (config.getEntryAcceptanceThreshold()!=-Float.MIN_VALUE)
setEntryAcceptanceThreshold(config.getEntryAcceptanceThreshold());
if (config.getNumberOfThreadsToUse()!=-Integer.MIN_VALUE)
setNumberOfThreadsToUse(config.getNumberOfThreadsToUse());
if (config.getReferenceChunksToTake()!=-Integer.MIN_VALUE)
setReferenceChunksToTake(config.getReferenceChunksToTake());
if (config.getTimeSeriesChunksToTake()!=-Integer.MIN_VALUE)
setTimeSeriesChunksToTake(config.getTimeSeriesChunksToTake());
if (config.randomTake!= null)
setRandomTake(config.isRandomTake());
if (config.useSimpleDistance!=null)
setUseSimpleDistance(config.isUseSimpleDistance());
//database information merge
if (config.databaseDriver!=null)
setDatabaseDriver(config.databaseDriver);
if (config.databaseDialect!=null)
setDatabaseDialect(config.databaseDialect);
if (config.databaseAutomaticTestTable!=null)
setDatabaseAutomaticTestTable(config.databaseAutomaticTestTable);
if (config.databaseIdleConnectionTestPeriod!=null)
setDatabaseIdleConnectionTestPeriod(config.databaseIdleConnectionTestPeriod);
if (config.databaseUserName!=null)
setDatabaseUserName(config.databaseUserName);
if (config.databasePassword!=null)
setDatabasePassword(config.databasePassword);
if (config.databaseURL!=null)
setDatabaseURL(config.databaseURL);
if (config.referenceTable!=null)
setReferenceTable(config.referenceTable);
if (config.referenceColumn!=null)
setReferenceColumn(config.referenceColumn);
if (config.idColumn!=null)
setIdColumn(config.idColumn);
if (config.nameHuman!=null)
setNameHuman(config.nameHuman);
if (config.description!=null)
setDescription(config.description);
public void setCategories(ArrayList<Category> categories) {
this.categories = categories;
}
public ArrayList<Category> getCategories() {
return this.categories;
}
public void mergeConfig(LexicalEngineConfiguration config) {
if (config.getCategoryDiscardDifferencialThreshold() != -Float.MIN_VALUE)
setCategoryDiscardDifferencialThreshold(config.getCategoryDiscardDifferencialThreshold());
if (config.getSingleEntryRecognitionMaxDeviation() != -Float.MIN_VALUE)
setSingleEntryRecognitionMaxDeviation(config.getSingleEntryRecognitionMaxDeviation());
if (config.getCategoryDiscardThreshold() != -Float.MIN_VALUE)
setCategoryDiscardThreshold(config.getCategoryDiscardThreshold());
if (config.getChunkSize() != -Integer.MIN_VALUE)
setChunkSize(config.getChunkSize());
if (config.getEntryAcceptanceThreshold() != -Float.MIN_VALUE)
setEntryAcceptanceThreshold(config.getEntryAcceptanceThreshold());
if (config.getNumberOfThreadsToUse() != -Integer.MIN_VALUE)
setNumberOfThreadsToUse(config.getNumberOfThreadsToUse());
if (config.getReferenceChunksToTake() != -Integer.MIN_VALUE)
setReferenceChunksToTake(config.getReferenceChunksToTake());
if (config.getTimeSeriesChunksToTake() != -Integer.MIN_VALUE)
setTimeSeriesChunksToTake(config.getTimeSeriesChunksToTake());
if (config.randomTake != null)
setRandomTake(config.isRandomTake());
if (config.useSimpleDistance != null)
setUseSimpleDistance(config.isUseSimpleDistance());
// database information merge
if (config.databaseDriver != null)
setDatabaseDriver(config.databaseDriver);
if (config.databaseDialect != null)
setDatabaseDialect(config.databaseDialect);
if (config.databaseAutomaticTestTable != null)
setDatabaseAutomaticTestTable(config.databaseAutomaticTestTable);
if (config.databaseIdleConnectionTestPeriod != null)
setDatabaseIdleConnectionTestPeriod(config.databaseIdleConnectionTestPeriod);
if (config.databaseUserName != null)
setDatabaseUserName(config.databaseUserName);
if (config.databasePassword != null)
setDatabasePassword(config.databasePassword);
if (config.databaseURL != null)
setDatabaseURL(config.databaseURL);
if (config.referenceTable != null)
setReferenceTable(config.referenceTable);
if (config.referenceColumn != null)
setReferenceColumn(config.referenceColumn);
if (config.idColumn != null)
setIdColumn(config.idColumn);
if (config.nameHuman != null)
setNameHuman(config.nameHuman);
if (config.description != null)
setDescription(config.description);
}
public void setDatabaseDriver(String databaseDriver) {
this.databaseDriver = databaseDriver;
}
public String getDatabaseDriver() {
return databaseDriver;
}
public void setDatabaseURL(String databaseURL) {
this.databaseURL = databaseURL;
}
public String getDatabaseURL() {
return databaseURL;
}
public void setDatabaseUserName(String databaseUserName) {
this.databaseUserName = databaseUserName;
}
public String getDatabaseUserName() {
return databaseUserName;
}
public void setDatabasePassword(String databasePassword) {
this.databasePassword = databasePassword;
}
public String getDatabasePassword() {
return databasePassword;
}
public void setDatabaseDialect(String databaseDialect) {
this.databaseDialect = databaseDialect;
}
public String getDatabaseDialect() {
return databaseDialect;
}
public void setDatabaseIdleConnectionTestPeriod(String databaseIdleConnectionTestPeriod) {
this.databaseIdleConnectionTestPeriod = databaseIdleConnectionTestPeriod;
}
public String getDatabaseIdleConnectionTestPeriod() {
return databaseIdleConnectionTestPeriod;
}
public void setDatabaseAutomaticTestTable(String databaseAutomaticTestTable) {
this.databaseAutomaticTestTable = databaseAutomaticTestTable;
}
public String getDatabaseAutomaticTestTable() {
return databaseAutomaticTestTable;
}
@ -289,32 +280,22 @@ public class LexicalEngineConfiguration {
return idColumn;
}
public void setIdColumn(String idColumn) {
this.idColumn = idColumn;
}
public String getNameHuman() {
return nameHuman;
}
public void setNameHuman(String nameHuman) {
this.nameHuman = nameHuman;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}

View File

@ -13,7 +13,7 @@ public class Example1_Species {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";

View File

@ -13,7 +13,7 @@ public class Example2_Area {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";

View File

@ -15,7 +15,7 @@ public class Example3_SingleMatchShark {
try {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String singleton = "shark";

View File

@ -15,7 +15,7 @@ public class Example4_SingleMatchMitella {
try {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String singleton = "Mitella pollicipes";

View File

@ -15,7 +15,7 @@ public class Example5_SingleMatchMitella {
try {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String singleton = "Mirella policepes";

View File

@ -13,7 +13,7 @@ public class ExampleGuessingExternalCfg {
try {
String configPath = "./";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1

View File

@ -157,6 +157,7 @@ public class DBObjectTranslator {
}
}
public void buildCategories(SessionFactory dbSession, String referenceTable, String referenceColumn, String idColumn, String nameHuman, String description) {
referenceTable = referenceTable == null ? "reference_table" : referenceTable;

View File

@ -6,6 +6,7 @@ import java.util.ArrayList;
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.Engine;
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.Category;
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.CategoryOrderedList;
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.DBObjectTranslator;
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
@ -103,29 +104,23 @@ public class CategoryGuesser {
}
// NOTE: The config path has to contain the two files: lexicalGuesser.properties and ALog.properties
private static final String cfgFile = "lexicalGuesser.properties";
private static final String LogFile = "ALog.properties";
private static final String cfgFile = "lexicalguesser/lexicalGuesser.properties";
private static final String LogFile = "lexicalguesser/ALog.properties";
// singleton
private CategoryOrderedList col;
private Engine processor;
private CategoryOrderedList originalCol;
private LexicalEngineConfiguration config;
private String configPath;
private boolean oneshotMode;
private static final int maxTriesClassification = 3;
private int triesCounter;
public CategoryGuesser(String ConfigPath) {
triesCounter = 0;
this.configPath = ConfigPath;
}
public CategoryGuesser() {
triesCounter = 0;
this.configPath = ".";
}
public void runGuesser(String seriesName, String columnName, LexicalEngineConfiguration externalConfig) throws Exception {
runGuesser(seriesName, columnName, externalConfig, null, null);
}
@ -145,19 +140,19 @@ public class CategoryGuesser {
public void init(String categoryFilter, String columnFilter, LexicalEngineConfiguration externalConfig) throws Exception {
String cfgFileCompletePath = configPath + "/" + cfgFile;
AnalysisLogger.setLogger(configPath + "/" + LogFile);
String cfgFileCompletePath = cfgFile;
AnalysisLogger.setLogger(LogFile);
AnalysisLogger.getLogger().trace("******************INITIALIZING******************");
config = new LexicalEngineConfiguration();
config.configure(cfgFileCompletePath);
config.configureByStream(cfgFileCompletePath);
if (externalConfig != null) {
config.mergeConfig(externalConfig);
}
processor = new Engine(config, columnFilter, configPath);
processor = new Engine(config, columnFilter);
SessionFactory dbSession = processor.getDBSession(config);
DBObjectTranslator dbo = new DBObjectTranslator();
@ -166,7 +161,12 @@ public class CategoryGuesser {
AnalysisLogger.getLogger().trace("******************Order Category******************");
if (externalConfig == null)
externalConfig = new LexicalEngineConfiguration();
dbo.buildCategoriesStructure(dbSession, externalConfig.getReferenceTable(), externalConfig.getReferenceColumn(), externalConfig.getIdColumn(), externalConfig.getNameHuman(), externalConfig.getDescription());
if (externalConfig.getCategories()!=null && externalConfig.getCategories().size()>0)
dbo.categories=externalConfig.getCategories();
else
dbo.buildCategoriesStructure(dbSession, externalConfig.getReferenceTable(), externalConfig.getReferenceColumn(), externalConfig.getIdColumn(), externalConfig.getNameHuman(), externalConfig.getDescription());
col = TSObjectTransformer.transform2List(dbo, config, categoryFilter);
AnalysisLogger.getLogger().trace("***************End Ordering********************");
originalCol = col.generateNovelList();
@ -179,17 +179,17 @@ public class CategoryGuesser {
public void initSingleMatcher(LexicalEngineConfiguration externalConfig, String ColumnFilter) throws Exception {
String cfgFileCompletePath = configPath + "/" + cfgFile;
AnalysisLogger.setLogger(configPath + "/" + LogFile);
String cfgFileCompletePath = cfgFile;
AnalysisLogger.setLogger(LogFile);
config = new LexicalEngineConfiguration();
config.configure(cfgFileCompletePath);
config.configureByStream(cfgFileCompletePath);
if (externalConfig != null) {
config.mergeConfig(externalConfig);
}
processor = new Engine(config, ColumnFilter, configPath);
processor = new Engine(config, ColumnFilter);
// in this case, the lexical matcher is invoked once, then it has to be stopped in the end
oneshotMode = true;
@ -213,17 +213,17 @@ public class CategoryGuesser {
public void runGuesser(String seriesName, String columnName, LexicalEngineConfiguration externalConfig, String CategoryFilter, String ColumnFilter, String SingletonString) throws Exception {
String cfgFileCompletePath = configPath + "/" + cfgFile;
AnalysisLogger.setLogger(configPath + "/" + LogFile);
String cfgFileCompletePath = cfgFile;
AnalysisLogger.setLogger(LogFile);
AnalysisLogger.getLogger().debug("Guessing Table " + seriesName + " column " + columnName);
if (externalConfig != null) {
config = new LexicalEngineConfiguration();
config.configure(cfgFileCompletePath);
config.configureByStream(cfgFileCompletePath);
config.mergeConfig(externalConfig);
// NOTE FOR FUTURE OPTIMIZATION: perform the re-init only if there is a change in the Database pointing
processor = new Engine(config, ColumnFilter, configPath);
processor = new Engine(config, ColumnFilter);
} else {
if (config == null) {
config = new LexicalEngineConfiguration();
@ -231,9 +231,9 @@ public class CategoryGuesser {
}
if (processor == null) {
processor = new Engine(config, ColumnFilter, configPath);
processor = new Engine(config, ColumnFilter);
} else
processor.resetEngine(config, ColumnFilter, configPath);
processor.resetEngine(config, ColumnFilter);
}
SessionFactory dbSession = processor.getDBSession(config);
@ -242,7 +242,16 @@ public class CategoryGuesser {
//modification of 10/10/11 calculate structure each time
// if (col == null) {
AnalysisLogger.getLogger().trace("******************Order Category******************");
dbo.buildCategoriesStructure(dbSession, config.getReferenceTable(), config.getReferenceColumn(), config.getIdColumn(), config.getNameHuman(), config.getDescription());
if (externalConfig !=null){
ArrayList<Category> externalcategories = externalConfig.getCategories();
if ((externalcategories!=null) && (externalcategories.size()>0)){
dbo.categories=externalConfig.getCategories();
dbo.calculateCategoriesWeights(dbSession);
}
else
dbo.buildCategoriesStructure(dbSession, config.getReferenceTable(), config.getReferenceColumn(), config.getIdColumn(), config.getNameHuman(), config.getDescription());
}
col = TSObjectTransformer.transform2List(dbo, config, CategoryFilter);
AnalysisLogger.getLogger().trace("***************End Ordering********************");
originalCol = col.generateNovelList();
@ -277,7 +286,7 @@ public class CategoryGuesser {
config.setCategoryDiscardDifferencialThreshold(Math.max(differencialThr - 20, 0));
config.setEntryAcceptanceThreshold(Math.max(acceptanceThr - 20, 0));
AnalysisLogger.getLogger().trace("Performing next processing pass");
runGuesser(seriesName, columnName, null, CategoryFilter, ColumnFilter, SingletonString);
runGuesser(seriesName, columnName, externalConfig, CategoryFilter, ColumnFilter, SingletonString);
AnalysisLogger.getLogger().debug("End processing pass");
// if (oneshotMode)

View File

@ -1,6 +1,10 @@
package org.gcube.contentmanagement.lexicalmatcher.analysis.test;
import java.util.ArrayList;
import java.util.Properties;
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.Category;
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
@ -11,9 +15,9 @@ public class TestExternalCfgProduction {
try {
int attempts = 1;
// new Properties().load(ClassLoader.getSystemResourceAsStream("lexicalguesser/lexicalGuesser.properties"));
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
@ -28,6 +32,18 @@ public class TestExternalCfgProduction {
conf.setDescription("ifield2");
ArrayList<Category> categories = new ArrayList<Category>();
//human name, index, table name, description
categories.add(new Category("COUNTRY_OLD","39c98800-dd3c-11e0-b8d1-d1e2e7ba4f9d","rdf39c98800dd3c11e0b8d1d1e2e7ba4f9d","country"));
categories.add(new Category("CONTINENT_OLD","1d5d51f0-dd42-11e0-b8d3-d1e2e7ba4f9d","rdf1d5d51f0dd4211e0b8d3d1e2e7ba4f9d","continent reference data"));
categories.add(new Category("SPECIES_OLD","0a7fb500-dd3d-11e0-b8d1-d1e2e7ba4f9d","rdf0a7fb500dd3d11e0b8d1d1e2e7ba4f9d","species"));
categories.add(new Category("CodeListCountry","4c8d93a0-edc2-11e0-93e4-f6a9821baa29","rdf4c8d93a0edc211e093e4f6a9821baa29","Country"));
categories.add(new Category("CL_DIVISION","1140bdf0-dd2c-11e0-9220-ae17b3db32b7","rdf1140bdf0dd2c11e09220ae17b3db32b7","undefined"));
categories.add(new Category("CL_ASFIS_TAX","f87360f0-d9f9-11e0-ba05-d9adb0db767c","rdff87360f0d9f911e0ba05d9adb0db767c","undefined"));
conf.setCategories(categories);
//database Parameters
conf.setDatabaseUserName("gcube");
conf.setDatabasePassword("d4science2");

View File

@ -1,8 +1,10 @@
package org.gcube.contentmanagement.lexicalmatcher.analysis.test;
import java.util.ArrayList;
import java.util.List;
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.Category;
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
@ -13,9 +15,7 @@ public class TestSingleExternalCfgProduction {
try {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
@ -25,11 +25,23 @@ public class TestSingleExternalCfgProduction {
String column = "field6";
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
/*
conf.setReferenceTable("codelist1733371938");
conf.setReferenceColumn("ifield14");
conf.setNameHuman("ifield1");
conf.setIdColumn("ifield0");
conf.setDescription("ifield2");
*/
ArrayList<Category> categories = new ArrayList<Category>();
//human name, index, table name, description
categories.add(new Category("COUNTRY_OLD","39c98800-dd3c-11e0-b8d1-d1e2e7ba4f9d","rdf39c98800dd3c11e0b8d1d1e2e7ba4f9d","country"));
categories.add(new Category("CONTINENT_OLD","1d5d51f0-dd42-11e0-b8d3-d1e2e7ba4f9d","rdf1d5d51f0dd4211e0b8d3d1e2e7ba4f9d","continent reference data"));
categories.add(new Category("SPECIES_OLD","0a7fb500-dd3d-11e0-b8d1-d1e2e7ba4f9d","rdf0a7fb500dd3d11e0b8d1d1e2e7ba4f9d","species"));
categories.add(new Category("CodeListCountry","4c8d93a0-edc2-11e0-93e4-f6a9821baa29","rdf4c8d93a0edc211e093e4f6a9821baa29","Country"));
categories.add(new Category("CL_DIVISION","1140bdf0-dd2c-11e0-9220-ae17b3db32b7","rdf1140bdf0dd2c11e09220ae17b3db32b7","undefined"));
conf.setCategories(categories);
//CHANGE THIS TO ENHANCE THE RECALL
conf.setEntryAcceptanceThreshold(30);

View File

@ -11,7 +11,7 @@ public class BenchMarkTest1 {
int attempts = 1;
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String seriesName = "import_bdefb470_5cea_11df_a0a6_909e7d074592";

View File

@ -12,7 +12,7 @@ public class BenchMarkTest2 {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";

View File

@ -12,7 +12,7 @@ public class BenchMarkTest3 {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";

View File

@ -12,7 +12,7 @@ public class BenchMarkTest4 {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";

View File

@ -12,7 +12,7 @@ public class BenchMarkTest5 {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String seriesName = "import_532bba80_1c8f_11df_a4ee_87804054691e";

View File

@ -13,7 +13,7 @@ public class BenchMarkTestExternalCfg {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String seriesName = "import_532bba80_1c8f_11df_a4ee_87804054691e";

View File

@ -14,7 +14,7 @@ public class BenchMarkTestFilterCategory {
try {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String seriesName = "ref_order";

View File

@ -15,7 +15,7 @@ public class BenchMarkTestSingleton {
try {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String singleton = "sarda sarda";

View File

@ -11,7 +11,7 @@ public class BenchMarkTestTSCountry {
int attempts = 1;
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String seriesName = "import_bdefb470_5cea_11df_a0a6_909e7d074592";

View File

@ -11,7 +11,7 @@ public static void main(String[] args) {
try {
String configPath =".";
int attempts = 1;
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");

View File

@ -11,7 +11,7 @@ public static void main(String[] args) {
try {
String configPath =".";
int attempts = 1;
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");

View File

@ -13,7 +13,7 @@ public class TestExternalCfgProduction {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
// String seriesName = "rdmc366dfe0ddf511e086b1b1c5d6fb1c27";

View File

@ -15,7 +15,7 @@ public class TestSingleExternalCfgProduction {
String configPath = ".";
CategoryGuesser guesser = new CategoryGuesser(configPath);
CategoryGuesser guesser = new CategoryGuesser();
//bench 1
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
String singleton = "Faroe Island";

View File

@ -1,5 +1,9 @@
package org.gcube.contentmanagement.lexicalmatcher.utils;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
@ -25,7 +29,18 @@ public class AnalysisLogger {
//in ingresso vuole il path al file di config del log4j
public static void setLogger(String path){
if (logger == null){
PropertyConfigurator.configure(path);
try{
PropertyConfigurator.configure(path);
}catch(Exception e) {
Properties p = new Properties();
try {
InputStream is = ClassLoader.getSystemResourceAsStream(path);
p.load(is);
is.close();
} catch (IOException e1) {
}
PropertyConfigurator.configure(p);
}
}
logger = Logger.getLogger("AnalysisLogger");
hibernateLogger = Logger.getLogger("hibernate");

View File

@ -4,6 +4,7 @@ package org.gcube.contentmanagement.lexicalmatcher.utils;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
@ -36,13 +37,16 @@ public class DatabaseFactory {
if (config==null)
return initDBConnection(configurationFile);
// take the configuration file
File fl = new File(configurationFile);
FileInputStream stream = new FileInputStream(fl);
InputStream stream;
try {
File fl = new File(configurationFile);
stream = new FileInputStream(fl);
} catch (Exception e) {
stream = ClassLoader.getSystemResourceAsStream(configurationFile);
}
SAXReader saxReader = new SAXReader();
Document document = saxReader.read(stream);
List<Node> nodes = document.selectNodes("//hibernate-configuration/session-factory/property");
Iterator<Node> nodesIterator = nodes.iterator();
@ -92,9 +96,6 @@ public class DatabaseFactory {
// close stream
stream.close();
return DBSessionFactory;
}

View File

@ -6,6 +6,7 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
@ -18,12 +19,18 @@ public class FileTools {
public static String readXMLDoc(String xmlFilePath) throws Exception {
String xml = null;
File fl = new File(xmlFilePath);
FileInputStream stream = new FileInputStream(fl);
InputStream stream;
try {
File fl = new File(xmlFilePath);
stream = new FileInputStream(fl);
} catch (Exception e) {
stream = ClassLoader.getSystemResourceAsStream(xmlFilePath);
}
SAXReader saxReader = new SAXReader();
Document document = saxReader.read(stream);
xml = document.asXML();
stream.close();
return xml;
}

View File

@ -97,10 +97,11 @@ public abstract class DataAnalysis implements Evaluator{
* visualizes the results of the analysis
* @param results
*/
public static void visualizeResults(HashMap<String,String> results){
public static void visualizeResults(HashMap<String,Object> results){
for (String key:results.keySet()){
System.out.println(key+":"+results.get(key));
for (Object key:results.keySet()){
PrimitiveType keyp = (PrimitiveType) results.get(key);
System.out.println(key+":"+keyp.getContent());
}
}

View File

@ -25,8 +25,12 @@ public class TablesComparison {
static double Threshold = 0.01;
//change this defaults to change comparison
public String referenceTable = "hspec_suitable_executor_1_worker";
public String analyzedTable = "hspec_suitable_executor_2";
/*
public String referenceTable = "hspec_suitable_executor_1_worker";
public String analyzedTable = "hspec_suitable_executor_2";
*/
public String referenceTable = "hspec_suitable_latimeria_chalumnae";
public String analyzedTable = "hspec_suitable_neural_latimeria_chalumnae";
public String referenceCriteria = "speciesid,csquarecode";
public String destinationCriteria = "speciesid,csquarecode";
public String referenceSelectedColumns = "speciesid,csquarecode,probability";
@ -84,9 +88,14 @@ public class TablesComparison {
String configPath = "./cfg/";
AlgorithmConfiguration config = new AlgorithmConfiguration();
config.setConfigPath(configPath);
/*
config.setDatabaseUserName("utente");
config.setDatabasePassword("d4science");
config.setDatabaseURL("jdbc:postgresql://dbtest.research-infrastructures.eu/aquamapsorgupdated");
*/
// config.setDatabaseURL("jdbc:postgresql://dbtest.research-infrastructures.eu/aquamapsorgupdated");
config.setDatabaseUserName("gcube");
config.setDatabasePassword("d4science2");
config.setDatabaseURL("jdbc:postgresql://localhost/testdb");
TablesComparison ec = new TablesComparison(config);
long t0 = System.currentTimeMillis();

View File

@ -0,0 +1,180 @@
package org.gcube.dataanalysis.ecoengine.test;
import java.math.BigInteger;
import java.util.List;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.hibernate.SessionFactory;
/**
* checks if two tables are equal
* checks numbers at the second decimal position
*/
public class TablesComparisonLatimeria {
private BigInteger numOfElements;
private int errorCounter;
//connection setup
protected String LogFile = "ALog.properties";
//fundamental: set a the chunk csquaresNumber and the maximum number of chunks to take
int chunkSize = 7000;
static double Threshold = 0.01;
//change this defaults to change comparison
public String referenceTable = "hspec_suitable_latimeria_chalumnae";
public String analyzedTable = "hspec_suitable_neural_latimeria_chalumnae";
public String referenceCriteria = "csquarecode";
public String destinationCriteria = "csquarecode";
public String referenceSelectedColumns = "csquarecode,probability";
public String destinationSelectedColumns = "csquarecode,probability";
//selection query
public static String selectElementsQuery = "select %1$s from %2$s order by %3$s";
//database connections
protected SessionFactory referencedbConnection;
protected SessionFactory destinationdbConnection;
//init connections
public TablesComparisonLatimeria(AlgorithmConfiguration config) throws Exception {
AnalysisLogger.setLogger(config.getConfigPath() + LogFile);
referencedbConnection = DatabaseFactory.initDBConnection(config.getConfigPath() + AlgorithmConfiguration.defaultConnectionFile,config);
AnalysisLogger.getLogger().debug("ReferenceDB initialized");
destinationdbConnection = DatabaseFactory.initDBConnection(config.getConfigPath() + AlgorithmConfiguration.defaultConnectionFile,config);
AnalysisLogger.getLogger().debug("OriginalDB initialized");
}
//counts the elements in a table
public BigInteger countElements(String tablename, SessionFactory session)
{
BigInteger count = BigInteger.ZERO;
String countingQuery = "select count(*) from "+tablename;
AnalysisLogger.getLogger().debug("Getting DB elements by this query: "+countingQuery);
List<Object> result = DatabaseFactory.executeSQLQuery(countingQuery, session);
count = (BigInteger) result.get(0);
return count;
}
//takes a chunk of elements from the database, belonging to the set of 170 selected species
public List<Object> takeChunkOfElements(String tablename,String selectedColumns,String criteria, int limit, int offset, SessionFactory session) {
String query = String.format(selectElementsQuery,selectedColumns,tablename,criteria)+ " limit " + limit + " offset " + offset;
AnalysisLogger.getLogger().debug("takeChunkOfElements-> executing query on DB: " + query);
List<Object> results = DatabaseFactory.executeSQLQuery(query, session);
return results;
}
//checks if a string is a number
public double isNumber(String element){
try{
double d = Double.parseDouble(element);
return d;
}catch(Exception e){
return -Double.MAX_VALUE;
}
}
public static void main(String[] args) throws Exception {
String configPath = "./cfg/";
AlgorithmConfiguration config = new AlgorithmConfiguration();
config.setConfigPath(configPath);
/*
config.setDatabaseUserName("utente");
config.setDatabasePassword("d4science");
*/
// config.setDatabaseURL("jdbc:postgresql://dbtest.research-infrastructures.eu/aquamapsorgupdated");
config.setDatabaseUserName("postgres");
config.setDatabasePassword("d4science2");
config.setDatabaseURL("jdbc:postgresql://geoserver-dev.d4science-ii.research-infrastructures.eu/aquamapsdb");
TablesComparisonLatimeria ec = new TablesComparisonLatimeria(config);
long t0 = System.currentTimeMillis();
ec.runTest();
long t1 = System.currentTimeMillis();
float difference = (t1-t0);
difference = difference /(float)(1000*60);
System.out.println("Elapsed time : "+difference+" min");
}
//runs the test between the tables
public boolean runTest() {
long t0 = System.currentTimeMillis();
// take the number of elements
numOfElements = countElements(analyzedTable, destinationdbConnection);
AnalysisLogger.getLogger().debug("Remote DB contains " + numOfElements + " elements.");
int maxNumber = numOfElements.intValue();
int numOfChunks = maxNumber / chunkSize;
if ((maxNumber % chunkSize) > 0) {
numOfChunks++;
}
int startIndex = 0;
// reset error counter
errorCounter = 0;
boolean equal = true;
for (int i = startIndex; i < numOfChunks; i++) {
AnalysisLogger.getLogger().debug("Chunk "+(i+1)+" of "+numOfChunks);
int offset = i * chunkSize;
List<Object> referencechunk = takeChunkOfElements(referenceTable,referenceSelectedColumns,referenceCriteria, chunkSize, offset, referencedbConnection);
List<Object> destinationchunk = takeChunkOfElements(analyzedTable,destinationSelectedColumns,destinationCriteria, chunkSize, offset, destinationdbConnection);
int m = referencechunk.size();
for (int j=0;j<m;j++){
Object[] refrow = (Object[]) referencechunk.get(j);
Object[] destrow = (Object[]) destinationchunk.get(j);
int columns = destrow.length;
for (int k=0;k<columns;k++){
String refelem = ""+refrow[k];
String destelem = ""+destrow[k];
double d = isNumber(refelem);
// System.out.print(refelem+" vs "+destelem+ " ");
if (d!=-Double.MAX_VALUE){
if (Math.abs(d-isNumber(destelem))>Threshold){
errorCounter++;
equal = false;
AnalysisLogger.getLogger().debug("ERROR - DISCREPANCY AT NUMBERS COMPARISON: "+refelem+" vs "+destelem);
}
}
else if (!refelem.equals(destelem)){
errorCounter++;
equal = false;
AnalysisLogger.getLogger().debug("ERROR - DISCREPANCY AT STRING COMPARISON: "+refelem+" vs "+destelem);
}
if (!equal)
break;
}
// System.out.println();
if (!equal)
break;
}
if (!equal)
break;
else
AnalysisLogger.getLogger().debug("CHUNK NUMBER "+i+" OK!");
}
long t1 = System.currentTimeMillis();
AnalysisLogger.getLogger().debug("ELAPSED TIME: " + (t1-t0) + " ms");
//close connections
referencedbConnection.close();
destinationdbConnection.close();
return equal;
}
}

View File

@ -77,7 +77,7 @@ public static void main(String[] args) throws Exception {
try {
dg.compute();
PrimitiveType output = (PrimitiveType) dg.getOutput();
HashMap<String, String> out = (HashMap<String, String>)output.getContent();
HashMap<String, Object> out = (HashMap<String, Object>)output.getContent();
DiscrepancyAnalysis.visualizeResults(out);
} catch (Exception e) {

View File

@ -191,7 +191,7 @@ public class ExperimentsForLatimeria {
evaluators.get(0).init();
Regressor.process(evaluators.get(0));
PrimitiveType output = (PrimitiveType) evaluators.get(0).getOutput();
HashMap<String, String> out = (HashMap<String, String>)output.getContent();
HashMap<String, Object> out = (HashMap<String, Object>)output.getContent();
DiscrepancyAnalysis.visualizeResults(out);
evaluators = null;
}
@ -203,7 +203,7 @@ public class ExperimentsForLatimeria {
evaluators.get(0).init();
Regressor.process(evaluators.get(0));
PrimitiveType output = (PrimitiveType) evaluators.get(0).getOutput();
HashMap<String, String> out = (HashMap<String, String>)output.getContent();
HashMap<String, Object> out = (HashMap<String, Object>)output.getContent();
DiscrepancyAnalysis.visualizeResults(out);
evaluators = null;
}
@ -225,7 +225,7 @@ public class ExperimentsForLatimeria {
evaluators.get(0).init();
Regressor.process(evaluators.get(0));
PrimitiveType output = (PrimitiveType) evaluators.get(0).getOutput();
HashMap<String, String> out = (HashMap<String, String>)output.getContent();
HashMap<String, Object> out = (HashMap<String, Object>)output.getContent();
DiscrepancyAnalysis.visualizeResults(out);
evaluators = null;
}

View File

@ -0,0 +1,111 @@
package org.gcube.dataanalysis.ecoengine.test.experiments.latimeria;
import java.util.HashMap;
import java.util.List;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.evaluation.DiscrepancyAnalysis;
import org.gcube.dataanalysis.ecoengine.interfaces.ComputationalAgent;
import org.gcube.dataanalysis.ecoengine.interfaces.Evaluator;
import org.gcube.dataanalysis.ecoengine.interfaces.Generator;
import org.gcube.dataanalysis.ecoengine.interfaces.Modeler;
import org.gcube.dataanalysis.ecoengine.interfaces.Transducerer;
import org.gcube.dataanalysis.ecoengine.processing.factories.EvaluatorsFactory;
import org.gcube.dataanalysis.ecoengine.processing.factories.GeneratorsFactory;
import org.gcube.dataanalysis.ecoengine.processing.factories.ModelersFactory;
import org.gcube.dataanalysis.ecoengine.processing.factories.TransducerersFactory;
import org.gcube.dataanalysis.ecoengine.test.regression.Regressor;
import org.gcube.dataanalysis.ecoengine.utils.PresetConfigGenerator;
public class TablesComparisonForLatimeria {
static String absenceRandomTable = "absence_data_latimeria_random";
static String absenceStaticTable = "absence_data_latimeria";
static String presenceTable = "presence_data_latimeria_2";
static String presenceTableNoEarth = "presence_data_latimeria_sea";
static String envelopeTable = "hspen_latimeria";
static String aquamapsSuitableTable = "hspec_suitable_latimeria_chalumnae";
static String aquamapsNativeTable = "hspec_native_latimeria_chalumnae";
static String nnsuitableTable = "hspec_suitable_neural_latimeria_chalumnae";
static String nnsuitableRandomTable = "hspec_suitable_neural_latimeria_chalumnae_random";
static String nnnativeTable = "hspec_native_neural_latimeria_chalumnae";
static String nnnativeRandomTable = "hspec_native_neural_latimeria_chalumnae_random";
static String hcaf = "hcaf_d";
static String filteredhcaf = "bboxed_hcaf_d";
static String speciesID = "Fis-30189";
static String staticsuitable = "staticsuitable";
static String randomsuitable = "randomsuitable";
static String staticnative = "staticnative";
static String randomnative = "randomnative";
static int numberOfPoints = 34;
static String nnname = "neuralname";
static float x1 = 95.346678f;
static float y1 = -9.18887f;
static float x2 = 125.668944f;
static float y2 = 12.983148f;
public static AlgorithmConfiguration configDiscrepancyAnalysis(String table1, String table2) {
AlgorithmConfiguration config = getConfig();
config.setNumberOfResources(1);
config.setAgent("DISCREPANCY_ANALYSIS");
config.setParam("FirstTable", table1);
config.setParam("SecondTable", table2);
config.setParam("FirstTableCsquareColumn", "csquarecode");
config.setParam("SecondTableCsquareColumn", "csquarecode");
config.setParam("FirstTableProbabilityColumn", "probability");
config.setParam("SecondTableProbabilityColumn", "probability");
config.setParam("ComparisonThreshold", "0.1");
return config;
}
public static AlgorithmConfiguration getConfig() {
AlgorithmConfiguration config = new AlgorithmConfiguration();
config.setConfigPath("./cfg/");
config.setPersistencePath("./");
config.setParam("DatabaseUserName", "postgres");
config.setParam("DatabasePassword", "d4science2");
config.setParam("DatabaseURL", "jdbc:postgresql://geoserver-dev.d4science-ii.research-infrastructures.eu/aquamapsdb");
config.setParam("DatabaseDriver", "org.postgresql.Driver");
AnalysisLogger.setLogger(config.getConfigPath() + AlgorithmConfiguration.defaultLoggerFile);
return config;
}
public static void calcdiscrepancy(String table1, String table2) throws Exception {
System.out.println("*****************************DISCREPANCY: " + table1 + " vs " + table2 + "************************************");
List<ComputationalAgent> evaluators = null;
evaluators = EvaluatorsFactory.getEvaluators(configDiscrepancyAnalysis(table1, table2));
evaluators.get(0).init();
Regressor.process(evaluators.get(0));
PrimitiveType output = (PrimitiveType) evaluators.get(0).getOutput();
HashMap<String, Object> out = (HashMap<String, Object>) output.getContent();
DiscrepancyAnalysis.visualizeResults(out);
evaluators = null;
}
public static void calcquality(String table, String presenceTable, String absenceTable) throws Exception {
System.out.println("*****************************QUALITY: " + table + " vs " + presenceTable + " and " + absenceTable + "************************************");
List<ComputationalAgent> evaluators = null;
evaluators = EvaluatorsFactory.getEvaluators(PresetConfigGenerator.configQualityAnalysis(presenceTable, absenceTable, table));
evaluators.get(0).init();
Regressor.process(evaluators.get(0));
PrimitiveType output = (PrimitiveType) evaluators.get(0).getOutput();
HashMap<String, Object> out = (HashMap<String, Object>) output.getContent();
DiscrepancyAnalysis.visualizeResults(out);
evaluators = null;
}
public static void main(String[] args) throws Exception {
calcdiscrepancy(aquamapsSuitableTable, nnsuitableTable);
calcdiscrepancy(aquamapsNativeTable, nnnativeTable);
}
}

View File

@ -18,11 +18,11 @@ public class Regressor {
String resLoad = agent.getResourceLoad();
String ress = agent.getResources();
/*
System.out.println("LOAD: " + resLoad);
System.out.println("RESOURCES: " + ress);
System.out.println("STATUS: " + agent.getStatus());
*/
Thread.sleep(10000);
}
} else

View File

@ -20,7 +20,7 @@ public class EvaluatorT implements Runnable{
try {
dg.compute();
PrimitiveType output = (PrimitiveType) dg.getOutput();
HashMap<String, String> out = (HashMap<String, String>)output.getContent();
HashMap<String, Object> out = (HashMap<String, Object>)output.getContent();
DiscrepancyAnalysis.visualizeResults(out);
} catch (Exception e) {

View File

@ -1,17 +1,17 @@
package org.gcube.dataanalysis.ecoengine.utils;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.test.regression.Regressor;
public class PresetConfigGenerator {
public static AlgorithmConfiguration configAquamapsSuitable(String aquamapsSuitableTable, String envelopeTable) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(5);
config.setModel("AQUAMAPS_SUITABLE");
config.setParam("DistributionTable",aquamapsSuitableTable );
config.setParam("DistributionTable", aquamapsSuitableTable);
config.setParam("CsquarecodesTable", "hcaf_d");
config.setParam("EnvelopeTable", envelopeTable);
config.setParam("OccurrencePointsTable", "occurrencecells");
@ -22,7 +22,7 @@ public class PresetConfigGenerator {
return config;
}
public static AlgorithmConfiguration configAquamapsNative(String aquamapsNativeTable, String envelopeTable) {
public static AlgorithmConfiguration configAquamapsNative(String aquamapsNativeTable, String envelopeTable) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(5);
@ -36,7 +36,7 @@ public class PresetConfigGenerator {
return config;
}
public static AlgorithmConfiguration configAquamapsNNSuitable(String tableName,String username, String envelopeTable, String speciesID,String nnname) {
public static AlgorithmConfiguration configAquamapsNNSuitable(String tableName, String username, String envelopeTable, String speciesID, String nnname) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(5);
@ -53,7 +53,7 @@ public class PresetConfigGenerator {
return config;
}
public static AlgorithmConfiguration configAquamapsNNNative(String tableName,String username, String envelopeTable, String speciesID,String nnname) {
public static AlgorithmConfiguration configAquamapsNNNative(String tableName, String username, String envelopeTable, String speciesID, String nnname) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(5);
@ -69,7 +69,7 @@ public class PresetConfigGenerator {
return config;
}
public static AlgorithmConfiguration configSuitableNeuralNetworkTraining(String presenceTable, String absenceTable,String username, String speciesID, String neuronsAndLayers,String nnname) {
public static AlgorithmConfiguration configSuitableNeuralNetworkTraining(String presenceTable, String absenceTable, String username, String speciesID, String neuronsAndLayers, String nnname) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(5);
@ -78,13 +78,13 @@ public class PresetConfigGenerator {
config.setParam("AbsenceDataTable", absenceTable);
config.setParam("PresenceDataTable", presenceTable);
config.setParam("SpeciesName", speciesID);
config.setParam("UserName",username);
config.setParam("UserName", username);
config.setParam("LayersNeurons", neuronsAndLayers);
config.setParam("NeuralNetworkName", nnname);
return config;
}
public static AlgorithmConfiguration configNativeNeuralNetworkTraining(String presenceTable, String absenceTable,String username, String speciesID, String neuronsAndLayers,String nnname) {
public static AlgorithmConfiguration configNativeNeuralNetworkTraining(String presenceTable, String absenceTable, String username, String speciesID, String neuronsAndLayers, String nnname) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(5);
@ -100,71 +100,70 @@ public class PresetConfigGenerator {
return config;
}
public static AlgorithmConfiguration configQualityAnalysis(String presenceTable, String absenceTable, String table){
public static AlgorithmConfiguration configQualityAnalysis(String presenceTable, String absenceTable, String table) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);
config.setAgent("QUALITY_ANALYSIS");
config.setParam("PositiveCasesTable",presenceTable);
config.setParam("NegativeCasesTable",absenceTable);
config.setParam("PositiveCasesTableKeyColumn","csquarecode");
config.setParam("NegativeCasesTableKeyColumn","csquarecode");
config.setParam("DistributionTable",table);
config.setParam("DistributionTableKeyColumn","csquarecode");
config.setParam("DistributionTableProbabilityColumn","probability");
config.setParam("PositiveThreshold","0.8");
config.setParam("NegativeThreshold","0.3");
config.setParam("PositiveCasesTable", presenceTable);
config.setParam("NegativeCasesTable", absenceTable);
config.setParam("PositiveCasesTableKeyColumn", "csquarecode");
config.setParam("NegativeCasesTableKeyColumn", "csquarecode");
config.setParam("DistributionTable", table);
config.setParam("DistributionTableKeyColumn", "csquarecode");
config.setParam("DistributionTableProbabilityColumn", "probability");
config.setParam("PositiveThreshold", "0.8");
config.setParam("NegativeThreshold", "0.3");
return config;
}
public static AlgorithmConfiguration configDiscrepancyAnalysis(String table1,String table2){
public static AlgorithmConfiguration configDiscrepancyAnalysis(String table1, String table2) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);
config.setAgent("DISCREPANCY_ANALYSIS");
config.setParam("FirstTable",table1);
config.setParam("SecondTable",table2);
config.setParam("FirstTableCsquareColumn","csquarecode");
config.setParam("SecondTableCsquareColumn","csquarecode");
config.setParam("FirstTableProbabilityColumn","probability");
config.setParam("SecondTableProbabilityColumn","probability");
config.setParam("ComparisonThreshold","0.1");
config.setParam("FirstTable", table1);
config.setParam("SecondTable", table2);
config.setParam("FirstTableCsquareColumn", "csquarecode");
config.setParam("SecondTableCsquareColumn", "csquarecode");
config.setParam("FirstTableProbabilityColumn", "probability");
config.setParam("SecondTableProbabilityColumn", "probability");
config.setParam("ComparisonThreshold", "0.1");
return config;
}
public static AlgorithmConfiguration configHRSAnalysis(String projectiontable,String absenceTable, String presenceTable){
public static AlgorithmConfiguration configHRSAnalysis(String projectiontable, String absenceTable, String presenceTable) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);
config.setAgent("HRS");
config.setParam("ProjectingAreaTable", projectiontable);
config.setParam("ProjectingAreaFeaturesOptionalCondition", "where oceanarea>0");
config.setParam("FeaturesColumns", "depthmean"+AlgorithmConfiguration.getListSeparator()+"depthmax"+AlgorithmConfiguration.getListSeparator()+"depthmin"+AlgorithmConfiguration.getListSeparator()+" sstanmean"+AlgorithmConfiguration.getListSeparator()+"sbtanmean"+AlgorithmConfiguration.getListSeparator()+"salinitymean"+AlgorithmConfiguration.getListSeparator()+"salinitybmean"+AlgorithmConfiguration.getListSeparator()+" primprodmean"+AlgorithmConfiguration.getListSeparator()+"iceconann"+AlgorithmConfiguration.getListSeparator()+"landdist"+AlgorithmConfiguration.getListSeparator()+"oceanarea");
config.setParam("FeaturesColumns", "depthmean" + AlgorithmConfiguration.getListSeparator() + "depthmax" + AlgorithmConfiguration.getListSeparator() + "depthmin" + AlgorithmConfiguration.getListSeparator() + " sstanmean" + AlgorithmConfiguration.getListSeparator() + "sbtanmean" + AlgorithmConfiguration.getListSeparator() + "salinitymean" + AlgorithmConfiguration.getListSeparator() + "salinitybmean" + AlgorithmConfiguration.getListSeparator() + " primprodmean" + AlgorithmConfiguration.getListSeparator() + "iceconann" + AlgorithmConfiguration.getListSeparator() + "landdist" + AlgorithmConfiguration.getListSeparator() + "oceanarea");
config.setParam("PositiveCasesTable", presenceTable);
config.setParam("NegativeCasesTable", absenceTable);
return config;
}
public static AlgorithmConfiguration configAbsenceTable(boolean random, String absenceTable, String hspecTable, int numberOfPoints, String speciesCode){
public static AlgorithmConfiguration configAbsenceTable(boolean random, String absenceTable, String hspecTable, int numberOfPoints, String speciesCode) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);
config.setAgent("ABSENCE_CELLS_FROM_AQUAMAPS");
config.setParam("RANDOM_TAKE", ""+random);
config.setParam("RANDOM_TAKE", "" + random);
config.setParam("FINAL_TABLE_NAME", absenceTable);
config.setParam("AQUAMAPS_HSPEC", hspecTable);
config.setParam("SPECIES_CODE", speciesCode);
config.setParam("NUMBER_OF_POINTS", ""+numberOfPoints);
config.setParam("NUMBER_OF_POINTS", "" + numberOfPoints);
return config;
}
public static AlgorithmConfiguration configPresenceTable(String presenceTable, int numberOfPoints, String speciesCode){
public static AlgorithmConfiguration configPresenceTable(String presenceTable, int numberOfPoints, String speciesCode) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);
@ -172,12 +171,12 @@ public class PresetConfigGenerator {
config.setParam("Table_Label", presenceTable);
config.setParam("Table_Name", presenceTable);
config.setParam("Species_Code", speciesCode);
config.setParam("Number_of_Points", ""+numberOfPoints);
config.setParam("Number_of_Points", "" + numberOfPoints);
return config;
}
public static AlgorithmConfiguration configHCAFfilter(String table, float x1,float y1, float x2, float y2){
public static AlgorithmConfiguration configHCAFfilter(String table, float x1, float y1, float x2, float y2) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);
@ -185,15 +184,15 @@ public class PresetConfigGenerator {
config.setParam("FINAL_TABLE_NAME", table);
config.setParam("BOUNDING_BOX_LEFT_LOWER_LONG", ""+x1);
config.setParam("BOUNDING_BOX_RIGHT_UPPER_LONG", ""+x2);
config.setParam("BOUNDING_BOX_LEFT_LOWER_LAT", ""+y1);
config.setParam("BOUNDING_BOX_RIGHT_UPPER_LAT", ""+y2);
config.setParam("BOUNDING_BOX_LEFT_LOWER_LONG", "" + x1);
config.setParam("BOUNDING_BOX_RIGHT_UPPER_LONG", "" + x2);
config.setParam("BOUNDING_BOX_LEFT_LOWER_LAT", "" + y1);
config.setParam("BOUNDING_BOX_RIGHT_UPPER_LAT", "" + y2);
return config;
}
public static AlgorithmConfiguration configHSPENfilter(String table, String speciesList){
public static AlgorithmConfiguration configHSPENfilter(String table, String speciesList) {
AlgorithmConfiguration config = Regressor.getConfig();
config.setNumberOfResources(1);

View File

@ -0,0 +1,15 @@
# Use two appenders, one to log to console, another to log to a file
#log4j.rootLogger=AR
#### Second appender writes to a file
log4j.logger.AnalysisLogger=TRACE,AR
log4j.appender.AR=org.apache.log4j.RollingFileAppender
log4j.appender.AR.Threshold=TRACE
log4j.appender.AR.File=${GLOBUS_LOCATION}/logs/Analysis.log
log4j.appender.AR.MaxFileSize=1024KB
log4j.appender.AR.MaxBackupIndex=2
log4j.appender.AR.layout=org.apache.log4j.PatternLayout
log4j.appender.AR.layout.ConversionPattern=%d{dd/MM/yyyy HH:mm:ss} %p %t %c - %m%n
#### Third appender writes to a file
log4j.logger.org.hibernate = fatal

View File

@ -0,0 +1,18 @@
<?xml version='1.0' encoding='UTF-8'?>
<hibernate-configuration>
<session-factory>
<property name="connection.driver_class">org.postgresql.Driver</property>
<property name="connection.provider_class">org.hibernate.connection.C3P0ConnectionProvider</property>
<property name="connection.url">jdbc:postgresql://localhost/testdb</property>
<property name="connection.username">gcube</property>
<property name="connection.password">d4science2</property>
<!-- <property name="dialect">org.hibernatespatial.postgis.PostgisDialect</property>-->
<property name="dialect">org.hibernate.dialect.PostgreSQLDialect</property>
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
<property name="c3p0.timeout">0</property>
<property name="c3p0.max_size">1</property>
<property name="c3p0.max_statements">0</property>
<property name="c3p0.min_size">1</property>
<property name="current_session_context_class">thread</property>
</session-factory>
</hibernate-configuration>

View File

@ -0,0 +1,20 @@
#Percentage threshold for discarding a category
categoryDiscardThreshold=0
#Percentage threshold for accepting similarity between a single Time series entry and a reference entry
entryAcceptanceThreshold=80
#Size of a comparison chunk
chunkSize=25
#Number of chunks to take from Time series for performing comparison respect to reference data; if set to -1 all chunks will be analyzed
timeSeriesChunksToTake=2
#Number of chunks to take from Reference for performing comparison Time Series Elements; if set to -1 all chunks will be analyzed
referenceChunksToTake =50
#Use random choice for chunks selection = true |false
randomTake=true
#Use Simple String Match as distance calculation
useSimpleDistance=false
#Number Of Threads to use
numberOfThreadsToUse=5
#if two final scores differ for more than this percentage, prune the lower result
categoryDiscardDifferencialThreshold = 2
#maximum difference between a result and the best result
singleEntryRecognitionMaxDeviation = 40