This commit is contained in:
Gianpaolo Coro 2013-03-12 16:44:56 +00:00
parent 74c040904a
commit 9437d03c0a
5 changed files with 76 additions and 10 deletions

View File

@ -212,6 +212,11 @@ public class CategoryGuesser {
col = null;
}
public ArrayList<SingleResult> getLastResults(){
return lastResults;
}
ArrayList<SingleResult> lastResults;
public void runGuesser(String seriesName, String columnName, LexicalEngineConfiguration externalConfig, String CategoryFilter, String ColumnFilter, String SingletonString) throws Exception {
SessionFactory dbSession = null;
String cfgFileCompletePath = cfgFile;
@ -304,6 +309,7 @@ public class CategoryGuesser {
AnalysisLogger.getLogger().warn("...End Calculation in " + t1 + "ms");
triesCounter = 0;
lastResults=checkingResults;
// close session if not more necessary
if (oneshotMode)
dbSession.close();
@ -478,7 +484,8 @@ public class CategoryGuesser {
}
}
return deleteDuplicates(results);
// return deleteDuplicates(results);
return results;
}

View File

@ -37,11 +37,12 @@ public class ExampleGuesser {
conf.setDatabaseDialect("org.hibernate.dialect.PostgreSQLDialect");
guesser.runGuesser(seriesName, column, conf);
ArrayList<SingleResult> classification = guesser.getClassification();
ArrayList<SingleResult> classification = guesser.getLastResults();
//only for debug
guesser.showResults(classification);
guesser.shutdown();
} catch (Exception e) {
e.printStackTrace();
}

View File

@ -0,0 +1,58 @@
package org.gcube.contentmanagement.lexicalmatcher.analysis.test;
import java.util.ArrayList;
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.Category;
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
public class ExampleLexicalMatcher {
public static void main(String[] args) {
try {
String singleton = "Faroe Island";
String family = "COUNTRY_OLD";
String column = "field6";
CategoryGuesser guesser = new CategoryGuesser();
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
ArrayList<Category> categories = new ArrayList<Category>();
//human name, index, table name, description
categories.add(new Category("COUNTRY_OLD","39c98800-dd3c-11e0-b8d1-d1e2e7ba4f9d","rdf39c98800dd3c11e0b8d1d1e2e7ba4f9d","country"));
categories.add(new Category("CONTINENT_OLD","1d5d51f0-dd42-11e0-b8d3-d1e2e7ba4f9d","rdf1d5d51f0dd4211e0b8d3d1e2e7ba4f9d","continent reference data"));
categories.add(new Category("SPECIES_OLD","0a7fb500-dd3d-11e0-b8d1-d1e2e7ba4f9d","rdf0a7fb500dd3d11e0b8d1d1e2e7ba4f9d","species"));
categories.add(new Category("CodeListCountry","4c8d93a0-edc2-11e0-93e4-f6a9821baa29","rdf4c8d93a0edc211e093e4f6a9821baa29","Country"));
categories.add(new Category("CL_DIVISION","1140bdf0-dd2c-11e0-9220-ae17b3db32b7","rdf1140bdf0dd2c11e09220ae17b3db32b7","undefined"));
categories.add(new Category("CL_ASFIS_TAX","f87360f0-d9f9-11e0-ba05-d9adb0db767c","rdff87360f0d9f911e0ba05d9adb0db767c","undefined"));
conf.setCategories(categories);
//CHANGE THIS TO ENHANCE THE RECALL
conf.setEntryAcceptanceThreshold(30);
conf.setReferenceChunksToTake(-1);
conf.setTimeSeriesChunksToTake(-1);
conf.setUseSimpleDistance(false);
//database Parameters
conf.setDatabaseUserName("gcube");
conf.setDatabasePassword("d4science2");
conf.setDatabaseURL("jdbc:postgresql://localhost/testdb");
conf.setDatabaseDialect("org.hibernate.dialect.PostgreSQLDialect");
guesser.runGuesser(singleton, conf, family,column );
ArrayList<SingleResult> detailedResults = guesser.getDetailedMatches();
AnalysisLogger.getLogger().warn("Detailed Match on Name :"+singleton);
CategoryGuesser.showResults(detailedResults);
} catch (Exception e) {
e.printStackTrace();
}
}
}

View File

@ -8,9 +8,9 @@ log4j.appender.stdout.layout.ConversionPattern=%d{dd/MM/yyyy HH:mm:ss} %p %t %c
#### Second appender writes to a file
log4j.logger.AnalysisLogger=info,AR
log4j.logger.AnalysisLogger=trace,AR,stdout
log4j.appender.AR=org.apache.log4j.RollingFileAppender
log4j.appender.AR.Threshold=info
log4j.appender.AR.Threshold=trace
log4j.appender.AR.File=${GLOBUS_LOCATION}/logs/Analysis.log
log4j.appender.AR.MaxFileSize=1024KB
log4j.appender.AR.MaxBackupIndex=2

View File

@ -1,13 +1,13 @@
#Percentage threshold for discarding a category
categoryDiscardThreshold=0
#Percentage threshold for accepting similarity between a single Time series entry and a reference entry
entryAcceptanceThreshold=80
entryAcceptanceThreshold=50
#Size of a comparison chunk
chunkSize=25
chunkSize=50
#Number of chunks to take from Time series for performing comparison respect to reference data; if set to -1 all chunks will be analyzed
timeSeriesChunksToTake=2
timeSeriesChunksToTake=1
#Number of chunks to take from Reference for performing comparison Time Series Elements; if set to -1 all chunks will be analyzed
referenceChunksToTake =50
referenceChunksToTake=5
#Use random choice for chunks selection = true |false
randomTake=true
#Use Simple String Match as distance calculation
@ -15,6 +15,6 @@ useSimpleDistance=false
#Number Of Threads to use
numberOfThreadsToUse=5
#if two final scores differ for more than this percentage, prune the lower result
categoryDiscardDifferencialThreshold = 2
categoryDiscardDifferencialThreshold = 50
#maximum difference between a result and the best result
singleEntryRecognitionMaxDeviation = 40