git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@51513 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
297676de1e
commit
d95cf07f69
|
@ -0,0 +1,117 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.core;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class DataTypeRecognizer {
|
||||
|
||||
// if the DB type contains one of this, org.gcube.contentmanagement.lexicalmatcher will be classified as Decimal
|
||||
private static String[] decimalType = { "decimal", "integer", "int", "ordinal", "length", "position" ,"real"};
|
||||
|
||||
private static String[] booleanType = { "bool" };
|
||||
|
||||
private static String[] stringType = { "varchar", "char", "string", "text" };
|
||||
|
||||
public static String transformTypeFromDB(String DBType) {
|
||||
|
||||
// check if the db type is yet known
|
||||
String type = null;
|
||||
|
||||
try {
|
||||
// check if org.gcube.contentmanagement.lexicalmatcher is a char
|
||||
if (contains(DBType, stringType)) {
|
||||
type = String.class.getName();
|
||||
}
|
||||
// check if org.gcube.contentmanagement.lexicalmatcher is a decimal
|
||||
else if (contains(DBType, decimalType))
|
||||
type = BigDecimal.class.getName();
|
||||
// check if org.gcube.contentmanagement.lexicalmatcher is a boolean
|
||||
else if (contains(DBType, booleanType))
|
||||
type = Boolean.class.getName();
|
||||
else
|
||||
type = String.class.getName();
|
||||
} catch (Exception e) {
|
||||
type = String.class.getName();
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
// guesses the type of an object
|
||||
public static Object guessType(String entry) {
|
||||
|
||||
Object type = null;
|
||||
|
||||
// try to transform to a double
|
||||
try {
|
||||
double d = Double.parseDouble(entry);
|
||||
type = BigDecimal.valueOf(d);
|
||||
} catch (Exception eD) {
|
||||
// try to transform to a boolean
|
||||
if (entry.equalsIgnoreCase("true") || (entry.equalsIgnoreCase("false"))) {
|
||||
boolean b = Boolean.parseBoolean(entry);
|
||||
type = Boolean.valueOf(b);
|
||||
} else
|
||||
type = entry;
|
||||
}
|
||||
|
||||
return type;
|
||||
|
||||
}
|
||||
|
||||
private static boolean contains(String element, String[] array) {
|
||||
element = element.toLowerCase();
|
||||
for (String arrayElem : array) {
|
||||
|
||||
if (element.contains(arrayElem)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public static String guessType(ArrayList<String> elementlist) {
|
||||
|
||||
// 0 = String 1 = Boolean 2 = Decimal
|
||||
int[] scores = new int[3];
|
||||
String[] types = { String.class.getName(), Boolean.class.getName(), BigDecimal.class.getName() };
|
||||
for (String element : elementlist) {
|
||||
Object guessedObj = guessType(element);
|
||||
if (guessedObj instanceof String) {
|
||||
scores[0] = scores[0] + 1;
|
||||
} else if (guessedObj instanceof Boolean) {
|
||||
scores[1] = scores[1] + 1;
|
||||
} else if (guessedObj instanceof BigDecimal) {
|
||||
scores[2] = scores[2] + 1;
|
||||
}
|
||||
|
||||
}
|
||||
int max = -1;
|
||||
int maxindex = -1;
|
||||
for (int i = 0; i < scores.length; i++) {
|
||||
if (scores[i] > max) {
|
||||
max = scores[i];
|
||||
maxindex = i;
|
||||
}
|
||||
}
|
||||
|
||||
// System.out.println("index " + maxindex + " max " + max);
|
||||
|
||||
String type = types[maxindex];
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws ClassNotFoundException {
|
||||
|
||||
ArrayList<String> prova = new ArrayList<String>();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
prova.add("1234");
|
||||
}
|
||||
|
||||
String classtype = guessType(prova);
|
||||
System.out.println(classtype);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,350 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.core;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.CategoryOrderedList;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.CategoryScores;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks.ChunkSet;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks.ReferenceChunk;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks.ReferenceChunkSet;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks.SetOfReferenceChunkSet;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks.SingletonChunkSet;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks.TimeSeriesChunk;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks.TimeSeriesChunkSet;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.MathFunctions;
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
public class Engine {
|
||||
|
||||
private String ConfigurationFileNameLocal = "hibernate.cfg.xml";
|
||||
private SessionFactory referenceDBSession;
|
||||
|
||||
public ArrayList<String> bestCategories;
|
||||
public ArrayList<Double> bestScores;
|
||||
public ArrayList<String> bestColumns;
|
||||
public HashMap<String, CategoryScores> scoresTable;
|
||||
public String columnFilter;
|
||||
private LexicalEngineConfiguration config;
|
||||
private TimeSeriesChunk singletonChunk;
|
||||
|
||||
public ArrayList<SingleResult> getSingletonMatches(){
|
||||
return singletonChunk.getDetailedResults();
|
||||
}
|
||||
|
||||
public String getSingletonElement(){
|
||||
return singletonChunk.getSingletonEntry();
|
||||
}
|
||||
|
||||
public SessionFactory getDBSession() throws Exception {
|
||||
|
||||
if (referenceDBSession == null) {
|
||||
referenceDBSession = DatabaseFactory.initDBConnection(ConfigurationFileNameLocal);
|
||||
}
|
||||
|
||||
return referenceDBSession;
|
||||
}
|
||||
|
||||
public SessionFactory getDBSession(LexicalEngineConfiguration externalConf) throws Exception {
|
||||
|
||||
if (referenceDBSession == null) {
|
||||
referenceDBSession = DatabaseFactory.initDBConnection(ConfigurationFileNameLocal, externalConf);
|
||||
}
|
||||
|
||||
return referenceDBSession;
|
||||
}
|
||||
|
||||
public void resetEngine(LexicalEngineConfiguration Config,String ColumnFilter,String configPath){
|
||||
config = Config;
|
||||
scoresTable = new HashMap<String, CategoryScores>();
|
||||
bestCategories = new ArrayList<String>();
|
||||
bestColumns = new ArrayList<String>();
|
||||
bestScores = new ArrayList<Double>();
|
||||
columnFilter = ColumnFilter;
|
||||
// ConfigurationFileNameLocal = configPath+"/"+ConfigurationFileNameLocal;
|
||||
}
|
||||
|
||||
public Engine(LexicalEngineConfiguration Config,String ColumnFilter,String configPath) {
|
||||
config = Config;
|
||||
scoresTable = new HashMap<String, CategoryScores>();
|
||||
bestCategories = new ArrayList<String>();
|
||||
bestColumns = new ArrayList<String>();
|
||||
bestScores = new ArrayList<Double>();
|
||||
columnFilter = ColumnFilter;
|
||||
ConfigurationFileNameLocal = configPath+"/"+ConfigurationFileNameLocal;
|
||||
}
|
||||
|
||||
public void calcLike(CategoryOrderedList col, String unknownSeriesName, String unknownSeriesColumn) {
|
||||
scoresTable = col.getScoresTable();
|
||||
|
||||
// take a time series set of chunks
|
||||
TimeSeriesChunkSet tsChunkSet = null;
|
||||
try {
|
||||
tsChunkSet = new TimeSeriesChunkSet(config.TimeSeriesChunksToTake, config.chunkSize, unknownSeriesName, unknownSeriesColumn,config, this);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
AnalysisLogger.getLogger().error("Engine->calcLike-> ERROR could not retrieve time series chunks " + e.getLocalizedMessage());
|
||||
}
|
||||
// if we took the ts chunk set correctly perform calculation
|
||||
if (tsChunkSet != null) {
|
||||
|
||||
// generate the set of reference chunks
|
||||
SetOfReferenceChunkSet setRefChunksSet = new SetOfReferenceChunkSet(col.getOrderedList(),config, this);
|
||||
|
||||
TimeSeriesChunk tsChunk = tsChunkSet.nextChunk();
|
||||
// for all ts chunks
|
||||
while (tsChunk != null) {
|
||||
|
||||
// take a set of chunks from a reference category
|
||||
ReferenceChunkSet refChunkSet = setRefChunksSet.getNextChunkSet();
|
||||
while (refChunkSet != null) {
|
||||
// take a chunk in the reference chunk set
|
||||
ReferenceChunk refChunk = refChunkSet.nextChunk();
|
||||
while (refChunk != null) {
|
||||
|
||||
try {
|
||||
tsChunk.compareToReferenceChunk(scoresTable, refChunk);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
AnalysisLogger.getLogger().error("Engine->calcLike-> ERROR could not compare time series chunk with reference chunk " + e.getLocalizedMessage());
|
||||
}
|
||||
// take another chunk in the reference chunk set
|
||||
refChunk = refChunkSet.nextChunk();
|
||||
}
|
||||
|
||||
// check score
|
||||
UpdateScores(refChunkSet.getSeriesName(),false);
|
||||
|
||||
// take another set of chunks from another reference category
|
||||
refChunkSet = setRefChunksSet.getNextChunkSet();
|
||||
}
|
||||
|
||||
tsChunk = tsChunkSet.nextChunk();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
boolean threadActivity[];
|
||||
|
||||
private void wait4Thread(int index){
|
||||
|
||||
|
||||
// wait until thread is free
|
||||
while (threadActivity[index]) {
|
||||
try {
|
||||
Thread.sleep(10);
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private void startNewTCalc(TimeSeriesChunk tsChunk, ReferenceChunkSet refChunkSet,int index){
|
||||
|
||||
threadActivity[index] = true;
|
||||
ThreadCalculator tc = new ThreadCalculator(tsChunk, refChunkSet,index);
|
||||
Thread t = new Thread(tc);
|
||||
t.start();
|
||||
// AnalysisLogger.getLogger().info("ThreadCalculator<-go "+index);
|
||||
}
|
||||
|
||||
|
||||
public void calcLikeThread(CategoryOrderedList col, String unknownSeriesName, String unknownSeriesColumn,String singletonString) {
|
||||
scoresTable = col.getScoresTable();
|
||||
|
||||
// take a time series set of chunks
|
||||
ChunkSet tsChunkSet = null;
|
||||
int[] currentThreads = MathFunctions.generateSequence(config.numberOfThreadsToUse);
|
||||
int currentThread = 0;
|
||||
threadActivity = new boolean [currentThreads.length];
|
||||
//initialize to false;
|
||||
for (int j=0;j<threadActivity.length;j++){
|
||||
threadActivity[j] = false;
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
|
||||
if (singletonString==null)
|
||||
tsChunkSet = new TimeSeriesChunkSet(config.TimeSeriesChunksToTake, config.chunkSize, unknownSeriesName, unknownSeriesColumn,config, this);
|
||||
else{
|
||||
|
||||
tsChunkSet = new SingletonChunkSet(singletonString,config, this);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
AnalysisLogger.getLogger().error("Engine->calcLike-> ERROR could not retrieve time series chunks " + e.getLocalizedMessage());
|
||||
}
|
||||
// if we took the ts chunk set correctly perform calculation
|
||||
if (tsChunkSet != null) {
|
||||
|
||||
// generate the set of reference chunks
|
||||
SetOfReferenceChunkSet setRefChunksSet = new SetOfReferenceChunkSet(col.getOrderedList(),config, this);
|
||||
|
||||
TimeSeriesChunk tsChunk = (TimeSeriesChunk)tsChunkSet.nextChunk();
|
||||
|
||||
AnalysisLogger.getLogger().debug("tsChunk is null "+(tsChunk != null));
|
||||
// for all ts chunks
|
||||
while (tsChunk != null) {
|
||||
|
||||
// take a set of chunks from a reference category
|
||||
ReferenceChunkSet refChunkSet = setRefChunksSet.getNextChunkSet();
|
||||
while (refChunkSet != null) {
|
||||
wait4Thread(currentThreads[currentThread]);
|
||||
startNewTCalc(tsChunk, refChunkSet,currentThreads[currentThread]);
|
||||
|
||||
// makeComparisonsTSChunk2RefChunks(tsChunk, refChunkSet);
|
||||
|
||||
// take another set of chunks from another reference category
|
||||
refChunkSet = setRefChunksSet.getNextChunkSet();
|
||||
|
||||
currentThread++;
|
||||
if (currentThread >= currentThreads.length)
|
||||
currentThread = 0;
|
||||
}
|
||||
|
||||
|
||||
//if the chunk is a singleton, don't process other and record the result
|
||||
if (tsChunk.isSingleton()){
|
||||
singletonChunk = tsChunk;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
tsChunk = (TimeSeriesChunk)tsChunkSet.nextChunk();
|
||||
}
|
||||
|
||||
//wait for last threads to finish
|
||||
for (int i : currentThreads) {
|
||||
// free previous calculation
|
||||
wait4Thread(i);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void makeComparisonsTSChunk2RefChunks(TimeSeriesChunk tsChunk, ReferenceChunkSet refChunkSet) {
|
||||
|
||||
// take a chunk in the reference chunk set
|
||||
ReferenceChunk refChunk = refChunkSet.nextChunk();
|
||||
while (refChunk != null) {
|
||||
|
||||
try {
|
||||
tsChunk.compareToReferenceChunk(scoresTable, refChunk,columnFilter);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
AnalysisLogger.getLogger().error("Engine->calcLike-> ERROR could not compare time series chunk with reference chunk " + e.getLocalizedMessage());
|
||||
}
|
||||
|
||||
//if the TimeSeries chunk states the processing must be interrupted, don't perform other comparisons
|
||||
if (tsChunk.mustInterruptProcess())
|
||||
break;
|
||||
|
||||
// take another chunk in the reference chunk set
|
||||
refChunk = refChunkSet.nextChunk();
|
||||
|
||||
}
|
||||
// check score
|
||||
UpdateScores(refChunkSet.getSeriesName(),tsChunk.isSingleton());
|
||||
}
|
||||
|
||||
private void UpdateScores(String categoryName, boolean singletonMatch) {
|
||||
|
||||
CategoryScores categoryScore = scoresTable.get(categoryName);
|
||||
ArrayList<String> bestCols = categoryScore.findBestList();
|
||||
String bestColumn = null;
|
||||
double score = 0;
|
||||
if (bestCols.size() > 0) {
|
||||
bestColumn = bestCols.get(0);
|
||||
score = categoryScore.getScore(bestColumn,singletonMatch);
|
||||
}
|
||||
|
||||
AnalysisLogger.getLogger().trace("Engine->UpdateScores-> \tBEST SUITABLE COLUMN IS: " + bestColumn);
|
||||
AnalysisLogger.getLogger().trace("Engine->UpdateScores-> \tBEST SCORE IS: " + score);
|
||||
|
||||
// order this column
|
||||
if (score > config.categoryDiscardThreshold) {
|
||||
|
||||
int index = 0;
|
||||
// insert at the right point in the classification
|
||||
for (Double dscore : bestScores) {
|
||||
if (dscore.doubleValue() < score) {
|
||||
|
||||
break;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
bestCategories.add(index, categoryName);
|
||||
bestScores.add(index, score);
|
||||
bestColumns.add(index, bestColumn);
|
||||
checkAndAddColumns(categoryScore, bestCols, categoryName,singletonMatch);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void checkAndAddColumns(CategoryScores scores, ArrayList<String> bestCols, String categoryName,boolean singletonMatch) {
|
||||
|
||||
int size = bestCols.size();
|
||||
double bestScore = scores.getScore(bestCols.get(0),singletonMatch);
|
||||
|
||||
for (int i = 1; i < size; i++) {
|
||||
// take the i-th column
|
||||
String column = bestCols.get(i);
|
||||
if (column != null) {
|
||||
// check the score
|
||||
double score = scores.getScore(column,singletonMatch);
|
||||
|
||||
// if the score is near the best, add the column
|
||||
if ((score > 0) && (score >= (bestScore - 0.5 * bestScore))) {
|
||||
|
||||
int index = 0;
|
||||
// insert at the right point in the classification
|
||||
for (Double dscore : bestScores) {
|
||||
if (dscore.doubleValue() < score) {
|
||||
|
||||
break;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
|
||||
// AnalysisLogger.getLogger().info("chechAndAddColumns -> column to add "+column+" category "+categoryName+" with value "+score+" previous "+(bestScore - 0.5 * bestScore));
|
||||
bestColumns.add(index,column);
|
||||
bestScores.add(index,score);
|
||||
bestCategories.add(index,categoryName);
|
||||
// AnalysisLogger.getLogger().info("chechAndAddColumns -> "+bestCategories);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class ThreadCalculator implements Runnable {
|
||||
TimeSeriesChunk tsChunk;
|
||||
ReferenceChunkSet refChunksSet;
|
||||
int index;
|
||||
|
||||
public ThreadCalculator(TimeSeriesChunk tsChunk, ReferenceChunkSet refChunksSet,int index) {
|
||||
this.tsChunk = tsChunk;
|
||||
this.refChunksSet = refChunksSet;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
public void run() {
|
||||
// AnalysisLogger.getLogger().info("ThreadCalculator->started "+index);
|
||||
makeComparisonsTSChunk2RefChunks(tsChunk, refChunksSet);
|
||||
threadActivity[index]=false;
|
||||
// AnalysisLogger.getLogger().info("ThreadCalculator>-finished "+index);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,322 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.core;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.util.Properties;
|
||||
|
||||
|
||||
public class LexicalEngineConfiguration {
|
||||
|
||||
public void configure(String absoluteFilePath) throws Exception {
|
||||
Properties props = new Properties();
|
||||
FileInputStream fis = new FileInputStream(absoluteFilePath);
|
||||
props.load(fis);
|
||||
categoryDiscardThreshold = Float.parseFloat(props.getProperty("categoryDiscardThreshold"));
|
||||
entryAcceptanceThreshold = Integer.parseInt(props.getProperty("entryAcceptanceThreshold"));
|
||||
chunkSize = Integer.parseInt(props.getProperty("chunkSize"));
|
||||
TimeSeriesChunksToTake = Integer.parseInt(props.getProperty("timeSeriesChunksToTake"));
|
||||
ReferenceChunksToTake = Integer.parseInt(props.getProperty("referenceChunksToTake"));
|
||||
randomTake = Boolean.parseBoolean(props.getProperty("randomTake"));
|
||||
useSimpleDistance = Boolean.parseBoolean(props.getProperty("useSimpleDistance"));
|
||||
numberOfThreadsToUse = Integer.parseInt(props.getProperty("numberOfThreadsToUse"));
|
||||
categoryDiscardDifferencialThreshold = Float.parseFloat(props.getProperty("categoryDiscardDifferencialThreshold"));
|
||||
singleEntryRecognitionMaxDeviation = Float.parseFloat(props.getProperty("singleEntryRecognitionMaxDeviation"));
|
||||
fis.close();
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setCategoryDiscardThreshold(float categoryDiscardThreshold) {
|
||||
this.categoryDiscardThreshold = categoryDiscardThreshold;
|
||||
}
|
||||
|
||||
public float getCategoryDiscardThreshold() {
|
||||
return categoryDiscardThreshold;
|
||||
}
|
||||
|
||||
public void setEntryAcceptanceThreshold(float entryAcceptanceThreshold) {
|
||||
this.entryAcceptanceThreshold = entryAcceptanceThreshold;
|
||||
}
|
||||
|
||||
public float getEntryAcceptanceThreshold() {
|
||||
return entryAcceptanceThreshold;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setCategoryDiscardDifferencialThreshold(float categoryDiscardDifferencialThreshold) {
|
||||
this.categoryDiscardDifferencialThreshold = categoryDiscardDifferencialThreshold;
|
||||
}
|
||||
|
||||
public float getCategoryDiscardDifferencialThreshold() {
|
||||
return categoryDiscardDifferencialThreshold;
|
||||
}
|
||||
|
||||
public void setChunkSize(int chunkSize) {
|
||||
this.chunkSize = chunkSize;
|
||||
}
|
||||
|
||||
public int getChunkSize() {
|
||||
return chunkSize;
|
||||
}
|
||||
|
||||
public void setRandomTake(boolean randomTake) {
|
||||
this.randomTake = randomTake;
|
||||
}
|
||||
|
||||
public boolean isRandomTake() {
|
||||
return randomTake;
|
||||
}
|
||||
|
||||
public void setTimeSeriesChunksToTake(int timeSeriesChunksToTake) {
|
||||
TimeSeriesChunksToTake = timeSeriesChunksToTake;
|
||||
}
|
||||
|
||||
public int getTimeSeriesChunksToTake() {
|
||||
return TimeSeriesChunksToTake;
|
||||
}
|
||||
|
||||
public void setReferenceChunksToTake(int referenceChunksToTake) {
|
||||
ReferenceChunksToTake = referenceChunksToTake;
|
||||
}
|
||||
|
||||
public int getReferenceChunksToTake() {
|
||||
return ReferenceChunksToTake;
|
||||
}
|
||||
|
||||
public void setUseSimpleDistance(boolean useSimpleDistance) {
|
||||
this.useSimpleDistance = useSimpleDistance;
|
||||
}
|
||||
|
||||
public boolean isUseSimpleDistance() {
|
||||
return useSimpleDistance;
|
||||
}
|
||||
|
||||
|
||||
public void setNumberOfThreadsToUse(int numberOfThreadsToUse) {
|
||||
this.numberOfThreadsToUse = numberOfThreadsToUse;
|
||||
}
|
||||
|
||||
public int getNumberOfThreadsToUse() {
|
||||
return numberOfThreadsToUse;
|
||||
}
|
||||
|
||||
public void setSingleEntryRecognitionMaxDeviation(float singleEntryRecognitionMaxDeviation) {
|
||||
this.singleEntryRecognitionMaxDeviation = singleEntryRecognitionMaxDeviation;
|
||||
}
|
||||
|
||||
public float getSingleEntryRecognitionMaxDeviation() {
|
||||
return singleEntryRecognitionMaxDeviation;
|
||||
}
|
||||
|
||||
public float categoryDiscardThreshold = -Float.MIN_VALUE;
|
||||
public float entryAcceptanceThreshold = -Float.MIN_VALUE;
|
||||
public float categoryDiscardDifferencialThreshold = -Float.MIN_VALUE;
|
||||
public float singleEntryRecognitionMaxDeviation = -Float.MIN_VALUE;
|
||||
public int chunkSize = -Integer.MIN_VALUE;
|
||||
public Boolean randomTake = null;
|
||||
// if set to -1 all chunks will be analyzed
|
||||
public int TimeSeriesChunksToTake = -Integer.MIN_VALUE;
|
||||
public int ReferenceChunksToTake = -Integer.MIN_VALUE;
|
||||
public Boolean useSimpleDistance = null;
|
||||
public int numberOfThreadsToUse = -Integer.MIN_VALUE;
|
||||
|
||||
//database parameters
|
||||
public String databaseDriver = null;
|
||||
public String databaseURL = null;
|
||||
public String databaseUserName = null;
|
||||
public String databasePassword = null;
|
||||
public String databaseDialect = null;
|
||||
public String databaseIdleConnectionTestPeriod = null;
|
||||
public String databaseAutomaticTestTable = null;
|
||||
|
||||
//reference data parameters
|
||||
public String referenceTable = null;
|
||||
public String referenceColumn = null;
|
||||
public String idColumn= null;
|
||||
public String nameHuman = null;
|
||||
public String description = null;
|
||||
|
||||
public void mergeConfig(LexicalEngineConfiguration config){
|
||||
|
||||
if (config.getCategoryDiscardDifferencialThreshold()!=-Float.MIN_VALUE)
|
||||
setCategoryDiscardDifferencialThreshold(config.getCategoryDiscardDifferencialThreshold());
|
||||
if (config.getSingleEntryRecognitionMaxDeviation()!=-Float.MIN_VALUE)
|
||||
setSingleEntryRecognitionMaxDeviation(config.getSingleEntryRecognitionMaxDeviation());
|
||||
if (config.getCategoryDiscardThreshold()!=-Float.MIN_VALUE)
|
||||
setCategoryDiscardThreshold(config.getCategoryDiscardThreshold());
|
||||
if (config.getChunkSize()!=-Integer.MIN_VALUE)
|
||||
setChunkSize(config.getChunkSize());
|
||||
if (config.getEntryAcceptanceThreshold()!=-Float.MIN_VALUE)
|
||||
setEntryAcceptanceThreshold(config.getEntryAcceptanceThreshold());
|
||||
if (config.getNumberOfThreadsToUse()!=-Integer.MIN_VALUE)
|
||||
setNumberOfThreadsToUse(config.getNumberOfThreadsToUse());
|
||||
if (config.getReferenceChunksToTake()!=-Integer.MIN_VALUE)
|
||||
setReferenceChunksToTake(config.getReferenceChunksToTake());
|
||||
if (config.getTimeSeriesChunksToTake()!=-Integer.MIN_VALUE)
|
||||
setTimeSeriesChunksToTake(config.getTimeSeriesChunksToTake());
|
||||
if (config.randomTake!= null)
|
||||
setRandomTake(config.isRandomTake());
|
||||
if (config.useSimpleDistance!=null)
|
||||
setUseSimpleDistance(config.isUseSimpleDistance());
|
||||
//database information merge
|
||||
if (config.databaseDriver!=null)
|
||||
setDatabaseDriver(config.databaseDriver);
|
||||
if (config.databaseDialect!=null)
|
||||
setDatabaseDialect(config.databaseDialect);
|
||||
if (config.databaseAutomaticTestTable!=null)
|
||||
setDatabaseAutomaticTestTable(config.databaseAutomaticTestTable);
|
||||
if (config.databaseIdleConnectionTestPeriod!=null)
|
||||
setDatabaseIdleConnectionTestPeriod(config.databaseIdleConnectionTestPeriod);
|
||||
if (config.databaseUserName!=null)
|
||||
setDatabaseUserName(config.databaseUserName);
|
||||
if (config.databasePassword!=null)
|
||||
setDatabasePassword(config.databasePassword);
|
||||
if (config.databaseURL!=null)
|
||||
setDatabaseURL(config.databaseURL);
|
||||
if (config.referenceTable!=null)
|
||||
setReferenceTable(config.referenceTable);
|
||||
if (config.referenceColumn!=null)
|
||||
setReferenceColumn(config.referenceColumn);
|
||||
if (config.idColumn!=null)
|
||||
setIdColumn(config.idColumn);
|
||||
if (config.nameHuman!=null)
|
||||
setNameHuman(config.nameHuman);
|
||||
if (config.description!=null)
|
||||
setDescription(config.description);
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setDatabaseDriver(String databaseDriver) {
|
||||
this.databaseDriver = databaseDriver;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getDatabaseDriver() {
|
||||
return databaseDriver;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setDatabaseURL(String databaseURL) {
|
||||
this.databaseURL = databaseURL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getDatabaseURL() {
|
||||
return databaseURL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setDatabaseUserName(String databaseUserName) {
|
||||
this.databaseUserName = databaseUserName;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getDatabaseUserName() {
|
||||
return databaseUserName;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setDatabasePassword(String databasePassword) {
|
||||
this.databasePassword = databasePassword;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getDatabasePassword() {
|
||||
return databasePassword;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setDatabaseDialect(String databaseDialect) {
|
||||
this.databaseDialect = databaseDialect;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getDatabaseDialect() {
|
||||
return databaseDialect;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setDatabaseIdleConnectionTestPeriod(String databaseIdleConnectionTestPeriod) {
|
||||
this.databaseIdleConnectionTestPeriod = databaseIdleConnectionTestPeriod;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getDatabaseIdleConnectionTestPeriod() {
|
||||
return databaseIdleConnectionTestPeriod;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setDatabaseAutomaticTestTable(String databaseAutomaticTestTable) {
|
||||
this.databaseAutomaticTestTable = databaseAutomaticTestTable;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getDatabaseAutomaticTestTable() {
|
||||
return databaseAutomaticTestTable;
|
||||
}
|
||||
|
||||
public String getReferenceTable() {
|
||||
return referenceTable;
|
||||
}
|
||||
|
||||
public void setReferenceTable(String referenceTable) {
|
||||
this.referenceTable = referenceTable;
|
||||
}
|
||||
|
||||
public String getReferenceColumn() {
|
||||
return referenceColumn;
|
||||
}
|
||||
|
||||
public void setReferenceColumn(String referenceColumn) {
|
||||
this.referenceColumn = referenceColumn;
|
||||
}
|
||||
|
||||
public String getIdColumn() {
|
||||
return idColumn;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setIdColumn(String idColumn) {
|
||||
this.idColumn = idColumn;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getNameHuman() {
|
||||
return nameHuman;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setNameHuman(String nameHuman) {
|
||||
this.nameHuman = nameHuman;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.examples;
|
||||
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class Example1_Species {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
int attempts = 1;
|
||||
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";
|
||||
String column = "field1";
|
||||
String correctFamily = "SPECIES";
|
||||
String correctColumn = "SCIENTIFIC_NAME";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.examples;
|
||||
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class Example2_Area {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
int attempts = 1;
|
||||
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";
|
||||
String column = "field3";
|
||||
String correctFamily = "AREA";
|
||||
String correctColumn = "NAME_EN";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.examples;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
|
||||
public class Example3_SingleMatchShark {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String singleton = "shark";
|
||||
String family = "species";
|
||||
String column = "name_en";
|
||||
|
||||
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
|
||||
|
||||
//CHANGE THIS TO ENHANCE THE RECALL
|
||||
conf.setEntryAcceptanceThreshold(30);
|
||||
conf.setReferenceChunksToTake(-1);
|
||||
conf.setTimeSeriesChunksToTake(-1);
|
||||
conf.setUseSimpleDistance(false);
|
||||
|
||||
guesser.runGuesser(configPath, singleton, conf, family,column );
|
||||
ArrayList<SingleResult> detailedResults = guesser.getDetailedMatches();
|
||||
|
||||
AnalysisLogger.getLogger().warn("Detailed Match on Name :"+singleton);
|
||||
|
||||
CategoryGuesser.showResults(detailedResults);
|
||||
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.examples;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
|
||||
public class Example4_SingleMatchMitella {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String singleton = "Mitella pollicipes";
|
||||
// String singleton = "policipes";
|
||||
String family = "species";
|
||||
String column = "scientific_name";
|
||||
|
||||
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
|
||||
|
||||
//CHANGE THIS TO ENHANCE THE RECALL
|
||||
conf.setEntryAcceptanceThreshold(30);
|
||||
conf.setReferenceChunksToTake(-1);
|
||||
conf.setTimeSeriesChunksToTake(-1);
|
||||
conf.setUseSimpleDistance(false);
|
||||
|
||||
guesser.runGuesser(configPath, singleton, conf, family,column );
|
||||
ArrayList<SingleResult> detailedResults = guesser.getDetailedMatches();
|
||||
|
||||
AnalysisLogger.getLogger().warn("Detailed Match on Name :"+singleton);
|
||||
|
||||
CategoryGuesser.showResults(detailedResults);
|
||||
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.examples;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
|
||||
public class Example5_SingleMatchMitella {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String singleton = "Mirella policepes";
|
||||
String family = "species";
|
||||
String column = "scientific_name";
|
||||
|
||||
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
|
||||
|
||||
//CHANGE THIS TO ENHANCE THE RECALL
|
||||
conf.setEntryAcceptanceThreshold(30);
|
||||
conf.setReferenceChunksToTake(-1);
|
||||
conf.setTimeSeriesChunksToTake(-1);
|
||||
conf.setUseSimpleDistance(false);
|
||||
|
||||
guesser.runGuesser(configPath, singleton, conf, family,column );
|
||||
ArrayList<SingleResult> detailedResults = guesser.getDetailedMatches();
|
||||
|
||||
AnalysisLogger.getLogger().warn("Detailed Match on Name :"+singleton);
|
||||
|
||||
CategoryGuesser.showResults(detailedResults);
|
||||
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.examples;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
|
||||
public class ExampleGuessingExternalCfg {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
|
||||
String configPath = "./";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
|
||||
|
||||
//bench 1
|
||||
System.out.println("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "import_532bba80_1c8f_11df_a4ee_87804054691e";
|
||||
String column = "field2";
|
||||
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
|
||||
conf.setCategoryDiscardDifferencialThreshold(10);
|
||||
conf.setCategoryDiscardThreshold(0);
|
||||
conf.setChunkSize(25);
|
||||
conf.setEntryAcceptanceThreshold(50);
|
||||
conf.setNumberOfThreadsToUse(2);
|
||||
conf.setRandomTake(true);
|
||||
conf.setReferenceChunksToTake(20);
|
||||
conf.setTimeSeriesChunksToTake(1);
|
||||
conf.setUseSimpleDistance(false);
|
||||
|
||||
//database Parameters
|
||||
conf.setDatabaseUserName("root");
|
||||
conf.setDatabasePassword("ash_ash80");
|
||||
conf.setDatabaseDriver("com.mysql.jdbc.Driver");
|
||||
conf.setDatabaseURL("jdbc:mysql://localhost/timeseries");
|
||||
conf.setDatabaseDialect("org.hibernate.dialect.MySQLDialect");
|
||||
conf.setDatabaseAutomaticTestTable("connectiontesttable");
|
||||
conf.setDatabaseIdleConnectionTestPeriod("3600");
|
||||
|
||||
//reference parameters
|
||||
conf.setReferenceTable("reference_table");
|
||||
conf.setReferenceColumn("table_name");
|
||||
conf.setIdColumn("id");
|
||||
conf.setNameHuman("name_human");
|
||||
conf.setDescription("description");
|
||||
|
||||
guesser.init(conf);
|
||||
|
||||
guesser.runGuesser(seriesName, column, conf);
|
||||
ArrayList<SingleResult> results = guesser.getClassification();
|
||||
CategoryGuesser.showResults(results);
|
||||
|
||||
System.out.println("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data;
|
||||
|
||||
|
||||
import java.math.BigInteger;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.interfaces.Reference;
|
||||
|
||||
public class Category implements Reference {
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
private String categoryName;
|
||||
private String categoryIndex;
|
||||
private String tableName;
|
||||
private String description;
|
||||
private BigInteger numberOfElements;
|
||||
|
||||
public Category(String name,String index,String tablename,String descr){
|
||||
categoryName=name;
|
||||
categoryIndex=index;
|
||||
tableName=tablename;
|
||||
description=descr;
|
||||
}
|
||||
|
||||
public void setName(String categoryName) {
|
||||
this.categoryName = categoryName;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return categoryName;
|
||||
}
|
||||
|
||||
public void setIndex(String categoryIndex) {
|
||||
this.categoryIndex = categoryIndex;
|
||||
}
|
||||
|
||||
public String getIndex() {
|
||||
return categoryIndex;
|
||||
}
|
||||
|
||||
public void setTableName(String tableName) {
|
||||
this.tableName = tableName;
|
||||
}
|
||||
|
||||
public String getTableName() {
|
||||
return tableName;
|
||||
}
|
||||
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public String toString(){
|
||||
return "["+categoryName+": index "+categoryIndex+" table "+tableName+" description "+description+"]";
|
||||
}
|
||||
|
||||
public void setNumberOfElements(BigInteger numberOfElements) {
|
||||
this.numberOfElements = numberOfElements;
|
||||
}
|
||||
|
||||
public BigInteger getNumberOfElements() {
|
||||
return numberOfElements;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data;
|
||||
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.interfaces.Reference;
|
||||
|
||||
public class CategoryOrderedList {
|
||||
|
||||
// lista ordinata in ordine decrescente
|
||||
ArrayList<Reference> orderedList;
|
||||
HashMap<String,Reference> orderedListTable;
|
||||
private HashMap<String, CategoryScores> scoresTable;
|
||||
|
||||
|
||||
public void setOrderedList(ArrayList<Reference> OrderedList){
|
||||
orderedList = OrderedList;
|
||||
}
|
||||
public HashMap<String, CategoryScores> getScoresTable() {
|
||||
return scoresTable;
|
||||
}
|
||||
|
||||
public void setCategoryTable( HashMap<String,Reference> OrderedListTable ) {
|
||||
orderedListTable = OrderedListTable ;
|
||||
}
|
||||
|
||||
public Reference getCategory ( String categoryName ) {
|
||||
return orderedListTable.get(categoryName);
|
||||
}
|
||||
|
||||
public ArrayList<Reference> getOrderedList() {
|
||||
return orderedList;
|
||||
}
|
||||
|
||||
LexicalEngineConfiguration config;
|
||||
|
||||
public CategoryOrderedList(LexicalEngineConfiguration Config) {
|
||||
orderedList = new ArrayList<Reference>();
|
||||
scoresTable = new HashMap<String, CategoryScores>();
|
||||
config = Config;
|
||||
orderedListTable = new HashMap<String, Reference>();
|
||||
}
|
||||
|
||||
public void addCategory(Category c) {
|
||||
|
||||
BigInteger nElements = c.getNumberOfElements();
|
||||
int index = 0;
|
||||
|
||||
for (Reference cc : orderedList) {
|
||||
BigInteger localnum = cc.getNumberOfElements();
|
||||
if (localnum.compareTo(nElements) < 0) {
|
||||
break;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
orderedList.add(index, c);
|
||||
scoresTable.put(c.getName(), new CategoryScores(c.getNumberOfElements(),config));
|
||||
orderedListTable.put(c.getName(), c);
|
||||
// scoresTable.put(c.getName(), new CategoryScores());
|
||||
}
|
||||
|
||||
public CategoryOrderedList generateNovelList(){
|
||||
CategoryOrderedList newCatList = new CategoryOrderedList(config);
|
||||
newCatList.setOrderedList(orderedList);
|
||||
newCatList.setCategoryTable(orderedListTable);
|
||||
|
||||
for (String key:scoresTable.keySet()){
|
||||
CategoryScores ct = scoresTable.get(key);
|
||||
CategoryScores ctnew = new CategoryScores(ct.getCategoryElements(), config);
|
||||
newCatList.getScoresTable().put(key,ctnew);
|
||||
}
|
||||
|
||||
return newCatList;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,205 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data;
|
||||
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.MathFunctions;
|
||||
|
||||
//score relative to a certain category and column
|
||||
|
||||
public class CategoryScores {
|
||||
|
||||
// column names vs percentage
|
||||
private HashMap<String, Float> columnsScore;
|
||||
|
||||
private int matchedElements;
|
||||
private BigInteger maxElements;
|
||||
private BigInteger categoryElements;
|
||||
private LexicalEngineConfiguration config;
|
||||
|
||||
public CategoryScores(BigInteger catElements, LexicalEngineConfiguration Config) {
|
||||
columnsScore = new HashMap<String, Float>();
|
||||
matchedElements = 0;
|
||||
setCategoryElements(catElements);
|
||||
config = Config;
|
||||
maxElements = calculateMaxElements(catElements);
|
||||
}
|
||||
|
||||
public double calculateCoverage(){
|
||||
|
||||
double bd = new BigDecimal(matchedElements).divide(new BigDecimal(maxElements), 2, BigDecimal.ROUND_FLOOR).doubleValue();
|
||||
|
||||
//lower poor categories
|
||||
if (maxElements.compareTo(BigInteger.valueOf(config.chunkSize))<=0)
|
||||
bd = bd *0.8;
|
||||
|
||||
//To-DO take into observation!!!
|
||||
//higher very big set coverage
|
||||
if (categoryElements.compareTo(BigInteger.valueOf(10000))>0)
|
||||
bd = Math.max(0.01, bd);
|
||||
|
||||
return bd;
|
||||
}
|
||||
|
||||
private BigInteger calculateMaxElements(BigInteger catElements){
|
||||
BigInteger maxElements = BigInteger.ZERO;
|
||||
|
||||
int maxNumberOfChunks = config.ReferenceChunksToTake;
|
||||
int chunkSize = config.chunkSize;
|
||||
int numberofcycles=0;
|
||||
|
||||
if (maxNumberOfChunks<0)
|
||||
return catElements;
|
||||
try{
|
||||
BigDecimal intcycles;
|
||||
BigDecimal oddcycles;
|
||||
BigDecimal catElementsDecimal = new BigDecimal(catElements);
|
||||
BigDecimal[] arraydecimal = catElementsDecimal.divideAndRemainder(new BigDecimal(BigInteger.valueOf(chunkSize)));
|
||||
intcycles = arraydecimal[0];
|
||||
oddcycles = arraydecimal[1];
|
||||
numberofcycles = intcycles.intValue();
|
||||
if ((numberofcycles==0)&&(oddcycles.intValue() > 0)) {
|
||||
numberofcycles = numberofcycles + 1;
|
||||
maxElements = oddcycles.toBigInteger();
|
||||
}
|
||||
else{
|
||||
if (numberofcycles>maxNumberOfChunks)
|
||||
numberofcycles = maxNumberOfChunks;
|
||||
|
||||
maxElements = BigInteger.valueOf(chunkSize).multiply(BigInteger.valueOf(numberofcycles));
|
||||
}
|
||||
|
||||
}catch(Exception e){}
|
||||
|
||||
return maxElements;
|
||||
}
|
||||
|
||||
|
||||
public String showScores(){
|
||||
return columnsScore.toString()+":"+calculateCoverage(); //+" - "+matchedElements+" vs "+maxElements;
|
||||
}
|
||||
|
||||
public void incrementScore(String columnName,float increment,boolean doIncrementMathes) {
|
||||
|
||||
Float score = columnsScore.get(columnName);
|
||||
|
||||
if (score==null)
|
||||
score =new Float(0);
|
||||
|
||||
score = MathFunctions.incrementPerc(score, increment, matchedElements);
|
||||
|
||||
if (doIncrementMathes)
|
||||
matchedElements ++;
|
||||
|
||||
columnsScore.put(columnName, score);
|
||||
}
|
||||
|
||||
|
||||
public float getScore(String columnName,boolean simpleMatch) {
|
||||
|
||||
if (simpleMatch){
|
||||
return getSimpleScore(columnName);
|
||||
}
|
||||
else
|
||||
return getScore(columnName);
|
||||
}
|
||||
|
||||
|
||||
public float getScore(String columnName) {
|
||||
|
||||
Float score = null;
|
||||
try {
|
||||
// score = columnsScore.get(columnName)*(float)calculateCoverage();
|
||||
score = columnsScore.get(columnName);
|
||||
if (score!=null){
|
||||
return score*(float)calculateCoverage();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
return score;
|
||||
|
||||
}
|
||||
|
||||
public float getSimpleScore(String columnName) {
|
||||
|
||||
Float score = null;
|
||||
try {
|
||||
// score = columnsScore.get(columnName)*(float)calculateCoverage();
|
||||
score = columnsScore.get(columnName);
|
||||
if (score!=null){
|
||||
return score;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
return score;
|
||||
|
||||
}
|
||||
|
||||
// take the best performing column
|
||||
public String findBest() {
|
||||
|
||||
String bestCol = null;
|
||||
Float bestscore = Float.valueOf(-1);
|
||||
|
||||
for (String column : columnsScore.keySet()) {
|
||||
|
||||
Float score = new Float(0);
|
||||
try {
|
||||
score = columnsScore.get(column);
|
||||
} catch (Exception e) {
|
||||
AnalysisLogger.getLogger().error("ERROR in getting SCORE " + e.getLocalizedMessage());
|
||||
}
|
||||
if (bestscore.compareTo(score) < 0) {
|
||||
bestscore = score;
|
||||
bestCol = column;
|
||||
}
|
||||
}
|
||||
|
||||
return bestCol;
|
||||
}
|
||||
|
||||
// take the best performing columns
|
||||
public ArrayList<String> findBestList() {
|
||||
|
||||
ArrayList<String> bestCols = new ArrayList<String>();
|
||||
|
||||
for (String column : columnsScore.keySet()) {
|
||||
|
||||
Float score = new Float(0);
|
||||
|
||||
try {
|
||||
score = columnsScore.get(column);
|
||||
} catch (Exception e) {
|
||||
AnalysisLogger.getLogger().error("ERROR in getting SCORE " + e.getLocalizedMessage());
|
||||
}
|
||||
|
||||
// find best place where to put column
|
||||
int size = bestCols.size();
|
||||
int index = size;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (columnsScore.get(bestCols.get(i)).compareTo(score) <= 0) {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
bestCols.add(index, column);
|
||||
|
||||
}
|
||||
|
||||
return bestCols;
|
||||
}
|
||||
|
||||
public void setCategoryElements(BigInteger categoryElements) {
|
||||
this.categoryElements = categoryElements;
|
||||
}
|
||||
|
||||
public BigInteger getCategoryElements() {
|
||||
return categoryElements;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data;
|
||||
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
//score relative to a certain category and column
|
||||
|
||||
public class CategoryScoresOld {
|
||||
|
||||
// column names vs percentage
|
||||
private HashMap<String, BigDecimal> columnsScore;
|
||||
|
||||
private BigDecimal maximumElements;
|
||||
|
||||
public CategoryScoresOld(BigInteger maxelements) {
|
||||
this.maximumElements = new BigDecimal(maxelements);
|
||||
columnsScore = new HashMap<String, BigDecimal>();
|
||||
}
|
||||
|
||||
public void setMaximumElements(BigDecimal MaximumElements) {
|
||||
maximumElements = MaximumElements;
|
||||
}
|
||||
|
||||
public void incrementScore(String columnName,float increment) {
|
||||
|
||||
BigDecimal score = columnsScore.get(columnName);
|
||||
|
||||
BigDecimal reciproc = BigDecimal.valueOf(increment);
|
||||
|
||||
if (score == null) {
|
||||
// build up a new score : 1/TOTAL
|
||||
score = reciproc;
|
||||
} else {
|
||||
score = score.add(reciproc);
|
||||
}
|
||||
columnsScore.put(columnName, score);
|
||||
// AnalysisLogger.getLogger().debug("CategoryOrderedList->checkUnkEntriesOnEntireCategory-> SCORE "+score);
|
||||
}
|
||||
|
||||
public double getScore(String columnName) {
|
||||
|
||||
double score = 0;
|
||||
try {
|
||||
|
||||
BigDecimal percentage = columnsScore.get(columnName);
|
||||
try {
|
||||
if (percentage == null)
|
||||
percentage = BigDecimal.ZERO;
|
||||
|
||||
AnalysisLogger.getLogger().trace("getScore -> Score for "+columnName+": " + percentage + " vs " + maximumElements);
|
||||
percentage = percentage.divide(maximumElements, 2, BigDecimal.ROUND_DOWN);
|
||||
} catch (ArithmeticException e) {
|
||||
percentage = BigDecimal.ZERO;
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
score = percentage.doubleValue();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
return score;
|
||||
|
||||
}
|
||||
|
||||
// take the best performing column
|
||||
public String findBest() {
|
||||
|
||||
String bestCol = null;
|
||||
BigDecimal bestscore = BigDecimal.valueOf(-1);
|
||||
|
||||
for (String column : columnsScore.keySet()) {
|
||||
|
||||
BigDecimal score = BigDecimal.ZERO;
|
||||
try {
|
||||
score = columnsScore.get(column);
|
||||
} catch (Exception e) {
|
||||
AnalysisLogger.getLogger().error("ERROR in getting SCORE " + e.getLocalizedMessage());
|
||||
}
|
||||
if (bestscore.compareTo(score) < 0) {
|
||||
bestscore = score;
|
||||
bestCol = column;
|
||||
}
|
||||
}
|
||||
|
||||
return bestCol;
|
||||
}
|
||||
|
||||
// take the best performing columns
|
||||
public ArrayList<String> findBestList() {
|
||||
|
||||
ArrayList<String> bestCols = new ArrayList<String>();
|
||||
|
||||
for (String column : columnsScore.keySet()) {
|
||||
|
||||
BigDecimal score = BigDecimal.ZERO;
|
||||
|
||||
try {
|
||||
score = columnsScore.get(column);
|
||||
} catch (Exception e) {
|
||||
AnalysisLogger.getLogger().error("ERROR in getting SCORE " + e.getLocalizedMessage());
|
||||
}
|
||||
|
||||
// find best place where to put column
|
||||
int size = bestCols.size();
|
||||
int index = size;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (columnsScore.get(bestCols.get(i)).compareTo(score) <= 0) {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
bestCols.add(index, column);
|
||||
|
||||
}
|
||||
|
||||
return bestCols;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,272 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data;
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.DataTypeRecognizer;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
public class DBObjectTranslator {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
}
|
||||
|
||||
public ArrayList<RelationEdge> relations;
|
||||
public ArrayList<Category> categories;
|
||||
|
||||
public BigInteger totalEntries;
|
||||
public BigInteger totalCatElements;
|
||||
public BigInteger totalRelationElements;
|
||||
|
||||
public DBObjectTranslator() {
|
||||
relations = new ArrayList<RelationEdge>();
|
||||
categories = new ArrayList<Category>();
|
||||
totalCatElements = BigInteger.ZERO;
|
||||
totalRelationElements = BigInteger.ZERO;
|
||||
totalEntries = BigInteger.ZERO;
|
||||
}
|
||||
|
||||
public BigInteger calculateTotalEntries(SessionFactory dbSession, String timeSeriesName, String timeSeriesColumn) {
|
||||
|
||||
BigInteger count = BigInteger.ZERO;
|
||||
String query = "select count(*) from (SELECT distinct " + timeSeriesColumn + " FROM " + timeSeriesName + ") r;";
|
||||
// String query = "SELECT count(*) FROM " + timeSeriesName.toLowerCase();
|
||||
|
||||
List<Object> resultSet = DatabaseFactory.executeSQLQuery(query, dbSession);
|
||||
|
||||
for (Object result : resultSet) {
|
||||
|
||||
try {
|
||||
BigInteger resultcount = (BigInteger) result;
|
||||
totalEntries = totalEntries.add(resultcount);
|
||||
count = resultcount;
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->calculateTotalEntries: Time Series " + timeSeriesName + " total " + totalEntries);
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
public ArrayList<String> retrieveTimeSeriesEntries(SessionFactory dbSession, String timeSeriesName, String timeSeriesColumn, BigInteger min, int numberOfElements) {
|
||||
|
||||
// String query = "SELECT distinct "+timeSeriesColumn+" FROM "+timeSeriesName+" r limit "+min+","+numberOfElements;
|
||||
String query = "SELECT distinct " + timeSeriesColumn + " FROM " + timeSeriesName + " r limit " + numberOfElements + " offset " + min;
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->query: " + query);
|
||||
|
||||
List<Object> resultSet = DatabaseFactory.executeSQLQuery(query, dbSession);
|
||||
ArrayList<String> column = new ArrayList<String>();
|
||||
|
||||
for (Object result : resultSet) {
|
||||
try {
|
||||
String value = "";
|
||||
if (result != null)
|
||||
value = result.toString();
|
||||
|
||||
column.add(value);
|
||||
|
||||
// AnalysisLogger.getLogger().debug("DBObjectTranslator->retrieveColumnRange: Column Element Added " + value);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->retrieveTimeSeriesEntries: Error in adding entry :" + e.getLocalizedMessage());
|
||||
}
|
||||
}
|
||||
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->retrieveColumnRange: Column " + column.toString());
|
||||
|
||||
return column;
|
||||
}
|
||||
|
||||
public ArrayList<Entry> retrieveEntries(SessionFactory dbSession, String timeSeriesName, BigInteger min, int numberOfElements) {
|
||||
|
||||
// clean previous entries
|
||||
ArrayList<Entry> currentEntries = new ArrayList<Entry>();
|
||||
|
||||
ArrayList<String> descriptions = new ArrayList<String>();
|
||||
ArrayList<String> types = new ArrayList<String>();
|
||||
/*
|
||||
* SELECT table_name,ordinal_position,column_name,data_type, is_nullable,character_maximum_length FROM information_schema.COLUMNS WHERE table_name ='ref_area';
|
||||
*/
|
||||
|
||||
String queryDesc = "SELECT table_name,ordinal_position,column_name,data_type, is_nullable,character_maximum_length FROM information_schema.COLUMNS WHERE table_name ='" + timeSeriesName.toLowerCase() + "'";
|
||||
|
||||
List<Object> resultSetDesc = DatabaseFactory.executeSQLQuery(queryDesc, dbSession);
|
||||
for (Object result : resultSetDesc) {
|
||||
Object[] resultArray = (Object[]) result;
|
||||
descriptions.add((String) resultArray[2]);
|
||||
types.add(DataTypeRecognizer.transformTypeFromDB((String) resultArray[3]));
|
||||
}
|
||||
|
||||
if (descriptions.size() > 0) {
|
||||
// String query = "SELECT DISTINCT * FROM " + timeSeriesName + " r where id>=" + min.toString() + " and id<=" + max.toString();
|
||||
// String query = "SELECT DISTINCT * FROM " + timeSeriesName + " r limit "+min+","+numberOfElements;
|
||||
String query = "SELECT DISTINCT * FROM " + timeSeriesName + " r limit " + numberOfElements + " offset " + min;
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->retrieveEntries: query " + query);
|
||||
|
||||
List<Object> resultSet = DatabaseFactory.executeSQLQuery(query, dbSession);
|
||||
|
||||
for (Object result : resultSet) {
|
||||
Entry entry = new Entry();
|
||||
try {
|
||||
Object[] resultArray = (Object[]) result;
|
||||
int i = 0;
|
||||
for (Object res : resultArray) {
|
||||
// build entry
|
||||
String value = "";
|
||||
if (res != null)
|
||||
value = res.toString();
|
||||
|
||||
entry.addAttribute(descriptions.get(i), value);
|
||||
entry.addType(descriptions.get(i), types.get(i));
|
||||
i++;
|
||||
}
|
||||
// add entry
|
||||
currentEntries.add(entry);
|
||||
// AnalysisLogger.getLogger().debug("DBObjectTranslator->retrieveEntries: Entry Added " + entry.toString());
|
||||
} catch (Exception e) {
|
||||
// e.printStackTrace();
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->retrieveEntries: Error in adding entry :" + e.getLocalizedMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AnalysisLogger.getLogger().trace("DBObjectTranslator->retrieveEntries: Entries " + currentEntries);
|
||||
return currentEntries;
|
||||
}
|
||||
|
||||
public void buildRelationsEdges(SessionFactory dbSession) {
|
||||
|
||||
String query = "select * from relation_table;";
|
||||
List<Object> resultSet = DatabaseFactory.executeSQLQuery(query, dbSession);
|
||||
for (Object result : resultSet) {
|
||||
Object[] resultArray = (Object[]) result;
|
||||
RelationEdge re = null;
|
||||
try {
|
||||
re = new RelationEdge(((String) resultArray[2]), "" + resultArray[0], "" + resultArray[1]);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
if (re != null) {
|
||||
relations.add(re);
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->buildRelationsEdges: add relation " + re.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void buildCategories(SessionFactory dbSession, String referenceTable, String referenceColumn, String idColumn, String nameHuman, String description) {
|
||||
|
||||
referenceTable = referenceTable == null ? "reference_table" : referenceTable;
|
||||
referenceColumn = referenceColumn == null ? "table_name" : referenceColumn;
|
||||
nameHuman = nameHuman == null ? "name_human" : nameHuman;
|
||||
idColumn = idColumn == null ? "id" : idColumn;
|
||||
description = description == null ? "description" : description;
|
||||
|
||||
String query = "SELECT " + nameHuman + "," + idColumn + "," + referenceColumn + "," + description + " FROM " + referenceTable + " r;";
|
||||
List<Object> resultSet = DatabaseFactory.executeSQLQuery(query, dbSession);
|
||||
if (resultSet != null) {
|
||||
for (Object result : resultSet) {
|
||||
Object[] resultArray = (Object[]) result;
|
||||
Category cat = null;
|
||||
try {
|
||||
// name_human, id, table_name,description
|
||||
cat = new Category("" + resultArray[0], "" + resultArray[1], "" + resultArray[2], "" + resultArray[3]);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
if (cat != null) {
|
||||
categories.add(cat);
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->buildCategories: add category " + cat.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Category getCategoryfromIndex(String index) {
|
||||
|
||||
Category cat = null;
|
||||
for (Category c : categories) {
|
||||
|
||||
if (c.getIndex().equals(index)) {
|
||||
cat = c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return cat;
|
||||
}
|
||||
|
||||
public void populateRelationWithCategories() {
|
||||
|
||||
for (RelationEdge re : relations) {
|
||||
|
||||
Category from = getCategoryfromIndex(re.getFrom());
|
||||
Category to = getCategoryfromIndex(re.getTo());
|
||||
re.setCategoryFrom(from.getName());
|
||||
re.setCategoryTo(to.getName());
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->populateRelationWithCategories: modified Relation " + re.toString());
|
||||
}
|
||||
}
|
||||
|
||||
public void calculateRelationWeights(SessionFactory dbSession) {
|
||||
|
||||
for (RelationEdge re : relations) {
|
||||
|
||||
String query = "SELECT count(*) FROM " + re.getName().toLowerCase();
|
||||
|
||||
List<Object> resultSet = DatabaseFactory.executeSQLQuery(query, dbSession);
|
||||
for (Object result : resultSet) {
|
||||
|
||||
try {
|
||||
BigInteger resultcount = (BigInteger) result;
|
||||
re.setWeigth(resultcount);
|
||||
totalRelationElements = totalRelationElements.add(resultcount);
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->calculateRelationWeights: Relation " + re.getName() + " weight " + re.getWeigth());
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void calculateCategoriesWeights(SessionFactory dbSession) {
|
||||
|
||||
for (Category cat : categories) {
|
||||
|
||||
String query = "SELECT count(*) FROM " + cat.getTableName().toLowerCase();
|
||||
|
||||
List<Object> resultSet = DatabaseFactory.executeSQLQuery(query, dbSession);
|
||||
|
||||
for (Object result : resultSet) {
|
||||
|
||||
try {
|
||||
BigInteger resultcount = (BigInteger) result;
|
||||
cat.setNumberOfElements(resultcount);
|
||||
totalCatElements = totalCatElements.add(resultcount);
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->calculateCategoriesWeights: Category " + cat.getName() + " weight " + cat.getNumberOfElements() + " total " + totalCatElements);
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void buildCategoriesStructure(SessionFactory dbSession, String referenceTable, String referenceColumn, String idColumn, String nameHuman, String description) {
|
||||
buildCategories(dbSession, referenceTable, referenceColumn, idColumn, nameHuman, description);
|
||||
calculateCategoriesWeights(dbSession);
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->buildWholeStructure: Total Categories Elements " + totalCatElements + " Total Relation Elements " + totalRelationElements);
|
||||
}
|
||||
|
||||
public void buildWholeStructure(SessionFactory dbSession, String referenceTable, String referenceColumn, String idColumn, String nameHuman, String description) {
|
||||
|
||||
buildRelationsEdges(dbSession);
|
||||
buildCategories(dbSession, referenceTable, referenceColumn, idColumn, nameHuman, description);
|
||||
populateRelationWithCategories();
|
||||
calculateRelationWeights(dbSession);
|
||||
calculateCategoriesWeights(dbSession);
|
||||
|
||||
AnalysisLogger.getLogger().trace("DBObjectTranslator->buildWholeStructure: Total Categories Elements " + totalCatElements + " Total Relation Elements " + totalRelationElements);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
//a single entry from a category
|
||||
public class Entry {
|
||||
|
||||
HashMap<String,String> attributes;
|
||||
HashMap<String,String> types;
|
||||
|
||||
public HashMap<String,String> getAttributes(){
|
||||
return attributes;
|
||||
}
|
||||
|
||||
public HashMap<String,String> getTypes(){
|
||||
return types;
|
||||
}
|
||||
|
||||
public void addAttribute(String column,String value){
|
||||
if (value==null)
|
||||
value = "";
|
||||
|
||||
attributes.put(column, value);
|
||||
}
|
||||
|
||||
public void addType(String column,String value){
|
||||
if (value==null)
|
||||
value = "";
|
||||
|
||||
types.put(column, value);
|
||||
}
|
||||
|
||||
public Entry(){
|
||||
attributes = new HashMap<String, String>();
|
||||
types = new HashMap<String, String>();
|
||||
}
|
||||
|
||||
public String toString(){
|
||||
|
||||
StringBuffer returningString = new StringBuffer();
|
||||
returningString.append("{");
|
||||
for (String att: attributes.keySet()){
|
||||
String value = attributes.get(att);
|
||||
returningString.append(att+"="+value+"|"+types.get(att).toUpperCase()+"; ");
|
||||
}
|
||||
returningString.append("}");
|
||||
return returningString.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data;
|
||||
|
||||
import java.math.BigInteger;
|
||||
|
||||
public class RelationEdge {
|
||||
|
||||
/**
|
||||
* @param args
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
private String relationName;
|
||||
private String indexFrom;
|
||||
private String indexTo;
|
||||
private BigInteger weight;
|
||||
|
||||
private String categoryFrom;
|
||||
private String categoryTo;
|
||||
|
||||
|
||||
public BigInteger getWeigth(){
|
||||
return weight;
|
||||
}
|
||||
|
||||
public void setWeigth(BigInteger Weight){
|
||||
weight = Weight;
|
||||
}
|
||||
|
||||
public String getTo(){
|
||||
return indexTo;
|
||||
}
|
||||
public String getFrom(){
|
||||
return indexFrom;
|
||||
}
|
||||
public String getName(){
|
||||
return relationName;
|
||||
}
|
||||
public void setName(String name){
|
||||
relationName = name;
|
||||
}
|
||||
|
||||
public RelationEdge(String name,String from,String to){
|
||||
relationName = name;
|
||||
indexFrom = from;
|
||||
indexTo = to;
|
||||
}
|
||||
@Override
|
||||
public String toString(){
|
||||
return "["+relationName+": from "+indexFrom+" to " +indexTo+" nameFrom "+categoryFrom+" nameTo "+categoryTo+"]";
|
||||
}
|
||||
|
||||
public void setCategoryFrom(String categoryFrom) {
|
||||
this.categoryFrom = categoryFrom;
|
||||
}
|
||||
|
||||
public String getCategoryFrom() {
|
||||
return categoryFrom;
|
||||
}
|
||||
|
||||
public void setCategoryTo(String categoryTo) {
|
||||
this.categoryTo = categoryTo;
|
||||
}
|
||||
|
||||
public String getCategoryTo() {
|
||||
return categoryTo;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data;
|
||||
|
||||
|
||||
public class SingleResult {
|
||||
private String category;
|
||||
private String column;
|
||||
|
||||
private String tablename;
|
||||
private String familyID;
|
||||
|
||||
private double score;
|
||||
|
||||
public void setCategory(String category) {
|
||||
this.category = category;
|
||||
}
|
||||
public String getCategory() {
|
||||
return category;
|
||||
}
|
||||
public void setColumn(String column) {
|
||||
this.column = column;
|
||||
}
|
||||
public String getColumn() {
|
||||
return column;
|
||||
}
|
||||
public void setScore(double score) {
|
||||
this.score = score;
|
||||
}
|
||||
public double getScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
public String getStringScore() {
|
||||
double scored = Math.round((int)(score*100))/(double)100;
|
||||
|
||||
return ""+scored;
|
||||
}
|
||||
|
||||
public String toString(){
|
||||
double scored = Math.round((int)(score*100))/(double)100;
|
||||
if (column!=null)
|
||||
return category+"="+column+":"+scored+" tab:"+tablename+":"+familyID;
|
||||
else
|
||||
return category+"="+":"+scored;
|
||||
}
|
||||
|
||||
public SingleResult (String Category,String Column,double Score, String TableName,String FamilyID){
|
||||
category = Category;
|
||||
column = Column;
|
||||
score = Score;
|
||||
tablename = TableName;
|
||||
familyID = FamilyID;
|
||||
}
|
||||
public void setTablename(String tablename) {
|
||||
this.tablename = tablename;
|
||||
}
|
||||
public String getTablename() {
|
||||
return tablename;
|
||||
}
|
||||
public void setFamilyID(String familyID) {
|
||||
this.familyID = familyID;
|
||||
}
|
||||
public String getFamilyID() {
|
||||
return familyID;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.graph.GraphFramer;
|
||||
|
||||
|
||||
|
||||
public class TSObjectTransformer {
|
||||
|
||||
/**
|
||||
* @param args
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
public static CategoryOrderedList transform2List(DBObjectTranslator dbo, LexicalEngineConfiguration config){
|
||||
return transform2List(dbo,config,null);
|
||||
}
|
||||
|
||||
public static CategoryOrderedList transform2List(DBObjectTranslator dbo, LexicalEngineConfiguration config, String filter){
|
||||
CategoryOrderedList col = new CategoryOrderedList(config);
|
||||
for (Category cat:dbo.categories){
|
||||
if ((filter==null) || filter.equalsIgnoreCase(cat.getName()))
|
||||
col.addCategory(cat);
|
||||
}
|
||||
return col;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static void transform2Graph(DBObjectTranslator dbo){
|
||||
|
||||
GraphFramer starter = new GraphFramer("Time Series Graph");
|
||||
BigDecimal total = new BigDecimal(dbo.totalCatElements);
|
||||
// total = new BigDecimal(100).divide(total,2,BigDecimal.ROUND_HALF_UP);
|
||||
for (Category cat:dbo.categories){
|
||||
|
||||
BigDecimal bd = new BigDecimal(cat.getNumberOfElements());
|
||||
|
||||
bd = bd.divide(total,4,BigDecimal.ROUND_HALF_UP);
|
||||
bd = bd.multiply(new BigDecimal(100));
|
||||
bd = bd.setScale(2,BigDecimal.ROUND_HALF_UP);
|
||||
// double perc = bd.doubleValue()*100;
|
||||
|
||||
String builtname = cat.getName()+":"+bd+"% ";
|
||||
|
||||
starter.graphDisplayer.addVertex(builtname);
|
||||
}
|
||||
for (RelationEdge rel:dbo.relations){
|
||||
Category cat = dbo.getCategoryfromIndex(rel.getFrom());
|
||||
BigDecimal bd = new BigDecimal(cat.getNumberOfElements());
|
||||
bd = bd.divide(total,4,BigDecimal.ROUND_HALF_UP);
|
||||
bd = bd.multiply(new BigDecimal(100));
|
||||
bd = bd.setScale(2,BigDecimal.ROUND_HALF_UP);
|
||||
// double perc = bd.doubleValue()*100;
|
||||
|
||||
String name1 = cat.getName()+":"+bd+"% ";
|
||||
|
||||
cat = dbo.getCategoryfromIndex(rel.getTo());
|
||||
bd = new BigDecimal(cat.getNumberOfElements());
|
||||
bd = bd.divide(total,4,BigDecimal.ROUND_HALF_UP);
|
||||
bd = bd.multiply(new BigDecimal(100));
|
||||
bd = bd.setScale(2,BigDecimal.ROUND_HALF_UP);
|
||||
// perc = bd.doubleValue()+100;
|
||||
|
||||
String name2 = cat.getName()+":"+bd+"% ";
|
||||
starter.graphDisplayer.addEdge(name1,name2,new BigDecimal(rel.getWeigth()).divide(new BigDecimal(dbo.totalCatElements),2,BigDecimal.ROUND_HALF_UP).multiply(new BigDecimal(100)).doubleValue());
|
||||
// starter.graphDisplayer.addEdge(name1,name2,0);
|
||||
}
|
||||
|
||||
// starter.graphDisplayer.generateRandomGraph();
|
||||
starter.graphDisplayer.generateUpTo5StarGraph();
|
||||
|
||||
starter.go();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.interfaces;
|
||||
|
||||
import java.math.BigInteger;
|
||||
|
||||
public interface Reference {
|
||||
|
||||
public void setName(String categoryName);
|
||||
public String getName();
|
||||
public void setIndex(String categoryIndex);
|
||||
public String getIndex();
|
||||
public void setTableName(String tableName);
|
||||
public String getTableName();
|
||||
public void setDescription(String description);
|
||||
public String getDescription();
|
||||
public String toString();
|
||||
public void setNumberOfElements(BigInteger numberOfElements);
|
||||
public BigInteger getNumberOfElements();
|
||||
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.Engine;
|
||||
|
||||
public abstract class Chunk {
|
||||
|
||||
|
||||
protected Engine engine;
|
||||
|
||||
public Chunk(Engine engine){
|
||||
this.engine = engine;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,128 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks;
|
||||
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.Engine;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.MathFunctions;
|
||||
|
||||
public abstract class ChunkSet {
|
||||
|
||||
|
||||
protected String seriesName;
|
||||
protected String seriesColumn;
|
||||
protected int chunkSize;
|
||||
private int maxNumberOfChunks;
|
||||
ArrayList<Integer> chunkSet;
|
||||
protected int chunkSetSize;
|
||||
protected BigInteger numberOfEntries;
|
||||
protected int chunkIndex;
|
||||
protected LexicalEngineConfiguration config;
|
||||
protected Engine engine;
|
||||
|
||||
public ChunkSet(int MaxNumberOfChunks, int ChunkSize, String SeriesName,String SeriesColumn, LexicalEngineConfiguration Config, Engine engine) throws Exception{
|
||||
this.engine = engine;
|
||||
config = Config;
|
||||
setSeriesName(SeriesName);
|
||||
setSeriesColumn(SeriesColumn);
|
||||
setChunkSize(ChunkSize);
|
||||
maxNumberOfChunks = MaxNumberOfChunks;
|
||||
generateChunkSet();
|
||||
|
||||
}
|
||||
|
||||
public ChunkSet(int MaxNumberOfChunks, int ChunkSize, String SeriesName,String SeriesColumn,BigInteger numberOfEntries,LexicalEngineConfiguration Config , Engine engine) throws Exception{
|
||||
this.engine = engine;
|
||||
config = Config;
|
||||
setSeriesName(SeriesName);
|
||||
setSeriesColumn(SeriesColumn);
|
||||
setChunkSize(ChunkSize);
|
||||
setNumberOfEntries(numberOfEntries);
|
||||
maxNumberOfChunks = MaxNumberOfChunks;
|
||||
generateChunkSet();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void generateChunkSet() throws Exception{
|
||||
|
||||
AnalysisLogger.getLogger().trace("ChunkSet->generateChunkSet-> \tGenerating Chunk Set for " + seriesName+ " "+seriesColumn);
|
||||
int numberOfChunks = calculateNumberOfCycles();
|
||||
//generate chunks to be processed
|
||||
chunkSet = MathFunctions.generateRandoms(maxNumberOfChunks, 0, numberOfChunks);
|
||||
chunkIndex = 0;
|
||||
chunkSetSize = numberOfChunks;
|
||||
}
|
||||
|
||||
|
||||
abstract protected BigDecimal calculateNumberOfElements() throws Exception;
|
||||
|
||||
|
||||
protected int calculateNumberOfCycles() throws Exception {
|
||||
|
||||
int numberofcycles = 0;
|
||||
|
||||
// calculate total entries in the time series
|
||||
BigDecimal numberOfElements = calculateNumberOfElements();
|
||||
// calculate total cycles of comparison
|
||||
BigDecimal intcycles;
|
||||
BigDecimal oddcycles;
|
||||
BigDecimal[] arraydecimal = numberOfElements.divideAndRemainder(new BigDecimal(BigInteger.valueOf(chunkSize)));
|
||||
intcycles = arraydecimal[0];
|
||||
oddcycles = arraydecimal[1];
|
||||
numberofcycles = intcycles.intValue();
|
||||
if ((numberofcycles==0)&&(oddcycles.intValue() > 0)) numberofcycles = numberofcycles + 1;
|
||||
|
||||
return numberofcycles;
|
||||
|
||||
}
|
||||
|
||||
public void setSeriesName(String seriesName) {
|
||||
this.seriesName = seriesName;
|
||||
}
|
||||
|
||||
|
||||
public String getSeriesName() {
|
||||
return seriesName;
|
||||
}
|
||||
|
||||
|
||||
public void setSeriesColumn(String seriesColumn) {
|
||||
this.seriesColumn = seriesColumn;
|
||||
}
|
||||
|
||||
|
||||
public String getSeriesColumn() {
|
||||
return seriesColumn;
|
||||
}
|
||||
|
||||
|
||||
public void setChunkSize(int chunkSize) {
|
||||
this.chunkSize = chunkSize;
|
||||
}
|
||||
|
||||
|
||||
public int getChunkSize() {
|
||||
return chunkSize;
|
||||
}
|
||||
|
||||
public void setNumberOfEntries(BigInteger numberOfEntries) {
|
||||
this.numberOfEntries = numberOfEntries;
|
||||
}
|
||||
|
||||
public BigInteger getNumberOfEntries() {
|
||||
return numberOfEntries;
|
||||
}
|
||||
|
||||
|
||||
|
||||
abstract public Object nextChunk();
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks;
|
||||
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.Engine;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.DBObjectTranslator;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.Entry;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
public class ReferenceChunk extends Chunk{
|
||||
|
||||
|
||||
|
||||
private String categoryName;
|
||||
private String categoryTableName;
|
||||
private ArrayList<Entry> referenceEntries;
|
||||
|
||||
private BigInteger startPoint;
|
||||
private int chunkSize;
|
||||
|
||||
public ReferenceChunk(String CategoryName, String CategoryTableName, BigInteger StartPoint, int ChunkSize, Engine engine){
|
||||
super(engine);
|
||||
chunkSize = ChunkSize;
|
||||
categoryName = CategoryName;
|
||||
categoryTableName = CategoryTableName;
|
||||
startPoint = StartPoint;
|
||||
AnalysisLogger.getLogger().trace("ReferenceChunk-> \t\tTOOK CATEGORY CHUNK FOR CATEGORY: " + categoryName+" - index : "+startPoint);
|
||||
}
|
||||
|
||||
|
||||
//takes references on demand from DB
|
||||
public ArrayList<Entry> getReferenceEntries() throws Exception{
|
||||
|
||||
DBObjectTranslator dbo = new DBObjectTranslator();
|
||||
SessionFactory sess = engine.getDBSession();
|
||||
// AnalysisLogger.getLogger().debug("ReferenceChunk->getReferenceEntries-> \tCATEGORY CHUNK START : " + startPoint);
|
||||
referenceEntries = dbo.retrieveEntries(sess, categoryTableName, startPoint, chunkSize);
|
||||
return referenceEntries;
|
||||
}
|
||||
|
||||
public void setCategoryName(String categoryName) {
|
||||
this.categoryName = categoryName;
|
||||
}
|
||||
public String getCategoryName() {
|
||||
return categoryName;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks;
|
||||
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.Engine;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.MathFunctions;
|
||||
|
||||
public class ReferenceChunkSet extends ChunkSet{
|
||||
|
||||
|
||||
public ReferenceChunkSet(int MaxNumberOfChunks, int ChunkSize, String CategoryName,String CategoryColumn, LexicalEngineConfiguration config, Engine engine) throws Exception{
|
||||
super(MaxNumberOfChunks, ChunkSize, CategoryName,CategoryColumn, config, engine);
|
||||
}
|
||||
|
||||
public ReferenceChunkSet(int MaxNumberOfChunks, int ChunkSize, String CategoryName, String CategoryTable, BigInteger numberOfCategoryElements, LexicalEngineConfiguration config, Engine engine) throws Exception{
|
||||
super(MaxNumberOfChunks, ChunkSize, CategoryName, CategoryTable, numberOfCategoryElements, config, engine);
|
||||
}
|
||||
|
||||
protected BigDecimal calculateNumberOfElements() throws Exception{
|
||||
// calculate total entries in the time series
|
||||
BigDecimal numberOfElements = new BigDecimal(numberOfEntries);
|
||||
return numberOfElements;
|
||||
}
|
||||
|
||||
|
||||
public ReferenceChunk nextChunk() {
|
||||
|
||||
ReferenceChunk rc = null;
|
||||
|
||||
while (!chunkSet.contains(chunkIndex) && (chunkIndex < chunkSetSize)) {
|
||||
chunkIndex++;
|
||||
}
|
||||
if (chunkIndex < chunkSetSize) {
|
||||
BigInteger startIndex = MathFunctions.chunk2Index(chunkIndex, chunkSize);
|
||||
try {
|
||||
rc = new ReferenceChunk(seriesName, seriesColumn , startIndex, chunkSize, engine);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
chunkIndex++;
|
||||
return rc;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.Engine;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.interfaces.Reference;
|
||||
|
||||
public class SetOfReferenceChunkSet {
|
||||
|
||||
ArrayList<Reference> orderedList;
|
||||
int referenceIndex;
|
||||
LexicalEngineConfiguration config;
|
||||
Engine engine;
|
||||
|
||||
public SetOfReferenceChunkSet(ArrayList<Reference> OrderedList, LexicalEngineConfiguration Config, Engine engine){
|
||||
|
||||
this.engine = engine;
|
||||
orderedList = OrderedList;
|
||||
referenceIndex = 0;
|
||||
config = Config;
|
||||
}
|
||||
|
||||
//filter selects only one of the categories
|
||||
public ReferenceChunkSet getNextChunkSet(){
|
||||
ReferenceChunkSet cs = null;
|
||||
if (orderedList.size()>referenceIndex){
|
||||
Reference ref = orderedList.get(referenceIndex);
|
||||
try{
|
||||
cs = new ReferenceChunkSet(config.ReferenceChunksToTake,config.chunkSize,ref.getName(),ref.getTableName(),ref.getNumberOfElements(),config, engine);
|
||||
}catch (Exception e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
referenceIndex++;
|
||||
}
|
||||
|
||||
return cs;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks;
|
||||
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.Engine;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.MathFunctions;
|
||||
|
||||
|
||||
public class SingletonChunkSet extends ChunkSet {
|
||||
|
||||
private String singletonString;
|
||||
private String ColumnType;
|
||||
public SingletonChunkSet(String SingletonString, LexicalEngineConfiguration config, Engine engine) throws Exception {
|
||||
super(1, 1, null, null, config, engine);
|
||||
singletonString = SingletonString;
|
||||
ColumnType = null;
|
||||
}
|
||||
|
||||
protected BigDecimal calculateNumberOfElements() throws Exception {
|
||||
// calculate total entries in the time series
|
||||
BigDecimal numberOfElements = BigDecimal.ONE;
|
||||
return numberOfElements;
|
||||
}
|
||||
|
||||
public TimeSeriesChunk nextChunk() {
|
||||
|
||||
TimeSeriesChunk tsc = null;
|
||||
|
||||
while (!chunkSet.contains(chunkIndex) && (chunkIndex < chunkSetSize)) {
|
||||
chunkIndex++;
|
||||
}
|
||||
if (chunkIndex < chunkSetSize) {
|
||||
BigInteger startIndex = MathFunctions.chunk2Index(chunkIndex, chunkSize);
|
||||
|
||||
try {
|
||||
tsc = new TimeSeriesChunk(singletonString, ColumnType, startIndex, chunkSize, config, engine);
|
||||
if (ColumnType == null) {
|
||||
ColumnType = tsc.getColumnType();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
chunkIndex++;
|
||||
return tsc;
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,167 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks;
|
||||
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.DataTypeRecognizer;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.Engine;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.CategoryScores;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.DBObjectTranslator;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.Entry;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.DistanceCalculator;
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
public class TimeSeriesChunk extends Chunk{
|
||||
|
||||
|
||||
private ArrayList<String> columnEntries;
|
||||
private String columnType;
|
||||
private LexicalEngineConfiguration config;
|
||||
private boolean mustInterrupt;
|
||||
private ArrayList<SingleResult> detailedResults;
|
||||
private String singletonElement;
|
||||
private boolean isSingleton;
|
||||
|
||||
public String getColumnType(){
|
||||
return columnType;
|
||||
}
|
||||
|
||||
public String getSingletonEntry(){
|
||||
return singletonElement;
|
||||
}
|
||||
|
||||
public ArrayList<SingleResult> getDetailedResults(){
|
||||
return detailedResults;
|
||||
}
|
||||
public boolean isSingleton(){
|
||||
return isSingleton;
|
||||
}
|
||||
|
||||
public TimeSeriesChunk(String timeSeriesName, String timeSeriesColumn, String ColumnType, BigInteger start, int ChunkSize, LexicalEngineConfiguration Config, Engine engine) throws Exception{
|
||||
super(engine);
|
||||
DBObjectTranslator dbo = new DBObjectTranslator();
|
||||
SessionFactory sess = engine.getDBSession();
|
||||
columnEntries = dbo.retrieveTimeSeriesEntries(sess, timeSeriesName, timeSeriesColumn, start, ChunkSize);
|
||||
if (ColumnType==null){
|
||||
columnType = DataTypeRecognizer.guessType(columnEntries);
|
||||
AnalysisLogger.getLogger().trace("TimeSeriesChunk-> GUESSED TYPE " + columnType + " FOR COLUMN "+timeSeriesColumn);
|
||||
}
|
||||
mustInterrupt = false;
|
||||
config = Config;
|
||||
isSingleton = false;
|
||||
}
|
||||
|
||||
public TimeSeriesChunk(String singletonString, String ColumnType, BigInteger start, int ChunkSize, LexicalEngineConfiguration Config, Engine engine) throws Exception{
|
||||
super(engine);
|
||||
columnEntries = new ArrayList<String>();
|
||||
columnEntries.add(singletonString);
|
||||
if (ColumnType==null){
|
||||
columnType = DataTypeRecognizer.guessType(columnEntries);
|
||||
AnalysisLogger.getLogger().trace("TimeSeriesChunk-> GUESSED TYPE " + columnType + " FOR SINGLETON "+singletonString);
|
||||
}
|
||||
mustInterrupt = false;
|
||||
config = Config;
|
||||
isSingleton = true;
|
||||
singletonElement = singletonString;
|
||||
detailedResults = new ArrayList<SingleResult>();
|
||||
}
|
||||
|
||||
|
||||
|
||||
public boolean mustInterruptProcess (){
|
||||
return this.mustInterrupt;
|
||||
}
|
||||
public void compareToReferenceChunk(HashMap<String, CategoryScores> scoresTable, ReferenceChunk catChunk) throws Exception {
|
||||
compareToReferenceChunk(scoresTable, catChunk,null);
|
||||
}
|
||||
|
||||
// checks an entry set against a reference set
|
||||
// columnEntries: column elements from unknown column
|
||||
// cat: category analyzed for candidating to recognized
|
||||
// referenceEntries: some elements belonging to cat, to be compared to columnEntries
|
||||
public void compareToReferenceChunk(HashMap<String, CategoryScores> scoresTable, ReferenceChunk catChunk,String ColumnFilter) throws Exception {
|
||||
|
||||
|
||||
//in the case of a singleton Chunk interrupt computation in case of exact match
|
||||
|
||||
// get category Score for further processing
|
||||
CategoryScores categoryScores = scoresTable.get(catChunk.getCategoryName());
|
||||
//extract Entries from DB
|
||||
ArrayList<Entry> categoryEntries = catChunk.getReferenceEntries();
|
||||
|
||||
for (String timeSeriesElement : columnEntries) {
|
||||
// for each reference entry
|
||||
for (Entry referenceEntry : categoryEntries) {
|
||||
|
||||
// take all attributes of a reference entry for confrontation to columns
|
||||
HashMap<String, String> attributes = referenceEntry.getAttributes();
|
||||
HashMap<String, String> types = referenceEntry.getTypes();
|
||||
boolean anotherReference= true;
|
||||
|
||||
// for each attribute of an entry
|
||||
for (String referenceColumn : attributes.keySet()) {
|
||||
|
||||
// perform calculation only if the column type is the same
|
||||
if (types.get(referenceColumn).equals(columnType)&&((ColumnFilter==null)||(ColumnFilter.equalsIgnoreCase(referenceColumn)))) {
|
||||
// AnalysisLogger.getLogger().debug("CategoryOrderedList->checkAllEntriesOnEntireCategory-> REFERENCE COLUMN "+referenceColumn+" HAS TYPE "+types.get(referenceColumn));
|
||||
// take the attribute value of the entry
|
||||
String attribute = attributes.get(referenceColumn);
|
||||
// calculate the distance between the unknown entry and the attribute
|
||||
DistanceCalculator d = new DistanceCalculator();
|
||||
double percentage = d.CD(config.useSimpleDistance, timeSeriesElement, attribute, isSingleton, isSingleton) * 100f;
|
||||
// AnalysisLogger.getLogger().debug("CategoryOrderedList->checkUnkEntriesOnEntireCategory-> Percentage between " +timeSeriesElement + " and " + attribute + " is: "+percentage );
|
||||
// if they are similar
|
||||
if (percentage > config.entryAcceptanceThreshold) {
|
||||
// if (catChunk.getCategoryName().equals("COUNTRY_OLD"))
|
||||
AnalysisLogger.getLogger().trace("TimeSeriesChunk->compareToCategoryChunk-> \t\tPercentage between " + timeSeriesElement + " vs. " + attribute + " is: " + percentage+" in "+catChunk.getCategoryName()+":"+referenceColumn);
|
||||
|
||||
categoryScores.incrementScore(referenceColumn, (float)percentage,anotherReference);
|
||||
|
||||
//if we are in a singleton we have to get the details
|
||||
if (isSingleton){
|
||||
//for singleton match, fulfil details
|
||||
int index =0;
|
||||
for (SingleResult sr :detailedResults){
|
||||
|
||||
Double scoredetail = sr.getScore();
|
||||
|
||||
if (scoredetail<percentage){
|
||||
break;
|
||||
}
|
||||
index ++;
|
||||
}
|
||||
detailedResults.add(index, new SingleResult(attribute, null, percentage,null,"0"));
|
||||
}
|
||||
else{
|
||||
AnalysisLogger.getLogger().trace("TimeSeriesChunk->compareToCategoryChunk-> "+categoryScores.showScores());
|
||||
}
|
||||
//if exact match is reached, exit
|
||||
if ((percentage==100)&&(isSingleton))
|
||||
{
|
||||
detailedResults = new ArrayList<SingleResult>();
|
||||
detailedResults.add(new SingleResult(attribute, null, percentage,null,"0"));
|
||||
mustInterrupt = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}// end for on columns
|
||||
|
||||
if (mustInterrupt)
|
||||
break;
|
||||
|
||||
}// end for on entries
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks;
|
||||
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.Engine;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.DBObjectTranslator;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.MathFunctions;
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
public class TimeSeriesChunkSet extends ChunkSet {
|
||||
|
||||
private String ColumnType;
|
||||
|
||||
public TimeSeriesChunkSet(int MaxNumberOfChunks, int ChunkSize, String TimeSeriesName, String TimeSeriesColumn, LexicalEngineConfiguration config, Engine engine) throws Exception {
|
||||
super(MaxNumberOfChunks, ChunkSize, TimeSeriesName, TimeSeriesColumn, config,engine);
|
||||
ColumnType = null;
|
||||
}
|
||||
|
||||
protected BigDecimal calculateNumberOfElements() throws Exception {
|
||||
// calculate total entries in the time series
|
||||
DBObjectTranslator dbo = new DBObjectTranslator();
|
||||
SessionFactory sess = engine.getDBSession();
|
||||
BigDecimal numberOfElements = new BigDecimal(dbo.calculateTotalEntries(sess, seriesName, seriesColumn));
|
||||
return numberOfElements;
|
||||
}
|
||||
|
||||
public TimeSeriesChunk nextChunk() {
|
||||
|
||||
TimeSeriesChunk tsc = null;
|
||||
|
||||
while (!chunkSet.contains(chunkIndex) && (chunkIndex < chunkSetSize)) {
|
||||
chunkIndex++;
|
||||
}
|
||||
if (chunkIndex < chunkSetSize) {
|
||||
BigInteger startIndex = MathFunctions.chunk2Index(chunkIndex, chunkSize);
|
||||
try {
|
||||
tsc = new TimeSeriesChunk(seriesName, seriesColumn, ColumnType, startIndex, chunkSize, config, engine);
|
||||
if (ColumnType == null) {
|
||||
ColumnType = tsc.getColumnType();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
chunkIndex++;
|
||||
return tsc;
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.graph;
|
||||
|
||||
import org.jgrapht.graph.ListenableDirectedWeightedGraph;
|
||||
|
||||
public class CustomListenableDirectedWeightedGraph<V,E> extends ListenableDirectedWeightedGraph<V,E>{
|
||||
|
||||
|
||||
public CustomListenableDirectedWeightedGraph(Class arg0) {
|
||||
super(arg0);
|
||||
}
|
||||
|
||||
public void setEdgeWeight(E e, double weight) {
|
||||
super.setEdgeWeight(e, weight);
|
||||
|
||||
((CustomWeightedEdge)e).setWeight(weight);
|
||||
}
|
||||
|
||||
public E addEdge(V o1,V o2) {
|
||||
E out = super.addEdge(o1,o2);
|
||||
((CustomWeightedEdge)out).setEdges(o1,o2);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.graph;
|
||||
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
|
||||
import com.touchgraph.graphlayout.Edge;
|
||||
|
||||
public class CustomWeightedEdge extends DefaultWeightedEdge{
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
return "["+o1+":"+o2+":"+weight+"%]";
|
||||
}
|
||||
|
||||
private double weight;
|
||||
private Object o1;
|
||||
private Object o2;
|
||||
|
||||
public void setWeight(double weight){
|
||||
this.weight = weight;
|
||||
}
|
||||
|
||||
public void setEdges(Object o1,Object o2){
|
||||
this.o1=o1;
|
||||
this.o2=o2;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.graph;
|
||||
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
|
||||
import com.touchgraph.graphlayout.Edge;
|
||||
|
||||
public class CustomWeightedVertex {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return "[" + name + ":" + weight + "%]";
|
||||
}
|
||||
|
||||
private double weight;
|
||||
private String name;
|
||||
|
||||
public CustomWeightedVertex(String name, double weight) {
|
||||
this.weight = weight;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public CustomWeightedVertex(String name) {
|
||||
this.weight = 0;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public boolean equals(CustomWeightedVertex v) {
|
||||
|
||||
if (v.name.equals(name))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,299 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.graph;
|
||||
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.Dimension;
|
||||
import java.awt.Rectangle;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
|
||||
import javax.swing.JApplet;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.jgraph.JGraph;
|
||||
import org.jgraph.graph.DefaultGraphCell;
|
||||
import org.jgraph.graph.GraphConstants;
|
||||
import org.jgrapht.ext.JGraphModelAdapter;
|
||||
|
||||
public class GraphDisplayer extends JApplet {
|
||||
private static final Color DEFAULT_BG_COLOR = Color.decode("#FAFBFF");
|
||||
private static final Dimension DEFAULT_SIZE = new Dimension(530, 320);
|
||||
|
||||
private JGraphModelAdapter m_jgAdapter;
|
||||
|
||||
public static int WIDTH = 1000;
|
||||
public static int HEIGHT = 800;
|
||||
|
||||
public static int WIDTHBOX = 1280;
|
||||
public static int HEIGHTBOX = 1024;
|
||||
|
||||
private int newxposition;
|
||||
private int newyposition;
|
||||
|
||||
private CustomListenableDirectedWeightedGraph g;
|
||||
private int nodesCounter;
|
||||
private static final int minx = 10;
|
||||
private static final int miny = 10;
|
||||
ArrayList<String> VertexNames;
|
||||
HashMap<String, String> Edges;
|
||||
|
||||
public void generatePosition(int lastxPosition, int lastyposition) {
|
||||
|
||||
int rangex = (int) WIDTH - (int) lastxPosition;
|
||||
// compute a fraction of the range, 0 <= frac < range
|
||||
Random a = new Random();
|
||||
int newx = lastxPosition + 70 + (int) (rangex * a.nextDouble());
|
||||
int epsilon = 1;
|
||||
int newy = (int) lastyposition + (int) (epsilon * 20f * Math.random());
|
||||
if (newx > WIDTH)
|
||||
newx = WIDTH - 100;
|
||||
if (newx < lastxPosition - 90)
|
||||
newx = lastxPosition + 90;
|
||||
if (newy > HEIGHT)
|
||||
newy = HEIGHT - 10;
|
||||
if (newy < 0)
|
||||
newy = 0;
|
||||
newxposition = newx;
|
||||
newyposition = newy;
|
||||
// System.out.println("LAST X "+lastxPosition+" NEW X "+newxposition);
|
||||
// System.out.println("LAST Y "+lastyposition+" NEW Y "+newyposition);
|
||||
}
|
||||
|
||||
public void init() {
|
||||
AnalysisLogger.getLogger().debug("INIZIALIZZATO!");
|
||||
|
||||
JGraph jgraph = new JGraph(m_jgAdapter);
|
||||
|
||||
adjustDisplaySettings(jgraph);
|
||||
getContentPane().add(jgraph);
|
||||
resize(DEFAULT_SIZE);
|
||||
|
||||
AnalysisLogger.getLogger().debug("RESIZED!");
|
||||
}
|
||||
|
||||
public void generateGraph() {
|
||||
|
||||
for (String v : VertexNames) {
|
||||
genPositionVertex(v);
|
||||
}
|
||||
}
|
||||
|
||||
public void generateRandomGraph() {
|
||||
|
||||
for (String v : VertexNames) {
|
||||
int randx = minx + (int) ((WIDTH - 100) * Math.random());
|
||||
int randy = miny + (int) ((HEIGHT - 100) * Math.random());
|
||||
positionVertexAt(v, randx, randy);
|
||||
}
|
||||
}
|
||||
|
||||
public void generateUpTo5StarGraph() {
|
||||
|
||||
// individua le star
|
||||
HashMap<String, Integer> vertexFrequencies = new HashMap<String, Integer>();
|
||||
// calcolo le frequenze dei vertici
|
||||
for (String edge : Edges.values()) {
|
||||
System.out.println(edge + "-" + vertexFrequencies.get(edge));
|
||||
if (vertexFrequencies.get(edge) != null) {
|
||||
int f = vertexFrequencies.get(edge).intValue();
|
||||
vertexFrequencies.put(edge, new Integer(f + 1));
|
||||
} else
|
||||
vertexFrequencies.put(edge, new Integer(0));
|
||||
|
||||
}
|
||||
|
||||
for (String vertex : VertexNames) {
|
||||
|
||||
if (Edges.get(vertex) == null) {
|
||||
boolean trovato = false;
|
||||
// cerco ogni vertice tra gli archi
|
||||
for (String starvertex : Edges.values()) {
|
||||
if (vertex.equals(starvertex)) {
|
||||
trovato = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!trovato) {
|
||||
System.out.println("aggiunto vertice isolato " + vertex);
|
||||
vertexFrequencies.put(vertex, new Integer(0));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
System.out.println("FEQS " + vertexFrequencies.toString());
|
||||
// ordino le star
|
||||
ArrayList<String> starList = new ArrayList<String>();
|
||||
for (String vertex : vertexFrequencies.keySet()) {
|
||||
|
||||
int freq = vertexFrequencies.get(vertex);
|
||||
int i = 0;
|
||||
boolean trovato = false;
|
||||
for (String element : starList) {
|
||||
|
||||
int referfreq = vertexFrequencies.get(element);
|
||||
if (referfreq < freq) {
|
||||
starList.add(i, vertex);
|
||||
trovato = true;
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
if (!trovato)
|
||||
starList.add(vertex);
|
||||
}
|
||||
|
||||
// dispongo le star nel layout
|
||||
System.out.println(starList.toString());
|
||||
int bound = 200;
|
||||
int[] boundedXIndexex = { bound, WIDTH - bound, bound, WIDTH - bound, WIDTH / 2 };
|
||||
int[] boundedYIndexex = { bound, bound, HEIGHT - bound, HEIGHT - bound, HEIGHT / 2 };
|
||||
int sizeStar = starList.size();
|
||||
// int sizeStar = 1;
|
||||
|
||||
// distribuisco le star sul grafico
|
||||
for (int i = 0; i < sizeStar; i++) {
|
||||
|
||||
positionVertexAt(starList.get(i), boundedXIndexex[i], boundedYIndexex[i]);
|
||||
|
||||
// calcolo il numero di elementi della stella
|
||||
int countelems = 0;
|
||||
for (String edge : Edges.keySet()) {
|
||||
if (Edges.get(edge).equals(starList.get(i))) {
|
||||
countelems++;
|
||||
}
|
||||
}
|
||||
|
||||
if (countelems > 0) {
|
||||
double subdivision = 360 / countelems;
|
||||
double angle = 105f;
|
||||
double radius = 200f;
|
||||
System.out.println("Numero di elementi nella stella: " + countelems + " suddivisioni: " + subdivision);
|
||||
for (String edge : Edges.keySet()) {
|
||||
// dispongo gli elementi a stella
|
||||
if (Edges.get(edge).equals(starList.get(i))) {
|
||||
int currentx = boundedXIndexex[i];
|
||||
int currenty = boundedYIndexex[i];
|
||||
int epsilonx = (int) (radius * Math.cos(Math.toRadians(angle)));
|
||||
int epsilony = (int) (radius * Math.sin(Math.toRadians(angle)));
|
||||
System.out.println("angolo attuale: " + angle + " x0: " + currentx + " y0 " + currenty + " ex " + epsilonx + " ey " + epsilony);
|
||||
positionVertexAt(edge, currentx + epsilonx, currenty + epsilony);
|
||||
|
||||
angle += subdivision;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void genPositionVertex(String vertexName) {
|
||||
|
||||
if (nodesCounter > 0) {
|
||||
if ((nodesCounter % 2) == 0) {
|
||||
newxposition = 10 + (int) (20f * Math.random());
|
||||
newyposition += 100;
|
||||
} else
|
||||
generatePosition(newxposition, newyposition);
|
||||
}
|
||||
|
||||
positionVertexAt(vertexName, newxposition, newyposition);
|
||||
nodesCounter++;
|
||||
}
|
||||
|
||||
public GraphDisplayer() {
|
||||
g = new CustomListenableDirectedWeightedGraph(CustomWeightedEdge.class);
|
||||
m_jgAdapter = new JGraphModelAdapter(g);
|
||||
VertexNames = new ArrayList<String>();
|
||||
Edges = new HashMap<String, String>();
|
||||
newxposition = minx;
|
||||
newyposition = miny;
|
||||
nodesCounter = 0;
|
||||
}
|
||||
|
||||
public void addVertex(String name) {
|
||||
g.addVertex(name);
|
||||
VertexNames.add(name);
|
||||
}
|
||||
|
||||
public void addEdge(String v1, String v2, double bi) {
|
||||
CustomWeightedEdge ed = (CustomWeightedEdge)g.addEdge(v1,v2);
|
||||
g.setEdgeWeight(ed,bi);
|
||||
Edges.put(v1, v2);
|
||||
}
|
||||
|
||||
private void adjustDisplaySettings(JGraph jg) {
|
||||
jg.setPreferredSize(DEFAULT_SIZE);
|
||||
|
||||
Color c = DEFAULT_BG_COLOR;
|
||||
String colorStr = null;
|
||||
|
||||
try {
|
||||
colorStr = getParameter("bgcolor");
|
||||
} catch (Exception e) {
|
||||
}
|
||||
|
||||
if (colorStr != null) {
|
||||
c = Color.decode(colorStr);
|
||||
}
|
||||
|
||||
jg.setBackground(c);
|
||||
}
|
||||
|
||||
private void positionVertexAt(Object vertex, int x, int y) {
|
||||
|
||||
// seleziono la cella chiamata vertex
|
||||
DefaultGraphCell cell = m_jgAdapter.getVertexCell(vertex);
|
||||
|
||||
|
||||
// recupero gli attributi della cella
|
||||
Map attr = cell.getAttributes();
|
||||
// recupero i boundaries della cella
|
||||
Rectangle2D b = GraphConstants.getBounds(attr);
|
||||
// setto i parametri del nuovo rettangolo
|
||||
GraphConstants.setBounds(attr, new Rectangle(x, y, (int) (((String)vertex).length()+50+b.getWidth()), (int) b.getHeight()));
|
||||
// costruisco una nuova cella
|
||||
Map cellAttr = new HashMap();
|
||||
cellAttr.put(cell, attr);
|
||||
|
||||
// posiziono la cella nel grafo
|
||||
m_jgAdapter.edit(cellAttr, null, null, null);
|
||||
|
||||
}
|
||||
|
||||
public void start() {
|
||||
repaint();
|
||||
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
GraphFramer starter = new GraphFramer("Grafo");
|
||||
|
||||
// create a visualization using JGraph, via an adapter
|
||||
String nodi[] = { "ciao", "come", "stai", "oggi", "domani", "dopodomani" };
|
||||
for (String nodo : nodi) {
|
||||
starter.graphDisplayer.addVertex(nodo);
|
||||
}
|
||||
|
||||
for (int j = 0; j < nodi.length; j++) {
|
||||
int i0 = (int) (nodi.length * Math.random());
|
||||
int i1 = (int) (nodi.length * Math.random());
|
||||
System.out.println("i0: " + i0 + " i1: " + i1);
|
||||
if (i0 != i1) {
|
||||
starter.graphDisplayer.addEdge(nodi[i0], nodi[i1],0);
|
||||
}
|
||||
}
|
||||
|
||||
starter.graphDisplayer.generateGraph();
|
||||
|
||||
starter.go();
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.graph;
|
||||
|
||||
import java.awt.Event;
|
||||
import java.awt.Frame;
|
||||
|
||||
public class GraphFramer extends Frame{
|
||||
|
||||
public GraphDisplayer graphDisplayer;
|
||||
|
||||
public GraphFramer(String frameName){
|
||||
super(frameName);
|
||||
graphDisplayer = new GraphDisplayer();
|
||||
add("Center",graphDisplayer);
|
||||
|
||||
}
|
||||
|
||||
public void go(){
|
||||
|
||||
graphDisplayer.init();
|
||||
|
||||
this.resize(GraphDisplayer.WIDTHBOX, GraphDisplayer.HEIGHTBOX);
|
||||
this.show();
|
||||
graphDisplayer.start();
|
||||
|
||||
}
|
||||
|
||||
public boolean HandleEvent(Event event){
|
||||
|
||||
if (event.id == Event.WINDOW_DESTROY)
|
||||
|
||||
{
|
||||
try
|
||||
{graphDisplayer.stop();
|
||||
graphDisplayer.destroy();
|
||||
}catch(Exception e){e.printStackTrace();}
|
||||
System.exit(0);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.graph;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.Dimension;
|
||||
import java.awt.Rectangle;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.swing.JApplet;
|
||||
import javax.swing.JFrame;
|
||||
|
||||
import org.jgraph.JGraph;
|
||||
import org.jgraph.graph.AttributeMap;
|
||||
import org.jgraph.graph.DefaultGraphCell;
|
||||
import org.jgraph.graph.GraphConstants;
|
||||
|
||||
import org.jgrapht.ListenableGraph;
|
||||
import org.jgrapht.ext.JGraphModelAdapter;
|
||||
import org.jgrapht.graph.ListenableDirectedGraph;
|
||||
import org.jgrapht.graph.DefaultEdge;
|
||||
|
||||
|
||||
public class GraphGeneratorApplet extends JApplet {
|
||||
private static final Color DEFAULT_BG_COLOR = Color.decode("#FAFBFF");
|
||||
private static final Dimension DEFAULT_SIZE = new Dimension(530, 320);
|
||||
|
||||
//
|
||||
private JGraphModelAdapter m_jgAdapter;
|
||||
|
||||
/**
|
||||
* @see java.applet.Applet#init().
|
||||
*/
|
||||
public void init() {
|
||||
// create a JGraphT graph
|
||||
ListenableGraph g = new ListenableDirectedGraph(DefaultEdge.class);
|
||||
|
||||
// create a visualization using JGraph, via an adapter
|
||||
m_jgAdapter = new JGraphModelAdapter(g);
|
||||
|
||||
JGraph jgraph = new JGraph(m_jgAdapter);
|
||||
|
||||
adjustDisplaySettings(jgraph);
|
||||
getContentPane().add(jgraph);
|
||||
resize(DEFAULT_SIZE);
|
||||
|
||||
// add some sample data (graph manipulated via JGraphT)
|
||||
g.addVertex("v1");
|
||||
g.addVertex("v2");
|
||||
g.addVertex("v3");
|
||||
g.addVertex("v4");
|
||||
|
||||
g.addEdge("v1", "v2");
|
||||
g.addEdge("v2", "v3");
|
||||
g.addEdge("v3", "v1");
|
||||
g.addEdge("v4", "v3");
|
||||
|
||||
|
||||
|
||||
// position vertices nicely within JGraph component
|
||||
positionVertexAt("v1", 130, 40);
|
||||
positionVertexAt("v2", 60, 200);
|
||||
positionVertexAt("v3", 310, 230);
|
||||
positionVertexAt("v4", 380, 70);
|
||||
|
||||
// that's all there is to org.gcube.contentmanagement.lexicalmatcher!...
|
||||
}
|
||||
|
||||
private void adjustDisplaySettings(JGraph jg) {
|
||||
jg.setPreferredSize(DEFAULT_SIZE);
|
||||
|
||||
Color c = DEFAULT_BG_COLOR;
|
||||
String colorStr = null;
|
||||
|
||||
try {
|
||||
colorStr = getParameter("bgcolor");
|
||||
} catch (Exception e) {
|
||||
}
|
||||
|
||||
if (colorStr != null) {
|
||||
c = Color.decode(colorStr);
|
||||
}
|
||||
|
||||
jg.setBackground(c);
|
||||
}
|
||||
|
||||
private void positionVertexAt(Object vertex, int x, int y) {
|
||||
|
||||
|
||||
//seleziono la cella chiamata vertex
|
||||
DefaultGraphCell cell = m_jgAdapter.getVertexCell(vertex);
|
||||
//recupero gli attributi della cella
|
||||
Map attr = cell.getAttributes();
|
||||
//recupero i boundaries della cella
|
||||
Rectangle2D b = GraphConstants.getBounds(attr);
|
||||
//setto i parametri del nuovo rettangolo
|
||||
GraphConstants.setBounds(attr, new Rectangle(x, y, (int)b.getWidth(), (int)b.getHeight()));
|
||||
//costruisco una nuova cella
|
||||
Map cellAttr = new HashMap();
|
||||
cellAttr.put(cell, attr);
|
||||
//posiziono la cella nel grafo
|
||||
m_jgAdapter.edit(cellAttr, null, null, null);
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.graph;
|
||||
|
||||
import java.math.BigInteger;
|
||||
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
|
||||
public class RelationEdge extends DefaultWeightedEdge{
|
||||
|
||||
/**
|
||||
* @param args
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
private String relationName;
|
||||
private long indexFrom;
|
||||
private long indexTo;
|
||||
private BigInteger weight;
|
||||
|
||||
private String categoryFrom;
|
||||
private String categoryTo;
|
||||
|
||||
|
||||
public BigInteger getWeigth(){
|
||||
return weight;
|
||||
}
|
||||
|
||||
public void setWeigth(BigInteger Weight){
|
||||
weight = Weight;
|
||||
}
|
||||
|
||||
public long getTo(){
|
||||
return indexTo;
|
||||
}
|
||||
public long getFrom(){
|
||||
return indexFrom;
|
||||
}
|
||||
public String getName(){
|
||||
return relationName;
|
||||
}
|
||||
public void setName(String name){
|
||||
relationName = name;
|
||||
}
|
||||
|
||||
public RelationEdge(String name,long from,long to){
|
||||
relationName = name;
|
||||
indexFrom = from;
|
||||
indexTo = to;
|
||||
}
|
||||
@Override
|
||||
public String toString(){
|
||||
return "["+relationName+": from "+indexFrom+" to " +indexTo+" nameFrom "+categoryFrom+" nameTo "+categoryTo+"]";
|
||||
}
|
||||
|
||||
public void setCategoryFrom(String categoryFrom) {
|
||||
this.categoryFrom = categoryFrom;
|
||||
}
|
||||
|
||||
public String getCategoryFrom() {
|
||||
return categoryFrom;
|
||||
}
|
||||
|
||||
public void setCategoryTo(String categoryTo) {
|
||||
this.categoryTo = categoryTo;
|
||||
}
|
||||
|
||||
public String getCategoryTo() {
|
||||
return categoryTo;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.graph;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
public class TreeExtractor {
|
||||
|
||||
/**
|
||||
* @param args
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
TreeNode categoriesTree;
|
||||
|
||||
//recupera l'albero delle categorie
|
||||
public TreeNode getCategoriesTree(SessionFactory DB){
|
||||
return categoriesTree;
|
||||
}
|
||||
|
||||
//creo un nuovo Albero
|
||||
public TreeExtractor(){
|
||||
categoriesTree = new TreeNode(TreeNode.ROOT);
|
||||
}
|
||||
|
||||
class TreeNode implements Iterable<TreeNode> {
|
||||
|
||||
public static final String ROOT = "ROOT";
|
||||
|
||||
private Set<TreeNode> children;
|
||||
public String name;
|
||||
|
||||
public TreeNode(String Name) {
|
||||
children = new HashSet<TreeNode>();
|
||||
name = Name;
|
||||
}
|
||||
|
||||
public String getName(){
|
||||
return name;
|
||||
}
|
||||
|
||||
public boolean addChild(TreeNode n) {
|
||||
return children.add(n);
|
||||
}
|
||||
|
||||
public boolean removeChild(TreeNode n) {
|
||||
return children.remove(n);
|
||||
}
|
||||
|
||||
public Iterator<TreeNode> iterator() {
|
||||
return children.iterator();
|
||||
}
|
||||
|
||||
public boolean isLeaf(){
|
||||
return ((children==null) || (children.size()==0));
|
||||
}
|
||||
|
||||
public boolean isRoot(){
|
||||
return (name.equals(ROOT));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,489 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.run;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.Engine;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.CategoryOrderedList;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.DBObjectTranslator;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.TSObjectTransformer;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.interfaces.Reference;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
public class CategoryGuesser {
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws Exception
|
||||
*/
|
||||
|
||||
private final static int MAXRESULTS = 10;
|
||||
|
||||
public static void showResults(ArrayList<SingleResult> results) {
|
||||
|
||||
AnalysisLogger.getLogger().warn("CLASSIFICATION RESULT:\n");
|
||||
int i = 1;
|
||||
for (SingleResult result : results) {
|
||||
if (result.getColumn() != null)
|
||||
AnalysisLogger.getLogger().warn(i + ": " + result.getCategory() + " - " + result.getColumn() + " ; SCORE: " + result.getStringScore() + "%");
|
||||
else
|
||||
AnalysisLogger.getLogger().warn(i + ": " + result.getCategory() + " ; SCORE: " + result.getStringScore() + "%");
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static void AccuracyCalc(CategoryGuesser guesser, String configPath, String seriesName, String column, int attempts, String correctFamily, String correctColumn) throws Exception {
|
||||
AccuracyCalc(null, guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
}
|
||||
|
||||
public static void AccuracyCalc(LexicalEngineConfiguration externalcfg, CategoryGuesser guesser, String configPath, String seriesName, String column, int attempts, String correctFamily, String correctColumn) throws Exception {
|
||||
|
||||
int familyscore = 0;
|
||||
int columnscore = 0;
|
||||
// CategoryGuesser guesser = new CategoryGuesser();
|
||||
|
||||
for (int i = 0; i < attempts; i++) {
|
||||
|
||||
guesser.runGuesser(seriesName, column, externalcfg);
|
||||
ArrayList<SingleResult> results = guesser.getClassification();
|
||||
String result = results.toString();
|
||||
showResults(results);
|
||||
|
||||
AnalysisLogger.getLogger().info("CLASSIFICATION RESULT " + result + " " + CategoryGuesser.resultString(result, correctFamily, correctColumn));
|
||||
|
||||
if (CategoryGuesser.CheckCompleteResult(result, correctFamily, correctColumn))
|
||||
columnscore++;
|
||||
|
||||
if (CategoryGuesser.CheckFamilyResult(result, correctFamily))
|
||||
familyscore++;
|
||||
|
||||
}
|
||||
|
||||
double percColumn = ((double) columnscore / (double) attempts) * 100;
|
||||
double percFamily = ((double) familyscore / (double) attempts) * 100;
|
||||
|
||||
AnalysisLogger.getLogger().info("->ACCURACY ON FAMILY " + correctFamily + ":" + percFamily + " ACCURACY ON COLUMN " + correctColumn + ":" + percColumn);
|
||||
}
|
||||
|
||||
public static String resultString(String result, String family, String column) {
|
||||
|
||||
result = result.toUpperCase();
|
||||
family = family.toUpperCase();
|
||||
column = column.toUpperCase();
|
||||
|
||||
return "FAMILY REC: " + result.contains(family) + " COLUMN REC: " + result.contains(family + "=" + column);
|
||||
}
|
||||
|
||||
public static boolean CheckCompleteResult(String result, String family, String column) {
|
||||
|
||||
result = result.toUpperCase();
|
||||
family = family.toUpperCase();
|
||||
column = column.toUpperCase();
|
||||
if (result.contains(family + "=" + column))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
public static boolean CheckFamilyResult(String result, String family) {
|
||||
|
||||
result = result.toUpperCase();
|
||||
family = family.toUpperCase();
|
||||
|
||||
if (result.contains(family + "="))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
// NOTE: The config path has to contain the two files: lexicalGuesser.properties and ALog.properties
|
||||
private static final String cfgFile = "lexicalGuesser.properties";
|
||||
private static final String LogFile = "ALog.properties";
|
||||
// singleton
|
||||
private CategoryOrderedList col;
|
||||
private Engine processor;
|
||||
private CategoryOrderedList originalCol;
|
||||
private LexicalEngineConfiguration config;
|
||||
private String configPath;
|
||||
private boolean oneshotMode;
|
||||
private static final int maxTriesClassification = 3;
|
||||
private int triesCounter;
|
||||
|
||||
public CategoryGuesser(String ConfigPath) {
|
||||
|
||||
triesCounter = 0;
|
||||
this.configPath = ConfigPath;
|
||||
}
|
||||
|
||||
public CategoryGuesser() {
|
||||
triesCounter = 0;
|
||||
this.configPath = ".";
|
||||
}
|
||||
|
||||
public void runGuesser(String seriesName, String columnName, LexicalEngineConfiguration externalConfig) throws Exception {
|
||||
runGuesser(seriesName, columnName, externalConfig, null, null);
|
||||
}
|
||||
|
||||
public void runGuesser(String seriesName, String columnName) throws Exception {
|
||||
runGuesser(seriesName, columnName, null, null, null);
|
||||
}
|
||||
|
||||
public void runGuesser(String seriesName, String columnName, LexicalEngineConfiguration externalConfig, String CategoryFilter, String ColumnFilter) throws Exception {
|
||||
runGuesser(seriesName, columnName, externalConfig, CategoryFilter, ColumnFilter, null);
|
||||
}
|
||||
|
||||
public void runGuesser(String SingletonString, LexicalEngineConfiguration externalConfig, String CategoryFilter, String ColumnFilter) throws Exception {
|
||||
oneshotMode = true;
|
||||
runGuesser(null, null, externalConfig, CategoryFilter, ColumnFilter, SingletonString);
|
||||
}
|
||||
|
||||
public void init(String categoryFilter, String columnFilter, LexicalEngineConfiguration externalConfig) throws Exception {
|
||||
|
||||
String cfgFileCompletePath = configPath + "/" + cfgFile;
|
||||
AnalysisLogger.setLogger(configPath + "/" + LogFile);
|
||||
|
||||
AnalysisLogger.getLogger().trace("******************INITIALIZING******************");
|
||||
|
||||
config = new LexicalEngineConfiguration();
|
||||
config.configure(cfgFileCompletePath);
|
||||
|
||||
if (externalConfig != null) {
|
||||
config.mergeConfig(externalConfig);
|
||||
}
|
||||
|
||||
processor = new Engine(config, columnFilter, configPath);
|
||||
|
||||
SessionFactory dbSession = processor.getDBSession(config);
|
||||
DBObjectTranslator dbo = new DBObjectTranslator();
|
||||
|
||||
if (col == null) {
|
||||
AnalysisLogger.getLogger().trace("******************Order Category******************");
|
||||
if (externalConfig == null)
|
||||
externalConfig = new LexicalEngineConfiguration();
|
||||
dbo.buildCategoriesStructure(dbSession, externalConfig.getReferenceTable(), externalConfig.getReferenceColumn(), externalConfig.getIdColumn(), externalConfig.getNameHuman(), externalConfig.getDescription());
|
||||
col = TSObjectTransformer.transform2List(dbo, config, categoryFilter);
|
||||
AnalysisLogger.getLogger().trace("***************End Ordering********************");
|
||||
originalCol = col.generateNovelList();
|
||||
} else {
|
||||
col = originalCol.generateNovelList();
|
||||
}
|
||||
|
||||
oneshotMode = false;
|
||||
}
|
||||
|
||||
public void initSingleMatcher(LexicalEngineConfiguration externalConfig, String ColumnFilter) throws Exception {
|
||||
|
||||
String cfgFileCompletePath = configPath + "/" + cfgFile;
|
||||
AnalysisLogger.setLogger(configPath + "/" + LogFile);
|
||||
|
||||
config = new LexicalEngineConfiguration();
|
||||
config.configure(cfgFileCompletePath);
|
||||
|
||||
if (externalConfig != null) {
|
||||
config.mergeConfig(externalConfig);
|
||||
}
|
||||
|
||||
processor = new Engine(config, ColumnFilter, configPath);
|
||||
|
||||
// in this case, the lexical matcher is invoked once, then it has to be stopped in the end
|
||||
oneshotMode = true;
|
||||
}
|
||||
|
||||
public void init(String categoryFilter, String columnFilter) throws Exception {
|
||||
init(categoryFilter, columnFilter, null);
|
||||
}
|
||||
|
||||
public void init(LexicalEngineConfiguration externalConfig) throws Exception {
|
||||
init(null, null, externalConfig);
|
||||
}
|
||||
|
||||
public void init() throws Exception {
|
||||
init(null, null, null);
|
||||
}
|
||||
|
||||
public void refreshReferences() {
|
||||
col = null;
|
||||
}
|
||||
|
||||
public void runGuesser(String seriesName, String columnName, LexicalEngineConfiguration externalConfig, String CategoryFilter, String ColumnFilter, String SingletonString) throws Exception {
|
||||
|
||||
String cfgFileCompletePath = configPath + "/" + cfgFile;
|
||||
AnalysisLogger.setLogger(configPath + "/" + LogFile);
|
||||
|
||||
AnalysisLogger.getLogger().debug("Guessing Table " + seriesName + " column " + columnName);
|
||||
if (externalConfig != null) {
|
||||
config = new LexicalEngineConfiguration();
|
||||
config.configure(cfgFileCompletePath);
|
||||
config.mergeConfig(externalConfig);
|
||||
|
||||
// NOTE FOR FUTURE OPTIMIZATION: perform the re-init only if there is a change in the Database pointing
|
||||
processor = new Engine(config, ColumnFilter, configPath);
|
||||
} else {
|
||||
if (config == null) {
|
||||
config = new LexicalEngineConfiguration();
|
||||
config.configure(cfgFileCompletePath);
|
||||
|
||||
}
|
||||
if (processor == null) {
|
||||
processor = new Engine(config, ColumnFilter, configPath);
|
||||
} else
|
||||
processor.resetEngine(config, ColumnFilter, configPath);
|
||||
}
|
||||
|
||||
SessionFactory dbSession = processor.getDBSession(config);
|
||||
DBObjectTranslator dbo = new DBObjectTranslator();
|
||||
|
||||
//modification of 10/10/11 calculate structure each time
|
||||
// if (col == null) {
|
||||
AnalysisLogger.getLogger().trace("******************Order Category******************");
|
||||
dbo.buildCategoriesStructure(dbSession, config.getReferenceTable(), config.getReferenceColumn(), config.getIdColumn(), config.getNameHuman(), config.getDescription());
|
||||
col = TSObjectTransformer.transform2List(dbo, config, CategoryFilter);
|
||||
AnalysisLogger.getLogger().trace("***************End Ordering********************");
|
||||
originalCol = col.generateNovelList();
|
||||
/*
|
||||
} else {
|
||||
col = originalCol.generateNovelList();
|
||||
}
|
||||
*/
|
||||
|
||||
AnalysisLogger.getLogger().warn("Starting Calculation...wait...");
|
||||
|
||||
long t0 = System.currentTimeMillis();
|
||||
|
||||
// processor.calcLike(col,seriesName, columnName);
|
||||
|
||||
processor.calcLikeThread(col, seriesName, columnName, SingletonString);
|
||||
|
||||
// perform processing until the table contains at least one element
|
||||
ArrayList<SingleResult> checkingResults = null;
|
||||
|
||||
// if (oneshotMode)
|
||||
// checkingResults = getClassification();
|
||||
// else
|
||||
checkingResults = getClassification();
|
||||
|
||||
while ((checkingResults == null || checkingResults.size() == 0) && (triesCounter < maxTriesClassification)) {
|
||||
AnalysisLogger.getLogger().warn("..another processing pass is required. Attempt number " + (triesCounter + 1));
|
||||
triesCounter++;
|
||||
float differencialThr = config.getCategoryDiscardDifferencialThreshold();
|
||||
float acceptanceThr = config.getEntryAcceptanceThreshold();
|
||||
// reduce the thresholds of 10 points and recalculate
|
||||
config.setCategoryDiscardDifferencialThreshold(Math.max(differencialThr - 20, 0));
|
||||
config.setEntryAcceptanceThreshold(Math.max(acceptanceThr - 20, 0));
|
||||
AnalysisLogger.getLogger().trace("Performing next processing pass");
|
||||
runGuesser(seriesName, columnName, null, CategoryFilter, ColumnFilter, SingletonString);
|
||||
AnalysisLogger.getLogger().debug("End processing pass");
|
||||
|
||||
// if (oneshotMode)
|
||||
// checkingResults = getClassification();
|
||||
// else
|
||||
checkingResults = getClassification();
|
||||
|
||||
if (triesCounter == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
long t1 = System.currentTimeMillis() - t0;
|
||||
|
||||
AnalysisLogger.getLogger().warn("...End Calculation in " + t1 + "ms");
|
||||
|
||||
triesCounter = 0;
|
||||
// close session if not more necessary
|
||||
if (oneshotMode)
|
||||
dbSession.close();
|
||||
}
|
||||
|
||||
public ArrayList<SingleResult> getClassificationOLD() {
|
||||
|
||||
ArrayList<SingleResult> results = new ArrayList<SingleResult>();
|
||||
int size = processor.bestCategories.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
results.add(new SingleResult(processor.bestCategories.get(i), processor.bestColumns.get(i), processor.bestScores.get(i), null, "0"));
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
public ArrayList<SingleResult> getDetailedMatches() {
|
||||
|
||||
if (processor.getSingletonMatches() != null) {
|
||||
|
||||
// use deviation to cut results
|
||||
float threshold = config.getSingleEntryRecognitionMaxDeviation();
|
||||
ArrayList<SingleResult> results = processor.getSingletonMatches();
|
||||
double minScore = 0;
|
||||
// get the best result and calculate the threshold
|
||||
if (results.size() > 0) {
|
||||
minScore = results.get(0).getScore() - threshold;
|
||||
}
|
||||
|
||||
// remove poor objects
|
||||
int size = results.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
SingleResult sr = results.get(i);
|
||||
if (sr.getScore() < minScore) {
|
||||
results.remove(i);
|
||||
i--;
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
return processor.getSingletonMatches();
|
||||
} else
|
||||
return new ArrayList<SingleResult>();
|
||||
}
|
||||
|
||||
public String getDetailedSingletonEntry() {
|
||||
|
||||
if (processor.getSingletonElement() != null) {
|
||||
return processor.getSingletonElement();
|
||||
} else
|
||||
return "";
|
||||
}
|
||||
|
||||
public ArrayList<SingleResult> getClassificationPlain() {
|
||||
|
||||
ArrayList<SingleResult> results = new ArrayList<SingleResult>();
|
||||
int size = processor.bestCategories.size();
|
||||
double maxscore = 0;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
double score = processor.bestScores.get(i);
|
||||
if (maxscore < score) {
|
||||
maxscore = score;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
|
||||
double score = processor.bestScores.get(i);
|
||||
// normalizing percentages!!!
|
||||
score = (score / (maxscore + ((size > 1) ? 1 : 0))) * 100;
|
||||
|
||||
if (score > config.categoryDiscardDifferencialThreshold) {
|
||||
|
||||
Reference ref = col.getCategory(processor.bestCategories.get(i));
|
||||
|
||||
results.add(new SingleResult(processor.bestCategories.get(i), processor.bestColumns.get(i), score, ref.getTableName(), ref.getIndex()));
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
public ArrayList<SingleResult> getClassification() {
|
||||
|
||||
ArrayList<SingleResult> results = new ArrayList<SingleResult>();
|
||||
int size = processor.bestCategories.size();
|
||||
double maxscore = 0;
|
||||
|
||||
BigDecimal sumElements = BigDecimal.ZERO;
|
||||
ArrayList<Double> subscores = new ArrayList<Double>();
|
||||
|
||||
// calculate sum of elements and weights;
|
||||
for (int i = 0; i < size; i++) {
|
||||
BigInteger catElements = col.getScoresTable().get(processor.bestCategories.get(i)).getCategoryElements();
|
||||
sumElements = sumElements.add(new BigDecimal(catElements));
|
||||
}
|
||||
/*
|
||||
if (sumElements.compareTo(BigDecimal.valueOf(10000)) < 0)
|
||||
return getClassificationPlain();
|
||||
*/
|
||||
for (int i = 0; i < size; i++) {
|
||||
double score = processor.bestScores.get(i);
|
||||
// multiply for impotance
|
||||
BigInteger catElements = col.getScoresTable().get(processor.bestCategories.get(i)).getCategoryElements();
|
||||
|
||||
// AnalysisLogger.getLogger().warn("\t elements "+catElements+" sum "+sumElements);
|
||||
|
||||
double weight = new BigDecimal(catElements).divide(sumElements, 2, BigDecimal.ROUND_HALF_UP).doubleValue();
|
||||
|
||||
if (weight >= 3)
|
||||
weight = 2 * Math.log(weight * 100) / 10f;
|
||||
else if ((weight >= 0.5) && (weight <= 1))
|
||||
{
|
||||
weight = Math.log(weight * 100) / 100.00f;
|
||||
}
|
||||
else if (weight < 0.05)
|
||||
weight = 0.05;
|
||||
|
||||
AnalysisLogger.getLogger().warn("WEIGHT FOR CATEGORY " + processor.bestCategories.get(i) + "-" + processor.bestColumns.get(i) + " : " + weight + " SCORE " + score);
|
||||
|
||||
// recalculate weights
|
||||
score = score * weight;
|
||||
score = Math.min(1, score);
|
||||
|
||||
if (maxscore < score) {
|
||||
maxscore = score;
|
||||
}
|
||||
|
||||
subscores.add(score);
|
||||
}
|
||||
// AnalysisLogger.getLogger().warn("MAX SCORE "+maxscore);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
|
||||
// double score = processor.bestScores.get(i);
|
||||
double score = subscores.get(i);
|
||||
|
||||
// AnalysisLogger.getLogger().warn("SCORE FOR CATEGORY "+processor.bestCategories.get(i)+" -COLUMN : "+processor.bestColumns.get(i)+" - "+score);
|
||||
|
||||
// normalizing percentages!!!
|
||||
score = (score / (maxscore + ((size > 1) ? 1 : 0))) * 100;
|
||||
|
||||
// AnalysisLogger.getLogger().warn("SCORE FOR CATEGORY "+processor.bestCategories.get(i)+" -COLUMN : "+processor.bestColumns.get(i)+" - "+score);
|
||||
if (score > config.categoryDiscardDifferencialThreshold) {
|
||||
// AnalysisLogger.getLogger().warn("SCORE "+score);
|
||||
// insert into the right place
|
||||
int index = results.size();
|
||||
int j = 0;
|
||||
for (SingleResult res : results) {
|
||||
if (res.getScore() < score) {
|
||||
index = j;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
|
||||
Reference ref = col.getCategory(processor.bestCategories.get(i));
|
||||
SingleResult sr = new SingleResult(processor.bestCategories.get(i), processor.bestColumns.get(i), score, ref.getTableName(), ref.getIndex());
|
||||
//control for repetitions
|
||||
if (isnotRepetition(sr, results))
|
||||
results.add(index, sr);
|
||||
}
|
||||
}
|
||||
|
||||
//limit the result list after rescoring
|
||||
int s = results.size();
|
||||
if (s>MAXRESULTS){
|
||||
int diff = (size-MAXRESULTS);
|
||||
for (int i=0;i<diff;i++){
|
||||
s = results.size();
|
||||
results.remove(s-1);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private boolean isnotRepetition(SingleResult result, ArrayList<SingleResult> previous) {
|
||||
|
||||
boolean notrepeated = true;
|
||||
int size = previous.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
SingleResult sr = previous.get(i);
|
||||
if (sr.getCategory().equalsIgnoreCase(result.getCategory()) && sr.getColumn().equalsIgnoreCase(result.getColumn())) {
|
||||
notrepeated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return notrepeated;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.run;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.DBObjectTranslator;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.TSObjectTransformer;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
|
||||
import org.hibernate.SessionFactory;
|
||||
|
||||
public class StarGraphExtraction {
|
||||
|
||||
/**
|
||||
* @param args
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
try {
|
||||
RunMain();
|
||||
} catch (Exception e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final static String ConfigurationFileNameLocal = "hibernate.cfg.xml";
|
||||
|
||||
public static void RunMain() throws Exception{
|
||||
AnalysisLogger.setLogger("./ALog.properties");
|
||||
|
||||
//configurazione DB - inizializzo la sessione e mi connetto
|
||||
SessionFactory dbSession = DatabaseFactory.initDBConnection(ConfigurationFileNameLocal);
|
||||
DBObjectTranslator dbo = new DBObjectTranslator();
|
||||
dbo.buildWholeStructure(dbSession,null,null,null,null,null);
|
||||
TSObjectTransformer.transform2Graph(dbo);
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class TestExternalCfgProduction {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
int attempts = 1;
|
||||
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
|
||||
String seriesName = "IMPORT_ecd2e3a0_ee90_11e0_be9e_90f3621758ee";
|
||||
String column = "field4";
|
||||
|
||||
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
|
||||
conf.setReferenceTable("codelist1733371938");
|
||||
conf.setReferenceColumn("ifield14");
|
||||
conf.setNameHuman("ifield1");
|
||||
conf.setIdColumn("ifield0");
|
||||
conf.setDescription("ifield2");
|
||||
|
||||
|
||||
//database Parameters
|
||||
conf.setDatabaseUserName("gcube");
|
||||
conf.setDatabasePassword("d4science2");
|
||||
conf.setDatabaseDriver("org.postgresql.Driver");
|
||||
conf.setDatabaseURL("jdbc:postgresql://localhost/testdb");
|
||||
conf.setDatabaseDialect("org.hibernate.dialect.PostgreSQLDialect");
|
||||
|
||||
guesser.runGuesser(seriesName, column, conf);
|
||||
guesser.showResults(guesser.getClassification());
|
||||
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class TestSingleExternalCfgProduction {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String singleton = "Faroe Island";
|
||||
|
||||
String family = "COUNTRY_OLD";
|
||||
String column = "field6";
|
||||
|
||||
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
|
||||
conf.setReferenceTable("codelist1733371938");
|
||||
conf.setReferenceColumn("ifield14");
|
||||
conf.setNameHuman("ifield1");
|
||||
conf.setIdColumn("ifield0");
|
||||
conf.setDescription("ifield2");
|
||||
|
||||
//CHANGE THIS TO ENHANCE THE RECALL
|
||||
conf.setEntryAcceptanceThreshold(30);
|
||||
conf.setReferenceChunksToTake(-1);
|
||||
conf.setTimeSeriesChunksToTake(-1);
|
||||
conf.setUseSimpleDistance(false);
|
||||
|
||||
//database Parameters
|
||||
conf.setDatabaseUserName("gcube");
|
||||
conf.setDatabasePassword("d4science2");
|
||||
// conf.setDatabaseDriver("org.postgresql.Driver");
|
||||
conf.setDatabaseURL("jdbc:postgresql://localhost/testdb");
|
||||
conf.setDatabaseDialect("org.hibernate.dialect.PostgreSQLDialect");
|
||||
|
||||
guesser.runGuesser(singleton, conf, family,column );
|
||||
|
||||
ArrayList<SingleResult> detailedResults = guesser.getDetailedMatches();
|
||||
|
||||
AnalysisLogger.getLogger().warn("Detailed Match on Name :"+singleton);
|
||||
|
||||
CategoryGuesser.showResults(detailedResults);
|
||||
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class BenchMarkTest1 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
int attempts = 1;
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "import_bdefb470_5cea_11df_a0a6_909e7d074592";
|
||||
String column = "field1";
|
||||
String correctFamily = "country";
|
||||
String correctColumn = "name_en";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
//bench 2
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 2-------------------------");
|
||||
seriesName = "import_bdefb470_5cea_11df_a0a6_909e7d074592";
|
||||
column = "field2";
|
||||
correctFamily = "area";
|
||||
correctColumn = "name_en";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 2-----------------------\n");
|
||||
|
||||
//bench 3
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 3-------------------------");
|
||||
seriesName = "import_bdefb470_5cea_11df_a0a6_909e7d074592";
|
||||
column = "field4";
|
||||
correctFamily = "species";
|
||||
correctColumn = "scientific_name";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 3-----------------------\n");
|
||||
|
||||
//bench 4
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 4-------------------------");
|
||||
seriesName = "import_bdefb470_5cea_11df_a0a6_909e7d074592";
|
||||
column = "field3";
|
||||
correctFamily = "species";
|
||||
correctColumn = "scientific_name";
|
||||
// CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 4-----------------------\n");
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class BenchMarkTest2 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
int attempts = 1;
|
||||
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";
|
||||
String column = "field1";
|
||||
String correctFamily = "SPECIES";
|
||||
String correctColumn = "SCIENTIFIC_NAME";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
|
||||
|
||||
|
||||
//bench 2
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 2-------------------------");
|
||||
seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";
|
||||
column = "field2";
|
||||
correctFamily = "COUNTRY";
|
||||
correctColumn = "ISO_3_CODE";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 2-----------------------\n");
|
||||
|
||||
|
||||
//bench 4
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 4-------------------------");
|
||||
seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";
|
||||
column = "field3";
|
||||
correctFamily = "AREA";
|
||||
correctColumn = "NAME_EN";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 4-----------------------\n");
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class BenchMarkTest3 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
int attempts = 1;
|
||||
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";
|
||||
String column = "field1";
|
||||
String correctFamily = "SPECIES";
|
||||
String correctColumn = "SCIENTIFIC_NAME";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class BenchMarkTest4 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
int attempts = 1;
|
||||
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "import_2c97f580_35a0_11df_b8b3_aa10916debe6";
|
||||
String column = "field3";
|
||||
String correctFamily = "AREA";
|
||||
String correctColumn = "NAME_EN";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class BenchMarkTest5 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
int attempts = 1;
|
||||
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "import_532bba80_1c8f_11df_a4ee_87804054691e";
|
||||
String column = "field2";
|
||||
String correctFamily = "ISSCAAP GROUP";
|
||||
String correctColumn = "NAME_EN";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class BenchMarkTestExternalCfg {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
int attempts = 1;
|
||||
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "import_532bba80_1c8f_11df_a4ee_87804054691e";
|
||||
String column = "field2";
|
||||
String correctFamily = "ISSCAAP GROUP";
|
||||
String correctColumn = "NAME_EN";
|
||||
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
|
||||
conf.setCategoryDiscardDifferencialThreshold(5);
|
||||
conf.setCategoryDiscardThreshold(0);
|
||||
conf.setChunkSize(25);
|
||||
conf.setEntryAcceptanceThreshold(50);
|
||||
conf.setNumberOfThreadsToUse(2);
|
||||
conf.setRandomTake(true);
|
||||
conf.setReferenceChunksToTake(20);
|
||||
conf.setTimeSeriesChunksToTake(1);
|
||||
conf.setUseSimpleDistance(false);
|
||||
|
||||
//database Parameters
|
||||
conf.setDatabaseUserName("root");
|
||||
// conf.setDatabasePassword("password");
|
||||
conf.setDatabaseDriver("com.mysql.jdbc.Driver");
|
||||
conf.setDatabaseURL("jdbc:mysql://localhost/timeseries");
|
||||
conf.setDatabaseDialect("org.hibernate.dialect.MySQLDialect");
|
||||
conf.setDatabaseAutomaticTestTable("connectiontesttable");
|
||||
conf.setDatabaseIdleConnectionTestPeriod("3600");
|
||||
|
||||
CategoryGuesser.AccuracyCalc(conf,guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
|
||||
public class BenchMarkTestFilterCategory {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "ref_order";
|
||||
String column = "scientific_name";
|
||||
String correctFamily = "order";
|
||||
String correctColumn = "scientific_name";
|
||||
|
||||
guesser.runGuesser(seriesName, column, null, correctFamily, correctColumn);
|
||||
ArrayList<SingleResult> results = guesser.getClassification();
|
||||
|
||||
CategoryGuesser.showResults(results);
|
||||
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
|
||||
public class BenchMarkTestSingleton {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String singleton = "sarda sarda";
|
||||
// String singleton = "Mitella pollicipes";
|
||||
// String singleton = "policipes";
|
||||
// String singleton = "";
|
||||
String family = "catalog life";
|
||||
String column = "scientific_name";
|
||||
|
||||
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
|
||||
|
||||
//CHANGE THIS TO ENHANCE THE RECALL
|
||||
conf.setEntryAcceptanceThreshold(30);
|
||||
conf.setReferenceChunksToTake(-1);
|
||||
conf.setTimeSeriesChunksToTake(-1);
|
||||
conf.setUseSimpleDistance(false);
|
||||
|
||||
guesser.runGuesser(singleton, conf, family,column );
|
||||
ArrayList<SingleResult> detailedResults = guesser.getDetailedMatches();
|
||||
|
||||
AnalysisLogger.getLogger().warn("Detailed Match on Name :"+singleton);
|
||||
|
||||
CategoryGuesser.showResults(detailedResults);
|
||||
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class BenchMarkTestTSCountry {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
int attempts = 1;
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "import_bdefb470_5cea_11df_a0a6_909e7d074592";
|
||||
String column = "field1";
|
||||
String correctFamily = "country";
|
||||
String correctColumn = "name_en";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class BenchMarkTrainingSet {
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
String configPath =".";
|
||||
int attempts = 1;
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "ref_commission";
|
||||
String column = "name_en";
|
||||
String correctFamily = "commission";
|
||||
String correctColumn = "name_en";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 2-------------------------");
|
||||
seriesName = "ref_species";
|
||||
column = "scientific_name";
|
||||
correctFamily = "species";
|
||||
correctColumn = "scientific_name";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 2-----------------------\n");
|
||||
|
||||
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 3-------------------------");
|
||||
seriesName = "ref_area";
|
||||
column = "name_en";
|
||||
correctFamily = "area";
|
||||
correctColumn = "name_en";
|
||||
// CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 3-----------------------\n");
|
||||
|
||||
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 4-------------------------");
|
||||
seriesName = "ref_ocean";
|
||||
column = "name_en";
|
||||
correctFamily = "ocean";
|
||||
correctColumn = "name_en";
|
||||
// CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 4-----------------------\n");
|
||||
|
||||
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 5-------------------------");
|
||||
seriesName = "ref_geo_region";
|
||||
column = "name_en";
|
||||
correctFamily = "geo region";
|
||||
correctColumn = "name_en";
|
||||
// CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 5-----------------------\n");
|
||||
|
||||
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 6-------------------------");
|
||||
seriesName = "ref_fa_region";
|
||||
column = "name_en";
|
||||
correctFamily = "fa region";
|
||||
correctColumn = "name_en";
|
||||
// CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 6-----------------------\n");
|
||||
|
||||
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 7-------------------------");
|
||||
seriesName = "ref_order";
|
||||
column = "scientific_name";
|
||||
correctFamily = "order";
|
||||
correctColumn = "scientific_name";
|
||||
// CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 7-----------------------\n");
|
||||
|
||||
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class BenchMarkTrainingSetScientificName {
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
String configPath =".";
|
||||
int attempts = 1;
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String seriesName = "ref_species";
|
||||
String column = "scientific_name";
|
||||
String correctFamily = "species";
|
||||
String correctColumn = "scientific_name";
|
||||
CategoryGuesser.AccuracyCalc(guesser, configPath, seriesName, column, attempts, correctFamily, correctColumn);
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class TestExternalCfgProduction {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
int attempts = 1;
|
||||
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
// String seriesName = "rdmc366dfe0ddf511e086b1b1c5d6fb1c27";
|
||||
String seriesName = "IMPORT_ecd2e3a0_ee90_11e0_be9e_90f3621758ee";
|
||||
|
||||
String column = "field4";
|
||||
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
|
||||
/*
|
||||
conf.setCategoryDiscardDifferencialThreshold(5);
|
||||
conf.setCategoryDiscardThreshold(0);
|
||||
conf.setChunkSize(25);
|
||||
conf.setEntryAcceptanceThreshold(50);
|
||||
conf.setNumberOfThreadsToUse(2);
|
||||
conf.setRandomTake(true);
|
||||
conf.setReferenceChunksToTake(20);
|
||||
conf.setTimeSeriesChunksToTake(1);
|
||||
conf.setUseSimpleDistance(false);
|
||||
*/
|
||||
|
||||
//database Parameters
|
||||
conf.setDatabaseUserName("utente");
|
||||
conf.setDatabasePassword("d4science");
|
||||
// conf.setDatabaseDriver("org.postgresql.Driver");
|
||||
conf.setDatabaseURL("jdbc:postgresql://dbtest.next.research-infrastructures.eu/timeseries");
|
||||
conf.setDatabaseDialect("org.hibernate.dialect.PostgreSQLDialect");
|
||||
conf.setDatabaseAutomaticTestTable("connectiontesttable");
|
||||
conf.setDatabaseIdleConnectionTestPeriod("3600");
|
||||
conf.setReferenceTable("codelist1733371938");
|
||||
conf.setReferenceColumn("ifield14");
|
||||
conf.setNameHuman("ifield1");
|
||||
conf.setIdColumn("ifield0");
|
||||
conf.setDescription("ifield2");
|
||||
guesser.runGuesser(seriesName, column, conf);
|
||||
guesser.showResults(guesser.getClassification());
|
||||
// AnalysisLogger.getLogger().warn();
|
||||
|
||||
|
||||
|
||||
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.analysis.test.old;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.run.CategoryGuesser;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
|
||||
public class TestSingleExternalCfgProduction {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
|
||||
|
||||
String configPath = ".";
|
||||
CategoryGuesser guesser = new CategoryGuesser(configPath);
|
||||
//bench 1
|
||||
AnalysisLogger.getLogger().warn("----------------------BENCH 1-------------------------");
|
||||
String singleton = "Faroe Island";
|
||||
// String singleton = "Mitella pollicipes";
|
||||
// String singleton = "policipes";
|
||||
// String singleton = "";
|
||||
// String family = "rdf0a7fb500dd3d11e0b8d1d1e2e7ba4f9d";
|
||||
|
||||
String family = "COUNTRY_OLD";
|
||||
String column = "field6";
|
||||
|
||||
LexicalEngineConfiguration conf = new LexicalEngineConfiguration();
|
||||
|
||||
//CHANGE THIS TO ENHANCE THE RECALL
|
||||
conf.setEntryAcceptanceThreshold(30);
|
||||
conf.setReferenceChunksToTake(-1);
|
||||
conf.setTimeSeriesChunksToTake(-1);
|
||||
conf.setUseSimpleDistance(false);
|
||||
//database Parameters
|
||||
conf.setDatabaseUserName("utente");
|
||||
conf.setDatabasePassword("d4science");
|
||||
// conf.setDatabaseDriver("org.postgresql.Driver");
|
||||
conf.setDatabaseURL("jdbc:postgresql://dbtest.next.research-infrastructures.eu/timeseries");
|
||||
conf.setDatabaseDialect("org.hibernate.dialect.PostgreSQLDialect");
|
||||
conf.setDatabaseAutomaticTestTable("connectiontesttable");
|
||||
conf.setDatabaseIdleConnectionTestPeriod("3600");
|
||||
|
||||
conf.setReferenceTable("codelist1733371938");
|
||||
conf.setReferenceColumn("ifield14");
|
||||
conf.setNameHuman("ifield1");
|
||||
conf.setIdColumn("ifield0");
|
||||
conf.setDescription("ifield2");
|
||||
|
||||
guesser.initSingleMatcher(conf,column );
|
||||
|
||||
guesser.runGuesser(singleton, null, family,column );
|
||||
|
||||
ArrayList<SingleResult> detailedResults = guesser.getDetailedMatches();
|
||||
|
||||
AnalysisLogger.getLogger().warn("Detailed Match on Name :"+singleton);
|
||||
|
||||
CategoryGuesser.showResults(detailedResults);
|
||||
|
||||
AnalysisLogger.getLogger().warn("--------------------END BENCH 1-----------------------\n");
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.utils;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.log4j.PropertyConfigurator;
|
||||
|
||||
public class AnalysisLogger {
|
||||
|
||||
|
||||
private static Logger logger;
|
||||
private static Logger hibernateLogger;
|
||||
|
||||
public static Logger getLogger(){
|
||||
|
||||
if (logger == null){
|
||||
setLogger("./ALog.properties");
|
||||
logger = Logger.getLogger("AnalysisLogger");
|
||||
}
|
||||
|
||||
return logger;
|
||||
}
|
||||
//in ingresso vuole il path al file di config del log4j
|
||||
public static void setLogger(String path){
|
||||
if (logger == null){
|
||||
PropertyConfigurator.configure(path);
|
||||
}
|
||||
logger = Logger.getLogger("AnalysisLogger");
|
||||
hibernateLogger = Logger.getLogger("hibernate");
|
||||
}
|
||||
|
||||
public static void printStackTrace(Exception e){
|
||||
|
||||
int numberoflines = e.getStackTrace().length;
|
||||
for (int i=0;i<numberoflines;i++){
|
||||
logger.error(e.getStackTrace()[i]);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,207 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.utils;
|
||||
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.Node;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration;
|
||||
import org.hibernate.Query;
|
||||
import org.hibernate.Session;
|
||||
import org.hibernate.SessionFactory;
|
||||
import org.hibernate.cfg.Configuration;
|
||||
|
||||
public class DatabaseFactory {
|
||||
|
||||
public static SessionFactory initDBConnection(String configurationFile) throws Exception {
|
||||
String xml = FileTools.readXMLDoc(configurationFile);
|
||||
SessionFactory DBSessionFactory = null;
|
||||
Configuration cfg = new Configuration();
|
||||
cfg = cfg.configure(DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new ByteArrayInputStream(xml.getBytes())));
|
||||
DBSessionFactory = cfg.buildSessionFactory();
|
||||
return DBSessionFactory;
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
public static SessionFactory initDBConnection(String configurationFile, LexicalEngineConfiguration config) throws Exception {
|
||||
|
||||
|
||||
if (config==null)
|
||||
return initDBConnection(configurationFile);
|
||||
|
||||
|
||||
// take the configuration file
|
||||
File fl = new File(configurationFile);
|
||||
FileInputStream stream = new FileInputStream(fl);
|
||||
|
||||
SAXReader saxReader = new SAXReader();
|
||||
Document document = saxReader.read(stream);
|
||||
List<Node> nodes = document.selectNodes("//hibernate-configuration/session-factory/property");
|
||||
Iterator<Node> nodesIterator = nodes.iterator();
|
||||
|
||||
// System.out.println("--- DATABASE Configuration --- ");
|
||||
|
||||
while (nodesIterator.hasNext()) {
|
||||
Node currentnode = nodesIterator.next();
|
||||
String element = currentnode.valueOf("@name");
|
||||
if (element.equals("connection.driver_class"))
|
||||
if (config.getDatabaseDriver() != null){
|
||||
currentnode.setText(config.getDatabaseDriver());
|
||||
}
|
||||
if (element.equals("connection.url")) {
|
||||
if (config.getDatabaseURL() != null)
|
||||
currentnode.setText(config.getDatabaseURL());
|
||||
}
|
||||
if (element.equals("connection.username")) {
|
||||
if (config.getDatabaseUserName() != null)
|
||||
currentnode.setText(config.getDatabaseUserName());
|
||||
}
|
||||
if (element.equals("connection.password")) {
|
||||
if (config.getDatabasePassword() != null)
|
||||
currentnode.setText(config.getDatabasePassword());
|
||||
}
|
||||
if (element.equals("dialect")) {
|
||||
AnalysisLogger.getLogger().trace("Dialect -> "+config.getDatabaseDialect());
|
||||
if (config.getDatabaseDialect() != null)
|
||||
currentnode.setText(config.getDatabaseDialect());
|
||||
}
|
||||
if (element.equals("c3p0.idleConnectionTestPeriod")) {
|
||||
if (config.getDatabaseIdleConnectionTestPeriod() != null)
|
||||
currentnode.setText(config.getDatabaseIdleConnectionTestPeriod());
|
||||
}
|
||||
if (element.equals("c3p0.automaticTestTable")) {
|
||||
if (config.getDatabaseAutomaticTestTable() != null)
|
||||
currentnode.setText(config.getDatabaseAutomaticTestTable());
|
||||
}
|
||||
}
|
||||
|
||||
Configuration cfg = new Configuration();
|
||||
cfg = cfg.configure(DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new ByteArrayInputStream(document.asXML().getBytes())));
|
||||
cfg.setProperty("hibernate.hbm2ddl.auto", "create");
|
||||
|
||||
SessionFactory DBSessionFactory = null;
|
||||
DBSessionFactory = cfg.buildSessionFactory();
|
||||
|
||||
// close stream
|
||||
stream.close();
|
||||
|
||||
|
||||
|
||||
|
||||
return DBSessionFactory;
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
public static List<Object> executeHQLQuery(String query, SessionFactory DBSessionFactory, boolean useSQL) {
|
||||
|
||||
List<Object> obj = null;
|
||||
Session ss = null;
|
||||
try {
|
||||
ss = DBSessionFactory.getCurrentSession();
|
||||
|
||||
ss.beginTransaction();
|
||||
|
||||
Query qr = null;
|
||||
|
||||
if (useSQL)
|
||||
qr = ss.createSQLQuery(query);
|
||||
else
|
||||
qr = ss.createQuery(query);
|
||||
|
||||
List<Object> result = qr.list();
|
||||
|
||||
ss.getTransaction().commit();
|
||||
|
||||
/*
|
||||
if (result == null)
|
||||
System.out.println("Hibernate doesn't return a valid object when org.gcube.contentmanagement.lexicalmatcher retrieve UserState Object");
|
||||
|
||||
if (result != null && result.size() == 0)
|
||||
System.out.println(String.format("found nothing in database"));
|
||||
*/
|
||||
if (result != null && result.size() != 0) {
|
||||
obj = result;
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
|
||||
// System.out.println(String.format("Error while executing query: %1$s %2$s", query, e.getMessage()));
|
||||
e.printStackTrace();
|
||||
rollback(ss);
|
||||
}
|
||||
|
||||
return obj;
|
||||
|
||||
}
|
||||
|
||||
public static void executeHQLUpdate(String query, SessionFactory DBSessionFactory, boolean useSQL) {
|
||||
// System.out.println("executing query: " + query);
|
||||
Session ss = null;
|
||||
|
||||
try {
|
||||
|
||||
ss = DBSessionFactory.getCurrentSession();
|
||||
// System.out.println("executing query");
|
||||
ss.beginTransaction();
|
||||
Query qr = null;
|
||||
|
||||
if (useSQL)
|
||||
qr = ss.createSQLQuery(query);
|
||||
else
|
||||
qr = ss.createQuery(query);
|
||||
|
||||
qr.executeUpdate();
|
||||
ss.getTransaction().commit();
|
||||
|
||||
} catch (Exception e) {
|
||||
rollback(ss);
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void executeSQLUpdate(String query, SessionFactory DBSessionFactory) {
|
||||
executeHQLUpdate(query, DBSessionFactory, true);
|
||||
}
|
||||
|
||||
public static List<Object> executeSQLQuery(String query, SessionFactory DBSessionFactory) {
|
||||
return executeHQLQuery(query, DBSessionFactory, true);
|
||||
}
|
||||
|
||||
public static void rollback(Session ss) {
|
||||
|
||||
try {
|
||||
if (ss != null && ss.getTransaction() != null)
|
||||
ss.getTransaction().rollback();
|
||||
} catch (Exception ex) {
|
||||
|
||||
} finally {
|
||||
try {
|
||||
ss.close();
|
||||
} catch (Exception ee) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void saveObject(Object obj, SessionFactory DBSessionFactory) throws Exception {
|
||||
if (DBSessionFactory != null) {
|
||||
Session ss = null;
|
||||
try {
|
||||
ss = DBSessionFactory.getCurrentSession();
|
||||
ss.beginTransaction();
|
||||
ss.saveOrUpdate(obj);
|
||||
ss.getTransaction().commit();
|
||||
} catch (Exception e) {
|
||||
rollback(ss);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,189 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.utils;
|
||||
|
||||
public class DistanceCalculator {
|
||||
|
||||
// ****************************
|
||||
// Get minimum of three values
|
||||
// ****************************
|
||||
|
||||
private int Minimum(int a, int b, int c) {
|
||||
int mi;
|
||||
|
||||
mi = a;
|
||||
if (b < mi) {
|
||||
mi = b;
|
||||
}
|
||||
if (c < mi) {
|
||||
mi = c;
|
||||
}
|
||||
return mi;
|
||||
|
||||
}
|
||||
|
||||
// *****************************
|
||||
// Compute Levenshtein distance
|
||||
// *****************************
|
||||
|
||||
public int LD(String s, String t) {
|
||||
int d[][]; // matrix
|
||||
int n; // length of s
|
||||
int m; // length of t
|
||||
int i; // iterates through s
|
||||
int j; // iterates through t
|
||||
char s_i; // ith character of s
|
||||
char t_j; // jth character of t
|
||||
int cost; // cost
|
||||
|
||||
// Step 1
|
||||
|
||||
n = s.length();
|
||||
m = t.length();
|
||||
if (n == 0) {
|
||||
return m;
|
||||
}
|
||||
if (m == 0) {
|
||||
return n;
|
||||
}
|
||||
d = new int[n + 1][m + 1];
|
||||
|
||||
// Step 2
|
||||
|
||||
for (i = 0; i <= n; i++) {
|
||||
d[i][0] = i;
|
||||
}
|
||||
|
||||
for (j = 0; j <= m; j++) {
|
||||
d[0][j] = j;
|
||||
}
|
||||
|
||||
// Step 3
|
||||
|
||||
for (i = 1; i <= n; i++) {
|
||||
|
||||
s_i = s.charAt(i - 1);
|
||||
|
||||
// Step 4
|
||||
|
||||
for (j = 1; j <= m; j++) {
|
||||
|
||||
t_j = t.charAt(j - 1);
|
||||
|
||||
// Step 5
|
||||
|
||||
if (s_i == t_j) {
|
||||
cost = 0;
|
||||
} else {
|
||||
cost = 1;
|
||||
}
|
||||
|
||||
// Step 6
|
||||
|
||||
d[i][j] = Minimum(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Step 7
|
||||
|
||||
return d[n][m];
|
||||
|
||||
}
|
||||
|
||||
// *****************************
|
||||
// Calculate Complete Distance
|
||||
// *****************************
|
||||
public double CD(boolean useSimpleDistance, String h, String t) {
|
||||
return CD(useSimpleDistance, h, t,false,false);
|
||||
}
|
||||
//output will be a percentage. 1 will mean a complete agreement between the inputs
|
||||
public double CD(boolean useSimpleDistance, String h, String t, boolean ignoreCase, boolean boostMatch) {
|
||||
|
||||
|
||||
|
||||
double distance = 0;
|
||||
if ((h == null) && (t == null)) {
|
||||
distance = 1;
|
||||
}
|
||||
else if ((h != null) && (t != null)) {
|
||||
|
||||
h = treatString(h,ignoreCase);
|
||||
t = treatString(t,ignoreCase);
|
||||
int lt = t.length();
|
||||
int lh = h.length();
|
||||
double matchFactor = 1.5f;
|
||||
if (boostMatch)
|
||||
matchFactor = 2f;
|
||||
|
||||
if (((lt==0)&&(lh!=0))||((lt!=0)&&(lh==0)))
|
||||
distance = 0;
|
||||
else if (h.equalsIgnoreCase(t)){
|
||||
distance = 1;
|
||||
}
|
||||
else if (useSimpleDistance) {
|
||||
distance = 0;
|
||||
}
|
||||
else if (t.contains(h)) {
|
||||
// calcolo la percentuale di contenimento
|
||||
String treatedT = t.replace(h, "");
|
||||
double percentage = 1 - ((double) treatedT.length() / (double) lt);
|
||||
// AnalysisLogger.getLogger().debug("Complete Distance Calculation: coverage percentage of h on t " + percentage);
|
||||
// double percentage = 0.9;
|
||||
percentage = Math.min(percentage * matchFactor,0.98);
|
||||
distance = percentage;
|
||||
}
|
||||
else if (h.contains(t)) {
|
||||
// calcolo la percentuale di contenimento
|
||||
String treatedH = h.replace(t, "");
|
||||
double percentage = 1 - ((double) treatedH.length() / (double) lh);
|
||||
// AnalysisLogger.getLogger().debug("Complete Distance Calculation: coverage percentage of t on h " + percentage);
|
||||
// double percentage = 0.9;
|
||||
percentage = Math.min(percentage * matchFactor,0.98);
|
||||
distance = percentage;
|
||||
}
|
||||
else {
|
||||
/*
|
||||
if ((lh>lt)||((lt>lh*1.5))){
|
||||
System.out.println("UNMATCHABLE "+lt +" vs "+lh);
|
||||
distance = 0;
|
||||
}
|
||||
else{
|
||||
*/
|
||||
//calcolo percentuale su Levenshtein distance
|
||||
int levenDist = LD(h, t);
|
||||
int maxlen = Math.max(lh, lt);
|
||||
distance = 1-((double)levenDist / (double)maxlen);
|
||||
// System.out.println("L " + levenDist+" max "+maxlen+" h "+h+" t "+t);
|
||||
// AnalysisLogger.getLogger().debug("Complete Distance Calculation: leven distance percentage of h on t " + distance);
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
return distance;
|
||||
}
|
||||
|
||||
private String treatString(String h, boolean ignoreCase){
|
||||
//tolgo la punteggiatura
|
||||
h = h.replaceAll("[!\"#$%&'()*+,./:;<=>?@\\^_`{|}~-]", "");
|
||||
//riduco gli spazi multipli a spazi singoli
|
||||
h = h.replaceAll("[ ]+", " ");
|
||||
//trim
|
||||
h = h.trim();
|
||||
if (ignoreCase)
|
||||
h = h.toLowerCase();
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
String h = "Mediteranean";
|
||||
String t = "Mediterranean horse mackerel";
|
||||
DistanceCalculator d = new DistanceCalculator();
|
||||
double cd = d.CD(false,h, t, true , true);
|
||||
System.out.println("Distance between "+h+" and "+t+" : " + cd);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.utils;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.io.Writer;
|
||||
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.io.SAXReader;
|
||||
|
||||
public class FileTools {
|
||||
|
||||
public static String readXMLDoc(String xmlFilePath) throws Exception {
|
||||
String xml = null;
|
||||
|
||||
File fl = new File(xmlFilePath);
|
||||
FileInputStream stream = new FileInputStream(fl);
|
||||
SAXReader saxReader = new SAXReader();
|
||||
Document document = saxReader.read(stream);
|
||||
xml = document.asXML();
|
||||
return xml;
|
||||
}
|
||||
|
||||
public static void saveString2File(String filename, String string2save) throws Exception {
|
||||
|
||||
}
|
||||
|
||||
public static boolean checkInput(String filename) {
|
||||
File file = new File(filename);
|
||||
if (!file.exists())
|
||||
return false;
|
||||
if (!file.canRead())
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean checkOutput(String filename, boolean overwrite) {
|
||||
File file = new File(filename);
|
||||
if (!overwrite && file.exists())
|
||||
return false;
|
||||
if (file.exists() && (file.isDirectory() || !file.canWrite()))
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
public static String loadString(String filename, String encoding) throws Exception {
|
||||
try {
|
||||
if (checkInput(filename)) {
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(filename), encoding));
|
||||
String line = null;
|
||||
StringBuilder vud = new StringBuilder();
|
||||
|
||||
while ((line = in.readLine()) != null) {
|
||||
vud.append(line + "\n");
|
||||
}
|
||||
in.close();
|
||||
return vud.toString();
|
||||
} else
|
||||
return null;
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
e.printStackTrace();
|
||||
throw new Exception("The file " + filename + " is not in the correct format!");
|
||||
} catch (IOException e) {
|
||||
throw new Exception("The file " + filename + " is not in the correct format!");
|
||||
}
|
||||
}
|
||||
|
||||
public static void saveString(String filename, String s, boolean overwrite, String encoding) throws Exception {
|
||||
try {
|
||||
if (checkOutput(filename, overwrite)) {
|
||||
Writer out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), encoding));
|
||||
out.write(s);
|
||||
out.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new Exception("The system can not write in " + filename + " because:\n" + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,99 @@
|
|||
package org.gcube.contentmanagement.lexicalmatcher.utils;
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class MathFunctions {
|
||||
|
||||
/**
|
||||
* @param args
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
//increments a percentage o mean calculation when a lot of elements are present
|
||||
public static float incrementPerc(float perc, float quantity, int N){
|
||||
|
||||
if (N==0)
|
||||
return quantity;
|
||||
|
||||
float out = 0;
|
||||
int N_plus_1 = N+1;
|
||||
out = (float)((perc + ((double)quantity / (double)N )) * ((double)N/(double)N_plus_1));
|
||||
return out;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public static ArrayList<Integer> generateRandoms(int numberOfRandoms, int min, int max) {
|
||||
|
||||
ArrayList<Integer> randomsSet = new ArrayList<Integer>();
|
||||
// if number of randoms is equal to -1 generate all numbers
|
||||
if (numberOfRandoms == -1) {
|
||||
for (int i = min; i < max; i++) {
|
||||
randomsSet.add(i);
|
||||
}
|
||||
} else {
|
||||
int numofrandstogenerate = 0;
|
||||
if (numberOfRandoms <= max) {
|
||||
numofrandstogenerate = numberOfRandoms;
|
||||
} else {
|
||||
numofrandstogenerate = max;
|
||||
}
|
||||
|
||||
if (numofrandstogenerate == 0) {
|
||||
randomsSet.add(0);
|
||||
} else {
|
||||
for (int i = 0; i < numofrandstogenerate; i++) {
|
||||
|
||||
int RNum = -1;
|
||||
RNum = (int) ((max) * Math.random()) + min;
|
||||
|
||||
// generate random number
|
||||
while (randomsSet.contains(RNum)) {
|
||||
RNum = (int) ((max) * Math.random()) + min;
|
||||
// AnalysisLogger.getLogger().debug("generated " + RNum);
|
||||
}
|
||||
|
||||
// AnalysisLogger.getLogger().debug("generated " + RNum);
|
||||
|
||||
if (RNum >= 0)
|
||||
randomsSet.add(RNum);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
AnalysisLogger.getLogger().trace("MathFunctions-> generateRandoms " + randomsSet.toString());
|
||||
|
||||
return randomsSet;
|
||||
}
|
||||
|
||||
|
||||
public static int[] generateSequence(int elements) {
|
||||
int [] sequence = new int[elements];
|
||||
for (int i=0;i<elements;i++){
|
||||
sequence[i]=i;
|
||||
}
|
||||
return sequence;
|
||||
}
|
||||
|
||||
public static BigInteger chunk2Index(int chunkIndex,int chunkSize){
|
||||
|
||||
return BigInteger.valueOf(chunkIndex).multiply(BigInteger.valueOf(chunkSize));
|
||||
|
||||
}
|
||||
|
||||
//calculates mean
|
||||
public static double mean(double[] p) {
|
||||
double sum = 0; // sum of all the elements
|
||||
for (int i=0; i<p.length; i++) {
|
||||
sum += p[i];
|
||||
}
|
||||
return sum / p.length;
|
||||
}//end method mean
|
||||
|
||||
|
||||
}
|
Loading…
Reference in New Issue