package org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.treeStructure.chunks; import java.math.BigInteger; import java.util.ArrayList; import java.util.HashMap; import org.gcube.contentmanagement.lexicalmatcher.analysis.core.DataTypeRecognizer; import org.gcube.contentmanagement.lexicalmatcher.analysis.core.Engine; import org.gcube.contentmanagement.lexicalmatcher.analysis.core.LexicalEngineConfiguration; import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.CategoryScores; import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.DBObjectTranslator; import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.Entry; import org.gcube.contentmanagement.lexicalmatcher.analysis.guesser.data.SingleResult; import org.gcube.contentmanagement.lexicalmatcher.utils.DistanceCalculator; import org.hibernate.SessionFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class TimeSeriesChunk extends Chunk{ private static Logger logger = LoggerFactory.getLogger(TimeSeriesChunk.class); private ArrayList columnEntries; private String columnType; private LexicalEngineConfiguration config; private boolean mustInterrupt; private ArrayList detailedResults; private String singletonElement; private boolean isSingleton; public String getColumnType(){ return columnType; } public String getSingletonEntry(){ return singletonElement; } public ArrayList getDetailedResults(){ return detailedResults; } public boolean isSingleton(){ return isSingleton; } public TimeSeriesChunk(String timeSeriesName, String timeSeriesColumn, String ColumnType, BigInteger start, int ChunkSize, LexicalEngineConfiguration Config, Engine engine) throws Exception{ super(engine); DBObjectTranslator dbo = new DBObjectTranslator(); SessionFactory sess = engine.getDBSession(); columnEntries = dbo.retrieveTimeSeriesEntries(sess, timeSeriesName, timeSeriesColumn, start, ChunkSize); if (ColumnType==null){ columnType = DataTypeRecognizer.guessType(columnEntries); logger.trace("TimeSeriesChunk-> GUESSED TYPE " + columnType + " FOR COLUMN "+timeSeriesColumn); } mustInterrupt = false; config = Config; isSingleton = false; } public TimeSeriesChunk(String singletonString, String ColumnType, BigInteger start, int ChunkSize, LexicalEngineConfiguration Config, Engine engine) throws Exception{ super(engine); columnEntries = new ArrayList(); columnEntries.add(singletonString); if (ColumnType==null){ columnType = DataTypeRecognizer.guessType(columnEntries); logger.trace("TimeSeriesChunk-> GUESSED TYPE " + columnType + " FOR SINGLETON "+singletonString); } mustInterrupt = false; config = Config; isSingleton = true; singletonElement = singletonString; detailedResults = new ArrayList(); } public boolean mustInterruptProcess (){ return this.mustInterrupt; } public void compareToReferenceChunk(HashMap scoresTable, ReferenceChunk catChunk) throws Exception { compareToReferenceChunk(scoresTable, catChunk,null); } // checks an entry set against a reference set // columnEntries: column elements from unknown column // cat: category analyzed for candidating to recognized // referenceEntries: some elements belonging to cat, to be compared to columnEntries public void compareToReferenceChunk(HashMap scoresTable, ReferenceChunk catChunk,String ColumnFilter) throws Exception { //in the case of a singleton Chunk interrupt computation in case of exact match // get category Score for further processing CategoryScores categoryScores = scoresTable.get(catChunk.getCategoryName()); //extract Entries from DB ArrayList categoryEntries = catChunk.getReferenceEntries(); for (String timeSeriesElement : columnEntries) { // for each reference entry for (Entry referenceEntry : categoryEntries) { // take all attributes of a reference entry for confrontation to columns HashMap attributes = referenceEntry.getAttributes(); HashMap types = referenceEntry.getTypes(); boolean anotherReference= true; // for each attribute of an entry for (String referenceColumn : attributes.keySet()) { // perform calculation only if the column type is the same if (types.get(referenceColumn).equals(columnType)&&((ColumnFilter==null)||(ColumnFilter.equalsIgnoreCase(referenceColumn)))) { // logger.debug("CategoryOrderedList->checkAllEntriesOnEntireCategory-> REFERENCE COLUMN "+referenceColumn+" HAS TYPE "+types.get(referenceColumn)); // take the attribute value of the entry String attribute = attributes.get(referenceColumn); // calculate the distance between the unknown entry and the attribute DistanceCalculator d = new DistanceCalculator(); double percentage = d.CD(config.useSimpleDistance, timeSeriesElement, attribute, isSingleton, isSingleton) * 100f; // logger.debug("CategoryOrderedList->checkUnkEntriesOnEntireCategory-> Percentage between " +timeSeriesElement + " and " + attribute + " is: "+percentage ); // if they are similar if (percentage > config.entryAcceptanceThreshold) { // if (catChunk.getCategoryName().equals("COUNTRY_OLD")) logger.trace("TimeSeriesChunk->compareToCategoryChunk-> \t\tPercentage between " + timeSeriesElement + " vs. " + attribute + " is: " + percentage+" in "+catChunk.getCategoryName()+":"+referenceColumn); categoryScores.incrementScore(referenceColumn, (float)percentage,anotherReference); //if we are in a singleton we have to get the details if (isSingleton){ //for singleton match, fulfil details int index =0; for (SingleResult sr :detailedResults){ Double scoredetail = sr.getScore(); if (scoredetailcompareToCategoryChunk-> "+categoryScores.showScores()); } //if exact match is reached, exit if ((percentage==100)&&(isSingleton)) { detailedResults = new ArrayList(); detailedResults.add(new SingleResult(attribute, null, percentage,null,"0")); mustInterrupt = true; break; } } } }// end for on columns if (mustInterrupt) break; }// end for on entries } } }