package org.gcube.dataanalysis.ecoengine.utils; import java.math.BigInteger; import java.util.List; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; import org.hibernate.SessionFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * checks if two tables are equal * checks numbers at the second decimal position */ public class TablesDiscrepanciesCalculator { private static Logger logger = LoggerFactory.getLogger(TablesDiscrepanciesCalculator.class); private BigInteger numOfElements; private int errorCounter; //connection setup protected String LogFile = "ALog.properties"; //fundamental: set a the chunk csquaresNumber and the maximum number of chunks to take int chunkSize = 7000; static double Threshold = 0.01; //change this defaults to change comparison public String referenceTable = "speciesrichness"; public String analyzedTable = "crossspecies_nonreviewed"; public String referenceCriteria = "csquarecode,speccount"; public String destinationCriteria = "csquarecode,maxspeciescountinacell"; public String referenceSelectedColumns = "csquarecode,speccount"; public String destinationSelectedColumns = "csquarecode,maxspeciescountinacell"; //selection query public static String selectElementsQuery = "select %1$s from %2$s order by %3$s"; //database connections protected SessionFactory referencedbConnection; protected SessionFactory destinationdbConnection; //init connections public TablesDiscrepanciesCalculator(AlgorithmConfiguration config) throws Exception { referencedbConnection = DatabaseFactory.initDBConnection(config.getConfigPath() + AlgorithmConfiguration.defaultConnectionFile,config); logger.debug("ReferenceDB initialized"); destinationdbConnection = DatabaseFactory.initDBConnection(config.getConfigPath() + AlgorithmConfiguration.defaultConnectionFile,config); logger.debug("OriginalDB initialized"); } //counts the elements in a table public BigInteger countElements(String tablename, SessionFactory session) { BigInteger count = BigInteger.ZERO; String countingQuery = "select count(*) from "+tablename; logger.debug("Getting DB elements by this query: "+countingQuery); List result = DatabaseFactory.executeSQLQuery(countingQuery, session); count = (BigInteger) result.get(0); return count; } //takes a chunk of elements from the database, belonging to the set of 170 selected species public List takeChunkOfElements(String tablename,String selectedColumns,String criteria, int limit, int offset, SessionFactory session) { String query = String.format(selectElementsQuery,selectedColumns,tablename,criteria)+ " limit " + limit + " offset " + offset; logger.debug("takeChunkOfElements-> executing query on DB: " + query); List results = DatabaseFactory.executeSQLQuery(query, session); return results; } //checks if a string is a number public double isNumber(String element){ try{ double d = Double.parseDouble(element); return d; }catch(Exception e){ return -Double.MAX_VALUE; } } public static void main(String[] args) throws Exception { String configPath = "./cfg/"; AlgorithmConfiguration config = new AlgorithmConfiguration(); config.setParam("DistributionTable","hspec_suitable_automatic_local"); config.setParam("ConfigPath","./cfg/"); config.setParam("CsquarecodesTable","hcaf_d"); config.setParam("EnvelopeTable","hspen_micro"); // config.setEnvelopeTable("hspen"); config.setParam("CreateTable","true"); config.setNumberOfResources(2); config.setParam("DatabaseUserName","gcube"); config.setParam("DatabasePassword","d4science2"); config.setParam("DatabaseURL","jdbc:postgresql://localhost/testdb"); TablesDiscrepanciesCalculator ec = new TablesDiscrepanciesCalculator(config); long t0 = System.currentTimeMillis(); ec.runTest(); long t1 = System.currentTimeMillis(); float difference = (t1-t0); difference = difference /(float)(1000*60); System.out.println("Elapsed time : "+difference+" min"); } //runs the test between the tables public boolean runTest() { long t0 = System.currentTimeMillis(); // take the number of elements numOfElements = countElements(analyzedTable, destinationdbConnection); logger.debug("Remote DB contains " + numOfElements + " elements."); int maxNumber = numOfElements.intValue(); int numOfChunks = maxNumber / chunkSize; if ((maxNumber % chunkSize) > 0) { numOfChunks++; } int startIndex = 0; // reset error counter errorCounter = 0; boolean equal = true; for (int i = startIndex; i < numOfChunks; i++) { int offset = i * chunkSize; List referencechunk = takeChunkOfElements(referenceTable,referenceSelectedColumns,referenceCriteria, chunkSize, offset, referencedbConnection); List destinationchunk = takeChunkOfElements(analyzedTable,destinationSelectedColumns,destinationCriteria, chunkSize, offset, destinationdbConnection); int m = referencechunk.size(); for (int j=0;jThreshold){ errorCounter++; equal = false; logger.debug("ERROR - DISCREPANCY AT NUMBERS COMPARISON: "+refelem+" vs "+destelem); } } else if (!refelem.equals(destelem)){ errorCounter++; equal = false; logger.debug("ERROR - DISCREPANCY AT STRING COMPARISON: "+refelem+" vs "+destelem); } if (!equal) break; } // System.out.println(); if (!equal) break; } if (!equal) break; else logger.debug("CHUNK NUMBER "+i+" OK!"); } long t1 = System.currentTimeMillis(); logger.debug("ELAPSED TIME: " + (t1-t0) + " ms"); //close connections referencedbConnection.close(); destinationdbConnection.close(); return equal; } }