This commit is contained in:
Gianpaolo Coro 2012-06-21 12:50:54 +00:00
parent 8d4d96013c
commit 258409ddb8
6 changed files with 371 additions and 217 deletions

42
README
View File

@ -1,42 +0,0 @@
The gCube System - VREManager Service
------------------------------------------------------------
This work is partially funded by the European Commission in the
context of the D4Science project (www.d4science.eu), under the
1st call of FP7 IST priority.
Authors
-------
* Gianpaolo Coro (gianpaolo.coro@isti.cnr.it), CNR Pisa,
Istituto di Scienza e Tecnologie dell'Informazione "A. Faedo"
Version and Release Date
------------------------
version 1.0.0 (23-02-2012)
Description
--------------------
Support library for statistics analysis on Time Series data.
Download information
--------------------
Source code is available from SVN:
http://svn.research-infrastructures.eu/d4science/gcube/trunk/content-management/EcologicalModelling
Binaries can be downloaded from:
http://software.d4science.research-infrastructures.eu/
Documentation
-------------
VREManager documentation is available on-line from the Projects Documentation Wiki:
not available
Licensing
---------
This software is licensed under the terms you may find in the file named "LICENSE" in this directory.

View File

@ -8,7 +8,7 @@
<property name="connection.password">d4science2</property>
<property name="dialect">org.hibernatespatial.postgis.PostgisDialect</property>
<property name="transaction.factory_class">org.hibernate.transaction.JDBCTransactionFactory</property>
<property name="c3p0.timeout">7200</property>
<property name="c3p0.timeout">0</property>
<property name="c3p0.max_size">10</property>
<property name="c3p0.max_statements">0</property>
<property name="c3p0.min_size">1</property>

View File

@ -437,7 +437,7 @@ public class LocalSplitGenerator implements Generator {
writeOnDB(AlgorithmConfiguration.chunkSize);
// AnalysisLogger.getLogger().trace("\t...finished writing on db");
}
} catch (Exception e) {
} catch (Throwable e) {
e.printStackTrace();
AnalysisLogger.getLogger().error(e);
flushInterrupt = true;

View File

@ -10,7 +10,7 @@ public class PerformanceTests {
String configPath = "./cfg/";
String csquareTable = "hcaf_d";
String preprocessedTable = "maxminlat_hspen";
String envelopeTable = "hspen_mini_1";
String envelopeTable = "hspen_mini_1000";
int numberOfResources = 4;
String speciesCode = "Fis-22747";
String userName = "gianpaolo.coro";

View File

@ -0,0 +1,174 @@
package org.gcube.dataanalysis.ecoengine.test;
import java.math.BigInteger;
import java.util.List;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanagement.lexicalmatcher.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.hibernate.SessionFactory;
/**
* checks if two tables are equal
* checks numbers at the second decimal position
*/
public class TablesComparison {
private BigInteger numOfElements;
private int errorCounter;
//connection setup
protected String LogFile = "ALog.properties";
//fundamental: set a the chunk csquaresNumber and the maximum number of chunks to take
int chunkSize = 7000;
static double Threshold = 0.01;
//change this defaults to change comparison
public String referenceTable = "hspec_suitable_executor_1_worker";
public String analyzedTable = "hspec_suitable_executor_2";
public String referenceCriteria = "speciesid,csquarecode";
public String destinationCriteria = "speciesid,csquarecode";
public String referenceSelectedColumns = "speciesid,csquarecode,probability";
public String destinationSelectedColumns = "speciesid,csquarecode,probability";
//selection query
public static String selectElementsQuery = "select %1$s from %2$s order by %3$s";
//database connections
protected SessionFactory referencedbConnection;
protected SessionFactory destinationdbConnection;
//init connections
public TablesComparison(AlgorithmConfiguration config) throws Exception {
AnalysisLogger.setLogger(config.getConfigPath() + LogFile);
referencedbConnection = DatabaseFactory.initDBConnection(config.getConfigPath() + AlgorithmConfiguration.defaultConnectionFile,config);
AnalysisLogger.getLogger().debug("ReferenceDB initialized");
destinationdbConnection = DatabaseFactory.initDBConnection(config.getConfigPath() + AlgorithmConfiguration.defaultConnectionFile,config);
AnalysisLogger.getLogger().debug("OriginalDB initialized");
}
//counts the elements in a table
public BigInteger countElements(String tablename, SessionFactory session)
{
BigInteger count = BigInteger.ZERO;
String countingQuery = "select count(*) from "+tablename;
AnalysisLogger.getLogger().debug("Getting DB elements by this query: "+countingQuery);
List<Object> result = DatabaseFactory.executeSQLQuery(countingQuery, session);
count = (BigInteger) result.get(0);
return count;
}
//takes a chunk of elements from the database, belonging to the set of 170 selected species
public List<Object> takeChunkOfElements(String tablename,String selectedColumns,String criteria, int limit, int offset, SessionFactory session) {
String query = String.format(selectElementsQuery,selectedColumns,tablename,criteria)+ " limit " + limit + " offset " + offset;
AnalysisLogger.getLogger().debug("takeChunkOfElements-> executing query on DB: " + query);
List<Object> results = DatabaseFactory.executeSQLQuery(query, session);
return results;
}
//checks if a string is a number
public double isNumber(String element){
try{
double d = Double.parseDouble(element);
return d;
}catch(Exception e){
return -Double.MAX_VALUE;
}
}
public static void main(String[] args) throws Exception {
String configPath = "./cfg/";
AlgorithmConfiguration config = new AlgorithmConfiguration();
config.setConfigPath(configPath);
config.setDatabaseUserName("utente");
config.setDatabasePassword("d4science");
config.setDatabaseURL("jdbc:postgresql://dbtest.research-infrastructures.eu/aquamapsorgupdated");
TablesComparison ec = new TablesComparison(config);
long t0 = System.currentTimeMillis();
ec.runTest();
long t1 = System.currentTimeMillis();
float difference = (t1-t0);
difference = difference /(float)(1000*60);
System.out.println("Elapsed time : "+difference+" min");
}
//runs the test between the tables
public boolean runTest() {
long t0 = System.currentTimeMillis();
// take the number of elements
numOfElements = countElements(analyzedTable, destinationdbConnection);
AnalysisLogger.getLogger().debug("Remote DB contains " + numOfElements + " elements.");
int maxNumber = numOfElements.intValue();
int numOfChunks = maxNumber / chunkSize;
if ((maxNumber % chunkSize) > 0) {
numOfChunks++;
}
int startIndex = 0;
// reset error counter
errorCounter = 0;
boolean equal = true;
for (int i = startIndex; i < numOfChunks; i++) {
int offset = i * chunkSize;
List<Object> referencechunk = takeChunkOfElements(referenceTable,referenceSelectedColumns,referenceCriteria, chunkSize, offset, referencedbConnection);
List<Object> destinationchunk = takeChunkOfElements(analyzedTable,destinationSelectedColumns,destinationCriteria, chunkSize, offset, destinationdbConnection);
int m = referencechunk.size();
for (int j=0;j<m;j++){
Object[] refrow = (Object[]) referencechunk.get(j);
Object[] destrow = (Object[]) destinationchunk.get(j);
int columns = destrow.length;
for (int k=0;k<columns;k++){
String refelem = ""+refrow[k];
String destelem = ""+destrow[k];
double d = isNumber(refelem);
// System.out.print(refelem+" vs "+destelem+ " ");
if (d!=-Double.MAX_VALUE){
if (Math.abs(d-isNumber(destelem))>Threshold){
errorCounter++;
equal = false;
AnalysisLogger.getLogger().debug("ERROR - DISCREPANCY AT NUMBERS COMPARISON: "+refelem+" vs "+destelem);
}
}
else if (!refelem.equals(destelem)){
errorCounter++;
equal = false;
AnalysisLogger.getLogger().debug("ERROR - DISCREPANCY AT STRING COMPARISON: "+refelem+" vs "+destelem);
}
if (!equal)
break;
}
// System.out.println();
if (!equal)
break;
}
if (!equal)
break;
else
AnalysisLogger.getLogger().debug("CHUNK NUMBER "+i+" OK!");
}
long t1 = System.currentTimeMillis();
AnalysisLogger.getLogger().debug("ELAPSED TIME: " + (t1-t0) + " ms");
//close connections
referencedbConnection.close();
destinationdbConnection.close();
return equal;
}
}

View File

@ -21,15 +21,15 @@ public class Operations {
double sum = 0;
for (int i = 0; i < a.length; i++) {
sum = sum + a[i];
sum = sum + a[i];
}
return sum;
}
public static double[] vectorialDifference(double[] a, double[] b) {
double [] diff = new double [a.length];
double[] diff = new double[a.length];
for (int i = 0; i < a.length; i++) {
if (i < b.length)
@ -40,10 +40,10 @@ public class Operations {
return diff;
}
public static double[] vectorialAbsoluteDifference(double[] a, double[] b) {
double [] diff = new double [a.length];
double[] diff = new double[a.length];
for (int i = 0; i < a.length; i++) {
if (i < b.length)
@ -54,7 +54,7 @@ public class Operations {
return diff;
}
public static double getMax(double[] points) {
double max = -Double.MAX_VALUE;
for (int i = 0; i < points.length; i++) {
@ -72,6 +72,7 @@ public class Operations {
}
return max;
}
public static int getMin(int[] points) {
int min = Integer.MAX_VALUE;
for (int i = 0; i < points.length; i++) {
@ -80,6 +81,7 @@ public class Operations {
}
return min;
}
public static double getMin(double[] points) {
double min = Double.MAX_VALUE;
for (int i = 0; i < points.length; i++) {
@ -99,25 +101,25 @@ public class Operations {
for (int j = 0; j < npoints; j++) {
if (((i == 0) && (points[j] < interval[i])) || ((i == intervs - 1) && (points[j] >= interval[i - 1]) && (points[j] <= interval[i])) || ((i > 0) && (points[j] >= interval[i - 1]) && (points[j] < interval[i]))) {
// System.out.println("(" + (i == 0 ? "" : interval[i - 1]) + "," + interval[i] + ")" + " - " + points[j]);
// System.out.println("(" + (i == 0 ? "" : interval[i - 1]) + "," + interval[i] + ")" + " - " + points[j]);
frequencies[i] = frequencies[i] + 1;
}
}
}
return frequencies;
}
public static double[] normalizeFrequencies(double[] frequencies, int numberOfPoints){
public static double[] normalizeFrequencies(double[] frequencies, int numberOfPoints) {
int intervs = frequencies.length;
for (int i = 0; i < intervs; i++) {
frequencies[i] = frequencies[i] / (double) numberOfPoints;
}
return frequencies;
}
// checks if an interval contains at least one element from a sequence of points
public static boolean intervalContainsPoints(double min, double max, double[] points) {
// System.out.println(min+"-"+max);
@ -139,45 +141,45 @@ public class Operations {
boolean subdivisionOK = false;
double gap = (max - min) / n;
//search for the best subdivision: find the best n
// search for the best subdivision: find the best n
while (!subdivisionOK) {
// System.out.println("*************************");
boolean notcontains = false;
//take the gap interval to test
// take the gap interval to test
for (int i = 0; i < n; i++) {
double rightmost = 0;
//for the last border take a bit more than max
// for the last border take a bit more than max
if (i == n - 1)
rightmost = max + 0.01;
else
rightmost = min + gap * (i + 1);
//if the interval doesn't contain any point discard the subdivision
// if the interval doesn't contain any point discard the subdivision
if (!intervalContainsPoints(min + gap * i, rightmost, points)) {
notcontains = true;
break;
}
}
//if there are empty intervals and there is space for another subdivision proceed
// if there are empty intervals and there is space for another subdivision proceed
if (notcontains && n > 0) {
n--;
gap = (max - min) / n;
}
//otherwise take the default subdivision
}
// otherwise take the default subdivision
else if (n == 0) {
n = maxintervals;
subdivisionOK = true;
}
//if all the intervals are non empty then exit
}
// if all the intervals are non empty then exit
else
subdivisionOK = true;
}
//once the best n is found build the intervals
// once the best n is found build the intervals
double[] intervals = new double[n];
for (int i = 0; i < n; i++) {
if (i<n-1)
if (i < n - 1)
intervals[i] = min + gap * (i + 1);
else
intervals[i] = Double.POSITIVE_INFINITY;
@ -186,47 +188,45 @@ public class Operations {
return intervals;
}
public double[][] standardize(double[][] matrix) {
return standardize(matrix,null,null);
return standardize(matrix, null, null);
}
public double[] means;
public double[] variances;
public double[] variances;
// standardizes a matrix: each row represents a vector: outputs columns means and variances
public double[][] standardize(double[][] matrix, double[] meansVec, double[] variancesVec) {
if (matrix.length > 0) {
int ncols = matrix[0].length;
int mrows = matrix.length;
if ((means==null) && (variances==null)){
if ((means == null) && (variances == null)) {
means = new double[ncols];
variances = new double[ncols];
}
double[][] matrixT = Transformations.traspose(matrix);
for (int i = 0; i < ncols ; i++) {
double[] icolumn = matrixT[i];
for (int i = 0; i < ncols; i++) {
double[] icolumn = matrixT[i];
double mean = 0;
if (meansVec == null){
if (meansVec == null) {
mean = MathFunctions.mean(icolumn);
means[i] = mean;
}
else
} else
mean = meansVec[i];
double variance = 0;
if (variancesVec==null){
if (variancesVec == null) {
variance = com.rapidminer.tools.math.MathFunctions.variance(icolumn, Double.NEGATIVE_INFINITY);
variances[i] = variance;
}
else
} else
variance = variancesVec[i];
for (int j = 0; j < mrows; j++) {
// standardization
double numerator = (icolumn[j] - mean);
@ -238,145 +238,167 @@ public class Operations {
icolumn[j] = numerator / variance;
}
}
matrix = Transformations.traspose(matrixT);
}
return matrix;
}
// calculates the number of elements to take from a set with inverse weight respect to the number of elements
public static int calcNumOfRepresentativeElements(int numberOfElements, int minimumNumberToTake) {
return (int)Math.max(minimumNumberToTake,numberOfElements/Math.log10(numberOfElements));
public static int calcNumOfRepresentativeElements(int numberOfElements, int minimumNumberToTake) {
return (int) Math.max(minimumNumberToTake, numberOfElements / Math.log10(numberOfElements));
}
public static double[] linearInterpolation(double el1, double el2, int intervals) {
double step = (el2 - el1) / (double) intervals;
double[] intervalsd = new double[intervals];
intervalsd[0] = el1;
for (int i = 1; i < intervals - 1; i++) {
intervalsd[i] = el1 + step * i;
}
public static double[] linearInterpolation(double el1, double el2,int intervals){
double step = (el2-el1)/(double)intervals;
double [] intervalsd = new double[intervals];
intervalsd[0] = el1;
for (int i=1;i<intervals-1;i++){
intervalsd[i] = el1+step*i;
}
intervalsd[intervals-1] = el2;
return intervalsd;
intervalsd[intervals - 1] = el2;
return intervalsd;
}
private static double parabol(double a, double b, double c, double x, double shift) {
return a * (x - shift) * (x - shift) + b * (x - shift) + c;
}
private static double[] inverseParabol(double a, double b, double c, double y) {
double[] ret = { (-1d * b + Math.sqrt(b * b + 4 * a * (Math.abs(y) - c))) / (2 * a), (-1d * b - Math.sqrt(b * b + 4 * a * (Math.abs(y) - c))) / (2 * a) };
return ret;
}
private static double logaritmicTransformation(double y) {
y = Math.abs(y);
if (y == 0)
return -Double.MAX_VALUE;
else
return Math.log10(y);
}
// the parabol is centered on the start Point
public static double[] parabolicInterpolation(double startP, double endP, int intervals) {
double start = startP;
double end = endP;
double shift = start;
double a = 1000d;
double b = 0d;
double c = 0d;
double parabolStart = parabol(a, b, c, start, shift);
if (start < 0)
parabolStart = -1 * parabolStart;
double parabolEnd = parabol(a, b, c, end, start);
if (end < 0)
parabolEnd = -1 * parabolEnd;
double step = 0;
if (intervals > 0) {
double difference = Math.abs(parabolEnd - parabolStart);
step = (difference / (double) intervals);
}
private static double parabol(double a, double b,double c,double x,double shift){
return a*(x-shift)*(x-shift)+b*(x-shift)+c;
}
private static double[] inverseParabol(double a, double b,double c,double y){
double []ret = {(-1d*b+Math.sqrt(b*b+4*a*(Math.abs(y)-c)))/(2*a), (-1d*b-Math.sqrt(b*b+4*a*(Math.abs(y)-c)))/(2*a)};
return ret;
}
private static double logaritmicTransformation(double y){
y = Math.abs(y);
if (y ==0) return -Double.MAX_VALUE;
double[] linearpoints = new double[intervals];
linearpoints[0] = startP;
// System.out.println("Y0: "+parabolStart);
for (int i = 1; i < intervals - 1; i++) {
double ypoint = 0;
if (end > start)
ypoint = parabolStart + (i * step);
else
return Math.log10(y);
ypoint = parabolStart - (i * step);
// System.out.println("Y: "+ypoint);
double res[] = inverseParabol(a, b, c, Math.abs(ypoint));
// System.out.println("X: "+linearpoints[i]);
if (ypoint < 0)
linearpoints[i] = res[1] + shift;
else
linearpoints[i] = res[0] + shift;
}
//the parabol is centered on the start Point
public static double[] parabolicInterpolation(double startP,double endP,int intervals){
double start = startP;
double end = endP;
double shift = start;
double a = 1000d;
double b = 0d;
double c = 0d;
double parabolStart = parabol(a,b,c,start,shift);
if (start<0)
parabolStart = -1*parabolStart;
double parabolEnd = parabol(a,b,c,end,start);
if (end<0)
parabolEnd = -1*parabolEnd;
double step =0 ;
if (intervals >0){
double difference = Math.abs(parabolEnd-parabolStart);
step = (difference/(double)intervals);
linearpoints[intervals - 1] = endP;
return linearpoints;
}
public static void main1(String[] args) {
// double [] points = {1,1.2,1.3,2,5};
double[] points = new double[20];
for (int i = 0; i < 20; i++)
points[i] = 10 * Math.random();
double max = getMax(points);
double min = getMin(points);
System.out.println("<" + min + "," + max + ">");
double[] interval = uniformDivide(max, min, points);
double[] frequencies = calcFrequencies(interval, points);
for (int i = 0; i < interval.length; i++) {
System.out.print(interval[i] + " ");
System.out.println("->" + frequencies[i] + " ");
}
}
public static void main(String[] args) {
/*
* System.out.println("numbers to take: " + calcNumOfRepresentativeElements(100, 100)); double[] interp = linearInterpolation(27.27, 28.28, 3); double[] parabinterp = parabolicInterpolation(1, 10, 9); System.out.println("");
*/
int[] ii = takeChunks(11549, 11549/100);
System.out.println("OK");
}
public static int[] takeChunks(int numberOfElements, int partitionFactor) {
int[] partitions = new int[1];
if (partitionFactor <= 0) {
return partitions;
} else if (partitionFactor == 1) {
partitions[0] = numberOfElements;
return partitions;
}
int chunksize = numberOfElements / (partitionFactor);
int rest = numberOfElements % (partitionFactor);
if (chunksize == 0) {
partitions = new int[numberOfElements];
for (int i = 0; i < numberOfElements; i++) {
partitions[i] = 1;
}
double[] linearpoints = new double[intervals];
linearpoints[0] = startP;
// System.out.println("Y0: "+parabolStart);
for (int i=1;i<intervals-1;i++){
double ypoint =0;
if (end>start)
ypoint = parabolStart+(i*step);
else
ypoint = parabolStart-(i*step);
// System.out.println("Y: "+ypoint);
double res[] = inverseParabol(a, b, c, Math.abs(ypoint));
// System.out.println("X: "+linearpoints[i]);
if (ypoint<0)
linearpoints[i]= res[1]+shift;
else
linearpoints[i]= res[0]+shift;
} else {
partitions = new int[partitionFactor];
for (int i = 0; i < partitionFactor; i++) {
partitions[i] = chunksize;
}
linearpoints[intervals-1] = endP;
return linearpoints;
}
public static void main1(String[] args) {
// double [] points = {1,1.2,1.3,2,5};
double[] points = new double[20];
for (int i = 0; i < 20; i++)
points[i] = 10 * Math.random();
double max = getMax(points);
double min = getMin(points);
System.out.println("<" + min + "," + max + ">");
double[] interval = uniformDivide(max, min, points);
double[] frequencies = calcFrequencies(interval, points);
for (int i = 0; i < interval.length; i++) {
System.out.print(interval[i] + " ");
System.out.println("->" + frequencies[i] + " ");
for (int i = 0; i < rest; i++) {
partitions[i]++;
}
}
public static void main(String[] args) {
System.out.println("numbers to take: "+calcNumOfRepresentativeElements(100,100));
double [] interp = linearInterpolation(27.27,28.28,3);
double [] parabinterp = parabolicInterpolation(1,10,9);
System.out.println("");
}
public static int chunkize(int numberOfElements,int partitionFactor){
int chunksize = numberOfElements/ partitionFactor;
int rest = numberOfElements % partitionFactor;
if (chunksize == 0)
chunksize = 1;
else if (rest !=0)
chunksize++;
/*
int numOfChunks = numberOfElements / chunksize;
if ((numberOfElements % chunksize) != 0)
numOfChunks += 1;
*/
return chunksize;
}
return partitions;
}
public static int chunkize(int numberOfElements, int partitionFactor) {
int chunksize = numberOfElements / partitionFactor;
int rest = numberOfElements % partitionFactor;
if (chunksize == 0)
chunksize = 1;
else if (rest != 0)
chunksize++;
/*
* int numOfChunks = numberOfElements / chunksize; if ((numberOfElements % chunksize) != 0) numOfChunks += 1;
*/
return chunksize;
}
}