diff --git a/.classpath b/.classpath index d1379d5..fe82554 100644 --- a/.classpath +++ b/.classpath @@ -30,5 +30,6 @@ + diff --git a/ecologicalEngine.jardesc b/ecologicalEngine.jardesc index ae4f01e..acc0d2f 100644 --- a/ecologicalEngine.jardesc +++ b/ecologicalEngine.jardesc @@ -1,6 +1,6 @@ - + diff --git a/src/org/gcube/contentmanagement/graphtools/abstracts/GenericStandaloneGraph.java b/src/org/gcube/contentmanagement/graphtools/abstracts/GenericStandaloneGraph.java index b84a090..cbc6d7a 100644 --- a/src/org/gcube/contentmanagement/graphtools/abstracts/GenericStandaloneGraph.java +++ b/src/org/gcube/contentmanagement/graphtools/abstracts/GenericStandaloneGraph.java @@ -36,6 +36,7 @@ public abstract class GenericStandaloneGraph extends ApplicationFrame { public GenericStandaloneGraph(String title) { super(title); + big = false; } @@ -130,6 +131,7 @@ public abstract class GenericStandaloneGraph extends ApplicationFrame { return image; } + public void renderGraphGroup(GraphGroups graphgroups) { Map graphmap = graphgroups.getGraphs(); diff --git a/src/org/gcube/contentmanagement/lexicalmatcher/utils/MathFunctions.java b/src/org/gcube/contentmanagement/lexicalmatcher/utils/MathFunctions.java index 465a5b9..0015b0e 100644 --- a/src/org/gcube/contentmanagement/lexicalmatcher/utils/MathFunctions.java +++ b/src/org/gcube/contentmanagement/lexicalmatcher/utils/MathFunctions.java @@ -26,6 +26,18 @@ public class MathFunctions { } + //increments a percentage o mean calculation when a lot of elements are present + public static double incrementAvg(double perc, double quantity, int N){ + + if (N==0) + return quantity; + + double out = 0; + int N_plus_1 = N+1; + out = (double)((perc + ((double)quantity / (double)N )) * ((double)N/(double)N_plus_1)); + return out; + + } public static ArrayList generateRandoms(int numberOfRandoms, int min, int max) { diff --git a/src/org/gcube/dataanalysis/ecoengine/evaluation/HabitatRepresentativeness.java b/src/org/gcube/dataanalysis/ecoengine/evaluation/HabitatRepresentativeness.java index 085f86f..76d6733 100644 --- a/src/org/gcube/dataanalysis/ecoengine/evaluation/HabitatRepresentativeness.java +++ b/src/org/gcube/dataanalysis/ecoengine/evaluation/HabitatRepresentativeness.java @@ -5,6 +5,7 @@ import java.util.HashMap; import java.util.List; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; +import org.gcube.contentmanagement.lexicalmatcher.utils.MathFunctions; import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration; import org.gcube.dataanalysis.ecoengine.interfaces.DataAnalysis; import org.gcube.dataanalysis.ecoengine.models.cores.pca.PrincipalComponentAnalysis; @@ -24,9 +25,12 @@ public class HabitatRepresentativeness extends DataAnalysis { String configPath = "./cfg/"; private HashMap output; - private static int minimumNumberToTake = 500; - private static int status; - + private static int minimumNumberToTake = 10000; + private float status; + private int currentIterationStep; + private float innerstatus; + private int maxTests = 2; + public HashMap getInputParameters() { HashMap parameters = new HashMap(); @@ -98,31 +102,34 @@ public class HabitatRepresentativeness extends DataAnalysis { } - double [] meanHRS ; - double [] meanHRSVector; - double currentHRSScore; - double [] currentHRSVector; + private void calcHRS(String projectingAreaTable, String projectingAreaFeaturesOptionalCondition, String FeaturesColumns, String positiveCasesTable, String negativeCasesTable,int numberOfElements) throws Exception{ - int numberOfElementsToTake = numberOfElements;//Operations.calcNumOfRepresentativeElements(numberOfElements, numberOfElements); + innerstatus = 0f; + int numberOfElementsToTake = Operations.calcNumOfRepresentativeElements(numberOfElements, minimumNumberToTake); AnalysisLogger.getLogger().trace("HRS: TAKING "+numberOfElementsToTake+" POINTS ON "+numberOfElements+" FROM THE AREA UNDER ANALYSIS"); // 1 - take the right number of points double[][] areaPoints = getPoints(projectingAreaTable, projectingAreaFeaturesOptionalCondition, FeaturesColumns, numberOfElementsToTake); AnalysisLogger.getLogger().trace("HRS: AREA POINTS MATRIX GENERATED"); + innerstatus = 10f; Operations operations = new Operations(); // 2 - standardize the matrix areaPoints = operations.standardize(areaPoints); AnalysisLogger.getLogger().trace("HRS: MATRIX HAS BEEN STANDARDIZED"); + innerstatus = 20f; // 3 - calculate PCA PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis(); pca.calcPCA(areaPoints); AnalysisLogger.getLogger().trace("HRS: PCA HAS BEEN TRAINED"); + innerstatus = 30f; // 4 - get the pca components for all the vector double[][] pcaComponents = pca.getComponentsMatrix(areaPoints); AnalysisLogger.getLogger().trace("HRS: PCA COMPONENT CALCULATED"); + innerstatus = 40f; // 5 - calculate the frequency distributions for all the pca: each row will be a frequency distribution for a pca component associated to uniform divisions of the range calcFrequenciesDistributionsForComponents(pcaComponents); AnalysisLogger.getLogger().trace("HRS: FREQUENCIES FOR COMPONENTS CALCULATED"); + innerstatus = 50f; // 6 - take positive points and negative points - eventually merge them double[][] positivePoints = null; if ((positiveCasesTable!=null) && (positiveCasesTable.length()>0)) @@ -132,17 +139,18 @@ public class HabitatRepresentativeness extends DataAnalysis { negativePoints = getPoints(negativeCasesTable, "", FeaturesColumns, numberOfElementsToTake); double[][] habitatPoints = Transformations.mergeMatrixes(positivePoints, negativePoints); AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS BUILT FROM POSITIVE AND NEGATIVE POINTS"); + innerstatus = 60f; // 7 - Standardize the points respect to previous means and variances habitatPoints = operations.standardize(habitatPoints, operations.means, operations.variances); AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN STANDARDIZED RESPECT TO PREVIOUS MEANS AND VARIANCES"); // 8 - calculate the pca components for habitat double[][] habitatPcaComponents = pca.getComponentsMatrix(habitatPoints); AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN TRANSFORMED BY PCA"); + innerstatus = 70f; // 9 - calculate frequencies distributions for each component, respect to previous intervals int components = habitatPcaComponents[0].length; // 10 - calculate absolute differences and sum -> obtain a hrs for each PCA component = for each feature currentHRSVector = new double[components]; - AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN TRANSFORMED BY PCA"); double[][] habitatPcaPointsMatrix = Transformations.traspose(habitatPcaComponents); for (int i = 0; i < components; i++) { @@ -155,85 +163,51 @@ public class HabitatRepresentativeness extends DataAnalysis { } AnalysisLogger.getLogger().trace("HRS: HRS VECTOR HAS BEEN CALCULATED"); - - // 11 - obtain hrsScore by weighted sum of hrs respect to inverse eigenvalues - currentHRSScore = Operations.scalarProduct(currentHRSVector, pca.getInverseEigenvalues()); - AnalysisLogger.getLogger().trace("HRS: HRS SCORE HAS BEEN CALCULATED"); + innerstatus = 90f; + // 11 - obtain hrsScore by weighted sum of hrs respect to inverse eigenvalues - too variable, substituted with the sum of the scores +// currentHRSScore = Operations.scalarProduct(currentHRSVector, pca.getInverseNormalizedEigenvalues()); + currentHRSScore = Operations.sumVector(currentHRSVector); + AnalysisLogger.getLogger().trace("HRS: HRS SCORE HAS BEEN CALCULATED"); + innerstatus = 100f; } + private double meanHRS ; + private double [] meanHRSVector; + private double currentHRSScore; + private double [] currentHRSVector; public HashMap analyze(AlgorithmConfiguration config) throws Exception { try { + status = 0; String projectingAreaTable = config.getParam("ProjectingAreaTable"); String projectingAreaFeaturesOptionalCondition = config.getParam("ProjectingAreaFeaturesOptionalCondition"); String FeaturesColumns = config.getParam("FeaturesColumns"); String positiveCasesTable = config.getParam("PositiveCasesTable"); String negativeCasesTable = config.getParam("NegativeCasesTable"); - connection = AlgorithmConfiguration.getConnectionFromConfig(config); - + meanHRS = 0; int numberOfElements = calculateNumberOfPoints(projectingAreaTable, projectingAreaFeaturesOptionalCondition); - int numberOfElementsToTake = numberOfElements;//Operations.calcNumOfRepresentativeElements(numberOfElements, numberOfElements); - AnalysisLogger.getLogger().trace("HRS: TAKING "+numberOfElementsToTake+" POINTS ON "+numberOfElements+" FROM THE AREA UNDER ANALYSIS"); - // 1 - take the right number of points - double[][] areaPoints = getPoints(projectingAreaTable, projectingAreaFeaturesOptionalCondition, FeaturesColumns, numberOfElementsToTake); - AnalysisLogger.getLogger().trace("HRS: AREA POINTS MATRIX GENERATED"); - Operations operations = new Operations(); - // 2 - standardize the matrix - areaPoints = operations.standardize(areaPoints); - AnalysisLogger.getLogger().trace("HRS: MATRIX HAS BEEN STANDARDIZED"); - // 3 - calculate PCA - PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis(); - pca.calcPCA(areaPoints); - AnalysisLogger.getLogger().trace("HRS: PCA HAS BEEN TRAINED"); - // 4 - get the pca components for all the vector - double[][] pcaComponents = pca.getComponentsMatrix(areaPoints); - AnalysisLogger.getLogger().trace("HRS: PCA COMPONENT CALCULATED"); - // 5 - calculate the frequency distributions for all the pca: each row will be a frequency distribution for a pca component associated to uniform divisions of the range - calcFrequenciesDistributionsForComponents(pcaComponents); - AnalysisLogger.getLogger().trace("HRS: FREQUENCIES FOR COMPONENTS CALCULATED"); - // 6 - take positive points and negative points - eventually merge them - double[][] positivePoints = null; - if ((positiveCasesTable!=null) && (positiveCasesTable.length()>0)) - positivePoints = getPoints(positiveCasesTable, "", FeaturesColumns, numberOfElementsToTake); - double[][] negativePoints = null; - if ((negativeCasesTable!=null) && (negativeCasesTable.length()>0)) - negativePoints = getPoints(negativeCasesTable, "", FeaturesColumns, numberOfElementsToTake); - double[][] habitatPoints = Transformations.mergeMatrixes(positivePoints, negativePoints); - AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS BUILT FROM POSITIVE AND NEGATIVE POINTS"); - // 7 - Standardize the points respect to previous means and variances - habitatPoints = operations.standardize(habitatPoints, operations.means, operations.variances); - AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN STANDARDIZED RESPECT TO PREVIOUS MEANS AND VARIANCES"); - // 8 - calculate the pca components for habitat - double[][] habitatPcaComponents = pca.getComponentsMatrix(habitatPoints); - AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN TRANSFORMED BY PCA"); - // 9 - calculate frequencies distributions for each component, respect to previous intervals - int components = habitatPcaComponents[0].length; - // 10 - calculate absolute differences and sum -> obtain a hrs for each PCA component = for each feature - double[] hrs = new double[components]; - AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN TRANSFORMED BY PCA"); - - double[][] habitatPcaPointsMatrix = Transformations.traspose(habitatPcaComponents); - for (int i = 0; i < components; i++) { - double[] habitatPcaPoints = habitatPcaPointsMatrix[i]; - // calculate frequency distributions respect to previous intervals - double[] habitatPcafrequencies = Operations.calcFrequencies(intervals.get(i), habitatPcaPoints); - habitatPcafrequencies = Operations.normalizeFrequencies(habitatPcafrequencies, habitatPcaPoints.length); - double[] absdifference = Operations.vectorialAbsoluteDifference(habitatPcafrequencies, frequencyDistrib.get(i)); - hrs[i] = Operations.sumVector(absdifference); + + for (int i=0;i(); - output.put("HRS_VECTOR", "" + Transformations.vector2String(hrs)); - output.put("HRS", "" + hrsScore); + output.put("HRS_VECTOR", "" + Transformations.vector2String(meanHRSVector)); + output.put("HRS", "" + meanHRS); return output; } catch (Exception e) { @@ -243,6 +217,7 @@ public class HabitatRepresentativeness extends DataAnalysis { } finally { connection.close(); status = 100; + AnalysisLogger.getLogger().trace("COMPUTATION FINISHED "); } } @@ -302,7 +277,7 @@ public class HabitatRepresentativeness extends DataAnalysis { // config.setParam("ProjectingAreaTable", "absence_data_baskingshark2"); config.setParam("ProjectingAreaFeaturesOptionalCondition", "where oceanarea>0"); config.setParam("FeaturesColumns", "depthmean,depthmax,depthmin, sstanmean,sbtanmean,salinitymean,salinitybmean, primprodmean,iceconann,landdist,oceanarea"); -// config.setParam("PositiveCasesTable", "presence_data_baskingshark"); + config.setParam("PositiveCasesTable", "presence_data_baskingshark"); config.setParam("NegativeCasesTable", "absence_data_baskingshark_random"); // config.setParam("NegativeCasesTable", "absence_data_baskingshark2"); @@ -348,5 +323,10 @@ public class HabitatRepresentativeness extends DataAnalysis { public Object getContent() { return output; } + + @Override + public float getStatus() { + return status==100f?status: Math.min((status+(float)(currentIterationStep+1)*innerstatus/(float)maxTests),99f); + } } diff --git a/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/BioClimateAnalysis.java b/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/BioClimateAnalysis.java index ddcf2cf..38aef8e 100644 --- a/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/BioClimateAnalysis.java +++ b/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/BioClimateAnalysis.java @@ -118,34 +118,38 @@ public class BioClimateAnalysis { int height = 420; if (doHspecAn){ - BioClimateGraph lineg1 = new BioClimateGraph(SERIES[0], Operations.getMax(highProbabilityCells), Operations.getMin(highProbabilityCells)); + double min = Operations.getMin(discrepancies); discrepancies[0] = min; - BioClimateGraph lineg4 = new BioClimateGraph(SERIES[3], Operations.getMax(discrepancies), min); + if (liveRender){ + BioClimateGraph lineg1 = new BioClimateGraph(SERIES[0], Operations.getMax(highProbabilityCells), Operations.getMin(highProbabilityCells)); + BioClimateGraph lineg4 = new BioClimateGraph(SERIES[3], Operations.getMax(discrepancies), min); lineg4.render(discrepanciesTrend); lineg1.render(probabilityTrend); } - producedImages.add(lineg1.renderImgObject(width, height, probabilityTrend)); - producedImages.add(lineg4.renderImgObject(width, height, discrepanciesTrend)); + producedImages.add(BioClimateGraph.renderStaticImgObject(width, height, probabilityTrend, SERIES[0], Operations.getMax(highProbabilityCells), Operations.getMin(highProbabilityCells))); + producedImages.add(BioClimateGraph.renderStaticImgObject(width, height, discrepanciesTrend, SERIES[3], Operations.getMax(discrepancies), min)); + } if (doHcafAn){ - BioClimateGraph lineg6 = new BioClimateGraph(SERIES[5], Operations.getMax(avgIce), Operations.getMin(avgIce)); - BioClimateGraph lineg7 = new BioClimateGraph(SERIES[6], Operations.getMax(avgSST), Operations.getMin(avgSST)); - BioClimateGraph lineg8 = new BioClimateGraph(SERIES[7], Operations.getMax(avgSalinity), Operations.getMin(avgSalinity)); if (liveRender){ + BioClimateGraph lineg6 = new BioClimateGraph(SERIES[5], Operations.getMax(avgIce), Operations.getMin(avgIce)); + BioClimateGraph lineg7 = new BioClimateGraph(SERIES[6], Operations.getMax(avgSST), Operations.getMin(avgSST)); + BioClimateGraph lineg8 = new BioClimateGraph(SERIES[7], Operations.getMax(avgSalinity), Operations.getMin(avgSalinity)); lineg6.render(avgIceD); lineg7.render(avgSSTD); lineg8.render(avgSalinityD); } - producedImages.add(lineg6.renderImgObject(width, height, avgIceD)); - producedImages.add(lineg7.renderImgObject(width, height, avgSSTD)); - producedImages.add(lineg8.renderImgObject(width, height, avgSalinityD)); + producedImages.add(BioClimateGraph.renderStaticImgObject(width, height, avgIceD, SERIES[5], Operations.getMax(avgIce), Operations.getMin(avgIce))); + producedImages.add(BioClimateGraph.renderStaticImgObject(width, height, avgSSTD, SERIES[6], Operations.getMax(avgSST), Operations.getMin(avgSST))); + producedImages.add(BioClimateGraph.renderStaticImgObject(width, height, avgSalinityD, SERIES[7], Operations.getMax(avgSalinity), Operations.getMin(avgSalinity))); + } AnalysisLogger.getLogger().trace("Produced All Images"); @@ -331,4 +335,6 @@ public class BioClimateAnalysis { Double d = Double.parseDouble(out.get("MEAN")); return d; } + + } diff --git a/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/BioClimateGraph.java b/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/BioClimateGraph.java index 6619cb5..ca5898d 100644 --- a/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/BioClimateGraph.java +++ b/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/BioClimateGraph.java @@ -1,8 +1,10 @@ package org.gcube.dataanalysis.ecoengine.evaluation.bioclimate; import java.awt.Color; +import java.awt.Image; import org.gcube.contentmanagement.graphtools.abstracts.GenericStandaloneGraph; +import org.gcube.contentmanagement.graphtools.data.conversions.ImageTools; import org.jfree.chart.ChartFactory; import org.jfree.chart.JFreeChart; import org.jfree.chart.axis.CategoryAxis; @@ -75,6 +77,64 @@ public class BioClimateGraph extends GenericStandaloneGraph { return dataset; } + + public static Image renderStaticImgObject(int width, int height, Dataset set, String title, double max, double min) { + + JFreeChart chart = createStaticChart(set,max,min,title); + + /* + JPanel jp = new ChartPanel(chart); + + this.setContentPane(jp); + this.pack(); + */ +// Image image = this.createImage(width, height); + + Image image = ImageTools.toImage(chart.createBufferedImage(width, height)); + + return image; + } + + + protected static JFreeChart createStaticChart(Dataset dataset, double max, double min, String title) { + + // create the chart... + JFreeChart chart = ChartFactory.createLineChart( + title, // chart title + "", // domain axis label + "", // range axis label + (DefaultCategoryDataset)dataset, // data + PlotOrientation.VERTICAL, // orientation + true, // include legend + true, // tooltips + false // urls + ); + chart.setBackgroundPaint(Color.white); + + CategoryPlot plot = chart.getCategoryPlot(); +// plot.setBackgroundPaint(Color.white); + plot.setRangeGridlinePaint(Color.white); + plot.setDomainCrosshairVisible(true); + plot.setDomainGridlinesVisible(true); + plot.setRangeCrosshairVisible(true); + plot.setRenderer(new LineAndShapeRenderer(true,true)); + + CategoryAxis categoryaxis1 = plot.getDomainAxis(0); + categoryaxis1.setCategoryLabelPositions(CategoryLabelPositions.DOWN_45); + + plot.mapDatasetToDomainAxis(0, 0); + + if (max!=min){ + plot.getRangeAxis().setAutoRange(false); + plot.getRangeAxis().setUpperBound(max); + plot.getRangeAxis().setLowerBound(min); + double avg = min+((max-min)/2d); + plot.getRangeAxis().centerRange(avg); + } + + return chart; + } + protected JFreeChart createChart(Dataset dataset) { // create the chart... diff --git a/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/InterpolateTables.java b/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/InterpolateTables.java index 2532b52..5189352 100644 --- a/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/InterpolateTables.java +++ b/src/org/gcube/dataanalysis/ecoengine/evaluation/bioclimate/InterpolateTables.java @@ -54,6 +54,9 @@ public class InterpolateTables { this.temporaryDirectory += "/"; AnalysisLogger.setLogger(configPath + AlgorithmConfiguration.defaultLoggerFile); + + AnalysisLogger.getLogger().debug("Initialization complete: persistence path "+persistencePath); + config = new LexicalEngineConfiguration(); config.setDatabaseURL(databaseURL); config.setDatabaseUserName(databaseUserName); @@ -67,9 +70,11 @@ public class InterpolateTables { referencedbConnection = DatabaseFactory.initDBConnection(configPath + AlgorithmConfiguration.defaultConnectionFile, config); AnalysisLogger.getLogger().debug("ReferenceDB initialized"); status = 0f; + AnalysisLogger.getLogger().debug("Interpolating from "+table1+" to "+table2); DatabaseUtils utils = new DatabaseUtils(referencedbConnection); // analyze table and take information about it String createTableStatement = utils.buildCreateStatement(table1, "%1$s"); + AnalysisLogger.getLogger().debug("Create Statement for table "+table1+": "+createTableStatement); int numberOfColumns = utils.getColumnDecriptions().size(); // initialize the map of columns to write List> outputFiles = new ArrayList>(); @@ -86,10 +91,10 @@ public class InterpolateTables { String javatype = DataTypeRecognizer.transformTypeFromDB(gotColumnType); List takeFirstColumn = DatabaseFactory.executeSQLQuery(DatabaseUtils.getOrderedElements(table1, utils.getPrimaryKey(), gotColumn), referencedbConnection); List takeSecondColumn = DatabaseFactory.executeSQLQuery(DatabaseUtils.getOrderedElements(table2, utils.getPrimaryKey(), gotColumn), referencedbConnection); - + // only if data are of numeric type, perform calculation if (javatype.equals(BigDecimal.class.getName())) { - System.out.println("interpolating -> " + gotColumn); + AnalysisLogger.getLogger().debug("interpolating -> " + gotColumn); List> interpolations = interpolateColumns(takeFirstColumn, takeSecondColumn, intervals, gotColumnType, function); @@ -99,18 +104,18 @@ public class InterpolateTables { // for each column to substitute List columnToSub = interpolations.get(i); if (columnToSub.size() > 0) { - System.out.println("UPDATE TABLE " + tableInterp + " ON COLUMN " + gotColumn); - addColumnToTable(outputFiles.get(i - 1), columnToSub); + AnalysisLogger.getLogger().debug("UPDATE TABLE " + tableInterp + " ON COLUMN " + gotColumn); + addColumnToTable(outputFiles.get(i - 1), columnToSub,true); } else { - System.out.println("DOESN'T CHANGE TABLE " + tableInterp + " COLUMN " + gotColumn); - addColumnToTable(outputFiles.get(i - 1), takeFirstColumn); + AnalysisLogger.getLogger().debug("DOESN'T CHANGE TABLE " + tableInterp + " COLUMN " + gotColumn); + addColumnToTable(outputFiles.get(i - 1), takeFirstColumn,true); } } } // else update all the tables else { for (int i = 0; i < intervals - 2; i++) { - addColumnToTable(outputFiles.get(i), takeFirstColumn); + addColumnToTable(outputFiles.get(i), takeFirstColumn,false); } } @@ -118,12 +123,12 @@ public class InterpolateTables { } status = 60f; - System.out.println("WRITING ALL THE BUFFERS"); + AnalysisLogger.getLogger().debug("WRITING ALL THE BUFFERS"); writeAllStringBuffersToFiles(table1, outputFiles, function,startYear,endYear); statusstep = 40f/(float)producedfiles.length; - interpolatedTables = new String[producedfiles.length+1]; + interpolatedTables = new String[producedfiles.length+2]; interpolatedTables[0] = table1; for (int i = 0; i < producedfiles.length; i++) { @@ -132,14 +137,16 @@ public class InterpolateTables { interpolatedTables[i+1] = filename; String copyFileQuery = DatabaseUtils.copyFileToTableStatement(temporaryDirectory + producedfiles[i].getName(), filename); // create Table - System.out.println("CREATING TABLE->" + filename); + AnalysisLogger.getLogger().debug("CREATING TABLE->" + filename); DatabaseFactory.executeSQLUpdate(String.format(createTableStatement, filename), referencedbConnection); - System.out.println("FULFILLING TABLE->" + filename + ": " + copyFileQuery); + AnalysisLogger.getLogger().debug("FULFILLING TABLE->" + filename + ": " + copyFileQuery); DatabaseFactory.executeSQLUpdate(copyFileQuery, referencedbConnection); status = Math.min(status+statusstep,99); } - interpolatedTables[producedfiles.length] = table2; + interpolatedTables[interpolatedTables.length-1] = table2; + + AnalysisLogger.getLogger().debug("ALL TABLES HAVE BEEN PRODUCED"); } catch (Exception e) { e.printStackTrace(); @@ -155,7 +162,7 @@ public class InterpolateTables { return status; } - private void addColumnToTable(List rows, List elements) { + private void addColumnToTable(List rows, List elements,boolean isNumber) { int size = elements.size(); for (int i = 0; i < size; i++) { Object[] couple = (Object[]) elements.get(i); @@ -163,7 +170,10 @@ public class InterpolateTables { StringBuffer buffer = null; if (i >= rows.size()) { buffer = new StringBuffer(); - buffer.append(value); + if (isNumber && (value == null) || (value.length()==0)) + buffer.append("0"); + else + buffer.append(value); rows.add(buffer); } else { buffer = rows.get(i); @@ -175,7 +185,7 @@ public class InterpolateTables { private void writeAllStringBuffersToFiles(String initialFile, List> outputFiles, INTERPOLATIONFUNCTIONS function,int startYear,int endYear) throws Exception { int numOfFiles = outputFiles.size(); - int yearStep = (int)((float)(endYear-startYear)/(float)numOfFiles); + int yearStep = (int)((float)(endYear-startYear)/(float)(numOfFiles+1)); producedfiles = new File[numOfFiles]; for (int i = 0; i < numOfFiles; i++) { List rows = outputFiles.get(i); @@ -184,8 +194,11 @@ public class InterpolateTables { for (int k = 0; k < nrows; k++) { completeFile.append(rows.get(k) + "\n"); } - - String filename = temporaryDirectory + initialFile + "_" + (startYear+(i+1)*yearStep) + "_" + function.name() + ".csv"; + int yearCals = startYear+(i+1)*yearStep; + if (yearCals == endYear) + yearCals = endYear-1; + + String filename = temporaryDirectory + initialFile + "_" + (yearCals) + "_" + function.name()+"_"+i+System.currentTimeMillis()+ ".csv"; FileTools.saveString(filename, completeFile.toString(), true, "UTF-8"); producedfiles[i] = new File(filename); } @@ -209,7 +222,7 @@ public class InterpolateTables { double[] interpolation = null; if (firstNum != secondNum) { if (interping) - { System.out.println("Interpolating ... "); interping = false;} + { AnalysisLogger.getLogger().debug("Interpolating ... "); interping = false;} if (function == INTERPOLATIONFUNCTIONS.LINEAR) interpolation = Operations.linearInterpolation(firstNum, secondNum, intervals); diff --git a/src/org/gcube/dataanalysis/ecoengine/models/cores/pca/PrincipalComponentAnalysis.java b/src/org/gcube/dataanalysis/ecoengine/models/cores/pca/PrincipalComponentAnalysis.java index 127df92..c4e7af7 100644 --- a/src/org/gcube/dataanalysis/ecoengine/models/cores/pca/PrincipalComponentAnalysis.java +++ b/src/org/gcube/dataanalysis/ecoengine/models/cores/pca/PrincipalComponentAnalysis.java @@ -49,11 +49,11 @@ public class PrincipalComponentAnalysis { int m= 5; int n = 5; double values[][] = new double[m][n]; - double val1[] = {1.000d,0.451d,0.511d,0.197d,0.162d}; - double val2[] = {0.451d,1.000d,0.445d,0.252d,0.238d}; + double val2[] = {1.000d,0.451d,0.511d,0.197d,0.162d}; + double val1[] = {0.451d,1.000d,0.445d,0.252d,0.238d}; double val3[] = {0.511d,0.445d,1.000d,0.301d,0.227d}; - double val4[] = {0.197d,0.252d,0.301d,1.000d,0.620d}; - double val5[] = {0.162d,0.238d,0.227d,0.620d,1.000d}; + double val5[] = {0.197d,0.252d,0.301d,1.000d,0.620d}; + double val4[] = {0.162d,0.238d,0.227d,0.620d,1.000d}; values[0] = val1; values[1] = val2; values[2] = val3; @@ -111,6 +111,23 @@ public class PrincipalComponentAnalysis { return values; } + public double [] getNormalizedEigenvalues (){ + double [] values = new double[numberOfComponents]; + + for (int i=0;i= interval[i - 1]) && (points[j] <= interval[i])) || ((i > 0) && (points[j] >= interval[i - 1]) && (points[j] < interval[i]))) { - System.out.println("(" + (i == 0 ? "" : interval[i - 1]) + "," + interval[i] + ")" + " - " + points[j]); +// System.out.println("(" + (i == 0 ? "" : interval[i - 1]) + "," + interval[i] + ")" + " - " + points[j]); frequencies[i] = frequencies[i] + 1; } } } - for (int i = 0; i < intervs; i++) { - frequencies[i] = frequencies[i] / (double) npoints; - } + return frequencies; } + public static double[] normalizeFrequencies(double[] frequencies, int numberOfPoints){ + int intervs = frequencies.length; + for (int i = 0; i < intervs; i++) { + frequencies[i] = frequencies[i] / (double) numberOfPoints; + } + + return frequencies; + + } + // checks if an interval contains at least one element from a sequence of points public static boolean intervalContainsPoints(double min, double max, double[] points) { // System.out.println(min+"-"+max); @@ -131,34 +139,42 @@ public class Operations { boolean subdivisionOK = false; double gap = (max - min) / n; - + + //search for the best subdivision: find the best n while (!subdivisionOK) { // System.out.println("*************************"); boolean notcontains = false; - + //take the gap interval to test for (int i = 0; i < n; i++) { - double leftmost = 0; + double rightmost = 0; + //for the last border take a bit more than max if (i == n - 1) - leftmost = max + 0.01; + rightmost = max + 0.01; else - leftmost = min + gap * (i + 1); - - if (!intervalContainsPoints(min + gap * i, leftmost, points)) { + rightmost = min + gap * (i + 1); + //if the interval doesn't contain any point discard the subdivision + if (!intervalContainsPoints(min + gap * i, rightmost, points)) { notcontains = true; break; } } + //if there are empty intervals and there is space for another subdivision proceed if (notcontains && n > 0) { n--; gap = (max - min) / n; - } else if (n == 0) { + } + //otherwise take the default subdivision + else if (n == 0) { n = maxintervals; subdivisionOK = true; - } else + } + //if all the intervals are non empty then exit + else subdivisionOK = true; } - + + //once the best n is found build the intervals double[] intervals = new double[n]; for (int i = 0; i < n; i++) { if (i 0) { int ncols = matrix[0].length; - if ((means==null) && (variances==null)) + int mrows = matrix.length; + + if ((means==null) && (variances==null)){ means = new double[ncols]; variances = new double[ncols]; + } + + double[][] matrixT = Transformations.traspose(matrix); for (int i = 0; i < ncols ; i++) { - double[] icolumn = Transformations.getColumn(i, matrix); + double[] icolumn = matrixT[i]; + double mean = 0; + if (meansVec == null){ mean = MathFunctions.mean(icolumn); means[i] = mean; @@ -203,16 +227,20 @@ public class Operations { else variance = variancesVec[i]; - for (int j = 0; j < icolumn.length; i++) { + for (int j = 0; j < mrows; j++) { // standardization - icolumn[j] = (icolumn[j] - mean) / variance; + double numerator = (icolumn[j] - mean); + if ((numerator == 0) && (variance == 0)) + icolumn[j] = 0; + else if (variance == 0) + icolumn[j] = Double.MAX_VALUE; + else + icolumn[j] = numerator / variance; } - - - - - Transformations.substColumn(icolumn, i, matrix); } + + matrix = Transformations.traspose(matrixT); + } return matrix; } diff --git a/src/org/gcube/dataanalysis/ecoengine/utils/Transformations.java b/src/org/gcube/dataanalysis/ecoengine/utils/Transformations.java index 342af0b..e921d6f 100644 --- a/src/org/gcube/dataanalysis/ecoengine/utils/Transformations.java +++ b/src/org/gcube/dataanalysis/ecoengine/utils/Transformations.java @@ -41,6 +41,26 @@ public class Transformations { } + // gets all the columns from a matrix + public static double[][] traspose(double[][] matrix) { + int m = matrix.length; + if (m>0){ + int n = matrix[0].length; + + double columns[][]= new double[n][m]; + + for (int i = 0; i0) - out = out + ","+vector; + out = out + ","+vector[i]; else - out = ""+vector; + out = ""+vector[i]; } - out +=")"; + return out; }