implemented HRS and bioClimate Analysis

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@51651 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Gianpaolo Coro 2012-02-28 17:14:29 +00:00
parent 59cfd11fae
commit c137e56432
11 changed files with 282 additions and 135 deletions

View File

@ -30,5 +30,6 @@
<classpathentry kind="lib" path="/StatisticalLibSupportLibraries/lib/EcologicalEngine/TGGraphLayout.jar"/>
<classpathentry kind="lib" path="/StatisticalLibSupportLibraries/lib/EcologicalEngine/xpp3_min-1.1.4c.jar"/>
<classpathentry kind="lib" path="/StatisticalLibSupportLibraries/lib/EcologicalEngine/xstream-1.3.1.jar"/>
<classpathentry kind="lib" path="/StatisticalLibSupportLibraries/lib/EcologicalEngine/Jama-1.0.2.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="WINDOWS-1252" standalone="no"?>
<jardesc>
<jar path="StatisticalLibSupportLibraries/lib/ecologicalDataMining.jar"/>
<jar path="StatisticalLibSupportLibraries/lib/EcologicalEngine/ecologicalDataMining.jar"/>
<options buildIfNeeded="true" compress="true" descriptionLocation="/EcologicalEngine/ecologicalEngine.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
<storedRefactorings deprecationInfo="true" structuralOnly="false"/>
<selectedProjects/>

View File

@ -36,6 +36,7 @@ public abstract class GenericStandaloneGraph extends ApplicationFrame {
public GenericStandaloneGraph(String title) {
super(title);
big = false;
}
@ -130,6 +131,7 @@ public abstract class GenericStandaloneGraph extends ApplicationFrame {
return image;
}
public void renderGraphGroup(GraphGroups graphgroups) {
Map<String, GraphData> graphmap = graphgroups.getGraphs();

View File

@ -26,6 +26,18 @@ public class MathFunctions {
}
//increments a percentage o mean calculation when a lot of elements are present
public static double incrementAvg(double perc, double quantity, int N){
if (N==0)
return quantity;
double out = 0;
int N_plus_1 = N+1;
out = (double)((perc + ((double)quantity / (double)N )) * ((double)N/(double)N_plus_1));
return out;
}
public static ArrayList<Integer> generateRandoms(int numberOfRandoms, int min, int max) {

View File

@ -5,6 +5,7 @@ import java.util.HashMap;
import java.util.List;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanagement.lexicalmatcher.utils.MathFunctions;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.interfaces.DataAnalysis;
import org.gcube.dataanalysis.ecoengine.models.cores.pca.PrincipalComponentAnalysis;
@ -24,8 +25,11 @@ public class HabitatRepresentativeness extends DataAnalysis {
String configPath = "./cfg/";
private HashMap<String, String> output;
private static int minimumNumberToTake = 500;
private static int status;
private static int minimumNumberToTake = 10000;
private float status;
private int currentIterationStep;
private float innerstatus;
private int maxTests = 2;
public HashMap<String, VarCouple> getInputParameters() {
@ -98,31 +102,34 @@ public class HabitatRepresentativeness extends DataAnalysis {
}
double [] meanHRS ;
double [] meanHRSVector;
double currentHRSScore;
double [] currentHRSVector;
private void calcHRS(String projectingAreaTable, String projectingAreaFeaturesOptionalCondition, String FeaturesColumns, String positiveCasesTable, String negativeCasesTable,int numberOfElements) throws Exception{
int numberOfElementsToTake = numberOfElements;//Operations.calcNumOfRepresentativeElements(numberOfElements, numberOfElements);
innerstatus = 0f;
int numberOfElementsToTake = Operations.calcNumOfRepresentativeElements(numberOfElements, minimumNumberToTake);
AnalysisLogger.getLogger().trace("HRS: TAKING "+numberOfElementsToTake+" POINTS ON "+numberOfElements+" FROM THE AREA UNDER ANALYSIS");
// 1 - take the right number of points
double[][] areaPoints = getPoints(projectingAreaTable, projectingAreaFeaturesOptionalCondition, FeaturesColumns, numberOfElementsToTake);
AnalysisLogger.getLogger().trace("HRS: AREA POINTS MATRIX GENERATED");
innerstatus = 10f;
Operations operations = new Operations();
// 2 - standardize the matrix
areaPoints = operations.standardize(areaPoints);
AnalysisLogger.getLogger().trace("HRS: MATRIX HAS BEEN STANDARDIZED");
innerstatus = 20f;
// 3 - calculate PCA
PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis();
pca.calcPCA(areaPoints);
AnalysisLogger.getLogger().trace("HRS: PCA HAS BEEN TRAINED");
innerstatus = 30f;
// 4 - get the pca components for all the vector
double[][] pcaComponents = pca.getComponentsMatrix(areaPoints);
AnalysisLogger.getLogger().trace("HRS: PCA COMPONENT CALCULATED");
innerstatus = 40f;
// 5 - calculate the frequency distributions for all the pca: each row will be a frequency distribution for a pca component associated to uniform divisions of the range
calcFrequenciesDistributionsForComponents(pcaComponents);
AnalysisLogger.getLogger().trace("HRS: FREQUENCIES FOR COMPONENTS CALCULATED");
innerstatus = 50f;
// 6 - take positive points and negative points - eventually merge them
double[][] positivePoints = null;
if ((positiveCasesTable!=null) && (positiveCasesTable.length()>0))
@ -132,17 +139,18 @@ public class HabitatRepresentativeness extends DataAnalysis {
negativePoints = getPoints(negativeCasesTable, "", FeaturesColumns, numberOfElementsToTake);
double[][] habitatPoints = Transformations.mergeMatrixes(positivePoints, negativePoints);
AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS BUILT FROM POSITIVE AND NEGATIVE POINTS");
innerstatus = 60f;
// 7 - Standardize the points respect to previous means and variances
habitatPoints = operations.standardize(habitatPoints, operations.means, operations.variances);
AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN STANDARDIZED RESPECT TO PREVIOUS MEANS AND VARIANCES");
// 8 - calculate the pca components for habitat
double[][] habitatPcaComponents = pca.getComponentsMatrix(habitatPoints);
AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN TRANSFORMED BY PCA");
innerstatus = 70f;
// 9 - calculate frequencies distributions for each component, respect to previous intervals
int components = habitatPcaComponents[0].length;
// 10 - calculate absolute differences and sum -> obtain a hrs for each PCA component = for each feature
currentHRSVector = new double[components];
AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN TRANSFORMED BY PCA");
double[][] habitatPcaPointsMatrix = Transformations.traspose(habitatPcaComponents);
for (int i = 0; i < components; i++) {
@ -155,85 +163,51 @@ public class HabitatRepresentativeness extends DataAnalysis {
}
AnalysisLogger.getLogger().trace("HRS: HRS VECTOR HAS BEEN CALCULATED");
innerstatus = 90f;
// 11 - obtain hrsScore by weighted sum of hrs respect to inverse eigenvalues - too variable, substituted with the sum of the scores
// currentHRSScore = Operations.scalarProduct(currentHRSVector, pca.getInverseNormalizedEigenvalues());
currentHRSScore = Operations.sumVector(currentHRSVector);
// 11 - obtain hrsScore by weighted sum of hrs respect to inverse eigenvalues
currentHRSScore = Operations.scalarProduct(currentHRSVector, pca.getInverseEigenvalues());
AnalysisLogger.getLogger().trace("HRS: HRS SCORE HAS BEEN CALCULATED");
innerstatus = 100f;
}
private double meanHRS ;
private double [] meanHRSVector;
private double currentHRSScore;
private double [] currentHRSVector;
public HashMap<String, String> analyze(AlgorithmConfiguration config) throws Exception {
try {
status = 0;
String projectingAreaTable = config.getParam("ProjectingAreaTable");
String projectingAreaFeaturesOptionalCondition = config.getParam("ProjectingAreaFeaturesOptionalCondition");
String FeaturesColumns = config.getParam("FeaturesColumns");
String positiveCasesTable = config.getParam("PositiveCasesTable");
String negativeCasesTable = config.getParam("NegativeCasesTable");
connection = AlgorithmConfiguration.getConnectionFromConfig(config);
meanHRS = 0;
int numberOfElements = calculateNumberOfPoints(projectingAreaTable, projectingAreaFeaturesOptionalCondition);
int numberOfElementsToTake = numberOfElements;//Operations.calcNumOfRepresentativeElements(numberOfElements, numberOfElements);
AnalysisLogger.getLogger().trace("HRS: TAKING "+numberOfElementsToTake+" POINTS ON "+numberOfElements+" FROM THE AREA UNDER ANALYSIS");
// 1 - take the right number of points
double[][] areaPoints = getPoints(projectingAreaTable, projectingAreaFeaturesOptionalCondition, FeaturesColumns, numberOfElementsToTake);
AnalysisLogger.getLogger().trace("HRS: AREA POINTS MATRIX GENERATED");
Operations operations = new Operations();
// 2 - standardize the matrix
areaPoints = operations.standardize(areaPoints);
AnalysisLogger.getLogger().trace("HRS: MATRIX HAS BEEN STANDARDIZED");
// 3 - calculate PCA
PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis();
pca.calcPCA(areaPoints);
AnalysisLogger.getLogger().trace("HRS: PCA HAS BEEN TRAINED");
// 4 - get the pca components for all the vector
double[][] pcaComponents = pca.getComponentsMatrix(areaPoints);
AnalysisLogger.getLogger().trace("HRS: PCA COMPONENT CALCULATED");
// 5 - calculate the frequency distributions for all the pca: each row will be a frequency distribution for a pca component associated to uniform divisions of the range
calcFrequenciesDistributionsForComponents(pcaComponents);
AnalysisLogger.getLogger().trace("HRS: FREQUENCIES FOR COMPONENTS CALCULATED");
// 6 - take positive points and negative points - eventually merge them
double[][] positivePoints = null;
if ((positiveCasesTable!=null) && (positiveCasesTable.length()>0))
positivePoints = getPoints(positiveCasesTable, "", FeaturesColumns, numberOfElementsToTake);
double[][] negativePoints = null;
if ((negativeCasesTable!=null) && (negativeCasesTable.length()>0))
negativePoints = getPoints(negativeCasesTable, "", FeaturesColumns, numberOfElementsToTake);
double[][] habitatPoints = Transformations.mergeMatrixes(positivePoints, negativePoints);
AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS BUILT FROM POSITIVE AND NEGATIVE POINTS");
// 7 - Standardize the points respect to previous means and variances
habitatPoints = operations.standardize(habitatPoints, operations.means, operations.variances);
AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN STANDARDIZED RESPECT TO PREVIOUS MEANS AND VARIANCES");
// 8 - calculate the pca components for habitat
double[][] habitatPcaComponents = pca.getComponentsMatrix(habitatPoints);
AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN TRANSFORMED BY PCA");
// 9 - calculate frequencies distributions for each component, respect to previous intervals
int components = habitatPcaComponents[0].length;
// 10 - calculate absolute differences and sum -> obtain a hrs for each PCA component = for each feature
double[] hrs = new double[components];
AnalysisLogger.getLogger().trace("HRS: HABITAT POINTS HAVE BEEN TRANSFORMED BY PCA");
double[][] habitatPcaPointsMatrix = Transformations.traspose(habitatPcaComponents);
for (int i = 0; i < components; i++) {
double[] habitatPcaPoints = habitatPcaPointsMatrix[i];
// calculate frequency distributions respect to previous intervals
double[] habitatPcafrequencies = Operations.calcFrequencies(intervals.get(i), habitatPcaPoints);
habitatPcafrequencies = Operations.normalizeFrequencies(habitatPcafrequencies, habitatPcaPoints.length);
double[] absdifference = Operations.vectorialAbsoluteDifference(habitatPcafrequencies, frequencyDistrib.get(i));
hrs[i] = Operations.sumVector(absdifference);
for (int i=0;i<maxTests;i++){
currentIterationStep = i;
AnalysisLogger.getLogger().trace("ITERATION NUMBER "+(i+1));
calcHRS(projectingAreaTable, projectingAreaFeaturesOptionalCondition, FeaturesColumns, positiveCasesTable, negativeCasesTable, numberOfElements);
meanHRS = MathFunctions.incrementAvg(meanHRS, currentHRSScore, i);
if (meanHRSVector==null)
meanHRSVector = new double[currentHRSVector.length];
for (int j=0;j<currentHRSVector.length;j++){
meanHRSVector[j]=MathFunctions.incrementAvg(meanHRSVector[j],currentHRSVector[j],i);
}
AnalysisLogger.getLogger().trace("ITERATION FINISHED "+meanHRS);
status=Math.min(status+100f/maxTests,99f);
}
AnalysisLogger.getLogger().trace("HRS: HRS VECTOR HAS BEEN CALCULATED");
// 11 - obtain hrsScore by weighted sum of hrs respect to inverse eigenvalues
double hrsScore = Operations.scalarProduct(hrs, pca.getInverseEigenvalues());
AnalysisLogger.getLogger().trace("HRS: HRS SCORE HAS BEEN CALCULATED");
output = new HashMap<String, String>();
output.put("HRS_VECTOR", "" + Transformations.vector2String(hrs));
output.put("HRS", "" + hrsScore);
output.put("HRS_VECTOR", "" + Transformations.vector2String(meanHRSVector));
output.put("HRS", "" + meanHRS);
return output;
} catch (Exception e) {
@ -243,6 +217,7 @@ public class HabitatRepresentativeness extends DataAnalysis {
} finally {
connection.close();
status = 100;
AnalysisLogger.getLogger().trace("COMPUTATION FINISHED ");
}
}
@ -302,7 +277,7 @@ public class HabitatRepresentativeness extends DataAnalysis {
// config.setParam("ProjectingAreaTable", "absence_data_baskingshark2");
config.setParam("ProjectingAreaFeaturesOptionalCondition", "where oceanarea>0");
config.setParam("FeaturesColumns", "depthmean,depthmax,depthmin, sstanmean,sbtanmean,salinitymean,salinitybmean, primprodmean,iceconann,landdist,oceanarea");
// config.setParam("PositiveCasesTable", "presence_data_baskingshark");
config.setParam("PositiveCasesTable", "presence_data_baskingshark");
config.setParam("NegativeCasesTable", "absence_data_baskingshark_random");
// config.setParam("NegativeCasesTable", "absence_data_baskingshark2");
@ -349,4 +324,9 @@ public class HabitatRepresentativeness extends DataAnalysis {
return output;
}
@Override
public float getStatus() {
return status==100f?status: Math.min((status+(float)(currentIterationStep+1)*innerstatus/(float)maxTests),99f);
}
}

View File

@ -118,34 +118,38 @@ public class BioClimateAnalysis {
int height = 420;
if (doHspecAn){
BioClimateGraph lineg1 = new BioClimateGraph(SERIES[0], Operations.getMax(highProbabilityCells), Operations.getMin(highProbabilityCells));
double min = Operations.getMin(discrepancies);
discrepancies[0] = min;
BioClimateGraph lineg4 = new BioClimateGraph(SERIES[3], Operations.getMax(discrepancies), min);
if (liveRender){
BioClimateGraph lineg1 = new BioClimateGraph(SERIES[0], Operations.getMax(highProbabilityCells), Operations.getMin(highProbabilityCells));
BioClimateGraph lineg4 = new BioClimateGraph(SERIES[3], Operations.getMax(discrepancies), min);
lineg4.render(discrepanciesTrend);
lineg1.render(probabilityTrend);
}
producedImages.add(lineg1.renderImgObject(width, height, probabilityTrend));
producedImages.add(lineg4.renderImgObject(width, height, discrepanciesTrend));
producedImages.add(BioClimateGraph.renderStaticImgObject(width, height, probabilityTrend, SERIES[0], Operations.getMax(highProbabilityCells), Operations.getMin(highProbabilityCells)));
producedImages.add(BioClimateGraph.renderStaticImgObject(width, height, discrepanciesTrend, SERIES[3], Operations.getMax(discrepancies), min));
}
if (doHcafAn){
BioClimateGraph lineg6 = new BioClimateGraph(SERIES[5], Operations.getMax(avgIce), Operations.getMin(avgIce));
BioClimateGraph lineg7 = new BioClimateGraph(SERIES[6], Operations.getMax(avgSST), Operations.getMin(avgSST));
BioClimateGraph lineg8 = new BioClimateGraph(SERIES[7], Operations.getMax(avgSalinity), Operations.getMin(avgSalinity));
if (liveRender){
BioClimateGraph lineg6 = new BioClimateGraph(SERIES[5], Operations.getMax(avgIce), Operations.getMin(avgIce));
BioClimateGraph lineg7 = new BioClimateGraph(SERIES[6], Operations.getMax(avgSST), Operations.getMin(avgSST));
BioClimateGraph lineg8 = new BioClimateGraph(SERIES[7], Operations.getMax(avgSalinity), Operations.getMin(avgSalinity));
lineg6.render(avgIceD);
lineg7.render(avgSSTD);
lineg8.render(avgSalinityD);
}
producedImages.add(lineg6.renderImgObject(width, height, avgIceD));
producedImages.add(lineg7.renderImgObject(width, height, avgSSTD));
producedImages.add(lineg8.renderImgObject(width, height, avgSalinityD));
producedImages.add(BioClimateGraph.renderStaticImgObject(width, height, avgIceD, SERIES[5], Operations.getMax(avgIce), Operations.getMin(avgIce)));
producedImages.add(BioClimateGraph.renderStaticImgObject(width, height, avgSSTD, SERIES[6], Operations.getMax(avgSST), Operations.getMin(avgSST)));
producedImages.add(BioClimateGraph.renderStaticImgObject(width, height, avgSalinityD, SERIES[7], Operations.getMax(avgSalinity), Operations.getMin(avgSalinity)));
}
AnalysisLogger.getLogger().trace("Produced All Images");
@ -331,4 +335,6 @@ public class BioClimateAnalysis {
Double d = Double.parseDouble(out.get("MEAN"));
return d;
}
}

View File

@ -1,8 +1,10 @@
package org.gcube.dataanalysis.ecoengine.evaluation.bioclimate;
import java.awt.Color;
import java.awt.Image;
import org.gcube.contentmanagement.graphtools.abstracts.GenericStandaloneGraph;
import org.gcube.contentmanagement.graphtools.data.conversions.ImageTools;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.axis.CategoryAxis;
@ -75,6 +77,64 @@ public class BioClimateGraph extends GenericStandaloneGraph {
return dataset;
}
public static Image renderStaticImgObject(int width, int height, Dataset set, String title, double max, double min) {
JFreeChart chart = createStaticChart(set,max,min,title);
/*
JPanel jp = new ChartPanel(chart);
this.setContentPane(jp);
this.pack();
*/
// Image image = this.createImage(width, height);
Image image = ImageTools.toImage(chart.createBufferedImage(width, height));
return image;
}
protected static JFreeChart createStaticChart(Dataset dataset, double max, double min, String title) {
// create the chart...
JFreeChart chart = ChartFactory.createLineChart(
title, // chart title
"", // domain axis label
"", // range axis label
(DefaultCategoryDataset)dataset, // data
PlotOrientation.VERTICAL, // orientation
true, // include legend
true, // tooltips
false // urls
);
chart.setBackgroundPaint(Color.white);
CategoryPlot plot = chart.getCategoryPlot();
// plot.setBackgroundPaint(Color.white);
plot.setRangeGridlinePaint(Color.white);
plot.setDomainCrosshairVisible(true);
plot.setDomainGridlinesVisible(true);
plot.setRangeCrosshairVisible(true);
plot.setRenderer(new LineAndShapeRenderer(true,true));
CategoryAxis categoryaxis1 = plot.getDomainAxis(0);
categoryaxis1.setCategoryLabelPositions(CategoryLabelPositions.DOWN_45);
plot.mapDatasetToDomainAxis(0, 0);
if (max!=min){
plot.getRangeAxis().setAutoRange(false);
plot.getRangeAxis().setUpperBound(max);
plot.getRangeAxis().setLowerBound(min);
double avg = min+((max-min)/2d);
plot.getRangeAxis().centerRange(avg);
}
return chart;
}
protected JFreeChart createChart(Dataset dataset) {
// create the chart...

View File

@ -54,6 +54,9 @@ public class InterpolateTables {
this.temporaryDirectory += "/";
AnalysisLogger.setLogger(configPath + AlgorithmConfiguration.defaultLoggerFile);
AnalysisLogger.getLogger().debug("Initialization complete: persistence path "+persistencePath);
config = new LexicalEngineConfiguration();
config.setDatabaseURL(databaseURL);
config.setDatabaseUserName(databaseUserName);
@ -67,9 +70,11 @@ public class InterpolateTables {
referencedbConnection = DatabaseFactory.initDBConnection(configPath + AlgorithmConfiguration.defaultConnectionFile, config);
AnalysisLogger.getLogger().debug("ReferenceDB initialized");
status = 0f;
AnalysisLogger.getLogger().debug("Interpolating from "+table1+" to "+table2);
DatabaseUtils utils = new DatabaseUtils(referencedbConnection);
// analyze table and take information about it
String createTableStatement = utils.buildCreateStatement(table1, "%1$s");
AnalysisLogger.getLogger().debug("Create Statement for table "+table1+": "+createTableStatement);
int numberOfColumns = utils.getColumnDecriptions().size();
// initialize the map of columns to write
List<List<StringBuffer>> outputFiles = new ArrayList<List<StringBuffer>>();
@ -89,7 +94,7 @@ public class InterpolateTables {
// only if data are of numeric type, perform calculation
if (javatype.equals(BigDecimal.class.getName())) {
System.out.println("interpolating -> " + gotColumn);
AnalysisLogger.getLogger().debug("interpolating -> " + gotColumn);
List<List<Object>> interpolations = interpolateColumns(takeFirstColumn, takeSecondColumn, intervals, gotColumnType, function);
@ -99,18 +104,18 @@ public class InterpolateTables {
// for each column to substitute
List<Object> columnToSub = interpolations.get(i);
if (columnToSub.size() > 0) {
System.out.println("UPDATE TABLE " + tableInterp + " ON COLUMN " + gotColumn);
addColumnToTable(outputFiles.get(i - 1), columnToSub);
AnalysisLogger.getLogger().debug("UPDATE TABLE " + tableInterp + " ON COLUMN " + gotColumn);
addColumnToTable(outputFiles.get(i - 1), columnToSub,true);
} else {
System.out.println("DOESN'T CHANGE TABLE " + tableInterp + " COLUMN " + gotColumn);
addColumnToTable(outputFiles.get(i - 1), takeFirstColumn);
AnalysisLogger.getLogger().debug("DOESN'T CHANGE TABLE " + tableInterp + " COLUMN " + gotColumn);
addColumnToTable(outputFiles.get(i - 1), takeFirstColumn,true);
}
}
}
// else update all the tables
else {
for (int i = 0; i < intervals - 2; i++) {
addColumnToTable(outputFiles.get(i), takeFirstColumn);
addColumnToTable(outputFiles.get(i), takeFirstColumn,false);
}
}
@ -118,12 +123,12 @@ public class InterpolateTables {
}
status = 60f;
System.out.println("WRITING ALL THE BUFFERS");
AnalysisLogger.getLogger().debug("WRITING ALL THE BUFFERS");
writeAllStringBuffersToFiles(table1, outputFiles, function,startYear,endYear);
statusstep = 40f/(float)producedfiles.length;
interpolatedTables = new String[producedfiles.length+1];
interpolatedTables = new String[producedfiles.length+2];
interpolatedTables[0] = table1;
for (int i = 0; i < producedfiles.length; i++) {
@ -132,14 +137,16 @@ public class InterpolateTables {
interpolatedTables[i+1] = filename;
String copyFileQuery = DatabaseUtils.copyFileToTableStatement(temporaryDirectory + producedfiles[i].getName(), filename);
// create Table
System.out.println("CREATING TABLE->" + filename);
AnalysisLogger.getLogger().debug("CREATING TABLE->" + filename);
DatabaseFactory.executeSQLUpdate(String.format(createTableStatement, filename), referencedbConnection);
System.out.println("FULFILLING TABLE->" + filename + ": " + copyFileQuery);
AnalysisLogger.getLogger().debug("FULFILLING TABLE->" + filename + ": " + copyFileQuery);
DatabaseFactory.executeSQLUpdate(copyFileQuery, referencedbConnection);
status = Math.min(status+statusstep,99);
}
interpolatedTables[producedfiles.length] = table2;
interpolatedTables[interpolatedTables.length-1] = table2;
AnalysisLogger.getLogger().debug("ALL TABLES HAVE BEEN PRODUCED");
} catch (Exception e) {
e.printStackTrace();
@ -155,7 +162,7 @@ public class InterpolateTables {
return status;
}
private void addColumnToTable(List<StringBuffer> rows, List<Object> elements) {
private void addColumnToTable(List<StringBuffer> rows, List<Object> elements,boolean isNumber) {
int size = elements.size();
for (int i = 0; i < size; i++) {
Object[] couple = (Object[]) elements.get(i);
@ -163,7 +170,10 @@ public class InterpolateTables {
StringBuffer buffer = null;
if (i >= rows.size()) {
buffer = new StringBuffer();
buffer.append(value);
if (isNumber && (value == null) || (value.length()==0))
buffer.append("0");
else
buffer.append(value);
rows.add(buffer);
} else {
buffer = rows.get(i);
@ -175,7 +185,7 @@ public class InterpolateTables {
private void writeAllStringBuffersToFiles(String initialFile, List<List<StringBuffer>> outputFiles, INTERPOLATIONFUNCTIONS function,int startYear,int endYear) throws Exception {
int numOfFiles = outputFiles.size();
int yearStep = (int)((float)(endYear-startYear)/(float)numOfFiles);
int yearStep = (int)((float)(endYear-startYear)/(float)(numOfFiles+1));
producedfiles = new File[numOfFiles];
for (int i = 0; i < numOfFiles; i++) {
List<StringBuffer> rows = outputFiles.get(i);
@ -184,8 +194,11 @@ public class InterpolateTables {
for (int k = 0; k < nrows; k++) {
completeFile.append(rows.get(k) + "\n");
}
int yearCals = startYear+(i+1)*yearStep;
if (yearCals == endYear)
yearCals = endYear-1;
String filename = temporaryDirectory + initialFile + "_" + (startYear+(i+1)*yearStep) + "_" + function.name() + ".csv";
String filename = temporaryDirectory + initialFile + "_" + (yearCals) + "_" + function.name()+"_"+i+System.currentTimeMillis()+ ".csv";
FileTools.saveString(filename, completeFile.toString(), true, "UTF-8");
producedfiles[i] = new File(filename);
}
@ -209,7 +222,7 @@ public class InterpolateTables {
double[] interpolation = null;
if (firstNum != secondNum) {
if (interping)
{ System.out.println("Interpolating ... "); interping = false;}
{ AnalysisLogger.getLogger().debug("Interpolating ... "); interping = false;}
if (function == INTERPOLATIONFUNCTIONS.LINEAR)
interpolation = Operations.linearInterpolation(firstNum, secondNum, intervals);

View File

@ -49,11 +49,11 @@ public class PrincipalComponentAnalysis {
int m= 5;
int n = 5;
double values[][] = new double[m][n];
double val1[] = {1.000d,0.451d,0.511d,0.197d,0.162d};
double val2[] = {0.451d,1.000d,0.445d,0.252d,0.238d};
double val2[] = {1.000d,0.451d,0.511d,0.197d,0.162d};
double val1[] = {0.451d,1.000d,0.445d,0.252d,0.238d};
double val3[] = {0.511d,0.445d,1.000d,0.301d,0.227d};
double val4[] = {0.197d,0.252d,0.301d,1.000d,0.620d};
double val5[] = {0.162d,0.238d,0.227d,0.620d,1.000d};
double val5[] = {0.197d,0.252d,0.301d,1.000d,0.620d};
double val4[] = {0.162d,0.238d,0.227d,0.620d,1.000d};
values[0] = val1;
values[1] = val2;
values[2] = val3;
@ -111,6 +111,23 @@ public class PrincipalComponentAnalysis {
return values;
}
public double [] getNormalizedEigenvalues (){
double [] values = new double[numberOfComponents];
for (int i=0;i<numberOfComponents;i++){
values[i] = getEigenvalue(i);
}
double sumEigen = Operations.sumVector(values);
for (int i=0;i<numberOfComponents;i++){
values[i] = values[i]/sumEigen;
}
return values;
}
public double [] getInverseEigenvalues (){
double [] values = new double[numberOfComponents];
for (int i=0;i<numberOfComponents;i++){
@ -119,6 +136,14 @@ public class PrincipalComponentAnalysis {
return values;
}
public double [] getInverseNormalizedEigenvalues (){
double [] values = new double[numberOfComponents];
double[] weightedEigens = getNormalizedEigenvalues();
for (int i=0;i<numberOfComponents;i++){
values[i] = 1d/weightedEigens[i];
}
return values;
}
public double[][] getComponentsMatrix(double[][] vectors) throws Exception{

View File

@ -99,17 +99,25 @@ public class Operations {
for (int j = 0; j < npoints; j++) {
if (((i == 0) && (points[j] < interval[i])) || ((i == intervs - 1) && (points[j] >= interval[i - 1]) && (points[j] <= interval[i])) || ((i > 0) && (points[j] >= interval[i - 1]) && (points[j] < interval[i]))) {
System.out.println("(" + (i == 0 ? "" : interval[i - 1]) + "," + interval[i] + ")" + " - " + points[j]);
// System.out.println("(" + (i == 0 ? "" : interval[i - 1]) + "," + interval[i] + ")" + " - " + points[j]);
frequencies[i] = frequencies[i] + 1;
}
}
}
for (int i = 0; i < intervs; i++) {
frequencies[i] = frequencies[i] / (double) npoints;
}
return frequencies;
}
public static double[] normalizeFrequencies(double[] frequencies, int numberOfPoints){
int intervs = frequencies.length;
for (int i = 0; i < intervs; i++) {
frequencies[i] = frequencies[i] / (double) numberOfPoints;
}
return frequencies;
}
// checks if an interval contains at least one element from a sequence of points
public static boolean intervalContainsPoints(double min, double max, double[] points) {
// System.out.println(min+"-"+max);
@ -132,33 +140,41 @@ public class Operations {
boolean subdivisionOK = false;
double gap = (max - min) / n;
//search for the best subdivision: find the best n
while (!subdivisionOK) {
// System.out.println("*************************");
boolean notcontains = false;
//take the gap interval to test
for (int i = 0; i < n; i++) {
double leftmost = 0;
double rightmost = 0;
//for the last border take a bit more than max
if (i == n - 1)
leftmost = max + 0.01;
rightmost = max + 0.01;
else
leftmost = min + gap * (i + 1);
if (!intervalContainsPoints(min + gap * i, leftmost, points)) {
rightmost = min + gap * (i + 1);
//if the interval doesn't contain any point discard the subdivision
if (!intervalContainsPoints(min + gap * i, rightmost, points)) {
notcontains = true;
break;
}
}
//if there are empty intervals and there is space for another subdivision proceed
if (notcontains && n > 0) {
n--;
gap = (max - min) / n;
} else if (n == 0) {
}
//otherwise take the default subdivision
else if (n == 0) {
n = maxintervals;
subdivisionOK = true;
} else
}
//if all the intervals are non empty then exit
else
subdivisionOK = true;
}
//once the best n is found build the intervals
double[] intervals = new double[n];
for (int i = 0; i < n; i++) {
if (i<n-1)
@ -179,15 +195,23 @@ public class Operations {
public double[] variances;
// standardizes a matrix: each row represents a vector: outputs columns means and variances
public double[][] standardize(double[][] matrix, double[] meansVec, double[] variancesVec) {
if (matrix.length > 0) {
int ncols = matrix[0].length;
if ((means==null) && (variances==null))
int mrows = matrix.length;
if ((means==null) && (variances==null)){
means = new double[ncols];
variances = new double[ncols];
}
double[][] matrixT = Transformations.traspose(matrix);
for (int i = 0; i < ncols ; i++) {
double[] icolumn = Transformations.getColumn(i, matrix);
double[] icolumn = matrixT[i];
double mean = 0;
if (meansVec == null){
mean = MathFunctions.mean(icolumn);
means[i] = mean;
@ -203,16 +227,20 @@ public class Operations {
else
variance = variancesVec[i];
for (int j = 0; j < icolumn.length; i++) {
for (int j = 0; j < mrows; j++) {
// standardization
icolumn[j] = (icolumn[j] - mean) / variance;
double numerator = (icolumn[j] - mean);
if ((numerator == 0) && (variance == 0))
icolumn[j] = 0;
else if (variance == 0)
icolumn[j] = Double.MAX_VALUE;
else
icolumn[j] = numerator / variance;
}
Transformations.substColumn(icolumn, i, matrix);
}
matrix = Transformations.traspose(matrixT);
}
return matrix;
}

View File

@ -41,6 +41,26 @@ public class Transformations {
}
// gets all the columns from a matrix
public static double[][] traspose(double[][] matrix) {
int m = matrix.length;
if (m>0){
int n = matrix[0].length;
double columns[][]= new double[n][m];
for (int i = 0; i<n; i++) {
for (int j=0;j<m;j++)
columns[i][j] = matrix[j][i];
}
return columns;
}
else
return null;
}
// gets a column from a matrix
public static double[] getColumn(int index, double[][] matrix) {
int colulen = matrix.length;
@ -69,9 +89,9 @@ public class Transformations {
else if ((matrix2 == null) || (matrix2.length == 0))
return matrix1;
else {
int superlen = matrix1.length + matrix2.length;
int len1 = matrix1.length;
int len2 = matrix2.length;
int superlen = len1 + len2;
double[][] supermatrix = new double[superlen][];
for (int i = 0; i < len1; i++) {
supermatrix[i] = matrix1[i];
@ -84,14 +104,14 @@ public class Transformations {
}
public static String vector2String(double[] vector){
String out = "(";
String out = "";
for (int i=0;i<vector.length;i++){
if (i>0)
out = out + ","+vector;
out = out + ","+vector[i];
else
out = ""+vector;
out = ""+vector[i];
}
out +=")";
return out;
}