ecological-engine/src/main/java/org/gcube/dataanalysis/ecoengine/utils/Operations.java

439 lines
12 KiB
Java

package org.gcube.dataanalysis.ecoengine.utils;
import org.gcube.contentmanagement.graphtools.utils.MathFunctions;
public class Operations {
public static double scalarProduct(double[] a, double[] b) {
double sum = 0;
for (int i = 0; i < a.length; i++) {
if (i < b.length)
sum = sum + a[i] * b[i];
}
return sum;
}
public static double sumVector(double[] a) {
double sum = 0;
for (int i = 0; i < a.length; i++) {
sum = sum + a[i];
}
return sum;
}
public static double[] vectorialDifference(double[] a, double[] b) {
double[] diff = new double[a.length];
for (int i = 0; i < a.length; i++) {
if (i < b.length)
diff[i] = a[i] - b[i];
else
diff[i] = a[i];
}
return diff;
}
public static double[] vectorialAbsoluteDifference(double[] a, double[] b) {
double[] diff = new double[a.length];
for (int i = 0; i < a.length; i++) {
if (i < b.length)
diff[i] = Math.abs(a[i] - b[i]);
else
diff[i] = Math.abs(a[i]);
}
return diff;
}
public static double getMax(double[] points) {
double max = -Double.MAX_VALUE;
for (int i = 0; i < points.length; i++) {
if (max < points[i])
max = points[i];
}
return max;
}
public static int getMax(int[] points) {
int max = -Integer.MAX_VALUE;
for (int i = 0; i < points.length; i++) {
if (max < points[i])
max = points[i];
}
return max;
}
public static int getMin(int[] points) {
int min = Integer.MAX_VALUE;
for (int i = 0; i < points.length; i++) {
if (min > points[i])
min = points[i];
}
return min;
}
public static double getMin(double[] points) {
double min = Double.MAX_VALUE;
for (int i = 0; i < points.length; i++) {
if (min > points[i])
min = points[i];
}
return min;
}
// calculates the frequency distribution for a set of points respect to a set of intervals
public static double[] calcFrequencies(double[] interval, double[] points) {
int intervs = interval.length;
int npoints = points.length;
double[] frequencies = new double[intervs];
for (int i = 0; i < intervs; i++) {
for (int j = 0; j < npoints; j++) {
if (((i == 0) && (points[j] < interval[i])) || ((i == intervs - 1) && (points[j] >= interval[i - 1]) && (points[j] <= interval[i])) || ((i > 0) && (points[j] >= interval[i - 1]) && (points[j] < interval[i]))) {
// System.out.println("(" + (i == 0 ? "" : interval[i - 1]) + "," + interval[i] + ")" + " - " + points[j]);
frequencies[i] = frequencies[i] + 1;
}
}
}
return frequencies;
}
public static double[] normalizeFrequencies(double[] frequencies, int numberOfPoints) {
int intervs = frequencies.length;
for (int i = 0; i < intervs; i++) {
frequencies[i] = frequencies[i] / (double) numberOfPoints;
}
return frequencies;
}
// checks if an interval contains at least one element from a sequence of points
public static boolean intervalContainsPoints(double min, double max, double[] points) {
// System.out.println(min+"-"+max);
boolean contains = false;
for (int i = 0; i < points.length; i++) {
if ((points[i] >= min) && (points[i] < max)) {
// System.out.println("---->"+points[i]);
contains = true;
break;
}
}
return contains;
}
// finds the best subdivision for a sequence of numbers
public static double[] uniformDivide(double max, double min, double[] points) {
int maxintervals = 10;
int n = maxintervals;
boolean subdivisionOK = false;
double gap = (max - min) / n;
// search for the best subdivision: find the best n
while (!subdivisionOK) {
// System.out.println("*************************");
boolean notcontains = false;
// take the gap interval to test
for (int i = 0; i < n; i++) {
double rightmost = 0;
// for the last border take a bit more than max
if (i == n - 1)
rightmost = max + 0.01;
else
rightmost = min + gap * (i + 1);
// if the interval doesn't contain any point discard the subdivision
if (!intervalContainsPoints(min + gap * i, rightmost, points)) {
notcontains = true;
break;
}
}
// if there are empty intervals and there is space for another subdivision proceed
if (notcontains && n > 0) {
n--;
gap = (max - min) / n;
}
// otherwise take the default subdivision
else if (n == 0) {
n = maxintervals;
subdivisionOK = true;
}
// if all the intervals are non empty then exit
else
subdivisionOK = true;
}
// once the best n is found build the intervals
double[] intervals = new double[n];
for (int i = 0; i < n; i++) {
if (i < n - 1)
intervals[i] = min + gap * (i + 1);
else
intervals[i] = Double.POSITIVE_INFINITY;
}
return intervals;
}
public double[][] standardize(double[][] matrix) {
return standardize(matrix, null, null);
}
public double[] means;
public double[] variances;
// standardizes a matrix: each row represents a vector: outputs columns means and variances
public double[][] standardize(double[][] matrix, double[] meansVec, double[] variancesVec) {
if (matrix.length > 0) {
int ncols = matrix[0].length;
int mrows = matrix.length;
if ((means == null) && (variances == null)) {
means = new double[ncols];
variances = new double[ncols];
}
double[][] matrixT = Transformations.traspose(matrix);
for (int i = 0; i < ncols; i++) {
double[] icolumn = matrixT[i];
double mean = 0;
if (meansVec == null) {
mean = MathFunctions.mean(icolumn);
means[i] = mean;
} else
mean = meansVec[i];
double variance = 0;
if (variancesVec == null) {
variance = com.rapidminer.tools.math.MathFunctions.variance(icolumn, Double.NEGATIVE_INFINITY);
variances[i] = variance;
} else
variance = variancesVec[i];
for (int j = 0; j < mrows; j++) {
// standardization
double numerator = (icolumn[j] - mean);
if ((numerator == 0) && (variance == 0))
icolumn[j] = 0;
else if (variance == 0)
icolumn[j] = Double.MAX_VALUE;
else
icolumn[j] = numerator / variance;
}
}
matrix = Transformations.traspose(matrixT);
}
return matrix;
}
// calculates the number of elements to take from a set with inverse weight respect to the number of elements
public static int calcNumOfRepresentativeElements(int numberOfElements, int minimumNumberToTake) {
return (int) Math.max(minimumNumberToTake, numberOfElements / Math.log10(numberOfElements));
}
public static double[] linearInterpolation(double el1, double el2, int intervals) {
double step = (el2 - el1) / (double) intervals;
double[] intervalsd = new double[intervals];
intervalsd[0] = el1;
for (int i = 1; i < intervals - 1; i++) {
intervalsd[i] = el1 + step * i;
}
intervalsd[intervals - 1] = el2;
return intervalsd;
}
private static double parabol(double a, double b, double c, double x, double shift) {
return a * (x - shift) * (x - shift) + b * (x - shift) + c;
}
public static double[] inverseParabol(double a, double b, double c, double y) {
double[] ret = { (-1d * b + Math.sqrt(b * b + 4 * a * (Math.abs(y) - c))) / (2 * a), (-1d * b - Math.sqrt(b * b + 4 * a * (Math.abs(y) - c))) / (2 * a) };
return ret;
}
public static double logaritmicTransformation(double y) {
y = Math.abs(y);
if (y == 0)
return -Double.MAX_VALUE;
else
return Math.log10(y);
}
// the parabol is centered on the start Point
public static double[] parabolicInterpolation(double startP, double endP, int intervals) {
double start = startP;
double end = endP;
double shift = start;
double a = 1000d;
double b = 0d;
double c = 0d;
double parabolStart = parabol(a, b, c, start, shift);
if (start < 0)
parabolStart = -1 * parabolStart;
double parabolEnd = parabol(a, b, c, end, start);
if (end < 0)
parabolEnd = -1 * parabolEnd;
double step = 0;
if (intervals > 0) {
double difference = Math.abs(parabolEnd - parabolStart);
step = (difference / (double) intervals);
}
double[] linearpoints = new double[intervals];
linearpoints[0] = startP;
// System.out.println("Y0: "+parabolStart);
for (int i = 1; i < intervals - 1; i++) {
double ypoint = 0;
if (end > start)
ypoint = parabolStart + (i * step);
else
ypoint = parabolStart - (i * step);
// System.out.println("Y: "+ypoint);
double res[] = inverseParabol(a, b, c, Math.abs(ypoint));
// System.out.println("X: "+linearpoints[i]);
if (ypoint < 0)
linearpoints[i] = res[1] + shift;
else
linearpoints[i] = res[0] + shift;
}
linearpoints[intervals - 1] = endP;
return linearpoints;
}
public static void main1(String[] args) {
// double [] points = {1,1.2,1.3,2,5};
double[] points = new double[20];
for (int i = 0; i < 20; i++)
points[i] = 10 * Math.random();
double max = getMax(points);
double min = getMin(points);
System.out.println("<" + min + "," + max + ">");
double[] interval = uniformDivide(max, min, points);
double[] frequencies = calcFrequencies(interval, points);
for (int i = 0; i < interval.length; i++) {
System.out.print(interval[i] + " ");
System.out.println("->" + frequencies[i] + " ");
}
}
public static void main2(String[] args) {
/*
* System.out.println("numbers to take: " + calcNumOfRepresentativeElements(100, 100)); double[] interp = linearInterpolation(27.27, 28.28, 3); double[] parabinterp = parabolicInterpolation(1, 10, 9); System.out.println("");
*/
int[] ii = takeChunks(11549, 11549/100);
System.out.println("OK");
}
//distributes uniformly elements in parts
public static int[] takeChunks(int numberOfElements, int partitionFactor) {
int[] partitions = new int[1];
if (partitionFactor <= 0) {
return partitions;
} else if (partitionFactor == 1) {
partitions[0] = numberOfElements;
return partitions;
}
int chunksize = numberOfElements / (partitionFactor);
int rest = numberOfElements % (partitionFactor);
if (chunksize == 0) {
partitions = new int[numberOfElements];
for (int i = 0; i < numberOfElements; i++) {
partitions[i] = 1;
}
} else {
partitions = new int[partitionFactor];
for (int i = 0; i < partitionFactor; i++) {
partitions[i] = chunksize;
}
for (int i = 0; i < rest; i++) {
partitions[i]++;
}
}
return partitions;
}
public static int chunkize(int numberOfElements, int partitionFactor) {
int chunksize = numberOfElements / partitionFactor;
int rest = numberOfElements % partitionFactor;
if (chunksize == 0)
chunksize = 1;
else if (rest != 0)
chunksize++;
/*
* int numOfChunks = numberOfElements / chunksize; if ((numberOfElements % chunksize) != 0) numOfChunks += 1;
*/
return chunksize;
}
public static double[] uniformSampling(double min, double max, int maxElementsToTake){
double step = (max-min)/(double)(maxElementsToTake-1);
double [] samples = new double [maxElementsToTake];
for (int i=0;i<samples.length;i++){
double value = min+i*step;
if (value>max)
value=max;
samples [i] = value;
}
return samples;
}
public static int[] uniformIntegerSampling(double min, double max, int maxElementsToTake){
double step = (max-min)/(double)(maxElementsToTake-1);
int [] samples = new int [maxElementsToTake];
for (int i=0;i<samples.length;i++){
double value = min+i*step;
if (value>max)
value=max;
samples [i] = (int)value;
}
return samples;
}
public static void main(String[] args) {
double [] samples = uniformSampling(0, 9, 10);
System.out.println("OK");
}
}