ecological-engine/src/main/java/org/gcube/dataanalysis/ecoengine/models/cores/pca/PrincipalComponentAnalysis....

144 lines
3.8 KiB
Java

package org.gcube.dataanalysis.ecoengine.models.cores.pca;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.utils.Operations;
import org.gcube.dataanalysis.ecoengine.utils.Transformations;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.features.transformation.PCA;
import com.rapidminer.operator.features.transformation.PCAModel;
import com.rapidminer.tools.OperatorService;
public class PrincipalComponentAnalysis {
private static Logger logger = LoggerFactory.getLogger(PrincipalComponentAnalysis.class);
public void init(AlgorithmConfiguration config){
config.initRapidMiner();
}
PCAModel innermodel;
int numberOfComponents;
public PCAModel getModel(){
return innermodel;
}
public double[] getEigenvector (int index){
return innermodel.getEigenvector(index);
}
public double getEigenvalue (int index){
return innermodel.getEigenvalue(index);
}
public double [] getEigenvalues (){
double [] values = new double[numberOfComponents];
for (int i=0;i<numberOfComponents;i++){
values[i] = getEigenvalue(i);
}
return values;
}
public double [] getNormalizedEigenvalues (){
double [] values = new double[numberOfComponents];
for (int i=0;i<numberOfComponents;i++){
values[i] = getEigenvalue(i);
}
double sumEigen = Operations.sumVector(values);
for (int i=0;i<numberOfComponents;i++){
values[i] = values[i]/sumEigen;
}
return values;
}
public double [] getInverseEigenvalues (){
double [] values = new double[numberOfComponents];
for (int i=0;i<numberOfComponents;i++){
values[i] = 1d/getEigenvalue(i);
}
return values;
}
public double [] getInverseNormalizedEigenvalues (){
double [] values = new double[numberOfComponents];
double[] weightedEigens = getNormalizedEigenvalues();
for (int i=0;i<numberOfComponents;i++){
values[i] = 1d/weightedEigens[i];
}
return values;
}
public double[][] getComponentsMatrix(double[][] vectors) throws Exception{
int nsamples=vectors.length;
double[][] components = new double[nsamples][];
for (int i=0;i<nsamples;i++){
components[i] = getComponents(vectors[i]);
}
return components;
}
public double[] getComponents(double[] vector) throws Exception{
double [] components = new double[numberOfComponents];
for (int i=0;i<numberOfComponents;i++){
components[i] = Operations.scalarProduct(vector, getEigenvector(i));
}
return components;
}
protected double[][] getPCA(double[][] sampleVectors) throws Exception{
ExampleSet set = Transformations.matrix2ExampleSet(sampleVectors);
ExampleSet outset = innermodel.apply(set);
return Transformations.exampleSet2Matrix(outset);
}
public void calcPCA(double [][] sampleVectors) throws Exception{
logger.debug("STARTING PCA COMPUTATION");
PCA pca = (PCA) OperatorService.createOperator("PCA");
pca.setParameter("variance_threshold", "0.95");
pca.setParameter("dimensionality_reduction", "keep variance");
pca.setParameter("number_of_components", "-1");
ExampleSet set = Transformations.matrix2ExampleSet(sampleVectors);
IOContainer innerInput = new IOContainer(set);
IOContainer output = pca.apply(innerInput);
IOObject[] outputvector = output.getIOObjects();
// ExampleSet setOut = (ExampleSet) outputvector[0];
innermodel = (PCAModel) outputvector[1];
numberOfComponents = innermodel.getMaximumNumberOfComponents();
logger.debug("MODEL APPLIED");
}
}