ecological-engine/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/KMeans.java

141 lines
5.5 KiB
Java

package org.gcube.dataanalysis.ecoengine.clustering;
import java.util.ArrayList;
import java.util.List;
import org.gcube.contentmanagement.graphtools.abstracts.GenericStandaloneGraph;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnTypesList;
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.utils.ResourceFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.IOObject;
import com.rapidminer.tools.OperatorService;
public class KMeans extends DBScan{
private static Logger logger = LoggerFactory.getLogger(KMeans.class);
private String kk;
private String maxRuns;
private String maxOptimizations;
@Override
public void setConfiguration(AlgorithmConfiguration config) {
if (config!=null){
kk=config.getParam("k");
maxRuns= config.getParam("max_runs");
maxOptimizations = config.getParam("max_optimization_steps");
OccurrencePointsClusterLabel=config.getParam("OccurrencePointsClusterLabel");
OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase();
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase();
FeaturesColumnNames=config.getParam("FeaturesColumnNames");
minPoints=config.getParam("min_points");
this.config=config;
}
}
@Override
public void compute() throws Exception {
try{
if ((config==null)||kk==null||maxRuns==null||maxOptimizations==null){
throw new Exception("KMeans: Error incomplete parameters");
}
logger.debug("KMeans: Settin up the cluster");
//take elements and produce example set
com.rapidminer.operator.clustering.clusterer.KMeans kmeans = (com.rapidminer.operator.clustering.clusterer.KMeans) OperatorService.createOperator("KMeans");
kmeans.setParameter("k", kk);
kmeans.setParameter("max_runs",maxRuns);
kmeans.setParameter("max_optimization_steps", maxOptimizations);
kmeans.setParameter("keep_example_set", "true");
kmeans.setParameter("add_cluster_attribute", "true");
IOContainer innerInput = new IOContainer(points);
logger.debug("KMeans: Clustering...");
long ti= System.currentTimeMillis();
IOContainer output = kmeans.apply(innerInput);
logger.debug("KMEANS: ...ELAPSED CLUSTERING TIME: "+(System.currentTimeMillis()-ti));
logger.debug("KMeans: ...Clustering Finished");
status = 70f;
IOObject[] outputvector = output.getIOObjects();
BuildClusterTable(outputvector);
}catch(Exception e){
throw e;
}
finally{
shutdown();
status = 100f;
}
}
@Override
public List<StatisticalType> getInputParameters() {
List<StatisticalType> parameters = new ArrayList<StatisticalType>();
List<TableTemplates> templateOccs = new ArrayList<TableTemplates>();
templateOccs.add(TableTemplates.GENERIC);
InputTable p1 = new InputTable(templateOccs,"OccurrencePointsTable","Occurrence Points Table. Max 4000 points","occurrences");
ColumnTypesList p2 = new ColumnTypesList ("OccurrencePointsTable","FeaturesColumnNames", "column Names for the features", false);
PrimitiveType p0 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "OccurrencePointsClusterLabel","table name of the resulting distribution","OccCluster_");
ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","table name of the distribution","occCluster_");
PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "k","expected Number of Clusters","3");
PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_runs","max runs of the clustering procedure","10");
PrimitiveType p12 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_optimization_steps","max number of internal optimization steps","5");
PrimitiveType p13 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points","number of points which define an outlier set","2");
parameters.add(p1);
parameters.add(p2);
parameters.add(p0);
parameters.add(p3);
parameters.add(p4);
parameters.add(p5);
parameters.add(p12);
parameters.add(p13);
DatabaseType.addDefaultDBPars(parameters);
return parameters;
}
@Override
public String getDescription() {
return "A clustering algorithm for real valued vectors that relies on the k-means algorithm, i.e. a method aiming to partition n observations into k clusters in which each observation belongs to the cluster with the nearest mean, serving as a prototype of the cluster. A Maximum of 4000 points is allowed.";
}
ResourceFactory resourceManager;
public String getResourceLoad() {
if (resourceManager==null)
resourceManager = new ResourceFactory();
return resourceManager.getResourceLoad(1);
}
@Override
public String getResources() {
return ResourceFactory.getResources(100f);
}
}