ecological-engine/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/KMeans.java

package org.gcube.dataanalysis.ecoengine.clustering;

import java.util.ArrayList;
import java.util.List;

import org.gcube.contentmanagement.graphtools.abstracts.GenericStandaloneGraph;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnTypesList;
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.utils.ResourceFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.IOObject;
import com.rapidminer.tools.OperatorService;

public class KMeans extends DBScan{

	private static Logger logger = LoggerFactory.getLogger(KMeans.class);

	private String kk;
	private String maxRuns;
	private String maxOptimizations;


	@Override
	public void setConfiguration(AlgorithmConfiguration config) {
		if (config!=null){
		kk=config.getParam("k");
		maxRuns= config.getParam("max_runs");
		maxOptimizations = config.getParam("max_optimization_steps");
		OccurrencePointsClusterLabel=config.getParam("OccurrencePointsClusterLabel");
		OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase();
		OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase();
		FeaturesColumnNames=config.getParam("FeaturesColumnNames");
		minPoints=config.getParam("min_points");
		this.config=config;
		}

	}

	@Override
	public void compute() throws Exception {
		try{
		if ((config==null)||kk==null||maxRuns==null||maxOptimizations==null){
			throw new Exception("KMeans: Error incomplete parameters");
		}

		logger.debug("KMeans: Settin up the cluster");
		//take elements and produce example set
		com.rapidminer.operator.clustering.clusterer.KMeans kmeans =  (com.rapidminer.operator.clustering.clusterer.KMeans) OperatorService.createOperator("KMeans");

		kmeans.setParameter("k", kk);
		kmeans.setParameter("max_runs",maxRuns);
		kmeans.setParameter("max_optimization_steps", maxOptimizations);

		kmeans.setParameter("keep_example_set", "true");
		kmeans.setParameter("add_cluster_attribute", "true");


		IOContainer innerInput = new IOContainer(points);

		logger.debug("KMeans: Clustering...");
		long ti= System.currentTimeMillis();
		IOContainer output = kmeans.apply(innerInput);
		logger.debug("KMEANS: ...ELAPSED CLUSTERING TIME: "+(System.currentTimeMillis()-ti));
		logger.debug("KMeans: ...Clustering Finished");
		status = 70f;

		IOObject[] outputvector = output.getIOObjects();

		BuildClusterTable(outputvector);
		}catch(Exception e){
		throw e;
		}
		finally{
			shutdown();
			status = 100f;
		}
	}


		@Override
	public List<StatisticalType> getInputParameters() {
		List<StatisticalType> parameters = new ArrayList<StatisticalType>();
		List<TableTemplates> templateOccs = new ArrayList<TableTemplates>();
		templateOccs.add(TableTemplates.GENERIC);
		InputTable p1 = new InputTable(templateOccs,"OccurrencePointsTable","Occurrence Points Table. Max 4000 points","occurrences");
		ColumnTypesList p2 = new ColumnTypesList ("OccurrencePointsTable","FeaturesColumnNames", "column Names for the features", false);
		PrimitiveType p0 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "OccurrencePointsClusterLabel","table name of the resulting distribution","OccCluster_");
		ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","table name of the distribution","occCluster_");
		PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "k","expected Number of Clusters","3");
		PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_runs","max runs of the clustering procedure","10");
		PrimitiveType p12 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_optimization_steps","max number of internal optimization steps","5");
		PrimitiveType p13 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points","number of points which define an outlier set","2");

		parameters.add(p1);
		parameters.add(p2);
		parameters.add(p0);
		parameters.add(p3);
		parameters.add(p4);
		parameters.add(p5);
		parameters.add(p12);
		parameters.add(p13);

		DatabaseType.addDefaultDBPars(parameters);
		return parameters;
	}

	@Override
	public String getDescription() {
		return "A clustering algorithm for real valued vectors that relies on the k-means algorithm, i.e. a method aiming to partition n observations into k clusters in which each observation belongs to the cluster with the nearest mean, serving as a prototype of the cluster.  A Maximum of 4000 points is allowed.";
	}


	ResourceFactory resourceManager;
	public String getResourceLoad() {
		if (resourceManager==null)
			resourceManager = new ResourceFactory();
		return resourceManager.getResourceLoad(1);
	}


	@Override
	public String getResources() {
		return ResourceFactory.getResources(100f);
	}


}