git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@57560 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
fec0f3ac4d
commit
8941ec3652
|
@ -1,2 +1,3 @@
|
||||||
DBSCAN=org.gcube.dataanalysis.ecoengine.clustering.DBScan
|
DBSCAN=org.gcube.dataanalysis.ecoengine.clustering.DBScan
|
||||||
KMEANS=org.gcube.dataanalysis.ecoengine.clustering.KMeans
|
KMEANS=org.gcube.dataanalysis.ecoengine.clustering.KMeans
|
||||||
|
XMEANS=org.gcube.dataanalysis.ecoengine.clustering.XMeansWrapper
|
|
@ -0,0 +1,10 @@
|
||||||
|
5.1,3.5
|
||||||
|
4.9,3.0
|
||||||
|
4.7,3.2
|
||||||
|
4.6,3.1
|
||||||
|
5.0,3.6
|
||||||
|
5.4,3.9
|
||||||
|
4.6,3.4
|
||||||
|
5.0,3.4
|
||||||
|
4.4,2.9
|
||||||
|
4.9,3.1
|
|
21
pom.xml
21
pom.xml
|
@ -151,6 +151,27 @@
|
||||||
<artifactId>xstream</artifactId>
|
<artifactId>xstream</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>net.sf.squirrel-sql.thirdparty-non-maven</groupId>
|
||||||
|
<artifactId>java-cup</artifactId>
|
||||||
|
<version>0.11a</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>nz.ac.waikato.cms.weka</groupId>
|
||||||
|
<artifactId>weka-dev</artifactId>
|
||||||
|
<version>3.7.6</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>nz.ac.waikato.cms.weka</groupId>
|
||||||
|
<artifactId>XMeans</artifactId>
|
||||||
|
<version>1.0.3</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.pentaho.pentaho-commons</groupId>
|
||||||
|
<artifactId>pentaho-package-manager</artifactId>
|
||||||
|
<name>Pentaho Package Manager</name>
|
||||||
|
<version>1.0.0</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<repositories>
|
<repositories>
|
||||||
<repository>
|
<repository>
|
||||||
|
|
|
@ -46,15 +46,18 @@ public class DBScan implements Clusterer{
|
||||||
protected String FeaturesColumnNames;
|
protected String FeaturesColumnNames;
|
||||||
protected float status;
|
protected float status;
|
||||||
protected SessionFactory dbHibConnection;
|
protected SessionFactory dbHibConnection;
|
||||||
|
protected double[][] samplesVector;
|
||||||
|
|
||||||
public static String clusterColumn = "clusterid";
|
public static String clusterColumn = "clusterid";
|
||||||
public static String clusterColumnType = "character varying";
|
public static String clusterColumnType = "character varying";
|
||||||
public static String outliersColumn = "outlier";
|
public static String outliersColumn = "outlier";
|
||||||
public static String outliersColumnType = "boolean";
|
public static String outliersColumnType = "boolean";
|
||||||
|
protected boolean initrapidminer = true;
|
||||||
|
|
||||||
public static void mainCluster(String[] args) throws Exception{
|
public static void mainCluster(String[] args) throws Exception{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
String coordinates [] = {
|
String coordinates [] = {
|
||||||
"55.973798,-55.297853",
|
"55.973798,-55.297853",
|
||||||
"57.279043,-57.055666",
|
"57.279043,-57.055666",
|
||||||
|
@ -162,8 +165,8 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init() throws Exception {
|
public void init() throws Exception {
|
||||||
|
status = 0;
|
||||||
if (config!=null)
|
if ((config!=null) && (initrapidminer))
|
||||||
config.initRapidMiner();
|
config.initRapidMiner();
|
||||||
AnalysisLogger.getLogger().debug("Initialized Rapid Miner ");
|
AnalysisLogger.getLogger().debug("Initialized Rapid Miner ");
|
||||||
AnalysisLogger.getLogger().debug("Initializing Database Connection");
|
AnalysisLogger.getLogger().debug("Initializing Database Connection");
|
||||||
|
@ -212,20 +215,16 @@ public class DBScan implements Clusterer{
|
||||||
FeaturesColumnNames=config.getParam("FeaturesColumnNames");
|
FeaturesColumnNames=config.getParam("FeaturesColumnNames");
|
||||||
this.config=config;
|
this.config=config;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
protected void getSamples() throws Exception{
|
protected void getSamples() throws Exception{
|
||||||
System.out.println("->"+DatabaseUtils.getColumnsElementsStatement(OccurrencePointsTable, FeaturesColumnNames, ""));
|
// System.out.println("->"+DatabaseUtils.getColumnsElementsStatement(OccurrencePointsTable, FeaturesColumnNames, ""));
|
||||||
FeaturesColumnNames=FeaturesColumnNames.replace(AlgorithmConfiguration.listSeparator, ",");
|
FeaturesColumnNames=FeaturesColumnNames.replace(AlgorithmConfiguration.listSeparator, ",");
|
||||||
List<Object> samples = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(OccurrencePointsTable, FeaturesColumnNames, ""), dbHibConnection);
|
List<Object> samples = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(OccurrencePointsTable, FeaturesColumnNames, ""), dbHibConnection);
|
||||||
String [] elements = FeaturesColumnNames.split(",");
|
String [] elements = FeaturesColumnNames.split(",");
|
||||||
int dimensions = elements.length;
|
int dimensions = elements.length;
|
||||||
int nSamples = samples.size();
|
int nSamples = samples.size();
|
||||||
double[][] samplesVector = new double[nSamples][dimensions];
|
samplesVector = new double[nSamples][dimensions];
|
||||||
int ir=0;
|
int ir=0;
|
||||||
for (Object row:samples){
|
for (Object row:samples){
|
||||||
Object[] rowArr = (Object[]) row;
|
Object[] rowArr = (Object[]) row;
|
||||||
|
@ -310,7 +309,7 @@ public class DBScan implements Clusterer{
|
||||||
//take cluster element indexes
|
//take cluster element indexes
|
||||||
int npoints = c.getExampleIds().size();
|
int npoints = c.getExampleIds().size();
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints);
|
AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints);
|
||||||
if (npoints==minpoints)
|
if (npoints<minpoints)
|
||||||
outlier=true;
|
outlier=true;
|
||||||
|
|
||||||
int k=0;
|
int k=0;
|
||||||
|
|
|
@ -23,10 +23,6 @@ import com.rapidminer.tools.OperatorService;
|
||||||
|
|
||||||
public class KMeans extends DBScan{
|
public class KMeans extends DBScan{
|
||||||
|
|
||||||
public static String clusterColumn = "clusterid";
|
|
||||||
public static String clusterColumnType = "character varying";
|
|
||||||
public static String outliersColumn = "outlier";
|
|
||||||
public static String outliersColumnType = "boolean";
|
|
||||||
private String kk;
|
private String kk;
|
||||||
private String maxRuns;
|
private String maxRuns;
|
||||||
private String maxOptimizations;
|
private String maxOptimizations;
|
||||||
|
@ -130,7 +126,7 @@ public class KMeans extends DBScan{
|
||||||
PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "k","Expected Number of Clusters","3");
|
PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "k","Expected Number of Clusters","3");
|
||||||
PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_runs","Max runs of the clustering procedure","10");
|
PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_runs","Max runs of the clustering procedure","10");
|
||||||
PrimitiveType p12 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_optimization_steps","Max number of internal optimization steps","5");
|
PrimitiveType p12 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_optimization_steps","Max number of internal optimization steps","5");
|
||||||
PrimitiveType p13 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points","Minimum number of points to define an outlier set","2");
|
PrimitiveType p13 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points","Number of points which define an outlier set","2");
|
||||||
|
|
||||||
parameters.add(p1);
|
parameters.add(p1);
|
||||||
parameters.add(p2);
|
parameters.add(p2);
|
||||||
|
@ -144,6 +140,7 @@ public class KMeans extends DBScan{
|
||||||
parameters.add(p10);
|
parameters.add(p10);
|
||||||
parameters.add(p11);
|
parameters.add(p11);
|
||||||
parameters.add(p12);
|
parameters.add(p12);
|
||||||
|
parameters.add(p13);
|
||||||
|
|
||||||
return parameters;
|
return parameters;
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,302 @@
|
||||||
|
package org.gcube.dataanalysis.ecoengine.clustering;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveTypesList;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.DatabaseParameters;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.utils.Transformations;
|
||||||
|
|
||||||
|
import com.rapidminer.example.Attribute;
|
||||||
|
import com.rapidminer.example.Attributes;
|
||||||
|
import com.rapidminer.example.Example;
|
||||||
|
import com.rapidminer.example.ExampleSet;
|
||||||
|
import com.rapidminer.operator.IOContainer;
|
||||||
|
import com.rapidminer.operator.IOObject;
|
||||||
|
import com.rapidminer.operator.clustering.Cluster;
|
||||||
|
import com.rapidminer.operator.clustering.ClusterModel;
|
||||||
|
import com.rapidminer.tools.OperatorService;
|
||||||
|
|
||||||
|
import weka.clusterers.ClusterEvaluation;
|
||||||
|
import weka.core.DenseInstance;
|
||||||
|
import weka.core.Instance;
|
||||||
|
import weka.core.Instances;
|
||||||
|
import weka.core.converters.ArffSaver;
|
||||||
|
import weka.core.converters.CSVLoader;
|
||||||
|
|
||||||
|
public class XMeansWrapper extends DBScan {
|
||||||
|
|
||||||
|
private String maxIterations;
|
||||||
|
private String minClusters;
|
||||||
|
private String maxClusters;
|
||||||
|
|
||||||
|
public XMeansWrapper(){
|
||||||
|
super();
|
||||||
|
initrapidminer=false;
|
||||||
|
}
|
||||||
|
public static void main1(String[] args) throws Exception {
|
||||||
|
args = new String[2];
|
||||||
|
args[0] = "input.csv";
|
||||||
|
args[1] = "c:/tmp/output.arff";
|
||||||
|
// load CSV
|
||||||
|
CSVLoader loader = new CSVLoader();
|
||||||
|
loader.setSource(new File(args[0]));
|
||||||
|
Instances data = loader.getDataSet();
|
||||||
|
|
||||||
|
// save ARFF
|
||||||
|
ArffSaver saver = new ArffSaver();
|
||||||
|
saver.setInstances(data);
|
||||||
|
saver.setFile(new File(args[1]));
|
||||||
|
// saver.setDestination(new File(args[1]));
|
||||||
|
saver.writeBatch();
|
||||||
|
}
|
||||||
|
|
||||||
|
public class CSV2Arff {
|
||||||
|
/**
|
||||||
|
* takes 2 arguments: - CSV input file - ARFF output file
|
||||||
|
*/
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
XMeans xmeans = new XMeans();
|
||||||
|
// xmeans.setInputCenterFile(new File("./clusterinput.arf"));
|
||||||
|
// String[] options = {"-I 10","-M 1000","-J 1000","-L 2","-H 50","-B 1.0","-use-kdtree no","-N clusterinput.arf","-O clusterout.txt","-U 3"};
|
||||||
|
// String[] options = {"-I 10","-M 1000","-J 1000","-L 2","-H 50","-B 1.0","-use-kdtree no","-t clusterinput.arf","-O clusterout.txt","-U 3"};
|
||||||
|
// String optionsS = "-t c:/tmp/output.arff -O c:/tmp/clusterout.arff";
|
||||||
|
String optionsS = "-t c:/tmp/output.arff";
|
||||||
|
String[] options = optionsS.split(" ");
|
||||||
|
String elements = "ciao,tutti\n5.1,3.5\n4.9,3.0\n4.7,3.2\n4.6,3.1\n5.0,3.6\n5.4,3.9\n4.6,3.4\n5.0,3.4\n4.4,2.9\n4.9,3.1\n";
|
||||||
|
|
||||||
|
// xmeans.setInputCenterFile(new File("./clusterinput.arf"));
|
||||||
|
|
||||||
|
CSVLoader loader = new CSVLoader();
|
||||||
|
InputStream tis = new ByteArrayInputStream(elements.getBytes("UTF-8"));
|
||||||
|
loader.setSource(tis);
|
||||||
|
Instances id = loader.getDataSet();
|
||||||
|
System.out.println("ids: "+id.numInstances());
|
||||||
|
System.exit(0);
|
||||||
|
xmeans.buildClusterer(id);
|
||||||
|
|
||||||
|
// xmeans.main(options);
|
||||||
|
// ClusterEvaluation.evaluateClusterer(xmeans, options);
|
||||||
|
/*
|
||||||
|
* String[] opts = xmeans.getOptions(); for (int i=0;i<opts.length;i++){ System.out.println("options: "+opts[i]); }
|
||||||
|
*/
|
||||||
|
|
||||||
|
System.out.println(ClusterEvaluation.evaluateClusterer(xmeans, options));
|
||||||
|
// ClusterEvaluation.evaluateClusterer(xmeans, options);
|
||||||
|
System.out.println("*************");
|
||||||
|
Instances is = xmeans.getClusterCenters();
|
||||||
|
for (Instance i : is) {
|
||||||
|
DenseInstance di = (DenseInstance) i;
|
||||||
|
System.out.println("Attributes: " + i.numAttributes());
|
||||||
|
System.out.print("->" + di.toString(0));
|
||||||
|
System.out.println(" " + di.toString(1));
|
||||||
|
|
||||||
|
// System.out.println(i);
|
||||||
|
|
||||||
|
System.out.println("-------------------------------");
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println(xmeans.m_Bic);
|
||||||
|
|
||||||
|
// System.out.println(xmeans.clusterInstance(instance));
|
||||||
|
int[] ii = xmeans.m_ClusterAssignments;
|
||||||
|
for (int ix : ii)
|
||||||
|
System.out.print(ix + " ");
|
||||||
|
|
||||||
|
// xmeans.main(options);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<StatisticalType> getInputParameters() {
|
||||||
|
List<StatisticalType> parameters = new ArrayList<StatisticalType>();
|
||||||
|
List<TableTemplates> templateOccs = new ArrayList<TableTemplates>();
|
||||||
|
templateOccs.add(TableTemplates.GENERIC);
|
||||||
|
|
||||||
|
InputTable p1 = new InputTable(templateOccs, "OccurrencePointsTable", "Occurrence Points Table", "occurrences");
|
||||||
|
PrimitiveTypesList p2 = new PrimitiveTypesList(PrimitiveTypes.STRING, "FeaturesColumnNames", "Column Names for the features", false);
|
||||||
|
ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable", "Table name of the distribution", "occCluster_");
|
||||||
|
|
||||||
|
PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "maxIterations", "XMeans max number of overall iterations of the clustering learning", "10");
|
||||||
|
PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "minClusters", "Minimum number of expected clusters", "1");
|
||||||
|
PrimitiveType p12 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "maxClusters", "Maximum number of clusters to produce", "50");
|
||||||
|
PrimitiveType p13 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points", "Number of points which define an outlier set", "2");
|
||||||
|
|
||||||
|
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name");
|
||||||
|
DatabaseType p7 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password");
|
||||||
|
DatabaseType p8 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver");
|
||||||
|
DatabaseType p9 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url");
|
||||||
|
DatabaseType p10 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect");
|
||||||
|
DatabaseType p11 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect");
|
||||||
|
|
||||||
|
parameters.add(p1);
|
||||||
|
parameters.add(p2);
|
||||||
|
parameters.add(p3);
|
||||||
|
parameters.add(p4);
|
||||||
|
parameters.add(p5);
|
||||||
|
parameters.add(p6);
|
||||||
|
parameters.add(p7);
|
||||||
|
parameters.add(p8);
|
||||||
|
parameters.add(p9);
|
||||||
|
parameters.add(p10);
|
||||||
|
parameters.add(p11);
|
||||||
|
parameters.add(p12);
|
||||||
|
parameters.add(p13);
|
||||||
|
|
||||||
|
return parameters;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDescription() {
|
||||||
|
return "Clustering with XMeans Algorithm: X-Means is K-Means extended by an Improve-Structure part In this part of the algorithm the centers are attempted to be split in its region. ";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setConfiguration(AlgorithmConfiguration config) {
|
||||||
|
if (config != null) {
|
||||||
|
maxIterations = config.getParam("maxIterations");
|
||||||
|
minClusters = config.getParam("minClusters");
|
||||||
|
maxClusters = config.getParam("maxClusters");
|
||||||
|
minPoints = config.getParam("min_points");
|
||||||
|
|
||||||
|
OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase();
|
||||||
|
OccurrencePointsClusterTable = config.getParam("OccurrencePointsClusterTable").toLowerCase();
|
||||||
|
FeaturesColumnNames = config.getParam("FeaturesColumnNames");
|
||||||
|
this.config = config;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void compute() throws Exception {
|
||||||
|
|
||||||
|
if ((config == null) || maxIterations == null || minClusters == null || maxClusters == null) {
|
||||||
|
throw new Exception("XMeans: Error incomplete parameters");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((samplesVector != null) && (samplesVector.length > 0)) {
|
||||||
|
|
||||||
|
|
||||||
|
AnalysisLogger.getLogger().debug("XMeans: Setting up the cluster");
|
||||||
|
CSVLoader loader = new CSVLoader();
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
|
||||||
|
for (int i = -1; i < samplesVector.length; i++) {
|
||||||
|
for (int j = 0; j < samplesVector[0].length; j++) {
|
||||||
|
if (i==-1)
|
||||||
|
sb.append("F"+j);
|
||||||
|
else
|
||||||
|
sb.append(samplesVector[i][j]);
|
||||||
|
if (j < samplesVector[0].length - 1) {
|
||||||
|
sb.append(",");
|
||||||
|
} else
|
||||||
|
sb.append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
InputStream tis = new ByteArrayInputStream(sb.toString().getBytes("UTF-8"));
|
||||||
|
loader.setSource(tis);
|
||||||
|
Instances id = loader.getDataSet();
|
||||||
|
XMeans xmeans = new XMeans();
|
||||||
|
xmeans.setMaxIterations(Integer.parseInt(maxIterations));
|
||||||
|
xmeans.setMinNumClusters(Integer.parseInt(minClusters));
|
||||||
|
xmeans.setMaxNumClusters(Integer.parseInt(maxClusters));
|
||||||
|
xmeans.buildClusterer(id);
|
||||||
|
status = 50f;
|
||||||
|
|
||||||
|
// do clustering
|
||||||
|
AnalysisLogger.getLogger().debug("XMeans: Clustering ...");
|
||||||
|
Instances is = xmeans.getClusterCenters();
|
||||||
|
int nClusters = is.numInstances();
|
||||||
|
// take results
|
||||||
|
AnalysisLogger.getLogger().debug("XMeans: Found "+nClusters+" Centroids");
|
||||||
|
for (Instance i : is) {
|
||||||
|
DenseInstance di = (DenseInstance) i;
|
||||||
|
int nCluster = di.numAttributes();
|
||||||
|
for (int k = 0; k < nCluster; k++) {
|
||||||
|
AnalysisLogger.getLogger().debug(di.toString(k));
|
||||||
|
}
|
||||||
|
AnalysisLogger.getLogger().debug("-------------------------------");
|
||||||
|
}
|
||||||
|
|
||||||
|
int[] clusteringAssignments = xmeans.m_ClusterAssignments;
|
||||||
|
int[] counters = new int[nClusters];
|
||||||
|
|
||||||
|
for (int cluster:clusteringAssignments){
|
||||||
|
counters[cluster]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
AnalysisLogger.getLogger().debug("XMeans: Building Table");
|
||||||
|
BuildClusterTable(clusteringAssignments, counters);
|
||||||
|
|
||||||
|
} else
|
||||||
|
AnalysisLogger.getLogger().debug("XMeans: Warning - Empty Training Set");
|
||||||
|
|
||||||
|
shutdown();
|
||||||
|
status = 100f;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void BuildClusterTable(int[] clusteringAssignments, int[] counters) throws Exception {
|
||||||
|
|
||||||
|
|
||||||
|
String columnsNames = FeaturesColumnNames + "," + clusterColumn + "," + outliersColumn;
|
||||||
|
int minpoints = Integer.parseInt(minPoints);
|
||||||
|
AnalysisLogger.getLogger().debug("Analyzing Cluster ->" + " minpoints " + minpoints);
|
||||||
|
|
||||||
|
StringBuffer bufferRows = new StringBuffer();
|
||||||
|
int nrows = samplesVector.length;
|
||||||
|
int ncols = samplesVector[0].length;
|
||||||
|
|
||||||
|
AnalysisLogger.getLogger().debug("Analyzing Cluster ->" + "Building Rows to Insert");
|
||||||
|
|
||||||
|
for (int k = 0; k < nrows; k++) {
|
||||||
|
bufferRows.append("(");
|
||||||
|
int cindex = clusteringAssignments[k];
|
||||||
|
boolean isoutlier = (counters[cindex]<minpoints);
|
||||||
|
|
||||||
|
for (int j = 0; j < ncols; j++) {
|
||||||
|
bufferRows.append(samplesVector[k][j]);
|
||||||
|
bufferRows.append(",");
|
||||||
|
}
|
||||||
|
|
||||||
|
bufferRows.append(cindex + "," + isoutlier + ")");
|
||||||
|
|
||||||
|
if (k < nrows - 1) {
|
||||||
|
bufferRows.append(",");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//TO-DO: insert row at chunks
|
||||||
|
|
||||||
|
AnalysisLogger.getLogger().debug("Analyzing Cluster ->" + "Inserting rows");
|
||||||
|
|
||||||
|
if (bufferRows.length() > 0) {
|
||||||
|
AnalysisLogger.getLogger().debug("XMeans: Writing into DB");
|
||||||
|
AnalysisLogger.getLogger().debug(DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows));
|
||||||
|
DatabaseFactory.executeSQLUpdate(DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows), dbHibConnection);
|
||||||
|
AnalysisLogger.getLogger().debug("XMeans: Finished with writing into DB");
|
||||||
|
} else
|
||||||
|
AnalysisLogger.getLogger().debug("XMeans: Nothing to write in the buffer");
|
||||||
|
|
||||||
|
status = 95f;
|
||||||
|
AnalysisLogger.getLogger().debug("XMeans: Status: " + status);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -3,6 +3,8 @@ package org.gcube.dataanalysis.ecoengine.test.regression;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
|
||||||
import org.gcube.dataanalysis.ecoengine.interfaces.Clusterer;
|
import org.gcube.dataanalysis.ecoengine.interfaces.Clusterer;
|
||||||
import org.gcube.dataanalysis.ecoengine.processing.factories.ClusterersFactory;
|
import org.gcube.dataanalysis.ecoengine.processing.factories.ClusterersFactory;
|
||||||
|
|
||||||
|
@ -15,12 +17,22 @@ public class RegressionTestClusterers {
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
System.out.println("TEST 1");
|
System.out.println("TEST 1");
|
||||||
|
List<Clusterer> clus;
|
||||||
|
|
||||||
List<Clusterer> clus = ClusterersFactory.getClusterers(testConfigLocal());
|
clus = ClusterersFactory.getClusterers(testConfigLocal());
|
||||||
clus.get(0).init();
|
clus.get(0).init();
|
||||||
Regressor.process(clus.get(0));
|
Regressor.process(clus.get(0));
|
||||||
clus = null;
|
clus = null;
|
||||||
|
|
||||||
|
clus = ClusterersFactory.getClusterers(testConfigLocal2());
|
||||||
|
clus.get(0).init();
|
||||||
|
Regressor.process(clus.get(0));
|
||||||
|
clus = null;
|
||||||
|
|
||||||
|
clus = ClusterersFactory.getClusterers(testConfigLocal3());
|
||||||
|
clus.get(0).init();
|
||||||
|
Regressor.process(clus.get(0));
|
||||||
|
clus = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,11 +42,45 @@ public static void main(String[] args) throws Exception {
|
||||||
config.setNumberOfResources(1);
|
config.setNumberOfResources(1);
|
||||||
config.setAgent("DBSCAN");
|
config.setAgent("DBSCAN");
|
||||||
config.setParam("OccurrencePointsTable","presence_basking_cluster");
|
config.setParam("OccurrencePointsTable","presence_basking_cluster");
|
||||||
config.setParam("FeaturesColumnNames","centerlat,centerlong");
|
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
|
||||||
config.setParam("OccurrencePointsClusterTable","occCluster_2");
|
config.setParam("OccurrencePointsClusterTable","occcluster_dbscan");
|
||||||
config.setParam("epsilon","10");
|
config.setParam("epsilon","10");
|
||||||
config.setParam("minPoints","1");
|
config.setParam("minPoints","1");
|
||||||
|
|
||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static AlgorithmConfiguration testConfigLocal2() {
|
||||||
|
|
||||||
|
AlgorithmConfiguration config = Regressor.getConfig();
|
||||||
|
config.setNumberOfResources(1);
|
||||||
|
config.setAgent("KMEANS");
|
||||||
|
config.setParam("OccurrencePointsTable","presence_basking_cluster");
|
||||||
|
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
|
||||||
|
config.setParam("OccurrencePointsClusterTable","occcluster_kmeans");
|
||||||
|
config.setParam("k","50");
|
||||||
|
config.setParam("max_runs","10");
|
||||||
|
config.setParam("max_optimization_steps","10");
|
||||||
|
config.setParam("min_points","2");
|
||||||
|
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static AlgorithmConfiguration testConfigLocal3() {
|
||||||
|
|
||||||
|
AlgorithmConfiguration config = Regressor.getConfig();
|
||||||
|
config.setNumberOfResources(1);
|
||||||
|
config.setAgent("XMEANS");
|
||||||
|
config.setParam("OccurrencePointsTable","presence_basking_cluster");
|
||||||
|
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
|
||||||
|
config.setParam("OccurrencePointsClusterTable","occcluster_xmeans");
|
||||||
|
config.setParam("maxIterations","1000");
|
||||||
|
config.setParam("minClusters","10");
|
||||||
|
config.setParam("maxClusters","50");
|
||||||
|
config.setParam("min_points","10");
|
||||||
|
|
||||||
|
return config;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue