git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@57480 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
b7b23ad5f9
commit
41ff336881
|
@ -36,16 +36,16 @@ import com.rapidminer.tools.OperatorService;
|
||||||
|
|
||||||
public class DBScan implements Clusterer{
|
public class DBScan implements Clusterer{
|
||||||
|
|
||||||
AlgorithmConfiguration config;
|
protected AlgorithmConfiguration config;
|
||||||
String epsilon;
|
protected String epsilon;
|
||||||
String minPoints;
|
protected String minPoints;
|
||||||
ExampleSet points;
|
protected ExampleSet points;
|
||||||
ArrayList<ArrayList<String>> rows;
|
protected ArrayList<ArrayList<String>> rows;
|
||||||
String OccurrencePointsTable;
|
protected String OccurrencePointsTable;
|
||||||
String OccurrencePointsClusterTable;
|
protected String OccurrencePointsClusterTable;
|
||||||
String FeaturesColumnNames;
|
protected String FeaturesColumnNames;
|
||||||
float status;
|
protected float status;
|
||||||
private SessionFactory dbHibConnection;
|
protected SessionFactory dbHibConnection;
|
||||||
|
|
||||||
public static String clusterColumn = "clusterid";
|
public static String clusterColumn = "clusterid";
|
||||||
public static String clusterColumnType = "character varying";
|
public static String clusterColumnType = "character varying";
|
||||||
|
@ -165,20 +165,20 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
if (config!=null)
|
if (config!=null)
|
||||||
config.initRapidMiner();
|
config.initRapidMiner();
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Initialized Rapid Miner ");
|
AnalysisLogger.getLogger().debug("Initialized Rapid Miner ");
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Initializing Database Connection");
|
AnalysisLogger.getLogger().debug("Initializing Database Connection");
|
||||||
dbHibConnection=DatabaseUtils.initDBSession(config);
|
dbHibConnection=DatabaseUtils.initDBSession(config);
|
||||||
//create the final table
|
//create the final table
|
||||||
try{
|
try{
|
||||||
AnalysisLogger.getLogger().debug("DBScan: dropping table "+OccurrencePointsClusterTable);
|
AnalysisLogger.getLogger().debug("dropping table "+OccurrencePointsClusterTable);
|
||||||
String dropStatement = DatabaseUtils.dropTableStatement(OccurrencePointsClusterTable);
|
String dropStatement = DatabaseUtils.dropTableStatement(OccurrencePointsClusterTable);
|
||||||
AnalysisLogger.getLogger().debug("DBScan: dropping table "+dropStatement);
|
AnalysisLogger.getLogger().debug("dropping table "+dropStatement);
|
||||||
DatabaseFactory.executeSQLUpdate(dropStatement, dbHibConnection);
|
DatabaseFactory.executeSQLUpdate(dropStatement, dbHibConnection);
|
||||||
}catch(Exception e){
|
}catch(Exception e){
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Could not drop table "+OccurrencePointsClusterTable);
|
AnalysisLogger.getLogger().debug("Could not drop table "+OccurrencePointsClusterTable);
|
||||||
}
|
}
|
||||||
//create Table
|
//create Table
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Creating table "+OccurrencePointsClusterTable);
|
AnalysisLogger.getLogger().debug("Creating table "+OccurrencePointsClusterTable);
|
||||||
String [] features = FeaturesColumnNames.split(AlgorithmConfiguration.getListSeparator());
|
String [] features = FeaturesColumnNames.split(AlgorithmConfiguration.getListSeparator());
|
||||||
String columns = "";
|
String columns = "";
|
||||||
|
|
||||||
|
@ -190,13 +190,13 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
String createStatement = "create table "+OccurrencePointsClusterTable+" ( "+columns+")";
|
String createStatement = "create table "+OccurrencePointsClusterTable+" ( "+columns+")";
|
||||||
// String createStatement = new DatabaseUtils(dbHibConnection).buildCreateStatement(OccurrencePointsTable,OccurrencePointsClusterTable);
|
// String createStatement = new DatabaseUtils(dbHibConnection).buildCreateStatement(OccurrencePointsTable,OccurrencePointsClusterTable);
|
||||||
AnalysisLogger.getLogger().debug("DBScan: "+createStatement);
|
AnalysisLogger.getLogger().debug("Statement: "+createStatement);
|
||||||
DatabaseFactory.executeSQLUpdate(createStatement, dbHibConnection);
|
DatabaseFactory.executeSQLUpdate(createStatement, dbHibConnection);
|
||||||
//add two columns one for cluster and another for outliers
|
//add two columns one for cluster and another for outliers
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Adding Columns");
|
AnalysisLogger.getLogger().debug("Adding Columns");
|
||||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, clusterColumn, clusterColumnType), dbHibConnection);
|
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, clusterColumn, clusterColumnType), dbHibConnection);
|
||||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, outliersColumn, outliersColumnType), dbHibConnection);
|
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, outliersColumn, outliersColumnType), dbHibConnection);
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Getting Samples");
|
AnalysisLogger.getLogger().debug("Getting Samples");
|
||||||
//build samples
|
//build samples
|
||||||
getSamples();
|
getSamples();
|
||||||
status = 10f;
|
status = 10f;
|
||||||
|
@ -218,7 +218,9 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private void getSamples() throws Exception{
|
protected void getSamples() throws Exception{
|
||||||
|
System.out.println("->"+DatabaseUtils.getColumnsElementsStatement(OccurrencePointsTable, FeaturesColumnNames, ""));
|
||||||
|
FeaturesColumnNames=FeaturesColumnNames.replace(AlgorithmConfiguration.listSeparator, ",");
|
||||||
List<Object> samples = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(OccurrencePointsTable, FeaturesColumnNames, ""), dbHibConnection);
|
List<Object> samples = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(OccurrencePointsTable, FeaturesColumnNames, ""), dbHibConnection);
|
||||||
String [] elements = FeaturesColumnNames.split(",");
|
String [] elements = FeaturesColumnNames.split(",");
|
||||||
int dimensions = elements.length;
|
int dimensions = elements.length;
|
||||||
|
@ -281,10 +283,21 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
IOObject[] outputvector = output.getIOObjects();
|
IOObject[] outputvector = output.getIOObjects();
|
||||||
|
|
||||||
|
BuildClusterTable(outputvector);
|
||||||
|
|
||||||
|
|
||||||
|
shutdown();
|
||||||
|
status = 100f;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected void BuildClusterTable(IOObject[] outputvector) throws Exception{
|
||||||
|
|
||||||
ClusterModel innermodel = (ClusterModel) outputvector[0];
|
ClusterModel innermodel = (ClusterModel) outputvector[0];
|
||||||
ExampleSet es = (ExampleSet) outputvector[1];
|
ExampleSet es = (ExampleSet) outputvector[1];
|
||||||
String columnsNames =FeaturesColumnNames+","+clusterColumn+","+outliersColumn;
|
String columnsNames =FeaturesColumnNames+","+clusterColumn+","+outliersColumn;
|
||||||
int minpoints = Integer.parseInt(minPoints);
|
int minpoints = Integer.parseInt(minPoints);
|
||||||
|
AnalysisLogger.getLogger().debug("Analyzing Cluster ->"+" minpoints"+minpoints);
|
||||||
int nClusters = innermodel.getClusters().size();
|
int nClusters = innermodel.getClusters().size();
|
||||||
float statusstep = ((100f-status)/ (float)(nClusters+1));
|
float statusstep = ((100f-status)/ (float)(nClusters+1));
|
||||||
|
|
||||||
|
@ -296,7 +309,7 @@ public class DBScan implements Clusterer{
|
||||||
boolean outlier = false;
|
boolean outlier = false;
|
||||||
//take cluster element indexes
|
//take cluster element indexes
|
||||||
int npoints = c.getExampleIds().size();
|
int npoints = c.getExampleIds().size();
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints+" "+minpoints);
|
AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints);
|
||||||
if (npoints==minpoints)
|
if (npoints==minpoints)
|
||||||
outlier=true;
|
outlier=true;
|
||||||
|
|
||||||
|
@ -341,12 +354,8 @@ public class DBScan implements Clusterer{
|
||||||
float instatus = status + statusstep;
|
float instatus = status + statusstep;
|
||||||
status = Math.min(95f, instatus);
|
status = Math.min(95f, instatus);
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Status: "+status);
|
AnalysisLogger.getLogger().debug("DBScan: Status: "+status);
|
||||||
}
|
|
||||||
|
|
||||||
shutdown();
|
|
||||||
status = 100f;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -384,8 +393,8 @@ public class DBScan implements Clusterer{
|
||||||
PrimitiveTypesList p2 = new PrimitiveTypesList(PrimitiveTypes.STRING, "FeaturesColumnNames","Column Names for the features",false);
|
PrimitiveTypesList p2 = new PrimitiveTypesList(PrimitiveTypes.STRING, "FeaturesColumnNames","Column Names for the features",false);
|
||||||
|
|
||||||
ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_");
|
ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_");
|
||||||
PrimitiveType p4 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "epsilon","DBScan epsilon parameter","10");
|
PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "epsilon","DBScan epsilon parameter","10");
|
||||||
PrimitiveType p5 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "minPoints","DBScan minimum points parameter (identifies outliers)","1");
|
PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "minPoints","DBScan minimum points parameter (identifies outliers)","1");
|
||||||
|
|
||||||
|
|
||||||
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name");
|
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name");
|
||||||
|
|
|
@ -0,0 +1,173 @@
|
||||||
|
package org.gcube.dataanalysis.ecoengine.clustering;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveTypesList;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.DatabaseParameters;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
|
||||||
|
import org.gcube.dataanalysis.ecoengine.utils.ResourceFactory;
|
||||||
|
|
||||||
|
import com.rapidminer.operator.IOContainer;
|
||||||
|
import com.rapidminer.operator.IOObject;
|
||||||
|
import com.rapidminer.tools.OperatorService;
|
||||||
|
|
||||||
|
public class KMeans extends DBScan{
|
||||||
|
|
||||||
|
public static String clusterColumn = "clusterid";
|
||||||
|
public static String clusterColumnType = "character varying";
|
||||||
|
public static String outliersColumn = "outlier";
|
||||||
|
public static String outliersColumnType = "boolean";
|
||||||
|
private String kk;
|
||||||
|
private String maxRuns;
|
||||||
|
private String maxOptimizations;
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception{
|
||||||
|
long t0 = System.currentTimeMillis();
|
||||||
|
|
||||||
|
AlgorithmConfiguration config = new AlgorithmConfiguration();
|
||||||
|
config.setConfigPath("./cfg/");
|
||||||
|
config.setPersistencePath("./");
|
||||||
|
config.setParam("OccurrencePointsTable","presence_basking_cluster");
|
||||||
|
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
|
||||||
|
config.setParam("OccurrencePointsClusterTable","occCluster_kmeans");
|
||||||
|
config.setParam("k","50");
|
||||||
|
config.setParam("max_runs","10");
|
||||||
|
config.setParam("max_optimization_steps","10");
|
||||||
|
config.setParam("min_points","2");
|
||||||
|
|
||||||
|
config.setParam("DatabaseUserName","gcube");
|
||||||
|
config.setParam("DatabasePassword","d4science2");
|
||||||
|
config.setParam("DatabaseURL","jdbc:postgresql://146.48.87.169/testdb");
|
||||||
|
config.setParam("DatabaseDriver","org.postgresql.Driver");
|
||||||
|
|
||||||
|
KMeans cluster = new KMeans();
|
||||||
|
cluster.setConfiguration(config);
|
||||||
|
cluster.init();
|
||||||
|
cluster.compute();
|
||||||
|
|
||||||
|
System.out.println("ELAPSED "+(System.currentTimeMillis()-t0));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setConfiguration(AlgorithmConfiguration config) {
|
||||||
|
if (config!=null){
|
||||||
|
kk=config.getParam("k");
|
||||||
|
maxRuns= config.getParam("max_runs");
|
||||||
|
maxOptimizations = config.getParam("max_optimization_steps");
|
||||||
|
OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase();
|
||||||
|
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase();
|
||||||
|
FeaturesColumnNames=config.getParam("FeaturesColumnNames");
|
||||||
|
minPoints=config.getParam("min_points");
|
||||||
|
this.config=config;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void compute() throws Exception {
|
||||||
|
|
||||||
|
if ((config==null)||kk==null||maxRuns==null||maxOptimizations==null){
|
||||||
|
throw new Exception("KMeans: Error incomplete parameters");
|
||||||
|
}
|
||||||
|
|
||||||
|
AnalysisLogger.getLogger().debug("KMeans: Settin up the cluster");
|
||||||
|
//take elements and produce example set
|
||||||
|
com.rapidminer.operator.clustering.clusterer.KMeans kmeans = (com.rapidminer.operator.clustering.clusterer.KMeans) OperatorService.createOperator("KMeans");
|
||||||
|
|
||||||
|
kmeans.setParameter("k", kk);
|
||||||
|
kmeans.setParameter("max_runs",maxRuns);
|
||||||
|
kmeans.setParameter("max_optimization_steps", maxOptimizations);
|
||||||
|
|
||||||
|
kmeans.setParameter("keep_example_set", "true");
|
||||||
|
kmeans.setParameter("add_cluster_attribute", "true");
|
||||||
|
|
||||||
|
|
||||||
|
IOContainer innerInput = new IOContainer(points);
|
||||||
|
|
||||||
|
AnalysisLogger.getLogger().debug("KMeans: Clustering...");
|
||||||
|
IOContainer output = kmeans.apply(innerInput);
|
||||||
|
AnalysisLogger.getLogger().debug("KMeans: ...Clustering Finished");
|
||||||
|
status = 70f;
|
||||||
|
|
||||||
|
IOObject[] outputvector = output.getIOObjects();
|
||||||
|
|
||||||
|
BuildClusterTable(outputvector);
|
||||||
|
|
||||||
|
shutdown();
|
||||||
|
status = 100f;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<StatisticalType> getInputParameters() {
|
||||||
|
List<StatisticalType> parameters = new ArrayList<StatisticalType>();
|
||||||
|
List<TableTemplates> templateOccs = new ArrayList<TableTemplates>();
|
||||||
|
templateOccs.add(TableTemplates.GENERIC);
|
||||||
|
InputTable p1 = new InputTable(templateOccs,"OccurrencePointsTable","Occurrence Points Table","occurrences");
|
||||||
|
PrimitiveTypesList p2 = new PrimitiveTypesList(PrimitiveTypes.STRING, "FeaturesColumnNames","Column Names for the features",false);
|
||||||
|
|
||||||
|
ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_");
|
||||||
|
|
||||||
|
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name");
|
||||||
|
DatabaseType p7 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password");
|
||||||
|
DatabaseType p8 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver");
|
||||||
|
DatabaseType p9 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url");
|
||||||
|
DatabaseType p10 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect");
|
||||||
|
DatabaseType p11 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect");
|
||||||
|
|
||||||
|
PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "k","Expected Number of Clusters","3");
|
||||||
|
PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_runs","Max runs of the clustering procedure","10");
|
||||||
|
PrimitiveType p12 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_optimization_steps","Max number of internal optimization steps","5");
|
||||||
|
PrimitiveType p13 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points","Minimum number of points to define an outlier set","2");
|
||||||
|
|
||||||
|
parameters.add(p1);
|
||||||
|
parameters.add(p2);
|
||||||
|
parameters.add(p3);
|
||||||
|
parameters.add(p4);
|
||||||
|
parameters.add(p5);
|
||||||
|
parameters.add(p6);
|
||||||
|
parameters.add(p7);
|
||||||
|
parameters.add(p8);
|
||||||
|
parameters.add(p9);
|
||||||
|
parameters.add(p10);
|
||||||
|
parameters.add(p11);
|
||||||
|
parameters.add(p12);
|
||||||
|
|
||||||
|
return parameters;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDescription() {
|
||||||
|
return "Clustering with KMeans";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ResourceFactory resourceManager;
|
||||||
|
public String getResourceLoad() {
|
||||||
|
if (resourceManager==null)
|
||||||
|
resourceManager = new ResourceFactory();
|
||||||
|
return resourceManager.getResourceLoad(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getResources() {
|
||||||
|
return ResourceFactory.getResources(100f);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue