git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@57480 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
b7b23ad5f9
commit
41ff336881
|
@ -36,16 +36,16 @@ import com.rapidminer.tools.OperatorService;
|
|||
|
||||
public class DBScan implements Clusterer{
|
||||
|
||||
AlgorithmConfiguration config;
|
||||
String epsilon;
|
||||
String minPoints;
|
||||
ExampleSet points;
|
||||
ArrayList<ArrayList<String>> rows;
|
||||
String OccurrencePointsTable;
|
||||
String OccurrencePointsClusterTable;
|
||||
String FeaturesColumnNames;
|
||||
float status;
|
||||
private SessionFactory dbHibConnection;
|
||||
protected AlgorithmConfiguration config;
|
||||
protected String epsilon;
|
||||
protected String minPoints;
|
||||
protected ExampleSet points;
|
||||
protected ArrayList<ArrayList<String>> rows;
|
||||
protected String OccurrencePointsTable;
|
||||
protected String OccurrencePointsClusterTable;
|
||||
protected String FeaturesColumnNames;
|
||||
protected float status;
|
||||
protected SessionFactory dbHibConnection;
|
||||
|
||||
public static String clusterColumn = "clusterid";
|
||||
public static String clusterColumnType = "character varying";
|
||||
|
@ -165,20 +165,20 @@ public class DBScan implements Clusterer{
|
|||
|
||||
if (config!=null)
|
||||
config.initRapidMiner();
|
||||
AnalysisLogger.getLogger().debug("DBScan: Initialized Rapid Miner ");
|
||||
AnalysisLogger.getLogger().debug("DBScan: Initializing Database Connection");
|
||||
AnalysisLogger.getLogger().debug("Initialized Rapid Miner ");
|
||||
AnalysisLogger.getLogger().debug("Initializing Database Connection");
|
||||
dbHibConnection=DatabaseUtils.initDBSession(config);
|
||||
//create the final table
|
||||
try{
|
||||
AnalysisLogger.getLogger().debug("DBScan: dropping table "+OccurrencePointsClusterTable);
|
||||
AnalysisLogger.getLogger().debug("dropping table "+OccurrencePointsClusterTable);
|
||||
String dropStatement = DatabaseUtils.dropTableStatement(OccurrencePointsClusterTable);
|
||||
AnalysisLogger.getLogger().debug("DBScan: dropping table "+dropStatement);
|
||||
AnalysisLogger.getLogger().debug("dropping table "+dropStatement);
|
||||
DatabaseFactory.executeSQLUpdate(dropStatement, dbHibConnection);
|
||||
}catch(Exception e){
|
||||
AnalysisLogger.getLogger().debug("DBScan: Could not drop table "+OccurrencePointsClusterTable);
|
||||
AnalysisLogger.getLogger().debug("Could not drop table "+OccurrencePointsClusterTable);
|
||||
}
|
||||
//create Table
|
||||
AnalysisLogger.getLogger().debug("DBScan: Creating table "+OccurrencePointsClusterTable);
|
||||
AnalysisLogger.getLogger().debug("Creating table "+OccurrencePointsClusterTable);
|
||||
String [] features = FeaturesColumnNames.split(AlgorithmConfiguration.getListSeparator());
|
||||
String columns = "";
|
||||
|
||||
|
@ -190,13 +190,13 @@ public class DBScan implements Clusterer{
|
|||
|
||||
String createStatement = "create table "+OccurrencePointsClusterTable+" ( "+columns+")";
|
||||
// String createStatement = new DatabaseUtils(dbHibConnection).buildCreateStatement(OccurrencePointsTable,OccurrencePointsClusterTable);
|
||||
AnalysisLogger.getLogger().debug("DBScan: "+createStatement);
|
||||
AnalysisLogger.getLogger().debug("Statement: "+createStatement);
|
||||
DatabaseFactory.executeSQLUpdate(createStatement, dbHibConnection);
|
||||
//add two columns one for cluster and another for outliers
|
||||
AnalysisLogger.getLogger().debug("DBScan: Adding Columns");
|
||||
AnalysisLogger.getLogger().debug("Adding Columns");
|
||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, clusterColumn, clusterColumnType), dbHibConnection);
|
||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, outliersColumn, outliersColumnType), dbHibConnection);
|
||||
AnalysisLogger.getLogger().debug("DBScan: Getting Samples");
|
||||
AnalysisLogger.getLogger().debug("Getting Samples");
|
||||
//build samples
|
||||
getSamples();
|
||||
status = 10f;
|
||||
|
@ -218,7 +218,9 @@ public class DBScan implements Clusterer{
|
|||
|
||||
|
||||
|
||||
private void getSamples() throws Exception{
|
||||
protected void getSamples() throws Exception{
|
||||
System.out.println("->"+DatabaseUtils.getColumnsElementsStatement(OccurrencePointsTable, FeaturesColumnNames, ""));
|
||||
FeaturesColumnNames=FeaturesColumnNames.replace(AlgorithmConfiguration.listSeparator, ",");
|
||||
List<Object> samples = DatabaseFactory.executeSQLQuery(DatabaseUtils.getColumnsElementsStatement(OccurrencePointsTable, FeaturesColumnNames, ""), dbHibConnection);
|
||||
String [] elements = FeaturesColumnNames.split(",");
|
||||
int dimensions = elements.length;
|
||||
|
@ -281,10 +283,21 @@ public class DBScan implements Clusterer{
|
|||
|
||||
IOObject[] outputvector = output.getIOObjects();
|
||||
|
||||
BuildClusterTable(outputvector);
|
||||
|
||||
|
||||
shutdown();
|
||||
status = 100f;
|
||||
}
|
||||
|
||||
|
||||
protected void BuildClusterTable(IOObject[] outputvector) throws Exception{
|
||||
|
||||
ClusterModel innermodel = (ClusterModel) outputvector[0];
|
||||
ExampleSet es = (ExampleSet) outputvector[1];
|
||||
String columnsNames =FeaturesColumnNames+","+clusterColumn+","+outliersColumn;
|
||||
int minpoints = Integer.parseInt(minPoints);
|
||||
AnalysisLogger.getLogger().debug("Analyzing Cluster ->"+" minpoints"+minpoints);
|
||||
int nClusters = innermodel.getClusters().size();
|
||||
float statusstep = ((100f-status)/ (float)(nClusters+1));
|
||||
|
||||
|
@ -296,7 +309,7 @@ public class DBScan implements Clusterer{
|
|||
boolean outlier = false;
|
||||
//take cluster element indexes
|
||||
int npoints = c.getExampleIds().size();
|
||||
AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints+" "+minpoints);
|
||||
AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints);
|
||||
if (npoints==minpoints)
|
||||
outlier=true;
|
||||
|
||||
|
@ -341,12 +354,8 @@ public class DBScan implements Clusterer{
|
|||
float instatus = status + statusstep;
|
||||
status = Math.min(95f, instatus);
|
||||
AnalysisLogger.getLogger().debug("DBScan: Status: "+status);
|
||||
}
|
||||
|
||||
shutdown();
|
||||
status = 100f;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -384,8 +393,8 @@ public class DBScan implements Clusterer{
|
|||
PrimitiveTypesList p2 = new PrimitiveTypesList(PrimitiveTypes.STRING, "FeaturesColumnNames","Column Names for the features",false);
|
||||
|
||||
ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_");
|
||||
PrimitiveType p4 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "epsilon","DBScan epsilon parameter","10");
|
||||
PrimitiveType p5 = new PrimitiveType(String.class.getName(), null, PrimitiveTypes.STRING, "minPoints","DBScan minimum points parameter (identifies outliers)","1");
|
||||
PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "epsilon","DBScan epsilon parameter","10");
|
||||
PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "minPoints","DBScan minimum points parameter (identifies outliers)","1");
|
||||
|
||||
|
||||
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name");
|
||||
|
|
|
@ -0,0 +1,173 @@
|
|||
package org.gcube.dataanalysis.ecoengine.clustering;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
|
||||
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveTypesList;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.DatabaseParameters;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
|
||||
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
|
||||
import org.gcube.dataanalysis.ecoengine.utils.ResourceFactory;
|
||||
|
||||
import com.rapidminer.operator.IOContainer;
|
||||
import com.rapidminer.operator.IOObject;
|
||||
import com.rapidminer.tools.OperatorService;
|
||||
|
||||
public class KMeans extends DBScan{
|
||||
|
||||
public static String clusterColumn = "clusterid";
|
||||
public static String clusterColumnType = "character varying";
|
||||
public static String outliersColumn = "outlier";
|
||||
public static String outliersColumnType = "boolean";
|
||||
private String kk;
|
||||
private String maxRuns;
|
||||
private String maxOptimizations;
|
||||
|
||||
public static void main(String[] args) throws Exception{
|
||||
long t0 = System.currentTimeMillis();
|
||||
|
||||
AlgorithmConfiguration config = new AlgorithmConfiguration();
|
||||
config.setConfigPath("./cfg/");
|
||||
config.setPersistencePath("./");
|
||||
config.setParam("OccurrencePointsTable","presence_basking_cluster");
|
||||
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
|
||||
config.setParam("OccurrencePointsClusterTable","occCluster_kmeans");
|
||||
config.setParam("k","50");
|
||||
config.setParam("max_runs","10");
|
||||
config.setParam("max_optimization_steps","10");
|
||||
config.setParam("min_points","2");
|
||||
|
||||
config.setParam("DatabaseUserName","gcube");
|
||||
config.setParam("DatabasePassword","d4science2");
|
||||
config.setParam("DatabaseURL","jdbc:postgresql://146.48.87.169/testdb");
|
||||
config.setParam("DatabaseDriver","org.postgresql.Driver");
|
||||
|
||||
KMeans cluster = new KMeans();
|
||||
cluster.setConfiguration(config);
|
||||
cluster.init();
|
||||
cluster.compute();
|
||||
|
||||
System.out.println("ELAPSED "+(System.currentTimeMillis()-t0));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void setConfiguration(AlgorithmConfiguration config) {
|
||||
if (config!=null){
|
||||
kk=config.getParam("k");
|
||||
maxRuns= config.getParam("max_runs");
|
||||
maxOptimizations = config.getParam("max_optimization_steps");
|
||||
OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase();
|
||||
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase();
|
||||
FeaturesColumnNames=config.getParam("FeaturesColumnNames");
|
||||
minPoints=config.getParam("min_points");
|
||||
this.config=config;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void compute() throws Exception {
|
||||
|
||||
if ((config==null)||kk==null||maxRuns==null||maxOptimizations==null){
|
||||
throw new Exception("KMeans: Error incomplete parameters");
|
||||
}
|
||||
|
||||
AnalysisLogger.getLogger().debug("KMeans: Settin up the cluster");
|
||||
//take elements and produce example set
|
||||
com.rapidminer.operator.clustering.clusterer.KMeans kmeans = (com.rapidminer.operator.clustering.clusterer.KMeans) OperatorService.createOperator("KMeans");
|
||||
|
||||
kmeans.setParameter("k", kk);
|
||||
kmeans.setParameter("max_runs",maxRuns);
|
||||
kmeans.setParameter("max_optimization_steps", maxOptimizations);
|
||||
|
||||
kmeans.setParameter("keep_example_set", "true");
|
||||
kmeans.setParameter("add_cluster_attribute", "true");
|
||||
|
||||
|
||||
IOContainer innerInput = new IOContainer(points);
|
||||
|
||||
AnalysisLogger.getLogger().debug("KMeans: Clustering...");
|
||||
IOContainer output = kmeans.apply(innerInput);
|
||||
AnalysisLogger.getLogger().debug("KMeans: ...Clustering Finished");
|
||||
status = 70f;
|
||||
|
||||
IOObject[] outputvector = output.getIOObjects();
|
||||
|
||||
BuildClusterTable(outputvector);
|
||||
|
||||
shutdown();
|
||||
status = 100f;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<StatisticalType> getInputParameters() {
|
||||
List<StatisticalType> parameters = new ArrayList<StatisticalType>();
|
||||
List<TableTemplates> templateOccs = new ArrayList<TableTemplates>();
|
||||
templateOccs.add(TableTemplates.GENERIC);
|
||||
InputTable p1 = new InputTable(templateOccs,"OccurrencePointsTable","Occurrence Points Table","occurrences");
|
||||
PrimitiveTypesList p2 = new PrimitiveTypesList(PrimitiveTypes.STRING, "FeaturesColumnNames","Column Names for the features",false);
|
||||
|
||||
ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_");
|
||||
|
||||
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name");
|
||||
DatabaseType p7 = new DatabaseType(DatabaseParameters.DATABASEPASSWORD, "DatabasePassword", "db password");
|
||||
DatabaseType p8 = new DatabaseType(DatabaseParameters.DATABASEDRIVER, "DatabaseDriver", "db driver");
|
||||
DatabaseType p9 = new DatabaseType(DatabaseParameters.DATABASEURL, "DatabaseURL", "db url");
|
||||
DatabaseType p10 = new DatabaseType(DatabaseParameters.DATABASEDIALECT, "DatabaseDialect", "db dialect");
|
||||
DatabaseType p11 = new DatabaseType(DatabaseParameters.DATABASETABLESPACE, "DatabaseTableSpace", "db dialect");
|
||||
|
||||
PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "k","Expected Number of Clusters","3");
|
||||
PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_runs","Max runs of the clustering procedure","10");
|
||||
PrimitiveType p12 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "max_optimization_steps","Max number of internal optimization steps","5");
|
||||
PrimitiveType p13 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points","Minimum number of points to define an outlier set","2");
|
||||
|
||||
parameters.add(p1);
|
||||
parameters.add(p2);
|
||||
parameters.add(p3);
|
||||
parameters.add(p4);
|
||||
parameters.add(p5);
|
||||
parameters.add(p6);
|
||||
parameters.add(p7);
|
||||
parameters.add(p8);
|
||||
parameters.add(p9);
|
||||
parameters.add(p10);
|
||||
parameters.add(p11);
|
||||
parameters.add(p12);
|
||||
|
||||
return parameters;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "Clustering with KMeans";
|
||||
}
|
||||
|
||||
|
||||
ResourceFactory resourceManager;
|
||||
public String getResourceLoad() {
|
||||
if (resourceManager==null)
|
||||
resourceManager = new ResourceFactory();
|
||||
return resourceManager.getResourceLoad(1);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getResources() {
|
||||
return ResourceFactory.getResources(100f);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
Loading…
Reference in New Issue