diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/DBScan.java b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/DBScan.java index 00c0465..de67f66 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/DBScan.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/DBScan.java @@ -89,7 +89,7 @@ public class DBScan implements Clusterer{ AlgorithmConfiguration config = new AlgorithmConfiguration(); config.setParam("epsilon", "10"); - config.setParam("minPoints", "1"); + config.setParam("min_points", "1"); config.setConfigPath("./cfg/"); config.initRapidMiner(); dbscanner.setConfiguration(config); @@ -122,7 +122,7 @@ public class DBScan implements Clusterer{ AlgorithmConfiguration config = new AlgorithmConfiguration(); config.setParam("epsilon", "10"); - config.setParam("minPoints", "1"); + config.setParam("min_points", "1"); config.setConfigPath("./cfg/"); config.initRapidMiner(); long t0 = System.currentTimeMillis(); @@ -142,7 +142,7 @@ public class DBScan implements Clusterer{ config.setParam("FeaturesColumnNames","centerlat,centerlong"); config.setParam("OccurrencePointsClusterTable","occCluster_1"); config.setParam("epsilon","10"); - config.setParam("minPoints","1"); + config.setParam("min_points","1"); config.setParam("DatabaseUserName","gcube"); config.setParam("DatabasePassword","d4science2"); @@ -209,7 +209,7 @@ public class DBScan implements Clusterer{ public void setConfiguration(AlgorithmConfiguration config) { if (config!=null){ epsilon=config.getParam("epsilon"); - minPoints = config.getParam("minPoints"); + minPoints = config.getParam("min_points"); OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase(); OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase(); FeaturesColumnNames=config.getParam("FeaturesColumnNames"); @@ -244,9 +244,9 @@ public class DBScan implements Clusterer{ ir++; } - AnalysisLogger.getLogger().debug("DBScan: Building Sample Set For Miner"); + AnalysisLogger.getLogger().debug("Building Sample Set For Miner"); produceSamples(samplesVector); - AnalysisLogger.getLogger().debug("DBScan: Obtained "+samplesVector.length+" chunks"); + AnalysisLogger.getLogger().debug("Obtained "+samplesVector.length+" chunks"); } public void produceSamples(double[][] sampleVectors) throws Exception{ @@ -276,7 +276,9 @@ public class DBScan implements Clusterer{ IOContainer innerInput = new IOContainer(points); AnalysisLogger.getLogger().debug("DBScan: Clustering..."); + long ti= System.currentTimeMillis(); IOContainer output = clusterer.apply(innerInput); + AnalysisLogger.getLogger().debug("DBScan: ...ELAPSED CLUSTERING TIME: "+(System.currentTimeMillis()-ti)); AnalysisLogger.getLogger().debug("DBScan: ...Clustering Finished"); status = 70f; @@ -300,7 +302,7 @@ public class DBScan implements Clusterer{ int nClusters = innermodel.getClusters().size(); float statusstep = ((100f-status)/ (float)(nClusters+1)); - AnalysisLogger.getLogger().debug("DBScan: Start Write On DB"); + AnalysisLogger.getLogger().debug("Start Write On DB"); for (Cluster c : innermodel.getClusters()){ StringBuffer bufferRows = new StringBuffer(); //take cluster id @@ -308,7 +310,7 @@ public class DBScan implements Clusterer{ boolean outlier = false; //take cluster element indexes int npoints = c.getExampleIds().size(); - AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints); + AnalysisLogger.getLogger().debug("Analyzing Cluster ->"+id+" with "+npoints); if (npoints"+id+" is outlier?"+outlier); +// AnalysisLogger.getLogger().trace("DBScan: Classification : "+towrite+"->"+id+" is outlier?"+outlier); } if (bufferRows.length()>0){ // AnalysisLogger.getLogger().trace("DBScan: Inserting Buffer "+DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows)); - AnalysisLogger.getLogger().debug("DBScan: Writing into DB"); + AnalysisLogger.getLogger().debug("Writing into DB"); DatabaseFactory.executeSQLUpdate(DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows),dbHibConnection); - AnalysisLogger.getLogger().debug("DBScan: Finished with writing into DB"); + AnalysisLogger.getLogger().debug("Finished with writing into DB"); }else - AnalysisLogger.getLogger().debug("DBScan: Nothing to write in the buffer"); + AnalysisLogger.getLogger().debug("Nothing to write in the buffer"); float instatus = status + statusstep; status = Math.min(95f, instatus); - AnalysisLogger.getLogger().debug("DBScan: Status: "+status); + AnalysisLogger.getLogger().debug("Status: "+status); } } @@ -361,10 +363,10 @@ public class DBScan implements Clusterer{ @Override public void shutdown() { try{ - AnalysisLogger.getLogger().debug("DBScan: Closing DB Connection "); + AnalysisLogger.getLogger().debug("Closing DB Connection "); dbHibConnection.close(); }catch(Exception e){ - AnalysisLogger.getLogger().debug("DBScan: Could not shut down connection"); + AnalysisLogger.getLogger().debug("Could not shut down connection"); } } @@ -393,7 +395,7 @@ public class DBScan implements Clusterer{ ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_"); PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "epsilon","DBScan epsilon parameter","10"); - PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "minPoints","DBScan minimum points parameter (identifies outliers)","1"); + PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points","DBScan minimum points parameter (identifies outliers)","1"); DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/KMeans.java b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/KMeans.java index 1d0eb8c..34411e9 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/KMeans.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/KMeans.java @@ -93,7 +93,9 @@ public class KMeans extends DBScan{ IOContainer innerInput = new IOContainer(points); AnalysisLogger.getLogger().debug("KMeans: Clustering..."); + long ti= System.currentTimeMillis(); IOContainer output = kmeans.apply(innerInput); + AnalysisLogger.getLogger().debug("KMEANS: ...ELAPSED CLUSTERING TIME: "+(System.currentTimeMillis()-ti)); AnalysisLogger.getLogger().debug("KMeans: ...Clustering Finished"); status = 70f; diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/XMeansWrapper.java b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/XMeansWrapper.java index de0fc6c..d1f7d52 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/XMeansWrapper.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/clustering/XMeansWrapper.java @@ -20,17 +20,6 @@ import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters; import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates; import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory; import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils; -import org.gcube.dataanalysis.ecoengine.utils.Transformations; - -import com.rapidminer.example.Attribute; -import com.rapidminer.example.Attributes; -import com.rapidminer.example.Example; -import com.rapidminer.example.ExampleSet; -import com.rapidminer.operator.IOContainer; -import com.rapidminer.operator.IOObject; -import com.rapidminer.operator.clustering.Cluster; -import com.rapidminer.operator.clustering.ClusterModel; -import com.rapidminer.tools.OperatorService; import weka.clusterers.ClusterEvaluation; import weka.core.DenseInstance; @@ -213,11 +202,13 @@ public class XMeansWrapper extends DBScan { InputStream tis = new ByteArrayInputStream(sb.toString().getBytes("UTF-8")); loader.setSource(tis); Instances id = loader.getDataSet(); + long ti= System.currentTimeMillis(); XMeans xmeans = new XMeans(); xmeans.setMaxIterations(Integer.parseInt(maxIterations)); xmeans.setMinNumClusters(Integer.parseInt(minClusters)); xmeans.setMaxNumClusters(Integer.parseInt(maxClusters)); xmeans.buildClusterer(id); + AnalysisLogger.getLogger().debug("XMEANS: ...ELAPSED CLUSTERING TIME: "+(System.currentTimeMillis()-ti)); status = 50f; // do clustering diff --git a/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestClusterers.java b/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestClusterers.java index add77b9..d83664b 100644 --- a/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestClusterers.java +++ b/src/main/java/org/gcube/dataanalysis/ecoengine/test/regression/RegressionTestClusterers.java @@ -45,7 +45,7 @@ public static void main(String[] args) throws Exception { config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong"); config.setParam("OccurrencePointsClusterTable","occcluster_dbscan"); config.setParam("epsilon","10"); - config.setParam("minPoints","1"); + config.setParam("min_points","3"); return config; } @@ -59,10 +59,10 @@ public static void main(String[] args) throws Exception { config.setParam("OccurrencePointsTable","presence_basking_cluster"); config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong"); config.setParam("OccurrencePointsClusterTable","occcluster_kmeans"); - config.setParam("k","50"); - config.setParam("max_runs","10"); - config.setParam("max_optimization_steps","10"); - config.setParam("min_points","2"); + config.setParam("k","30"); + config.setParam("max_runs","1000"); + config.setParam("max_optimization_steps","1000"); + config.setParam("min_points","3"); return config; } @@ -77,9 +77,9 @@ public static void main(String[] args) throws Exception { config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong"); config.setParam("OccurrencePointsClusterTable","occcluster_xmeans"); config.setParam("maxIterations","1000"); - config.setParam("minClusters","10"); + config.setParam("minClusters","30"); config.setParam("maxClusters","50"); - config.setParam("min_points","10"); + config.setParam("min_points","3"); return config; }