This commit is contained in:
Gianpaolo Coro 2012-09-04 13:52:08 +00:00
parent 8941ec3652
commit 01b4ace8e0
4 changed files with 29 additions and 34 deletions

View File

@ -89,7 +89,7 @@ public class DBScan implements Clusterer{
AlgorithmConfiguration config = new AlgorithmConfiguration(); AlgorithmConfiguration config = new AlgorithmConfiguration();
config.setParam("epsilon", "10"); config.setParam("epsilon", "10");
config.setParam("minPoints", "1"); config.setParam("min_points", "1");
config.setConfigPath("./cfg/"); config.setConfigPath("./cfg/");
config.initRapidMiner(); config.initRapidMiner();
dbscanner.setConfiguration(config); dbscanner.setConfiguration(config);
@ -122,7 +122,7 @@ public class DBScan implements Clusterer{
AlgorithmConfiguration config = new AlgorithmConfiguration(); AlgorithmConfiguration config = new AlgorithmConfiguration();
config.setParam("epsilon", "10"); config.setParam("epsilon", "10");
config.setParam("minPoints", "1"); config.setParam("min_points", "1");
config.setConfigPath("./cfg/"); config.setConfigPath("./cfg/");
config.initRapidMiner(); config.initRapidMiner();
long t0 = System.currentTimeMillis(); long t0 = System.currentTimeMillis();
@ -142,7 +142,7 @@ public class DBScan implements Clusterer{
config.setParam("FeaturesColumnNames","centerlat,centerlong"); config.setParam("FeaturesColumnNames","centerlat,centerlong");
config.setParam("OccurrencePointsClusterTable","occCluster_1"); config.setParam("OccurrencePointsClusterTable","occCluster_1");
config.setParam("epsilon","10"); config.setParam("epsilon","10");
config.setParam("minPoints","1"); config.setParam("min_points","1");
config.setParam("DatabaseUserName","gcube"); config.setParam("DatabaseUserName","gcube");
config.setParam("DatabasePassword","d4science2"); config.setParam("DatabasePassword","d4science2");
@ -209,7 +209,7 @@ public class DBScan implements Clusterer{
public void setConfiguration(AlgorithmConfiguration config) { public void setConfiguration(AlgorithmConfiguration config) {
if (config!=null){ if (config!=null){
epsilon=config.getParam("epsilon"); epsilon=config.getParam("epsilon");
minPoints = config.getParam("minPoints"); minPoints = config.getParam("min_points");
OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase(); OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase();
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase(); OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase();
FeaturesColumnNames=config.getParam("FeaturesColumnNames"); FeaturesColumnNames=config.getParam("FeaturesColumnNames");
@ -244,9 +244,9 @@ public class DBScan implements Clusterer{
ir++; ir++;
} }
AnalysisLogger.getLogger().debug("DBScan: Building Sample Set For Miner"); AnalysisLogger.getLogger().debug("Building Sample Set For Miner");
produceSamples(samplesVector); produceSamples(samplesVector);
AnalysisLogger.getLogger().debug("DBScan: Obtained "+samplesVector.length+" chunks"); AnalysisLogger.getLogger().debug("Obtained "+samplesVector.length+" chunks");
} }
public void produceSamples(double[][] sampleVectors) throws Exception{ public void produceSamples(double[][] sampleVectors) throws Exception{
@ -276,7 +276,9 @@ public class DBScan implements Clusterer{
IOContainer innerInput = new IOContainer(points); IOContainer innerInput = new IOContainer(points);
AnalysisLogger.getLogger().debug("DBScan: Clustering..."); AnalysisLogger.getLogger().debug("DBScan: Clustering...");
long ti= System.currentTimeMillis();
IOContainer output = clusterer.apply(innerInput); IOContainer output = clusterer.apply(innerInput);
AnalysisLogger.getLogger().debug("DBScan: ...ELAPSED CLUSTERING TIME: "+(System.currentTimeMillis()-ti));
AnalysisLogger.getLogger().debug("DBScan: ...Clustering Finished"); AnalysisLogger.getLogger().debug("DBScan: ...Clustering Finished");
status = 70f; status = 70f;
@ -300,7 +302,7 @@ public class DBScan implements Clusterer{
int nClusters = innermodel.getClusters().size(); int nClusters = innermodel.getClusters().size();
float statusstep = ((100f-status)/ (float)(nClusters+1)); float statusstep = ((100f-status)/ (float)(nClusters+1));
AnalysisLogger.getLogger().debug("DBScan: Start Write On DB"); AnalysisLogger.getLogger().debug("Start Write On DB");
for (Cluster c : innermodel.getClusters()){ for (Cluster c : innermodel.getClusters()){
StringBuffer bufferRows = new StringBuffer(); StringBuffer bufferRows = new StringBuffer();
//take cluster id //take cluster id
@ -308,7 +310,7 @@ public class DBScan implements Clusterer{
boolean outlier = false; boolean outlier = false;
//take cluster element indexes //take cluster element indexes
int npoints = c.getExampleIds().size(); int npoints = c.getExampleIds().size();
AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints); AnalysisLogger.getLogger().debug("Analyzing Cluster ->"+id+" with "+npoints);
if (npoints<minpoints) if (npoints<minpoints)
outlier=true; outlier=true;
@ -339,20 +341,20 @@ public class DBScan implements Clusterer{
} }
k++; k++;
AnalysisLogger.getLogger().trace("DBScan: Classification : "+towrite+"->"+id+" is outlier?"+outlier); // AnalysisLogger.getLogger().trace("DBScan: Classification : "+towrite+"->"+id+" is outlier?"+outlier);
} }
if (bufferRows.length()>0){ if (bufferRows.length()>0){
// AnalysisLogger.getLogger().trace("DBScan: Inserting Buffer "+DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows)); // AnalysisLogger.getLogger().trace("DBScan: Inserting Buffer "+DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows));
AnalysisLogger.getLogger().debug("DBScan: Writing into DB"); AnalysisLogger.getLogger().debug("Writing into DB");
DatabaseFactory.executeSQLUpdate(DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows),dbHibConnection); DatabaseFactory.executeSQLUpdate(DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows),dbHibConnection);
AnalysisLogger.getLogger().debug("DBScan: Finished with writing into DB"); AnalysisLogger.getLogger().debug("Finished with writing into DB");
}else }else
AnalysisLogger.getLogger().debug("DBScan: Nothing to write in the buffer"); AnalysisLogger.getLogger().debug("Nothing to write in the buffer");
float instatus = status + statusstep; float instatus = status + statusstep;
status = Math.min(95f, instatus); status = Math.min(95f, instatus);
AnalysisLogger.getLogger().debug("DBScan: Status: "+status); AnalysisLogger.getLogger().debug("Status: "+status);
} }
} }
@ -361,10 +363,10 @@ public class DBScan implements Clusterer{
@Override @Override
public void shutdown() { public void shutdown() {
try{ try{
AnalysisLogger.getLogger().debug("DBScan: Closing DB Connection "); AnalysisLogger.getLogger().debug("Closing DB Connection ");
dbHibConnection.close(); dbHibConnection.close();
}catch(Exception e){ }catch(Exception e){
AnalysisLogger.getLogger().debug("DBScan: Could not shut down connection"); AnalysisLogger.getLogger().debug("Could not shut down connection");
} }
} }
@ -393,7 +395,7 @@ public class DBScan implements Clusterer{
ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_"); ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_");
PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "epsilon","DBScan epsilon parameter","10"); PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "epsilon","DBScan epsilon parameter","10");
PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "minPoints","DBScan minimum points parameter (identifies outliers)","1"); PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points","DBScan minimum points parameter (identifies outliers)","1");
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name"); DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name");

View File

@ -93,7 +93,9 @@ public class KMeans extends DBScan{
IOContainer innerInput = new IOContainer(points); IOContainer innerInput = new IOContainer(points);
AnalysisLogger.getLogger().debug("KMeans: Clustering..."); AnalysisLogger.getLogger().debug("KMeans: Clustering...");
long ti= System.currentTimeMillis();
IOContainer output = kmeans.apply(innerInput); IOContainer output = kmeans.apply(innerInput);
AnalysisLogger.getLogger().debug("KMEANS: ...ELAPSED CLUSTERING TIME: "+(System.currentTimeMillis()-ti));
AnalysisLogger.getLogger().debug("KMeans: ...Clustering Finished"); AnalysisLogger.getLogger().debug("KMeans: ...Clustering Finished");
status = 70f; status = 70f;

View File

@ -20,17 +20,6 @@ import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates; import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory; import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils; import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
import org.gcube.dataanalysis.ecoengine.utils.Transformations;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.clustering.Cluster;
import com.rapidminer.operator.clustering.ClusterModel;
import com.rapidminer.tools.OperatorService;
import weka.clusterers.ClusterEvaluation; import weka.clusterers.ClusterEvaluation;
import weka.core.DenseInstance; import weka.core.DenseInstance;
@ -213,11 +202,13 @@ public class XMeansWrapper extends DBScan {
InputStream tis = new ByteArrayInputStream(sb.toString().getBytes("UTF-8")); InputStream tis = new ByteArrayInputStream(sb.toString().getBytes("UTF-8"));
loader.setSource(tis); loader.setSource(tis);
Instances id = loader.getDataSet(); Instances id = loader.getDataSet();
long ti= System.currentTimeMillis();
XMeans xmeans = new XMeans(); XMeans xmeans = new XMeans();
xmeans.setMaxIterations(Integer.parseInt(maxIterations)); xmeans.setMaxIterations(Integer.parseInt(maxIterations));
xmeans.setMinNumClusters(Integer.parseInt(minClusters)); xmeans.setMinNumClusters(Integer.parseInt(minClusters));
xmeans.setMaxNumClusters(Integer.parseInt(maxClusters)); xmeans.setMaxNumClusters(Integer.parseInt(maxClusters));
xmeans.buildClusterer(id); xmeans.buildClusterer(id);
AnalysisLogger.getLogger().debug("XMEANS: ...ELAPSED CLUSTERING TIME: "+(System.currentTimeMillis()-ti));
status = 50f; status = 50f;
// do clustering // do clustering

View File

@ -45,7 +45,7 @@ public static void main(String[] args) throws Exception {
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong"); config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
config.setParam("OccurrencePointsClusterTable","occcluster_dbscan"); config.setParam("OccurrencePointsClusterTable","occcluster_dbscan");
config.setParam("epsilon","10"); config.setParam("epsilon","10");
config.setParam("minPoints","1"); config.setParam("min_points","3");
return config; return config;
} }
@ -59,10 +59,10 @@ public static void main(String[] args) throws Exception {
config.setParam("OccurrencePointsTable","presence_basking_cluster"); config.setParam("OccurrencePointsTable","presence_basking_cluster");
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong"); config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
config.setParam("OccurrencePointsClusterTable","occcluster_kmeans"); config.setParam("OccurrencePointsClusterTable","occcluster_kmeans");
config.setParam("k","50"); config.setParam("k","30");
config.setParam("max_runs","10"); config.setParam("max_runs","1000");
config.setParam("max_optimization_steps","10"); config.setParam("max_optimization_steps","1000");
config.setParam("min_points","2"); config.setParam("min_points","3");
return config; return config;
} }
@ -77,9 +77,9 @@ public static void main(String[] args) throws Exception {
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong"); config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
config.setParam("OccurrencePointsClusterTable","occcluster_xmeans"); config.setParam("OccurrencePointsClusterTable","occcluster_xmeans");
config.setParam("maxIterations","1000"); config.setParam("maxIterations","1000");
config.setParam("minClusters","10"); config.setParam("minClusters","30");
config.setParam("maxClusters","50"); config.setParam("maxClusters","50");
config.setParam("min_points","10"); config.setParam("min_points","3");
return config; return config;
} }