git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@57563 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
8941ec3652
commit
01b4ace8e0
|
@ -89,7 +89,7 @@ public class DBScan implements Clusterer{
|
||||||
AlgorithmConfiguration config = new AlgorithmConfiguration();
|
AlgorithmConfiguration config = new AlgorithmConfiguration();
|
||||||
|
|
||||||
config.setParam("epsilon", "10");
|
config.setParam("epsilon", "10");
|
||||||
config.setParam("minPoints", "1");
|
config.setParam("min_points", "1");
|
||||||
config.setConfigPath("./cfg/");
|
config.setConfigPath("./cfg/");
|
||||||
config.initRapidMiner();
|
config.initRapidMiner();
|
||||||
dbscanner.setConfiguration(config);
|
dbscanner.setConfiguration(config);
|
||||||
|
@ -122,7 +122,7 @@ public class DBScan implements Clusterer{
|
||||||
AlgorithmConfiguration config = new AlgorithmConfiguration();
|
AlgorithmConfiguration config = new AlgorithmConfiguration();
|
||||||
|
|
||||||
config.setParam("epsilon", "10");
|
config.setParam("epsilon", "10");
|
||||||
config.setParam("minPoints", "1");
|
config.setParam("min_points", "1");
|
||||||
config.setConfigPath("./cfg/");
|
config.setConfigPath("./cfg/");
|
||||||
config.initRapidMiner();
|
config.initRapidMiner();
|
||||||
long t0 = System.currentTimeMillis();
|
long t0 = System.currentTimeMillis();
|
||||||
|
@ -142,7 +142,7 @@ public class DBScan implements Clusterer{
|
||||||
config.setParam("FeaturesColumnNames","centerlat,centerlong");
|
config.setParam("FeaturesColumnNames","centerlat,centerlong");
|
||||||
config.setParam("OccurrencePointsClusterTable","occCluster_1");
|
config.setParam("OccurrencePointsClusterTable","occCluster_1");
|
||||||
config.setParam("epsilon","10");
|
config.setParam("epsilon","10");
|
||||||
config.setParam("minPoints","1");
|
config.setParam("min_points","1");
|
||||||
|
|
||||||
config.setParam("DatabaseUserName","gcube");
|
config.setParam("DatabaseUserName","gcube");
|
||||||
config.setParam("DatabasePassword","d4science2");
|
config.setParam("DatabasePassword","d4science2");
|
||||||
|
@ -209,7 +209,7 @@ public class DBScan implements Clusterer{
|
||||||
public void setConfiguration(AlgorithmConfiguration config) {
|
public void setConfiguration(AlgorithmConfiguration config) {
|
||||||
if (config!=null){
|
if (config!=null){
|
||||||
epsilon=config.getParam("epsilon");
|
epsilon=config.getParam("epsilon");
|
||||||
minPoints = config.getParam("minPoints");
|
minPoints = config.getParam("min_points");
|
||||||
OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase();
|
OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase();
|
||||||
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase();
|
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase();
|
||||||
FeaturesColumnNames=config.getParam("FeaturesColumnNames");
|
FeaturesColumnNames=config.getParam("FeaturesColumnNames");
|
||||||
|
@ -244,9 +244,9 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
ir++;
|
ir++;
|
||||||
}
|
}
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Building Sample Set For Miner");
|
AnalysisLogger.getLogger().debug("Building Sample Set For Miner");
|
||||||
produceSamples(samplesVector);
|
produceSamples(samplesVector);
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Obtained "+samplesVector.length+" chunks");
|
AnalysisLogger.getLogger().debug("Obtained "+samplesVector.length+" chunks");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void produceSamples(double[][] sampleVectors) throws Exception{
|
public void produceSamples(double[][] sampleVectors) throws Exception{
|
||||||
|
@ -276,7 +276,9 @@ public class DBScan implements Clusterer{
|
||||||
IOContainer innerInput = new IOContainer(points);
|
IOContainer innerInput = new IOContainer(points);
|
||||||
|
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Clustering...");
|
AnalysisLogger.getLogger().debug("DBScan: Clustering...");
|
||||||
|
long ti= System.currentTimeMillis();
|
||||||
IOContainer output = clusterer.apply(innerInput);
|
IOContainer output = clusterer.apply(innerInput);
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: ...ELAPSED CLUSTERING TIME: "+(System.currentTimeMillis()-ti));
|
||||||
AnalysisLogger.getLogger().debug("DBScan: ...Clustering Finished");
|
AnalysisLogger.getLogger().debug("DBScan: ...Clustering Finished");
|
||||||
status = 70f;
|
status = 70f;
|
||||||
|
|
||||||
|
@ -300,7 +302,7 @@ public class DBScan implements Clusterer{
|
||||||
int nClusters = innermodel.getClusters().size();
|
int nClusters = innermodel.getClusters().size();
|
||||||
float statusstep = ((100f-status)/ (float)(nClusters+1));
|
float statusstep = ((100f-status)/ (float)(nClusters+1));
|
||||||
|
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Start Write On DB");
|
AnalysisLogger.getLogger().debug("Start Write On DB");
|
||||||
for (Cluster c : innermodel.getClusters()){
|
for (Cluster c : innermodel.getClusters()){
|
||||||
StringBuffer bufferRows = new StringBuffer();
|
StringBuffer bufferRows = new StringBuffer();
|
||||||
//take cluster id
|
//take cluster id
|
||||||
|
@ -308,7 +310,7 @@ public class DBScan implements Clusterer{
|
||||||
boolean outlier = false;
|
boolean outlier = false;
|
||||||
//take cluster element indexes
|
//take cluster element indexes
|
||||||
int npoints = c.getExampleIds().size();
|
int npoints = c.getExampleIds().size();
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints);
|
AnalysisLogger.getLogger().debug("Analyzing Cluster ->"+id+" with "+npoints);
|
||||||
if (npoints<minpoints)
|
if (npoints<minpoints)
|
||||||
outlier=true;
|
outlier=true;
|
||||||
|
|
||||||
|
@ -339,20 +341,20 @@ public class DBScan implements Clusterer{
|
||||||
}
|
}
|
||||||
|
|
||||||
k++;
|
k++;
|
||||||
AnalysisLogger.getLogger().trace("DBScan: Classification : "+towrite+"->"+id+" is outlier?"+outlier);
|
// AnalysisLogger.getLogger().trace("DBScan: Classification : "+towrite+"->"+id+" is outlier?"+outlier);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bufferRows.length()>0){
|
if (bufferRows.length()>0){
|
||||||
// AnalysisLogger.getLogger().trace("DBScan: Inserting Buffer "+DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows));
|
// AnalysisLogger.getLogger().trace("DBScan: Inserting Buffer "+DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows));
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Writing into DB");
|
AnalysisLogger.getLogger().debug("Writing into DB");
|
||||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows),dbHibConnection);
|
DatabaseFactory.executeSQLUpdate(DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows),dbHibConnection);
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Finished with writing into DB");
|
AnalysisLogger.getLogger().debug("Finished with writing into DB");
|
||||||
}else
|
}else
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Nothing to write in the buffer");
|
AnalysisLogger.getLogger().debug("Nothing to write in the buffer");
|
||||||
|
|
||||||
float instatus = status + statusstep;
|
float instatus = status + statusstep;
|
||||||
status = Math.min(95f, instatus);
|
status = Math.min(95f, instatus);
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Status: "+status);
|
AnalysisLogger.getLogger().debug("Status: "+status);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -361,10 +363,10 @@ public class DBScan implements Clusterer{
|
||||||
@Override
|
@Override
|
||||||
public void shutdown() {
|
public void shutdown() {
|
||||||
try{
|
try{
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Closing DB Connection ");
|
AnalysisLogger.getLogger().debug("Closing DB Connection ");
|
||||||
dbHibConnection.close();
|
dbHibConnection.close();
|
||||||
}catch(Exception e){
|
}catch(Exception e){
|
||||||
AnalysisLogger.getLogger().debug("DBScan: Could not shut down connection");
|
AnalysisLogger.getLogger().debug("Could not shut down connection");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -393,7 +395,7 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_");
|
ServiceType p3 = new ServiceType(ServiceParameters.RANDOMSTRING, "OccurrencePointsClusterTable","Table name of the distribution","occCluster_");
|
||||||
PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "epsilon","DBScan epsilon parameter","10");
|
PrimitiveType p4 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "epsilon","DBScan epsilon parameter","10");
|
||||||
PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "minPoints","DBScan minimum points parameter (identifies outliers)","1");
|
PrimitiveType p5 = new PrimitiveType(Integer.class.getName(), null, PrimitiveTypes.NUMBER, "min_points","DBScan minimum points parameter (identifies outliers)","1");
|
||||||
|
|
||||||
|
|
||||||
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name");
|
DatabaseType p6 = new DatabaseType(DatabaseParameters.DATABASEUSERNAME, "DatabaseUserName", "db user name");
|
||||||
|
|
|
@ -93,7 +93,9 @@ public class KMeans extends DBScan{
|
||||||
IOContainer innerInput = new IOContainer(points);
|
IOContainer innerInput = new IOContainer(points);
|
||||||
|
|
||||||
AnalysisLogger.getLogger().debug("KMeans: Clustering...");
|
AnalysisLogger.getLogger().debug("KMeans: Clustering...");
|
||||||
|
long ti= System.currentTimeMillis();
|
||||||
IOContainer output = kmeans.apply(innerInput);
|
IOContainer output = kmeans.apply(innerInput);
|
||||||
|
AnalysisLogger.getLogger().debug("KMEANS: ...ELAPSED CLUSTERING TIME: "+(System.currentTimeMillis()-ti));
|
||||||
AnalysisLogger.getLogger().debug("KMeans: ...Clustering Finished");
|
AnalysisLogger.getLogger().debug("KMeans: ...Clustering Finished");
|
||||||
status = 70f;
|
status = 70f;
|
||||||
|
|
||||||
|
|
|
@ -20,17 +20,6 @@ import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
|
||||||
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
|
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
|
||||||
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
|
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
|
||||||
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
|
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
|
||||||
import org.gcube.dataanalysis.ecoengine.utils.Transformations;
|
|
||||||
|
|
||||||
import com.rapidminer.example.Attribute;
|
|
||||||
import com.rapidminer.example.Attributes;
|
|
||||||
import com.rapidminer.example.Example;
|
|
||||||
import com.rapidminer.example.ExampleSet;
|
|
||||||
import com.rapidminer.operator.IOContainer;
|
|
||||||
import com.rapidminer.operator.IOObject;
|
|
||||||
import com.rapidminer.operator.clustering.Cluster;
|
|
||||||
import com.rapidminer.operator.clustering.ClusterModel;
|
|
||||||
import com.rapidminer.tools.OperatorService;
|
|
||||||
|
|
||||||
import weka.clusterers.ClusterEvaluation;
|
import weka.clusterers.ClusterEvaluation;
|
||||||
import weka.core.DenseInstance;
|
import weka.core.DenseInstance;
|
||||||
|
@ -213,11 +202,13 @@ public class XMeansWrapper extends DBScan {
|
||||||
InputStream tis = new ByteArrayInputStream(sb.toString().getBytes("UTF-8"));
|
InputStream tis = new ByteArrayInputStream(sb.toString().getBytes("UTF-8"));
|
||||||
loader.setSource(tis);
|
loader.setSource(tis);
|
||||||
Instances id = loader.getDataSet();
|
Instances id = loader.getDataSet();
|
||||||
|
long ti= System.currentTimeMillis();
|
||||||
XMeans xmeans = new XMeans();
|
XMeans xmeans = new XMeans();
|
||||||
xmeans.setMaxIterations(Integer.parseInt(maxIterations));
|
xmeans.setMaxIterations(Integer.parseInt(maxIterations));
|
||||||
xmeans.setMinNumClusters(Integer.parseInt(minClusters));
|
xmeans.setMinNumClusters(Integer.parseInt(minClusters));
|
||||||
xmeans.setMaxNumClusters(Integer.parseInt(maxClusters));
|
xmeans.setMaxNumClusters(Integer.parseInt(maxClusters));
|
||||||
xmeans.buildClusterer(id);
|
xmeans.buildClusterer(id);
|
||||||
|
AnalysisLogger.getLogger().debug("XMEANS: ...ELAPSED CLUSTERING TIME: "+(System.currentTimeMillis()-ti));
|
||||||
status = 50f;
|
status = 50f;
|
||||||
|
|
||||||
// do clustering
|
// do clustering
|
||||||
|
|
|
@ -45,7 +45,7 @@ public static void main(String[] args) throws Exception {
|
||||||
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
|
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
|
||||||
config.setParam("OccurrencePointsClusterTable","occcluster_dbscan");
|
config.setParam("OccurrencePointsClusterTable","occcluster_dbscan");
|
||||||
config.setParam("epsilon","10");
|
config.setParam("epsilon","10");
|
||||||
config.setParam("minPoints","1");
|
config.setParam("min_points","3");
|
||||||
|
|
||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
@ -59,10 +59,10 @@ public static void main(String[] args) throws Exception {
|
||||||
config.setParam("OccurrencePointsTable","presence_basking_cluster");
|
config.setParam("OccurrencePointsTable","presence_basking_cluster");
|
||||||
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
|
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
|
||||||
config.setParam("OccurrencePointsClusterTable","occcluster_kmeans");
|
config.setParam("OccurrencePointsClusterTable","occcluster_kmeans");
|
||||||
config.setParam("k","50");
|
config.setParam("k","30");
|
||||||
config.setParam("max_runs","10");
|
config.setParam("max_runs","1000");
|
||||||
config.setParam("max_optimization_steps","10");
|
config.setParam("max_optimization_steps","1000");
|
||||||
config.setParam("min_points","2");
|
config.setParam("min_points","3");
|
||||||
|
|
||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
@ -77,9 +77,9 @@ public static void main(String[] args) throws Exception {
|
||||||
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
|
config.setParam("FeaturesColumnNames","centerlat"+AlgorithmConfiguration.getListSeparator()+"centerlong");
|
||||||
config.setParam("OccurrencePointsClusterTable","occcluster_xmeans");
|
config.setParam("OccurrencePointsClusterTable","occcluster_xmeans");
|
||||||
config.setParam("maxIterations","1000");
|
config.setParam("maxIterations","1000");
|
||||||
config.setParam("minClusters","10");
|
config.setParam("minClusters","30");
|
||||||
config.setParam("maxClusters","50");
|
config.setParam("maxClusters","50");
|
||||||
config.setParam("min_points","10");
|
config.setParam("min_points","3");
|
||||||
|
|
||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue