git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@56882 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
6ad72aab53
commit
d34b63d317
|
@ -52,7 +52,7 @@ public class DBScan implements Clusterer{
|
|||
public static String outliersColumn = "outlier";
|
||||
public static String outliersColumnType = "boolean";
|
||||
|
||||
public static void mainOLD(String[] args) throws Exception{
|
||||
public static void mainCluster(String[] args) throws Exception{
|
||||
|
||||
|
||||
String coordinates [] = {
|
||||
|
@ -95,7 +95,7 @@ public class DBScan implements Clusterer{
|
|||
}
|
||||
|
||||
|
||||
public static void main(String[] args) throws Exception{
|
||||
public static void mainRandom(String[] args) throws Exception{
|
||||
int max = 100000;
|
||||
|
||||
|
||||
|
@ -129,6 +129,32 @@ public class DBScan implements Clusterer{
|
|||
}
|
||||
|
||||
|
||||
public static void main(String[] args) throws Exception{
|
||||
long t0 = System.currentTimeMillis();
|
||||
|
||||
AlgorithmConfiguration config = new AlgorithmConfiguration();
|
||||
config.setConfigPath("./cfg/");
|
||||
config.setPersistencePath("./");
|
||||
config.setParam("OccurrencePointsTable","presence_basking_cluster");
|
||||
config.setParam("FeaturesColumnNames","centerlat,centerlong");
|
||||
config.setParam("OccurrencePointsClusterTable","occCluster_1");
|
||||
config.setParam("epsilon","10");
|
||||
config.setParam("minPoints","1");
|
||||
|
||||
config.setParam("DatabaseUserName","gcube");
|
||||
config.setParam("DatabasePassword","d4science2");
|
||||
config.setParam("DatabaseURL","jdbc:postgresql://146.48.87.169/testdb");
|
||||
config.setParam("DatabaseDriver","org.postgresql.Driver");
|
||||
|
||||
DBScan dbscanner = new DBScan();
|
||||
dbscanner.setConfiguration(config);
|
||||
dbscanner.init();
|
||||
dbscanner.cluster();
|
||||
|
||||
System.out.println("ELAPSED "+(System.currentTimeMillis()-t0));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public INFRASTRUCTURE getInfrastructure() {
|
||||
return INFRASTRUCTURE.LOCAL;
|
||||
|
@ -136,17 +162,41 @@ public class DBScan implements Clusterer{
|
|||
|
||||
@Override
|
||||
public void init() throws Exception {
|
||||
|
||||
if (config!=null)
|
||||
config.initRapidMiner();
|
||||
|
||||
AnalysisLogger.getLogger().debug("DBScan: Initialized Rapid Miner ");
|
||||
AnalysisLogger.getLogger().debug("DBScan: Initializing Database Connection");
|
||||
dbHibConnection=DatabaseUtils.initDBSession(config);
|
||||
//create the final table
|
||||
String createStatement = new DatabaseUtils(dbHibConnection).buildCreateStatement(OccurrencePointsTable,OccurrencePointsClusterTable);
|
||||
try{
|
||||
AnalysisLogger.getLogger().debug("DBScan: dropping table "+OccurrencePointsClusterTable);
|
||||
String dropStatement = DatabaseUtils.dropTableStatement(OccurrencePointsClusterTable);
|
||||
AnalysisLogger.getLogger().debug("DBScan: dropping table "+dropStatement);
|
||||
DatabaseFactory.executeSQLUpdate(dropStatement, dbHibConnection);
|
||||
}catch(Exception e){
|
||||
AnalysisLogger.getLogger().debug("DBScan: Could not drop table "+OccurrencePointsClusterTable);
|
||||
}
|
||||
//create Table
|
||||
AnalysisLogger.getLogger().debug("DBScan: Creating table "+OccurrencePointsClusterTable);
|
||||
String [] features = FeaturesColumnNames.split(",");
|
||||
String columns = "";
|
||||
|
||||
for (int i=0;i<features.length;i++){
|
||||
columns +=features[i]+" real";
|
||||
if (i<features.length-1)
|
||||
columns+=",";
|
||||
}
|
||||
|
||||
String createStatement = "create table "+OccurrencePointsClusterTable+" ( "+columns+")";
|
||||
// String createStatement = new DatabaseUtils(dbHibConnection).buildCreateStatement(OccurrencePointsTable,OccurrencePointsClusterTable);
|
||||
AnalysisLogger.getLogger().debug("DBScan: "+createStatement);
|
||||
DatabaseFactory.executeSQLUpdate(createStatement, dbHibConnection);
|
||||
//add two columns one for cluster and another for outliers
|
||||
//add two columns one for cluster and another for outliers
|
||||
AnalysisLogger.getLogger().debug("DBScan: Adding Columns");
|
||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, clusterColumn, clusterColumnType), dbHibConnection);
|
||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, outliersColumn, outliersColumnType), dbHibConnection);
|
||||
AnalysisLogger.getLogger().debug("DBScan: Getting Samples");
|
||||
//build samples
|
||||
getSamples();
|
||||
status = 10f;
|
||||
|
@ -154,13 +204,14 @@ public class DBScan implements Clusterer{
|
|||
|
||||
@Override
|
||||
public void setConfiguration(AlgorithmConfiguration config) {
|
||||
|
||||
if (config!=null){
|
||||
epsilon=config.getParam("epsilon");
|
||||
minPoints = config.getParam("minPoints");
|
||||
OccurrencePointsTable = config.getParam("OccurrencePointsTable");
|
||||
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable");
|
||||
OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase();
|
||||
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase();
|
||||
FeaturesColumnNames=config.getParam("FeaturesColumnNames");
|
||||
this.config=config;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -185,9 +236,9 @@ public class DBScan implements Clusterer{
|
|||
|
||||
ir++;
|
||||
}
|
||||
|
||||
AnalysisLogger.getLogger().debug("DBScan: Building Sample Set For Miner");
|
||||
produceSamples(samplesVector);
|
||||
|
||||
AnalysisLogger.getLogger().debug("DBScan: Obtained "+samplesVector.length+" chunks");
|
||||
}
|
||||
|
||||
public void produceSamples(double[][] sampleVectors) throws Exception{
|
||||
|
@ -201,8 +252,11 @@ public class DBScan implements Clusterer{
|
|||
@Override
|
||||
public void cluster() throws Exception {
|
||||
|
||||
StringBuffer bufferRows = new StringBuffer();
|
||||
if ((config==null)||epsilon==null||minPoints==null||points==null){
|
||||
throw new Exception("DBScan: Error incomplete parameters");
|
||||
}
|
||||
|
||||
AnalysisLogger.getLogger().debug("DBScan: Settin up the cluster");
|
||||
//take elements and produce example set
|
||||
com.rapidminer.operator.clustering.clusterer.DBScan clusterer = (com.rapidminer.operator.clustering.clusterer.DBScan) OperatorService.createOperator("DBScanClustering");
|
||||
clusterer.setParameter("local_random_seed", "-1");
|
||||
|
@ -213,8 +267,9 @@ public class DBScan implements Clusterer{
|
|||
|
||||
IOContainer innerInput = new IOContainer(points);
|
||||
|
||||
AnalysisLogger.getLogger().debug("DBScan: Clustering...");
|
||||
IOContainer output = clusterer.apply(innerInput);
|
||||
|
||||
AnalysisLogger.getLogger().debug("DBScan: ...Clustering Finished");
|
||||
status = 70f;
|
||||
|
||||
IOObject[] outputvector = output.getIOObjects();
|
||||
|
@ -224,26 +279,31 @@ public class DBScan implements Clusterer{
|
|||
String columnsNames =FeaturesColumnNames+","+clusterColumn+","+outliersColumn;
|
||||
int minpoints = Integer.parseInt(minPoints);
|
||||
int nClusters = innermodel.getClusters().size();
|
||||
float statusstep = ((100f-status)/ (float)nClusters);
|
||||
float statusstep = ((100f-status)/ (float)(nClusters+1));
|
||||
|
||||
AnalysisLogger.getLogger().debug("DBScan: Start Write On DB");
|
||||
for (Cluster c : innermodel.getClusters()){
|
||||
StringBuffer bufferRows = new StringBuffer();
|
||||
//take cluster id
|
||||
int id = c.getClusterId();
|
||||
boolean outlier = false;
|
||||
// System.out.print("ID: '"+id+"'\n");
|
||||
//take cluster element indexes
|
||||
if (c.getExampleIds().size()==minpoints);
|
||||
int npoints = c.getExampleIds().size();
|
||||
AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints+" "+minpoints);
|
||||
if (npoints==minpoints)
|
||||
outlier=true;
|
||||
|
||||
|
||||
int k=0;
|
||||
|
||||
for (Object o:c.getExampleIds()){
|
||||
//transform into a numerical index
|
||||
int idd = (int) Double.parseDouble(""+o);
|
||||
|
||||
//take the corresponding sample
|
||||
Example e = es.getExample(idd-1);
|
||||
//take the attributes of the sample
|
||||
Attributes attributes = e.getAttributes();
|
||||
|
||||
// System.out.print(""+(idd-1)+":cluster: ");
|
||||
//for each attribute (yet filtered on numeric ones) add to the writing row
|
||||
bufferRows.append("(");
|
||||
StringBuffer valueStrings = new StringBuffer();
|
||||
|
@ -254,17 +314,29 @@ public class DBScan implements Clusterer{
|
|||
towrite = towrite.substring(0,towrite.length()-1);
|
||||
|
||||
//append the clusterid and outlier
|
||||
bufferRows.append(towrite+","+id+","+outlier);
|
||||
// System.out.print("\n");
|
||||
bufferRows.append(towrite+","+id+","+outlier+")");
|
||||
if (k<npoints-1){
|
||||
bufferRows.append(",");
|
||||
}
|
||||
|
||||
k++;
|
||||
AnalysisLogger.getLogger().trace("DBScan: Classification : "+towrite+"->"+id+" is outlier?"+outlier);
|
||||
}
|
||||
|
||||
if (bufferRows.length()>0){
|
||||
// AnalysisLogger.getLogger().trace("DBScan: Inserting Buffer "+DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows));
|
||||
AnalysisLogger.getLogger().debug("DBScan: Writing into DB");
|
||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows),dbHibConnection);
|
||||
AnalysisLogger.getLogger().debug("DBScan: Finished with writing into DB");
|
||||
}else
|
||||
AnalysisLogger.getLogger().debug("DBScan: Nothing to write in the buffer");
|
||||
|
||||
float instatus = status + statusstep;
|
||||
status = Math.min(95f, instatus);
|
||||
// System.out.print("\n");
|
||||
AnalysisLogger.getLogger().debug("DBScan: Status: "+status);
|
||||
}
|
||||
|
||||
|
||||
shutdown();
|
||||
status = 100f;
|
||||
}
|
||||
|
||||
|
@ -273,6 +345,12 @@ public class DBScan implements Clusterer{
|
|||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
try{
|
||||
AnalysisLogger.getLogger().debug("DBScan: Closing DB Connection ");
|
||||
dbHibConnection.close();
|
||||
}catch(Exception e){
|
||||
AnalysisLogger.getLogger().debug("DBScan: Could not shut down connection");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -31,8 +31,9 @@ public class DatabaseUtils {
|
|||
private String primaryKColName;
|
||||
|
||||
private void getPrimaryKeys(List<Object> keys, String table) {
|
||||
|
||||
int keynum = keys.size();
|
||||
int keynum =0;
|
||||
if (keys!=null)
|
||||
keynum= keys.size();
|
||||
StringBuffer pkeybuffer = new StringBuffer();
|
||||
primaryKColName = "";
|
||||
for (int i = 0; i < keynum; i++) {
|
||||
|
|
Loading…
Reference in New Issue