This commit is contained in:
Gianpaolo Coro 2012-07-27 16:16:33 +00:00
parent 6ad72aab53
commit d34b63d317
2 changed files with 102 additions and 23 deletions

View File

@ -52,7 +52,7 @@ public class DBScan implements Clusterer{
public static String outliersColumn = "outlier";
public static String outliersColumnType = "boolean";
public static void mainOLD(String[] args) throws Exception{
public static void mainCluster(String[] args) throws Exception{
String coordinates [] = {
@ -95,7 +95,7 @@ public class DBScan implements Clusterer{
}
public static void main(String[] args) throws Exception{
public static void mainRandom(String[] args) throws Exception{
int max = 100000;
@ -129,6 +129,32 @@ public class DBScan implements Clusterer{
}
public static void main(String[] args) throws Exception{
long t0 = System.currentTimeMillis();
AlgorithmConfiguration config = new AlgorithmConfiguration();
config.setConfigPath("./cfg/");
config.setPersistencePath("./");
config.setParam("OccurrencePointsTable","presence_basking_cluster");
config.setParam("FeaturesColumnNames","centerlat,centerlong");
config.setParam("OccurrencePointsClusterTable","occCluster_1");
config.setParam("epsilon","10");
config.setParam("minPoints","1");
config.setParam("DatabaseUserName","gcube");
config.setParam("DatabasePassword","d4science2");
config.setParam("DatabaseURL","jdbc:postgresql://146.48.87.169/testdb");
config.setParam("DatabaseDriver","org.postgresql.Driver");
DBScan dbscanner = new DBScan();
dbscanner.setConfiguration(config);
dbscanner.init();
dbscanner.cluster();
System.out.println("ELAPSED "+(System.currentTimeMillis()-t0));
}
@Override
public INFRASTRUCTURE getInfrastructure() {
return INFRASTRUCTURE.LOCAL;
@ -136,17 +162,41 @@ public class DBScan implements Clusterer{
@Override
public void init() throws Exception {
if (config!=null)
config.initRapidMiner();
AnalysisLogger.getLogger().debug("DBScan: Initialized Rapid Miner ");
AnalysisLogger.getLogger().debug("DBScan: Initializing Database Connection");
dbHibConnection=DatabaseUtils.initDBSession(config);
//create the final table
String createStatement = new DatabaseUtils(dbHibConnection).buildCreateStatement(OccurrencePointsTable,OccurrencePointsClusterTable);
try{
AnalysisLogger.getLogger().debug("DBScan: dropping table "+OccurrencePointsClusterTable);
String dropStatement = DatabaseUtils.dropTableStatement(OccurrencePointsClusterTable);
AnalysisLogger.getLogger().debug("DBScan: dropping table "+dropStatement);
DatabaseFactory.executeSQLUpdate(dropStatement, dbHibConnection);
}catch(Exception e){
AnalysisLogger.getLogger().debug("DBScan: Could not drop table "+OccurrencePointsClusterTable);
}
//create Table
AnalysisLogger.getLogger().debug("DBScan: Creating table "+OccurrencePointsClusterTable);
String [] features = FeaturesColumnNames.split(",");
String columns = "";
for (int i=0;i<features.length;i++){
columns +=features[i]+" real";
if (i<features.length-1)
columns+=",";
}
String createStatement = "create table "+OccurrencePointsClusterTable+" ( "+columns+")";
// String createStatement = new DatabaseUtils(dbHibConnection).buildCreateStatement(OccurrencePointsTable,OccurrencePointsClusterTable);
AnalysisLogger.getLogger().debug("DBScan: "+createStatement);
DatabaseFactory.executeSQLUpdate(createStatement, dbHibConnection);
//add two columns one for cluster and another for outliers
//add two columns one for cluster and another for outliers
AnalysisLogger.getLogger().debug("DBScan: Adding Columns");
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, clusterColumn, clusterColumnType), dbHibConnection);
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, outliersColumn, outliersColumnType), dbHibConnection);
AnalysisLogger.getLogger().debug("DBScan: Getting Samples");
//build samples
getSamples();
status = 10f;
@ -154,13 +204,14 @@ public class DBScan implements Clusterer{
@Override
public void setConfiguration(AlgorithmConfiguration config) {
if (config!=null){
epsilon=config.getParam("epsilon");
minPoints = config.getParam("minPoints");
OccurrencePointsTable = config.getParam("OccurrencePointsTable");
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable");
OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase();
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase();
FeaturesColumnNames=config.getParam("FeaturesColumnNames");
this.config=config;
}
}
@ -185,9 +236,9 @@ public class DBScan implements Clusterer{
ir++;
}
AnalysisLogger.getLogger().debug("DBScan: Building Sample Set For Miner");
produceSamples(samplesVector);
AnalysisLogger.getLogger().debug("DBScan: Obtained "+samplesVector.length+" chunks");
}
public void produceSamples(double[][] sampleVectors) throws Exception{
@ -201,8 +252,11 @@ public class DBScan implements Clusterer{
@Override
public void cluster() throws Exception {
StringBuffer bufferRows = new StringBuffer();
if ((config==null)||epsilon==null||minPoints==null||points==null){
throw new Exception("DBScan: Error incomplete parameters");
}
AnalysisLogger.getLogger().debug("DBScan: Settin up the cluster");
//take elements and produce example set
com.rapidminer.operator.clustering.clusterer.DBScan clusterer = (com.rapidminer.operator.clustering.clusterer.DBScan) OperatorService.createOperator("DBScanClustering");
clusterer.setParameter("local_random_seed", "-1");
@ -213,8 +267,9 @@ public class DBScan implements Clusterer{
IOContainer innerInput = new IOContainer(points);
AnalysisLogger.getLogger().debug("DBScan: Clustering...");
IOContainer output = clusterer.apply(innerInput);
AnalysisLogger.getLogger().debug("DBScan: ...Clustering Finished");
status = 70f;
IOObject[] outputvector = output.getIOObjects();
@ -224,26 +279,31 @@ public class DBScan implements Clusterer{
String columnsNames =FeaturesColumnNames+","+clusterColumn+","+outliersColumn;
int minpoints = Integer.parseInt(minPoints);
int nClusters = innermodel.getClusters().size();
float statusstep = ((100f-status)/ (float)nClusters);
float statusstep = ((100f-status)/ (float)(nClusters+1));
AnalysisLogger.getLogger().debug("DBScan: Start Write On DB");
for (Cluster c : innermodel.getClusters()){
StringBuffer bufferRows = new StringBuffer();
//take cluster id
int id = c.getClusterId();
boolean outlier = false;
// System.out.print("ID: '"+id+"'\n");
//take cluster element indexes
if (c.getExampleIds().size()==minpoints);
int npoints = c.getExampleIds().size();
AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints+" "+minpoints);
if (npoints==minpoints)
outlier=true;
int k=0;
for (Object o:c.getExampleIds()){
//transform into a numerical index
int idd = (int) Double.parseDouble(""+o);
//take the corresponding sample
Example e = es.getExample(idd-1);
//take the attributes of the sample
Attributes attributes = e.getAttributes();
// System.out.print(""+(idd-1)+":cluster: ");
//for each attribute (yet filtered on numeric ones) add to the writing row
bufferRows.append("(");
StringBuffer valueStrings = new StringBuffer();
@ -254,17 +314,29 @@ public class DBScan implements Clusterer{
towrite = towrite.substring(0,towrite.length()-1);
//append the clusterid and outlier
bufferRows.append(towrite+","+id+","+outlier);
// System.out.print("\n");
bufferRows.append(towrite+","+id+","+outlier+")");
if (k<npoints-1){
bufferRows.append(",");
}
k++;
AnalysisLogger.getLogger().trace("DBScan: Classification : "+towrite+"->"+id+" is outlier?"+outlier);
}
if (bufferRows.length()>0){
// AnalysisLogger.getLogger().trace("DBScan: Inserting Buffer "+DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows));
AnalysisLogger.getLogger().debug("DBScan: Writing into DB");
DatabaseFactory.executeSQLUpdate(DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows),dbHibConnection);
AnalysisLogger.getLogger().debug("DBScan: Finished with writing into DB");
}else
AnalysisLogger.getLogger().debug("DBScan: Nothing to write in the buffer");
float instatus = status + statusstep;
status = Math.min(95f, instatus);
// System.out.print("\n");
AnalysisLogger.getLogger().debug("DBScan: Status: "+status);
}
shutdown();
status = 100f;
}
@ -273,6 +345,12 @@ public class DBScan implements Clusterer{
@Override
public void shutdown() {
try{
AnalysisLogger.getLogger().debug("DBScan: Closing DB Connection ");
dbHibConnection.close();
}catch(Exception e){
AnalysisLogger.getLogger().debug("DBScan: Could not shut down connection");
}
}
@Override

View File

@ -31,8 +31,9 @@ public class DatabaseUtils {
private String primaryKColName;
private void getPrimaryKeys(List<Object> keys, String table) {
int keynum = keys.size();
int keynum =0;
if (keys!=null)
keynum= keys.size();
StringBuffer pkeybuffer = new StringBuffer();
primaryKColName = "";
for (int i = 0; i < keynum; i++) {