git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngine@56882 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
6ad72aab53
commit
d34b63d317
|
@ -52,7 +52,7 @@ public class DBScan implements Clusterer{
|
||||||
public static String outliersColumn = "outlier";
|
public static String outliersColumn = "outlier";
|
||||||
public static String outliersColumnType = "boolean";
|
public static String outliersColumnType = "boolean";
|
||||||
|
|
||||||
public static void mainOLD(String[] args) throws Exception{
|
public static void mainCluster(String[] args) throws Exception{
|
||||||
|
|
||||||
|
|
||||||
String coordinates [] = {
|
String coordinates [] = {
|
||||||
|
@ -95,7 +95,7 @@ public class DBScan implements Clusterer{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception{
|
public static void mainRandom(String[] args) throws Exception{
|
||||||
int max = 100000;
|
int max = 100000;
|
||||||
|
|
||||||
|
|
||||||
|
@ -129,6 +129,32 @@ public class DBScan implements Clusterer{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception{
|
||||||
|
long t0 = System.currentTimeMillis();
|
||||||
|
|
||||||
|
AlgorithmConfiguration config = new AlgorithmConfiguration();
|
||||||
|
config.setConfigPath("./cfg/");
|
||||||
|
config.setPersistencePath("./");
|
||||||
|
config.setParam("OccurrencePointsTable","presence_basking_cluster");
|
||||||
|
config.setParam("FeaturesColumnNames","centerlat,centerlong");
|
||||||
|
config.setParam("OccurrencePointsClusterTable","occCluster_1");
|
||||||
|
config.setParam("epsilon","10");
|
||||||
|
config.setParam("minPoints","1");
|
||||||
|
|
||||||
|
config.setParam("DatabaseUserName","gcube");
|
||||||
|
config.setParam("DatabasePassword","d4science2");
|
||||||
|
config.setParam("DatabaseURL","jdbc:postgresql://146.48.87.169/testdb");
|
||||||
|
config.setParam("DatabaseDriver","org.postgresql.Driver");
|
||||||
|
|
||||||
|
DBScan dbscanner = new DBScan();
|
||||||
|
dbscanner.setConfiguration(config);
|
||||||
|
dbscanner.init();
|
||||||
|
dbscanner.cluster();
|
||||||
|
|
||||||
|
System.out.println("ELAPSED "+(System.currentTimeMillis()-t0));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public INFRASTRUCTURE getInfrastructure() {
|
public INFRASTRUCTURE getInfrastructure() {
|
||||||
return INFRASTRUCTURE.LOCAL;
|
return INFRASTRUCTURE.LOCAL;
|
||||||
|
@ -136,17 +162,41 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init() throws Exception {
|
public void init() throws Exception {
|
||||||
|
|
||||||
if (config!=null)
|
if (config!=null)
|
||||||
config.initRapidMiner();
|
config.initRapidMiner();
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Initialized Rapid Miner ");
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Initializing Database Connection");
|
||||||
dbHibConnection=DatabaseUtils.initDBSession(config);
|
dbHibConnection=DatabaseUtils.initDBSession(config);
|
||||||
//create the final table
|
//create the final table
|
||||||
String createStatement = new DatabaseUtils(dbHibConnection).buildCreateStatement(OccurrencePointsTable,OccurrencePointsClusterTable);
|
try{
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: dropping table "+OccurrencePointsClusterTable);
|
||||||
|
String dropStatement = DatabaseUtils.dropTableStatement(OccurrencePointsClusterTable);
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: dropping table "+dropStatement);
|
||||||
|
DatabaseFactory.executeSQLUpdate(dropStatement, dbHibConnection);
|
||||||
|
}catch(Exception e){
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Could not drop table "+OccurrencePointsClusterTable);
|
||||||
|
}
|
||||||
//create Table
|
//create Table
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Creating table "+OccurrencePointsClusterTable);
|
||||||
|
String [] features = FeaturesColumnNames.split(",");
|
||||||
|
String columns = "";
|
||||||
|
|
||||||
|
for (int i=0;i<features.length;i++){
|
||||||
|
columns +=features[i]+" real";
|
||||||
|
if (i<features.length-1)
|
||||||
|
columns+=",";
|
||||||
|
}
|
||||||
|
|
||||||
|
String createStatement = "create table "+OccurrencePointsClusterTable+" ( "+columns+")";
|
||||||
|
// String createStatement = new DatabaseUtils(dbHibConnection).buildCreateStatement(OccurrencePointsTable,OccurrencePointsClusterTable);
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: "+createStatement);
|
||||||
DatabaseFactory.executeSQLUpdate(createStatement, dbHibConnection);
|
DatabaseFactory.executeSQLUpdate(createStatement, dbHibConnection);
|
||||||
//add two columns one for cluster and another for outliers
|
//add two columns one for cluster and another for outliers
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Adding Columns");
|
||||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, clusterColumn, clusterColumnType), dbHibConnection);
|
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, clusterColumn, clusterColumnType), dbHibConnection);
|
||||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, outliersColumn, outliersColumnType), dbHibConnection);
|
DatabaseFactory.executeSQLUpdate(DatabaseUtils.addColumnStatement(OccurrencePointsClusterTable, outliersColumn, outliersColumnType), dbHibConnection);
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Getting Samples");
|
||||||
//build samples
|
//build samples
|
||||||
getSamples();
|
getSamples();
|
||||||
status = 10f;
|
status = 10f;
|
||||||
|
@ -154,13 +204,14 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setConfiguration(AlgorithmConfiguration config) {
|
public void setConfiguration(AlgorithmConfiguration config) {
|
||||||
|
if (config!=null){
|
||||||
epsilon=config.getParam("epsilon");
|
epsilon=config.getParam("epsilon");
|
||||||
minPoints = config.getParam("minPoints");
|
minPoints = config.getParam("minPoints");
|
||||||
OccurrencePointsTable = config.getParam("OccurrencePointsTable");
|
OccurrencePointsTable = config.getParam("OccurrencePointsTable").toLowerCase();
|
||||||
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable");
|
OccurrencePointsClusterTable=config.getParam("OccurrencePointsClusterTable").toLowerCase();
|
||||||
FeaturesColumnNames=config.getParam("FeaturesColumnNames");
|
FeaturesColumnNames=config.getParam("FeaturesColumnNames");
|
||||||
this.config=config;
|
this.config=config;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -185,9 +236,9 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
ir++;
|
ir++;
|
||||||
}
|
}
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Building Sample Set For Miner");
|
||||||
produceSamples(samplesVector);
|
produceSamples(samplesVector);
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Obtained "+samplesVector.length+" chunks");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void produceSamples(double[][] sampleVectors) throws Exception{
|
public void produceSamples(double[][] sampleVectors) throws Exception{
|
||||||
|
@ -201,8 +252,11 @@ public class DBScan implements Clusterer{
|
||||||
@Override
|
@Override
|
||||||
public void cluster() throws Exception {
|
public void cluster() throws Exception {
|
||||||
|
|
||||||
StringBuffer bufferRows = new StringBuffer();
|
if ((config==null)||epsilon==null||minPoints==null||points==null){
|
||||||
|
throw new Exception("DBScan: Error incomplete parameters");
|
||||||
|
}
|
||||||
|
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Settin up the cluster");
|
||||||
//take elements and produce example set
|
//take elements and produce example set
|
||||||
com.rapidminer.operator.clustering.clusterer.DBScan clusterer = (com.rapidminer.operator.clustering.clusterer.DBScan) OperatorService.createOperator("DBScanClustering");
|
com.rapidminer.operator.clustering.clusterer.DBScan clusterer = (com.rapidminer.operator.clustering.clusterer.DBScan) OperatorService.createOperator("DBScanClustering");
|
||||||
clusterer.setParameter("local_random_seed", "-1");
|
clusterer.setParameter("local_random_seed", "-1");
|
||||||
|
@ -213,8 +267,9 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
IOContainer innerInput = new IOContainer(points);
|
IOContainer innerInput = new IOContainer(points);
|
||||||
|
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Clustering...");
|
||||||
IOContainer output = clusterer.apply(innerInput);
|
IOContainer output = clusterer.apply(innerInput);
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: ...Clustering Finished");
|
||||||
status = 70f;
|
status = 70f;
|
||||||
|
|
||||||
IOObject[] outputvector = output.getIOObjects();
|
IOObject[] outputvector = output.getIOObjects();
|
||||||
|
@ -224,26 +279,31 @@ public class DBScan implements Clusterer{
|
||||||
String columnsNames =FeaturesColumnNames+","+clusterColumn+","+outliersColumn;
|
String columnsNames =FeaturesColumnNames+","+clusterColumn+","+outliersColumn;
|
||||||
int minpoints = Integer.parseInt(minPoints);
|
int minpoints = Integer.parseInt(minPoints);
|
||||||
int nClusters = innermodel.getClusters().size();
|
int nClusters = innermodel.getClusters().size();
|
||||||
float statusstep = ((100f-status)/ (float)nClusters);
|
float statusstep = ((100f-status)/ (float)(nClusters+1));
|
||||||
|
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Start Write On DB");
|
||||||
for (Cluster c : innermodel.getClusters()){
|
for (Cluster c : innermodel.getClusters()){
|
||||||
|
StringBuffer bufferRows = new StringBuffer();
|
||||||
//take cluster id
|
//take cluster id
|
||||||
int id = c.getClusterId();
|
int id = c.getClusterId();
|
||||||
boolean outlier = false;
|
boolean outlier = false;
|
||||||
// System.out.print("ID: '"+id+"'\n");
|
|
||||||
//take cluster element indexes
|
//take cluster element indexes
|
||||||
if (c.getExampleIds().size()==minpoints);
|
int npoints = c.getExampleIds().size();
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Analyzing Cluster ->"+id+" with "+npoints+" "+minpoints);
|
||||||
|
if (npoints==minpoints)
|
||||||
outlier=true;
|
outlier=true;
|
||||||
|
|
||||||
|
int k=0;
|
||||||
|
|
||||||
for (Object o:c.getExampleIds()){
|
for (Object o:c.getExampleIds()){
|
||||||
//transform into a numerical index
|
//transform into a numerical index
|
||||||
int idd = (int) Double.parseDouble(""+o);
|
int idd = (int) Double.parseDouble(""+o);
|
||||||
|
|
||||||
//take the corresponding sample
|
//take the corresponding sample
|
||||||
Example e = es.getExample(idd-1);
|
Example e = es.getExample(idd-1);
|
||||||
//take the attributes of the sample
|
//take the attributes of the sample
|
||||||
Attributes attributes = e.getAttributes();
|
Attributes attributes = e.getAttributes();
|
||||||
|
|
||||||
// System.out.print(""+(idd-1)+":cluster: ");
|
|
||||||
//for each attribute (yet filtered on numeric ones) add to the writing row
|
//for each attribute (yet filtered on numeric ones) add to the writing row
|
||||||
bufferRows.append("(");
|
bufferRows.append("(");
|
||||||
StringBuffer valueStrings = new StringBuffer();
|
StringBuffer valueStrings = new StringBuffer();
|
||||||
|
@ -254,17 +314,29 @@ public class DBScan implements Clusterer{
|
||||||
towrite = towrite.substring(0,towrite.length()-1);
|
towrite = towrite.substring(0,towrite.length()-1);
|
||||||
|
|
||||||
//append the clusterid and outlier
|
//append the clusterid and outlier
|
||||||
bufferRows.append(towrite+","+id+","+outlier);
|
bufferRows.append(towrite+","+id+","+outlier+")");
|
||||||
// System.out.print("\n");
|
if (k<npoints-1){
|
||||||
|
bufferRows.append(",");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
k++;
|
||||||
|
AnalysisLogger.getLogger().trace("DBScan: Classification : "+towrite+"->"+id+" is outlier?"+outlier);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bufferRows.length()>0){
|
||||||
|
// AnalysisLogger.getLogger().trace("DBScan: Inserting Buffer "+DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows));
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Writing into DB");
|
||||||
DatabaseFactory.executeSQLUpdate(DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows),dbHibConnection);
|
DatabaseFactory.executeSQLUpdate(DatabaseUtils.insertFromBuffer(OccurrencePointsClusterTable, columnsNames, bufferRows),dbHibConnection);
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Finished with writing into DB");
|
||||||
|
}else
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Nothing to write in the buffer");
|
||||||
|
|
||||||
float instatus = status + statusstep;
|
float instatus = status + statusstep;
|
||||||
status = Math.min(95f, instatus);
|
status = Math.min(95f, instatus);
|
||||||
// System.out.print("\n");
|
AnalysisLogger.getLogger().debug("DBScan: Status: "+status);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
shutdown();
|
||||||
status = 100f;
|
status = 100f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -273,6 +345,12 @@ public class DBScan implements Clusterer{
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void shutdown() {
|
public void shutdown() {
|
||||||
|
try{
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Closing DB Connection ");
|
||||||
|
dbHibConnection.close();
|
||||||
|
}catch(Exception e){
|
||||||
|
AnalysisLogger.getLogger().debug("DBScan: Could not shut down connection");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -31,8 +31,9 @@ public class DatabaseUtils {
|
||||||
private String primaryKColName;
|
private String primaryKColName;
|
||||||
|
|
||||||
private void getPrimaryKeys(List<Object> keys, String table) {
|
private void getPrimaryKeys(List<Object> keys, String table) {
|
||||||
|
int keynum =0;
|
||||||
int keynum = keys.size();
|
if (keys!=null)
|
||||||
|
keynum= keys.size();
|
||||||
StringBuffer pkeybuffer = new StringBuffer();
|
StringBuffer pkeybuffer = new StringBuffer();
|
||||||
primaryKColName = "";
|
primaryKColName = "";
|
||||||
for (int i = 0; i < keynum; i++) {
|
for (int i = 0; i < keynum; i++) {
|
||||||
|
|
Loading…
Reference in New Issue