diff --git a/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java b/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java index e25d041..b8669d4 100644 --- a/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java +++ b/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java @@ -171,13 +171,16 @@ public class Sampler { if (DBType.equals(POSTGRES)) { -// attribute = "CAST(" + listColumns.get(i) + " as text), "; - attribute = "CAST(" + listColumns.get(i) + " as character varying(255)), "; + // attribute = "CAST(" + listColumns.get(i) + " as text), "; + attribute = "CAST(" + listColumns.get(i) + + " as character varying(255)), "; if (i == (listColumns.size() - 1)) { -// attribute = "CAST(" + listColumns.get(i) + " as text)"; - attribute = "CAST(" + listColumns.get(i) + " as character varying(255))"; + // attribute = "CAST(" + listColumns.get(i) + + // " as text)"; + attribute = "CAST(" + listColumns.get(i) + + " as character varying(255))"; } @@ -197,7 +200,7 @@ public class Sampler { // " as CHAR CHARACTER SET utf8), "; // attribute = "CONVERT(" + listColumns.get(i) + // ", CHAR), "; - attribute = "CAST(" + "`" +listColumns.get(i)+ "`" + attribute = "CAST(" + "`" + listColumns.get(i) + "`" + " as CHAR(255) CHARACTER SET utf8), "; if (i == (listColumns.size() - 1)) { @@ -208,14 +211,17 @@ public class Sampler { // attribute = "CONVERT(" + listColumns.get(i) + // ", BINARY)"; - attribute = "CAST(" + "`" +listColumns.get(i)+ "`" - + " as CHAR(255) CHARACTER SET utf8)"; + attribute = "CAST(" + "`" + listColumns.get(i) + + "`" + " as CHAR(255) CHARACTER SET utf8)"; } } else { - attribute = "CAST(CAST(" + "`" +listColumns.get(i)+ "`" + attribute = "CAST(CAST(" + + "`" + + listColumns.get(i) + + "`" + " as BINARY) as CHAR(255) CHARACTER SET utf8), "; if (i == (listColumns.size() - 1)) { @@ -226,7 +232,10 @@ public class Sampler { // attribute = "CONVERT(" + listColumns.get(i) + // ", BINARY)"; - attribute = "CAST(CAST(" + "`" +listColumns.get(i)+ "`" + attribute = "CAST(CAST(" + + "`" + + listColumns.get(i) + + "`" + " as BINARY) as CHAR(255) CHARACTER SET utf8)"; } @@ -560,7 +569,7 @@ public class Sampler { // extract 200 rows randomly for each iteration extractionRows: for (int i = 0; i < NIterations; i++) { - System.out.println("index iteration: " + i); + // System.out.println("index iteration: " + i); AnalysisLogger.getLogger().debug( "Sampler->executing the query: " + query); @@ -706,13 +715,13 @@ public class Sampler { if ((listRows.size() > 100) && (removal == false)) { - //print check -// for (int k = 0; k < listRows.size(); k++) { -// -// AnalysisLogger.getLogger().debug( -// "Sampler->row with index: " + k + " score " -// + listRows.get(k).getScore()); -// } + // print check + // for (int k = 0; k < listRows.size(); k++) { + // + // AnalysisLogger.getLogger().debug( + // "Sampler->row with index: " + k + " score " + // + listRows.get(k).getScore()); + // } AnalysisLogger.getLogger().debug( "Sampler->starting the removal operation"); @@ -727,8 +736,8 @@ public class Sampler { RowScore row = listRows.remove(100); -// AnalysisLogger.getLogger().debug( -// "Sampler->removing row with score: " + row.getScore()); + // AnalysisLogger.getLogger().debug( + // "Sampler->removing row with score: " + row.getScore()); numElemToDelete = numElemToDelete - 1; @@ -744,10 +753,11 @@ public class Sampler { for (int i = 0; i < listRows.size(); i++) { -// //check rows added in the final result -// AnalysisLogger.getLogger().debug( -// "Sampler->adding row with index: " + i + " " + listRows.get(i).getRow()); - + // //check rows added in the final result + // AnalysisLogger.getLogger().debug( + // "Sampler->adding row with index: " + i + " " + + // listRows.get(i).getRow()); + rows.add(listRows.get(i).getRow()); } @@ -915,7 +925,8 @@ public class Sampler { if (DBType.equals(POSTGRES)) { // the full name equal to "schemaname.tablename" - tablename = schemaName + "." + "\"" + tablename + "\""; + String tableName = ""; + tableName = schemaName + "." + "\"" + tablename + "\""; query = String.format( queryForSmartSampleWithThresholdOnTablePostgres, @@ -924,9 +935,9 @@ public class Sampler { // build the query for database mysql if (DBType.equals(MYSQL)) { - // the full name equal to "dbname.tablename" - tablename = schemaName + "." + tablename; + String tableName = ""; + tableName = schemaName + "." + tablename; query = String.format( queryForSmartSampleWithThresholdOnTableMysql, @@ -999,18 +1010,20 @@ public class Sampler { // if (value == columnArray.length) { - AnalysisLogger.getLogger().debug( - "Sampler-> column array dimension: " - + columnArray.length); + // print check + // AnalysisLogger.getLogger().debug( + // "Sampler-> column array dimension: " + // + columnArray.length); double thresholdRank = ((columnArray.length) * 80); thresholdRank = thresholdRank / 100; double valCeil = Math.round(thresholdRank); - AnalysisLogger.getLogger().debug( - "Sampler-> threshold: " + thresholdRank - + " rounded value: " + valCeil); + // print check + // AnalysisLogger.getLogger().debug( + // "Sampler-> threshold: " + thresholdRank + // + " rounded value: " + valCeil); if (value >= (int) valCeil) { @@ -1096,8 +1109,8 @@ public class Sampler { RowScore row = listRows.remove(100); -// AnalysisLogger.getLogger().debug( -// "Sampler->removing row with score: " + row.getScore()); + // AnalysisLogger.getLogger().debug( + // "Sampler->removing row with score: " + row.getScore()); numElemToDelete = numElemToDelete - 1; @@ -1113,7 +1126,7 @@ public class Sampler { for (int i = 0; i < listRows.size(); i++) { - //check rows added in the final result + // check rows added in the final result // AnalysisLogger.getLogger().debug( // "Sampler->adding row with index: " + i + " " + // listRows.get(i).getRow()); @@ -1266,10 +1279,10 @@ public class Sampler { queryForRandomSampleOnTablePostgres, listAttributes, tableName); } - + if (DBType.equals(MYSQL)) { - //the full name equal to "dbname.tablename" - tableName=schemaName+"."+tableName; + // the full name equal to "dbname.tablename" + tableName = schemaName + "." + tableName; querySampleOnTable = String.format( queryForRandomSampleOnTableMysql, listAttributes, tableName); @@ -1277,10 +1290,10 @@ public class Sampler { } -// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres - + // if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres + if (NumRows > 700000) { - + // generate an index randomly to execute the query // Define threshold @@ -1315,28 +1328,26 @@ public class Sampler { AnalysisLogger.getLogger().debug("Sampler->X index: " + X); } - - if (DBType.equals(POSTGRES)){ + + if (DBType.equals(POSTGRES)) { // the full name equal to "schemaname.tablename" tableName = schemaName + "." + "\"" + tableName + "\""; - + querySampleOnTable = String.format( queryForRandomSampleWithThresholdOnTablePostgres, listAttributes, tableName, X); - + } - - if (DBType.equals(MYSQL)) { // MySQL // the full name equal to "dbname.tablename" tableName = schemaName + "." + tableName; -// querySampleOnTable = String -// .format(queryForRandomSampleOnTableMysql, listAttributes, -// tableName); - + // querySampleOnTable = String + // .format(queryForRandomSampleOnTableMysql, listAttributes, + // tableName); + querySampleOnTable = String.format( queryForRandomSampleWithThresholdOnTableMysql, listAttributes, tableName, X); @@ -1345,16 +1356,16 @@ public class Sampler { } -// if (DBType.equals(MYSQL)) { // MySQL -// -// // the full name equal to "dbname.tablename" -// tableName = schemaName + "." + tableName; -// -// querySampleOnTable = String -// .format(queryForRandomSampleOnTableMysql, listAttributes, -// tableName); -// -// } + // if (DBType.equals(MYSQL)) { // MySQL + // + // // the full name equal to "dbname.tablename" + // tableName = schemaName + "." + tableName; + // + // querySampleOnTable = String + // .format(queryForRandomSampleOnTableMysql, listAttributes, + // tableName); + // + // } AnalysisLogger.getLogger() .debug("Sampler->preparing to submit the query: "