bug fixed for the smart smapling (with table col2oct2010-> scientific_names). Corrections in computeSmartSampleWithThreshold method.

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-access/DatabasesResourcesManager@99565 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Loredana Liccardo 2014-09-05 15:36:52 +00:00
parent 5af78b2871
commit b72e647ea8
1 changed files with 72 additions and 61 deletions

View File

@ -171,13 +171,16 @@ public class Sampler {
if (DBType.equals(POSTGRES)) {
// attribute = "CAST(" + listColumns.get(i) + " as text), ";
attribute = "CAST(" + listColumns.get(i) + " as character varying(255)), ";
// attribute = "CAST(" + listColumns.get(i) + " as text), ";
attribute = "CAST(" + listColumns.get(i)
+ " as character varying(255)), ";
if (i == (listColumns.size() - 1)) {
// attribute = "CAST(" + listColumns.get(i) + " as text)";
attribute = "CAST(" + listColumns.get(i) + " as character varying(255))";
// attribute = "CAST(" + listColumns.get(i) +
// " as text)";
attribute = "CAST(" + listColumns.get(i)
+ " as character varying(255))";
}
@ -197,7 +200,7 @@ public class Sampler {
// " as CHAR CHARACTER SET utf8), ";
// attribute = "CONVERT(" + listColumns.get(i) +
// ", CHAR), ";
attribute = "CAST(" + "`" +listColumns.get(i)+ "`"
attribute = "CAST(" + "`" + listColumns.get(i) + "`"
+ " as CHAR(255) CHARACTER SET utf8), ";
if (i == (listColumns.size() - 1)) {
@ -208,14 +211,17 @@ public class Sampler {
// attribute = "CONVERT(" + listColumns.get(i) +
// ", BINARY)";
attribute = "CAST(" + "`" +listColumns.get(i)+ "`"
+ " as CHAR(255) CHARACTER SET utf8)";
attribute = "CAST(" + "`" + listColumns.get(i)
+ "`" + " as CHAR(255) CHARACTER SET utf8)";
}
} else {
attribute = "CAST(CAST(" + "`" +listColumns.get(i)+ "`"
attribute = "CAST(CAST("
+ "`"
+ listColumns.get(i)
+ "`"
+ " as BINARY) as CHAR(255) CHARACTER SET utf8), ";
if (i == (listColumns.size() - 1)) {
@ -226,7 +232,10 @@ public class Sampler {
// attribute = "CONVERT(" + listColumns.get(i) +
// ", BINARY)";
attribute = "CAST(CAST(" + "`" +listColumns.get(i)+ "`"
attribute = "CAST(CAST("
+ "`"
+ listColumns.get(i)
+ "`"
+ " as BINARY) as CHAR(255) CHARACTER SET utf8)";
}
@ -560,7 +569,7 @@ public class Sampler {
// extract 200 rows randomly for each iteration
extractionRows: for (int i = 0; i < NIterations; i++) {
System.out.println("index iteration: " + i);
// System.out.println("index iteration: " + i);
AnalysisLogger.getLogger().debug(
"Sampler->executing the query: " + query);
@ -706,13 +715,13 @@ public class Sampler {
if ((listRows.size() > 100) && (removal == false)) {
//print check
// for (int k = 0; k < listRows.size(); k++) {
//
// AnalysisLogger.getLogger().debug(
// "Sampler->row with index: " + k + " score "
// + listRows.get(k).getScore());
// }
// print check
// for (int k = 0; k < listRows.size(); k++) {
//
// AnalysisLogger.getLogger().debug(
// "Sampler->row with index: " + k + " score "
// + listRows.get(k).getScore());
// }
AnalysisLogger.getLogger().debug(
"Sampler->starting the removal operation");
@ -727,8 +736,8 @@ public class Sampler {
RowScore row = listRows.remove(100);
// AnalysisLogger.getLogger().debug(
// "Sampler->removing row with score: " + row.getScore());
// AnalysisLogger.getLogger().debug(
// "Sampler->removing row with score: " + row.getScore());
numElemToDelete = numElemToDelete - 1;
@ -744,10 +753,11 @@ public class Sampler {
for (int i = 0; i < listRows.size(); i++) {
// //check rows added in the final result
// AnalysisLogger.getLogger().debug(
// "Sampler->adding row with index: " + i + " " + listRows.get(i).getRow());
// //check rows added in the final result
// AnalysisLogger.getLogger().debug(
// "Sampler->adding row with index: " + i + " " +
// listRows.get(i).getRow());
rows.add(listRows.get(i).getRow());
}
@ -915,7 +925,8 @@ public class Sampler {
if (DBType.equals(POSTGRES)) {
// the full name equal to "schemaname.tablename"
tablename = schemaName + "." + "\"" + tablename + "\"";
String tableName = "";
tableName = schemaName + "." + "\"" + tablename + "\"";
query = String.format(
queryForSmartSampleWithThresholdOnTablePostgres,
@ -924,9 +935,9 @@ public class Sampler {
// build the query for database mysql
if (DBType.equals(MYSQL)) {
// the full name equal to "dbname.tablename"
tablename = schemaName + "." + tablename;
String tableName = "";
tableName = schemaName + "." + tablename;
query = String.format(
queryForSmartSampleWithThresholdOnTableMysql,
@ -999,18 +1010,20 @@ public class Sampler {
// if (value == columnArray.length) {
AnalysisLogger.getLogger().debug(
"Sampler-> column array dimension: "
+ columnArray.length);
// print check
// AnalysisLogger.getLogger().debug(
// "Sampler-> column array dimension: "
// + columnArray.length);
double thresholdRank = ((columnArray.length) * 80);
thresholdRank = thresholdRank / 100;
double valCeil = Math.round(thresholdRank);
AnalysisLogger.getLogger().debug(
"Sampler-> threshold: " + thresholdRank
+ " rounded value: " + valCeil);
// print check
// AnalysisLogger.getLogger().debug(
// "Sampler-> threshold: " + thresholdRank
// + " rounded value: " + valCeil);
if (value >= (int) valCeil) {
@ -1096,8 +1109,8 @@ public class Sampler {
RowScore row = listRows.remove(100);
// AnalysisLogger.getLogger().debug(
// "Sampler->removing row with score: " + row.getScore());
// AnalysisLogger.getLogger().debug(
// "Sampler->removing row with score: " + row.getScore());
numElemToDelete = numElemToDelete - 1;
@ -1113,7 +1126,7 @@ public class Sampler {
for (int i = 0; i < listRows.size(); i++) {
//check rows added in the final result
// check rows added in the final result
// AnalysisLogger.getLogger().debug(
// "Sampler->adding row with index: " + i + " " +
// listRows.get(i).getRow());
@ -1266,10 +1279,10 @@ public class Sampler {
queryForRandomSampleOnTablePostgres, listAttributes,
tableName);
}
if (DBType.equals(MYSQL)) {
//the full name equal to "dbname.tablename"
tableName=schemaName+"."+tableName;
// the full name equal to "dbname.tablename"
tableName = schemaName + "." + tableName;
querySampleOnTable = String.format(
queryForRandomSampleOnTableMysql, listAttributes,
tableName);
@ -1277,10 +1290,10 @@ public class Sampler {
}
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
if (NumRows > 700000) {
// generate an index randomly to execute the query
// Define threshold
@ -1315,28 +1328,26 @@ public class Sampler {
AnalysisLogger.getLogger().debug("Sampler->X index: " + X);
}
if (DBType.equals(POSTGRES)){
if (DBType.equals(POSTGRES)) {
// the full name equal to "schemaname.tablename"
tableName = schemaName + "." + "\"" + tableName + "\"";
querySampleOnTable = String.format(
queryForRandomSampleWithThresholdOnTablePostgres,
listAttributes, tableName, X);
}
if (DBType.equals(MYSQL)) { // MySQL
// the full name equal to "dbname.tablename"
tableName = schemaName + "." + tableName;
// querySampleOnTable = String
// .format(queryForRandomSampleOnTableMysql, listAttributes,
// tableName);
// querySampleOnTable = String
// .format(queryForRandomSampleOnTableMysql, listAttributes,
// tableName);
querySampleOnTable = String.format(
queryForRandomSampleWithThresholdOnTableMysql,
listAttributes, tableName, X);
@ -1345,16 +1356,16 @@ public class Sampler {
}
// if (DBType.equals(MYSQL)) { // MySQL
//
// // the full name equal to "dbname.tablename"
// tableName = schemaName + "." + tableName;
//
// querySampleOnTable = String
// .format(queryForRandomSampleOnTableMysql, listAttributes,
// tableName);
//
// }
// if (DBType.equals(MYSQL)) { // MySQL
//
// // the full name equal to "dbname.tablename"
// tableName = schemaName + "." + tableName;
//
// querySampleOnTable = String
// .format(queryForRandomSampleOnTableMysql, listAttributes,
// tableName);
//
// }
AnalysisLogger.getLogger()
.debug("Sampler->preparing to submit the query: "