bug fixed for the smart smapling (with table col2oct2010-> scientific_names). Corrections in computeSmartSampleWithThreshold method.
git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-access/DatabasesResourcesManager@99565 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
5af78b2871
commit
b72e647ea8
|
@ -171,13 +171,16 @@ public class Sampler {
|
|||
|
||||
if (DBType.equals(POSTGRES)) {
|
||||
|
||||
// attribute = "CAST(" + listColumns.get(i) + " as text), ";
|
||||
attribute = "CAST(" + listColumns.get(i) + " as character varying(255)), ";
|
||||
// attribute = "CAST(" + listColumns.get(i) + " as text), ";
|
||||
attribute = "CAST(" + listColumns.get(i)
|
||||
+ " as character varying(255)), ";
|
||||
|
||||
if (i == (listColumns.size() - 1)) {
|
||||
|
||||
// attribute = "CAST(" + listColumns.get(i) + " as text)";
|
||||
attribute = "CAST(" + listColumns.get(i) + " as character varying(255))";
|
||||
// attribute = "CAST(" + listColumns.get(i) +
|
||||
// " as text)";
|
||||
attribute = "CAST(" + listColumns.get(i)
|
||||
+ " as character varying(255))";
|
||||
|
||||
}
|
||||
|
||||
|
@ -197,7 +200,7 @@ public class Sampler {
|
|||
// " as CHAR CHARACTER SET utf8), ";
|
||||
// attribute = "CONVERT(" + listColumns.get(i) +
|
||||
// ", CHAR), ";
|
||||
attribute = "CAST(" + "`" +listColumns.get(i)+ "`"
|
||||
attribute = "CAST(" + "`" + listColumns.get(i) + "`"
|
||||
+ " as CHAR(255) CHARACTER SET utf8), ";
|
||||
|
||||
if (i == (listColumns.size() - 1)) {
|
||||
|
@ -208,14 +211,17 @@ public class Sampler {
|
|||
// attribute = "CONVERT(" + listColumns.get(i) +
|
||||
// ", BINARY)";
|
||||
|
||||
attribute = "CAST(" + "`" +listColumns.get(i)+ "`"
|
||||
+ " as CHAR(255) CHARACTER SET utf8)";
|
||||
attribute = "CAST(" + "`" + listColumns.get(i)
|
||||
+ "`" + " as CHAR(255) CHARACTER SET utf8)";
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
attribute = "CAST(CAST(" + "`" +listColumns.get(i)+ "`"
|
||||
attribute = "CAST(CAST("
|
||||
+ "`"
|
||||
+ listColumns.get(i)
|
||||
+ "`"
|
||||
+ " as BINARY) as CHAR(255) CHARACTER SET utf8), ";
|
||||
|
||||
if (i == (listColumns.size() - 1)) {
|
||||
|
@ -226,7 +232,10 @@ public class Sampler {
|
|||
// attribute = "CONVERT(" + listColumns.get(i) +
|
||||
// ", BINARY)";
|
||||
|
||||
attribute = "CAST(CAST(" + "`" +listColumns.get(i)+ "`"
|
||||
attribute = "CAST(CAST("
|
||||
+ "`"
|
||||
+ listColumns.get(i)
|
||||
+ "`"
|
||||
+ " as BINARY) as CHAR(255) CHARACTER SET utf8)";
|
||||
|
||||
}
|
||||
|
@ -560,7 +569,7 @@ public class Sampler {
|
|||
// extract 200 rows randomly for each iteration
|
||||
extractionRows: for (int i = 0; i < NIterations; i++) {
|
||||
|
||||
System.out.println("index iteration: " + i);
|
||||
// System.out.println("index iteration: " + i);
|
||||
|
||||
AnalysisLogger.getLogger().debug(
|
||||
"Sampler->executing the query: " + query);
|
||||
|
@ -706,13 +715,13 @@ public class Sampler {
|
|||
|
||||
if ((listRows.size() > 100) && (removal == false)) {
|
||||
|
||||
//print check
|
||||
// for (int k = 0; k < listRows.size(); k++) {
|
||||
//
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler->row with index: " + k + " score "
|
||||
// + listRows.get(k).getScore());
|
||||
// }
|
||||
// print check
|
||||
// for (int k = 0; k < listRows.size(); k++) {
|
||||
//
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler->row with index: " + k + " score "
|
||||
// + listRows.get(k).getScore());
|
||||
// }
|
||||
|
||||
AnalysisLogger.getLogger().debug(
|
||||
"Sampler->starting the removal operation");
|
||||
|
@ -727,8 +736,8 @@ public class Sampler {
|
|||
|
||||
RowScore row = listRows.remove(100);
|
||||
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler->removing row with score: " + row.getScore());
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler->removing row with score: " + row.getScore());
|
||||
|
||||
numElemToDelete = numElemToDelete - 1;
|
||||
|
||||
|
@ -744,10 +753,11 @@ public class Sampler {
|
|||
|
||||
for (int i = 0; i < listRows.size(); i++) {
|
||||
|
||||
// //check rows added in the final result
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler->adding row with index: " + i + " " + listRows.get(i).getRow());
|
||||
|
||||
// //check rows added in the final result
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler->adding row with index: " + i + " " +
|
||||
// listRows.get(i).getRow());
|
||||
|
||||
rows.add(listRows.get(i).getRow());
|
||||
}
|
||||
|
||||
|
@ -915,7 +925,8 @@ public class Sampler {
|
|||
if (DBType.equals(POSTGRES)) {
|
||||
|
||||
// the full name equal to "schemaname.tablename"
|
||||
tablename = schemaName + "." + "\"" + tablename + "\"";
|
||||
String tableName = "";
|
||||
tableName = schemaName + "." + "\"" + tablename + "\"";
|
||||
|
||||
query = String.format(
|
||||
queryForSmartSampleWithThresholdOnTablePostgres,
|
||||
|
@ -924,9 +935,9 @@ public class Sampler {
|
|||
|
||||
// build the query for database mysql
|
||||
if (DBType.equals(MYSQL)) {
|
||||
|
||||
// the full name equal to "dbname.tablename"
|
||||
tablename = schemaName + "." + tablename;
|
||||
String tableName = "";
|
||||
tableName = schemaName + "." + tablename;
|
||||
|
||||
query = String.format(
|
||||
queryForSmartSampleWithThresholdOnTableMysql,
|
||||
|
@ -999,18 +1010,20 @@ public class Sampler {
|
|||
|
||||
// if (value == columnArray.length) {
|
||||
|
||||
AnalysisLogger.getLogger().debug(
|
||||
"Sampler-> column array dimension: "
|
||||
+ columnArray.length);
|
||||
// print check
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler-> column array dimension: "
|
||||
// + columnArray.length);
|
||||
|
||||
double thresholdRank = ((columnArray.length) * 80);
|
||||
thresholdRank = thresholdRank / 100;
|
||||
|
||||
double valCeil = Math.round(thresholdRank);
|
||||
|
||||
AnalysisLogger.getLogger().debug(
|
||||
"Sampler-> threshold: " + thresholdRank
|
||||
+ " rounded value: " + valCeil);
|
||||
// print check
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler-> threshold: " + thresholdRank
|
||||
// + " rounded value: " + valCeil);
|
||||
|
||||
if (value >= (int) valCeil) {
|
||||
|
||||
|
@ -1096,8 +1109,8 @@ public class Sampler {
|
|||
|
||||
RowScore row = listRows.remove(100);
|
||||
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler->removing row with score: " + row.getScore());
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler->removing row with score: " + row.getScore());
|
||||
|
||||
numElemToDelete = numElemToDelete - 1;
|
||||
|
||||
|
@ -1113,7 +1126,7 @@ public class Sampler {
|
|||
|
||||
for (int i = 0; i < listRows.size(); i++) {
|
||||
|
||||
//check rows added in the final result
|
||||
// check rows added in the final result
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler->adding row with index: " + i + " " +
|
||||
// listRows.get(i).getRow());
|
||||
|
@ -1266,10 +1279,10 @@ public class Sampler {
|
|||
queryForRandomSampleOnTablePostgres, listAttributes,
|
||||
tableName);
|
||||
}
|
||||
|
||||
|
||||
if (DBType.equals(MYSQL)) {
|
||||
//the full name equal to "dbname.tablename"
|
||||
tableName=schemaName+"."+tableName;
|
||||
// the full name equal to "dbname.tablename"
|
||||
tableName = schemaName + "." + tableName;
|
||||
querySampleOnTable = String.format(
|
||||
queryForRandomSampleOnTableMysql, listAttributes,
|
||||
tableName);
|
||||
|
@ -1277,10 +1290,10 @@ public class Sampler {
|
|||
|
||||
}
|
||||
|
||||
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||
|
||||
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||
|
||||
if (NumRows > 700000) {
|
||||
|
||||
|
||||
// generate an index randomly to execute the query
|
||||
|
||||
// Define threshold
|
||||
|
@ -1315,28 +1328,26 @@ public class Sampler {
|
|||
AnalysisLogger.getLogger().debug("Sampler->X index: " + X);
|
||||
|
||||
}
|
||||
|
||||
if (DBType.equals(POSTGRES)){
|
||||
|
||||
if (DBType.equals(POSTGRES)) {
|
||||
// the full name equal to "schemaname.tablename"
|
||||
tableName = schemaName + "." + "\"" + tableName + "\"";
|
||||
|
||||
|
||||
querySampleOnTable = String.format(
|
||||
queryForRandomSampleWithThresholdOnTablePostgres,
|
||||
listAttributes, tableName, X);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (DBType.equals(MYSQL)) { // MySQL
|
||||
|
||||
// the full name equal to "dbname.tablename"
|
||||
tableName = schemaName + "." + tableName;
|
||||
|
||||
// querySampleOnTable = String
|
||||
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
||||
// tableName);
|
||||
|
||||
// querySampleOnTable = String
|
||||
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
||||
// tableName);
|
||||
|
||||
querySampleOnTable = String.format(
|
||||
queryForRandomSampleWithThresholdOnTableMysql,
|
||||
listAttributes, tableName, X);
|
||||
|
@ -1345,16 +1356,16 @@ public class Sampler {
|
|||
|
||||
}
|
||||
|
||||
// if (DBType.equals(MYSQL)) { // MySQL
|
||||
//
|
||||
// // the full name equal to "dbname.tablename"
|
||||
// tableName = schemaName + "." + tableName;
|
||||
//
|
||||
// querySampleOnTable = String
|
||||
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
||||
// tableName);
|
||||
//
|
||||
// }
|
||||
// if (DBType.equals(MYSQL)) { // MySQL
|
||||
//
|
||||
// // the full name equal to "dbname.tablename"
|
||||
// tableName = schemaName + "." + tableName;
|
||||
//
|
||||
// querySampleOnTable = String
|
||||
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
||||
// tableName);
|
||||
//
|
||||
// }
|
||||
|
||||
AnalysisLogger.getLogger()
|
||||
.debug("Sampler->preparing to submit the query: "
|
||||
|
|
Loading…
Reference in New Issue