bug fixed for the smart smapling (with table col2oct2010-> scientific_names). Corrections in computeSmartSampleWithThreshold method.
git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-access/DatabasesResourcesManager@99565 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
5af78b2871
commit
b72e647ea8
|
@ -171,13 +171,16 @@ public class Sampler {
|
||||||
|
|
||||||
if (DBType.equals(POSTGRES)) {
|
if (DBType.equals(POSTGRES)) {
|
||||||
|
|
||||||
// attribute = "CAST(" + listColumns.get(i) + " as text), ";
|
// attribute = "CAST(" + listColumns.get(i) + " as text), ";
|
||||||
attribute = "CAST(" + listColumns.get(i) + " as character varying(255)), ";
|
attribute = "CAST(" + listColumns.get(i)
|
||||||
|
+ " as character varying(255)), ";
|
||||||
|
|
||||||
if (i == (listColumns.size() - 1)) {
|
if (i == (listColumns.size() - 1)) {
|
||||||
|
|
||||||
// attribute = "CAST(" + listColumns.get(i) + " as text)";
|
// attribute = "CAST(" + listColumns.get(i) +
|
||||||
attribute = "CAST(" + listColumns.get(i) + " as character varying(255))";
|
// " as text)";
|
||||||
|
attribute = "CAST(" + listColumns.get(i)
|
||||||
|
+ " as character varying(255))";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -197,7 +200,7 @@ public class Sampler {
|
||||||
// " as CHAR CHARACTER SET utf8), ";
|
// " as CHAR CHARACTER SET utf8), ";
|
||||||
// attribute = "CONVERT(" + listColumns.get(i) +
|
// attribute = "CONVERT(" + listColumns.get(i) +
|
||||||
// ", CHAR), ";
|
// ", CHAR), ";
|
||||||
attribute = "CAST(" + "`" +listColumns.get(i)+ "`"
|
attribute = "CAST(" + "`" + listColumns.get(i) + "`"
|
||||||
+ " as CHAR(255) CHARACTER SET utf8), ";
|
+ " as CHAR(255) CHARACTER SET utf8), ";
|
||||||
|
|
||||||
if (i == (listColumns.size() - 1)) {
|
if (i == (listColumns.size() - 1)) {
|
||||||
|
@ -208,14 +211,17 @@ public class Sampler {
|
||||||
// attribute = "CONVERT(" + listColumns.get(i) +
|
// attribute = "CONVERT(" + listColumns.get(i) +
|
||||||
// ", BINARY)";
|
// ", BINARY)";
|
||||||
|
|
||||||
attribute = "CAST(" + "`" +listColumns.get(i)+ "`"
|
attribute = "CAST(" + "`" + listColumns.get(i)
|
||||||
+ " as CHAR(255) CHARACTER SET utf8)";
|
+ "`" + " as CHAR(255) CHARACTER SET utf8)";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
attribute = "CAST(CAST(" + "`" +listColumns.get(i)+ "`"
|
attribute = "CAST(CAST("
|
||||||
|
+ "`"
|
||||||
|
+ listColumns.get(i)
|
||||||
|
+ "`"
|
||||||
+ " as BINARY) as CHAR(255) CHARACTER SET utf8), ";
|
+ " as BINARY) as CHAR(255) CHARACTER SET utf8), ";
|
||||||
|
|
||||||
if (i == (listColumns.size() - 1)) {
|
if (i == (listColumns.size() - 1)) {
|
||||||
|
@ -226,7 +232,10 @@ public class Sampler {
|
||||||
// attribute = "CONVERT(" + listColumns.get(i) +
|
// attribute = "CONVERT(" + listColumns.get(i) +
|
||||||
// ", BINARY)";
|
// ", BINARY)";
|
||||||
|
|
||||||
attribute = "CAST(CAST(" + "`" +listColumns.get(i)+ "`"
|
attribute = "CAST(CAST("
|
||||||
|
+ "`"
|
||||||
|
+ listColumns.get(i)
|
||||||
|
+ "`"
|
||||||
+ " as BINARY) as CHAR(255) CHARACTER SET utf8)";
|
+ " as BINARY) as CHAR(255) CHARACTER SET utf8)";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -560,7 +569,7 @@ public class Sampler {
|
||||||
// extract 200 rows randomly for each iteration
|
// extract 200 rows randomly for each iteration
|
||||||
extractionRows: for (int i = 0; i < NIterations; i++) {
|
extractionRows: for (int i = 0; i < NIterations; i++) {
|
||||||
|
|
||||||
System.out.println("index iteration: " + i);
|
// System.out.println("index iteration: " + i);
|
||||||
|
|
||||||
AnalysisLogger.getLogger().debug(
|
AnalysisLogger.getLogger().debug(
|
||||||
"Sampler->executing the query: " + query);
|
"Sampler->executing the query: " + query);
|
||||||
|
@ -706,13 +715,13 @@ public class Sampler {
|
||||||
|
|
||||||
if ((listRows.size() > 100) && (removal == false)) {
|
if ((listRows.size() > 100) && (removal == false)) {
|
||||||
|
|
||||||
//print check
|
// print check
|
||||||
// for (int k = 0; k < listRows.size(); k++) {
|
// for (int k = 0; k < listRows.size(); k++) {
|
||||||
//
|
//
|
||||||
// AnalysisLogger.getLogger().debug(
|
// AnalysisLogger.getLogger().debug(
|
||||||
// "Sampler->row with index: " + k + " score "
|
// "Sampler->row with index: " + k + " score "
|
||||||
// + listRows.get(k).getScore());
|
// + listRows.get(k).getScore());
|
||||||
// }
|
// }
|
||||||
|
|
||||||
AnalysisLogger.getLogger().debug(
|
AnalysisLogger.getLogger().debug(
|
||||||
"Sampler->starting the removal operation");
|
"Sampler->starting the removal operation");
|
||||||
|
@ -727,8 +736,8 @@ public class Sampler {
|
||||||
|
|
||||||
RowScore row = listRows.remove(100);
|
RowScore row = listRows.remove(100);
|
||||||
|
|
||||||
// AnalysisLogger.getLogger().debug(
|
// AnalysisLogger.getLogger().debug(
|
||||||
// "Sampler->removing row with score: " + row.getScore());
|
// "Sampler->removing row with score: " + row.getScore());
|
||||||
|
|
||||||
numElemToDelete = numElemToDelete - 1;
|
numElemToDelete = numElemToDelete - 1;
|
||||||
|
|
||||||
|
@ -744,10 +753,11 @@ public class Sampler {
|
||||||
|
|
||||||
for (int i = 0; i < listRows.size(); i++) {
|
for (int i = 0; i < listRows.size(); i++) {
|
||||||
|
|
||||||
// //check rows added in the final result
|
// //check rows added in the final result
|
||||||
// AnalysisLogger.getLogger().debug(
|
// AnalysisLogger.getLogger().debug(
|
||||||
// "Sampler->adding row with index: " + i + " " + listRows.get(i).getRow());
|
// "Sampler->adding row with index: " + i + " " +
|
||||||
|
// listRows.get(i).getRow());
|
||||||
|
|
||||||
rows.add(listRows.get(i).getRow());
|
rows.add(listRows.get(i).getRow());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -915,7 +925,8 @@ public class Sampler {
|
||||||
if (DBType.equals(POSTGRES)) {
|
if (DBType.equals(POSTGRES)) {
|
||||||
|
|
||||||
// the full name equal to "schemaname.tablename"
|
// the full name equal to "schemaname.tablename"
|
||||||
tablename = schemaName + "." + "\"" + tablename + "\"";
|
String tableName = "";
|
||||||
|
tableName = schemaName + "." + "\"" + tablename + "\"";
|
||||||
|
|
||||||
query = String.format(
|
query = String.format(
|
||||||
queryForSmartSampleWithThresholdOnTablePostgres,
|
queryForSmartSampleWithThresholdOnTablePostgres,
|
||||||
|
@ -924,9 +935,9 @@ public class Sampler {
|
||||||
|
|
||||||
// build the query for database mysql
|
// build the query for database mysql
|
||||||
if (DBType.equals(MYSQL)) {
|
if (DBType.equals(MYSQL)) {
|
||||||
|
|
||||||
// the full name equal to "dbname.tablename"
|
// the full name equal to "dbname.tablename"
|
||||||
tablename = schemaName + "." + tablename;
|
String tableName = "";
|
||||||
|
tableName = schemaName + "." + tablename;
|
||||||
|
|
||||||
query = String.format(
|
query = String.format(
|
||||||
queryForSmartSampleWithThresholdOnTableMysql,
|
queryForSmartSampleWithThresholdOnTableMysql,
|
||||||
|
@ -999,18 +1010,20 @@ public class Sampler {
|
||||||
|
|
||||||
// if (value == columnArray.length) {
|
// if (value == columnArray.length) {
|
||||||
|
|
||||||
AnalysisLogger.getLogger().debug(
|
// print check
|
||||||
"Sampler-> column array dimension: "
|
// AnalysisLogger.getLogger().debug(
|
||||||
+ columnArray.length);
|
// "Sampler-> column array dimension: "
|
||||||
|
// + columnArray.length);
|
||||||
|
|
||||||
double thresholdRank = ((columnArray.length) * 80);
|
double thresholdRank = ((columnArray.length) * 80);
|
||||||
thresholdRank = thresholdRank / 100;
|
thresholdRank = thresholdRank / 100;
|
||||||
|
|
||||||
double valCeil = Math.round(thresholdRank);
|
double valCeil = Math.round(thresholdRank);
|
||||||
|
|
||||||
AnalysisLogger.getLogger().debug(
|
// print check
|
||||||
"Sampler-> threshold: " + thresholdRank
|
// AnalysisLogger.getLogger().debug(
|
||||||
+ " rounded value: " + valCeil);
|
// "Sampler-> threshold: " + thresholdRank
|
||||||
|
// + " rounded value: " + valCeil);
|
||||||
|
|
||||||
if (value >= (int) valCeil) {
|
if (value >= (int) valCeil) {
|
||||||
|
|
||||||
|
@ -1096,8 +1109,8 @@ public class Sampler {
|
||||||
|
|
||||||
RowScore row = listRows.remove(100);
|
RowScore row = listRows.remove(100);
|
||||||
|
|
||||||
// AnalysisLogger.getLogger().debug(
|
// AnalysisLogger.getLogger().debug(
|
||||||
// "Sampler->removing row with score: " + row.getScore());
|
// "Sampler->removing row with score: " + row.getScore());
|
||||||
|
|
||||||
numElemToDelete = numElemToDelete - 1;
|
numElemToDelete = numElemToDelete - 1;
|
||||||
|
|
||||||
|
@ -1113,7 +1126,7 @@ public class Sampler {
|
||||||
|
|
||||||
for (int i = 0; i < listRows.size(); i++) {
|
for (int i = 0; i < listRows.size(); i++) {
|
||||||
|
|
||||||
//check rows added in the final result
|
// check rows added in the final result
|
||||||
// AnalysisLogger.getLogger().debug(
|
// AnalysisLogger.getLogger().debug(
|
||||||
// "Sampler->adding row with index: " + i + " " +
|
// "Sampler->adding row with index: " + i + " " +
|
||||||
// listRows.get(i).getRow());
|
// listRows.get(i).getRow());
|
||||||
|
@ -1266,10 +1279,10 @@ public class Sampler {
|
||||||
queryForRandomSampleOnTablePostgres, listAttributes,
|
queryForRandomSampleOnTablePostgres, listAttributes,
|
||||||
tableName);
|
tableName);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DBType.equals(MYSQL)) {
|
if (DBType.equals(MYSQL)) {
|
||||||
//the full name equal to "dbname.tablename"
|
// the full name equal to "dbname.tablename"
|
||||||
tableName=schemaName+"."+tableName;
|
tableName = schemaName + "." + tableName;
|
||||||
querySampleOnTable = String.format(
|
querySampleOnTable = String.format(
|
||||||
queryForRandomSampleOnTableMysql, listAttributes,
|
queryForRandomSampleOnTableMysql, listAttributes,
|
||||||
tableName);
|
tableName);
|
||||||
|
@ -1277,10 +1290,10 @@ public class Sampler {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||||
|
|
||||||
if (NumRows > 700000) {
|
if (NumRows > 700000) {
|
||||||
|
|
||||||
// generate an index randomly to execute the query
|
// generate an index randomly to execute the query
|
||||||
|
|
||||||
// Define threshold
|
// Define threshold
|
||||||
|
@ -1315,28 +1328,26 @@ public class Sampler {
|
||||||
AnalysisLogger.getLogger().debug("Sampler->X index: " + X);
|
AnalysisLogger.getLogger().debug("Sampler->X index: " + X);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DBType.equals(POSTGRES)){
|
if (DBType.equals(POSTGRES)) {
|
||||||
// the full name equal to "schemaname.tablename"
|
// the full name equal to "schemaname.tablename"
|
||||||
tableName = schemaName + "." + "\"" + tableName + "\"";
|
tableName = schemaName + "." + "\"" + tableName + "\"";
|
||||||
|
|
||||||
querySampleOnTable = String.format(
|
querySampleOnTable = String.format(
|
||||||
queryForRandomSampleWithThresholdOnTablePostgres,
|
queryForRandomSampleWithThresholdOnTablePostgres,
|
||||||
listAttributes, tableName, X);
|
listAttributes, tableName, X);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (DBType.equals(MYSQL)) { // MySQL
|
if (DBType.equals(MYSQL)) { // MySQL
|
||||||
|
|
||||||
// the full name equal to "dbname.tablename"
|
// the full name equal to "dbname.tablename"
|
||||||
tableName = schemaName + "." + tableName;
|
tableName = schemaName + "." + tableName;
|
||||||
|
|
||||||
// querySampleOnTable = String
|
// querySampleOnTable = String
|
||||||
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
||||||
// tableName);
|
// tableName);
|
||||||
|
|
||||||
querySampleOnTable = String.format(
|
querySampleOnTable = String.format(
|
||||||
queryForRandomSampleWithThresholdOnTableMysql,
|
queryForRandomSampleWithThresholdOnTableMysql,
|
||||||
listAttributes, tableName, X);
|
listAttributes, tableName, X);
|
||||||
|
@ -1345,16 +1356,16 @@ public class Sampler {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (DBType.equals(MYSQL)) { // MySQL
|
// if (DBType.equals(MYSQL)) { // MySQL
|
||||||
//
|
//
|
||||||
// // the full name equal to "dbname.tablename"
|
// // the full name equal to "dbname.tablename"
|
||||||
// tableName = schemaName + "." + tableName;
|
// tableName = schemaName + "." + tableName;
|
||||||
//
|
//
|
||||||
// querySampleOnTable = String
|
// querySampleOnTable = String
|
||||||
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
||||||
// tableName);
|
// tableName);
|
||||||
//
|
//
|
||||||
// }
|
// }
|
||||||
|
|
||||||
AnalysisLogger.getLogger()
|
AnalysisLogger.getLogger()
|
||||||
.debug("Sampler->preparing to submit the query: "
|
.debug("Sampler->preparing to submit the query: "
|
||||||
|
|
Loading…
Reference in New Issue