bug fixed in smart e random sampling for a database mysql

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-access/DatabasesResourcesManager@99333 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Loredana Liccardo 2014-08-28 13:57:57 +00:00
parent a2ea87687d
commit fe5b1c211b
2 changed files with 130 additions and 69 deletions

View File

@ -18,12 +18,14 @@ import org.hibernate.SessionFactory;
public class Sampler {
// query to perform sample operation on the table
// private static final String queryForSampleOnTablePostgres = "select %1$s from \"%2$s\" limit 100";
// private static final String queryForSampleOnTablePostgres =
// "select %1$s from \"%2$s\" limit 100";
private static final String queryForSampleOnTablePostgres = "select %1$s from %2$s limit 100";
private static final String queryForSampleOnTableMysql = "select %1$s from %2$s limit 100";
// query to perform a smart sample operation randomly on the table
// private static final String queryForSmartSampleOnTablePostgres = "select %1$s from \"%2$s\" order by random() limit 200";
// private static final String queryForSmartSampleOnTablePostgres =
// "select %1$s from \"%2$s\" order by random() limit 200";
private static final String queryForSmartSampleOnTablePostgres = "select %1$s from %2$s order by random() limit 200";
private static final String queryForSmartSampleOnTableMysql = "select %1$s from %2$s order by rand() limit 200";
// private static final String queryForSmartSampleOnTablePostgres =
@ -33,18 +35,22 @@ public class Sampler {
// query to perform a smart sample operation on the table considering the
// threshold
// private static final String queryForSmartSampleWithThresholdOnTablePostgres = "select %1$s from \"%2$s\" limit 200 offset %3$s";
// private static final String
// queryForSmartSampleWithThresholdOnTablePostgres =
// "select %1$s from \"%2$s\" limit 200 offset %3$s";
private static final String queryForSmartSampleWithThresholdOnTablePostgres = "select %1$s from %2$s limit 200 offset %3$s";
// private static final String queryForSmartSampleWithThresholdOnTableMysql
// = "select %1$s from %2$s limit 200 offset %3$s";
private static final String queryForSmartSampleWithThresholdOnTableMysql = "select %1$s from %2$s limit 200 offset %3$s";
// query to perform a sample operation randomly on a table
// private static final String queryForRandomSampleOnTablePostgres =
// "select %1$s from \"%2$s\" order by random() limit 100";
// query to perform a smart sample operation on the table considering the
// threshold
// private static final String queryForRandomSampleWithThresholdOnTablePostgres = "select %1$s from \"%2$s\" limit 100 offset %3$s";
// private static final String
// queryForRandomSampleWithThresholdOnTablePostgres =
// "select %1$s from \"%2$s\" limit 100 offset %3$s";
private static final String queryForRandomSampleWithThresholdOnTablePostgres = "select %1$s from %2$s limit 100 offset %3$s";
private static final String queryForRandomSampleWithThresholdOnTableMysql = "select %1$s from %2$s limit 100 offset %3$s";
private static final String queryForRandomSampleOnTableMysql = "select %1$s from %2$s order by rand() limit 100";
private static final String queryForRandomSampleOnTablePostgres = "select %1$s from %2$s order by random() limit 100";
@ -95,10 +101,10 @@ public class Sampler {
// preparing the query
if (DBType.equals(POSTGRES)) {
//the full name equal to "schemaname.tablename"
tableName=schemaName+"."+"\""+tableName+"\"";
// the full name equal to "schemaname.tablename"
tableName = schemaName + "." + "\"" + tableName + "\"";
querySampleOnTable = String.format(queryForSampleOnTablePostgres,
listAttributes, tableName);
@ -106,9 +112,9 @@ public class Sampler {
if (DBType.equals(MYSQL)) {
//the full name equal to "dbname.tablename"
tableName=schemaName+"."+tableName;
// the full name equal to "dbname.tablename"
tableName = schemaName + "." + tableName;
querySampleOnTable = String.format(queryForSampleOnTableMysql,
listAttributes, tableName);
@ -280,7 +286,6 @@ public class Sampler {
// //print check
// AnalysisLogger.getLogger().debug(
// "DatabaseManagement->size: " + columnsSet.size());
for (int i = 0; i < columnsSet.size(); i++) {
@ -338,11 +343,17 @@ public class Sampler {
// performed otherwise a not pure smart sample procedure is performed in
// order to solve a bug with the random function in postgres
if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
// // compute the smart sample on a table
// rows = computeSmartSampleWithThreshold(connection, dbSession,
// DBType, tableName, schemaName, NumRows, DataTypeColumns);
//
// }
if (NumRows > 700000) {
// compute the smart sample on a table
rows = computeSmartSampleWithThreshold(connection, dbSession,
DBType, tableName, schemaName, NumRows, DataTypeColumns);
} else {
// computation of the iterations number
@ -359,7 +370,6 @@ public class Sampler {
rows = computeSmartSample(connection, dbSession, DBType, tableName,
schemaName, NIterations, DataTypeColumns,
DataTypeColumns.size());
}
// if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres
@ -508,20 +518,19 @@ public class Sampler {
// build the query for database postgres
if (DBType.equals(POSTGRES)) {
//the full name equal to "schemaname.tablename"
tablename=schemaName+"."+ "\""+tablename+"\"";
// the full name equal to "schemaname.tablename"
tablename = schemaName + "." + "\"" + tablename + "\"";
query = String.format(queryForSmartSampleOnTablePostgres,
listAttributes, tablename);
}
// build the query for database mysql
if (DBType.equals(MYSQL)) {
//the full name equal to "dbname.tablename"
tablename=schemaName+"."+tablename;
// the full name equal to "dbname.tablename"
tablename = schemaName + "." + tablename;
query = String.format(queryForSmartSampleOnTableMysql,
listAttributes, tablename);
@ -732,12 +741,11 @@ public class Sampler {
for (int i = 0; i < listRows.size(); i++) {
// //check rows added in the final result
// AnalysisLogger.getLogger().debug(
// "Sampler->adding row with index: " + i);
// //check rows added in the final result
// AnalysisLogger.getLogger().debug(
// "Sampler->adding row with index: " + i + " " + listRows.get(i).getRow());
rows.add(listRows.get(i).getRow());
}
return rows;
@ -777,8 +785,8 @@ public class Sampler {
// Define Lower and Upper Index (LI and UL) of a range
int LI = X + 200;
int UI = X - 200;
int LI = X - 200;
int UI = X + 200;
AnalysisLogger.getLogger().debug(
"Sampler->Lower Index of the range: " + LI);
@ -817,8 +825,8 @@ public class Sampler {
// Define Lower and Upper Index (LI and UL) of a range
int LI = X + 200;
int UI = X - 200;
int LI = X - 200;
int UI = X + 200;
AnalysisLogger.getLogger().debug(
"Sampler->Lower Index of the range: " + LI);
@ -903,14 +911,23 @@ public class Sampler {
// build the query for database postgres
if (DBType.equals(POSTGRES)) {
//the full name equal to "schemaname.tablename"
tablename=schemaName+"."+ "\""+tablename+"\"";
// the full name equal to "schemaname.tablename"
tablename = schemaName + "." + "\"" + tablename + "\"";
query = String.format(
queryForSmartSampleWithThresholdOnTablePostgres,
listAttributes, tablename, indexes[i]);
}
// build the query for database mysql
if (DBType.equals(MYSQL)) {
// the full name equal to "dbname.tablename"
tablename = schemaName + "." + tablename;
query = String.format(
queryForSmartSampleWithThresholdOnTableMysql,
listAttributes, tablename, indexes[i]);
}
AnalysisLogger.getLogger().debug(
@ -1093,9 +1110,10 @@ public class Sampler {
for (int i = 0; i < listRows.size(); i++) {
// //check the row list result
//check rows added in the final result
// AnalysisLogger.getLogger().debug(
// "Sampler->adding row with index: " + i);
// "Sampler->adding row with index: " + i + " " +
// listRows.get(i).getRow());
rows.add(listRows.get(i).getRow());
@ -1220,26 +1238,46 @@ public class Sampler {
// preparing the query
// if is rows number <= 700000 then the pure random sample procedure is
// performed otherwise a not pure ranom sample procedure is performed in
// performed otherwise a not pure random sample procedure is performed
// in
// order to solve a bug with the random function in postgres
if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres
//the full name equal to "schemaname.tablename"
tableName=schemaName+"."+ "\""+tableName+"\"";
// if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres
//
// //the full name equal to "schemaname.tablename"
// tableName=schemaName+"."+ "\""+tableName+"\"";
//
// querySampleOnTable = String.format(
// queryForRandomSampleOnTablePostgres, listAttributes,
// tableName);
//
// }
querySampleOnTable = String.format(
queryForRandomSampleOnTablePostgres, listAttributes,
tableName);
if (NumRows <= 700000) {
if (DBType.equals(POSTGRES)) {
// the full name equal to "schemaname.tablename"
tableName = schemaName + "." + "\"" + tableName + "\"";
querySampleOnTable = String.format(
queryForRandomSampleOnTablePostgres, listAttributes,
tableName);
}
if (DBType.equals(MYSQL)) {
//the full name equal to "dbname.tablename"
tableName=schemaName+"."+tableName;
querySampleOnTable = String.format(
queryForRandomSampleOnTableMysql, listAttributes,
tableName);
}
}
if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
//the full name equal to "schemaname.tablename"
tableName=schemaName+"."+ "\""+tableName+"\"";
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
if (NumRows > 700000) {
// generate an index randomly to execute the query
// Define threshold
@ -1274,24 +1312,47 @@ public class Sampler {
AnalysisLogger.getLogger().debug("Sampler->X index: " + X);
}
querySampleOnTable = String.format(
queryForRandomSampleWithThresholdOnTablePostgres,
listAttributes, tableName, X);
}
if (DBType.equals(MYSQL)) { // MySQL
//the full name equal to "dbname.tablename"
tableName=schemaName+"."+tableName;
if (DBType.equals(POSTGRES)){
// the full name equal to "schemaname.tablename"
tableName = schemaName + "." + "\"" + tableName + "\"";
querySampleOnTable = String.format(
queryForRandomSampleWithThresholdOnTablePostgres,
listAttributes, tableName, X);
}
querySampleOnTable = String
.format(queryForRandomSampleOnTableMysql, listAttributes,
tableName);
if (DBType.equals(MYSQL)) { // MySQL
// the full name equal to "dbname.tablename"
tableName = schemaName + "." + tableName;
// querySampleOnTable = String
// .format(queryForRandomSampleOnTableMysql, listAttributes,
// tableName);
querySampleOnTable = String.format(
queryForRandomSampleWithThresholdOnTableMysql,
listAttributes, tableName, X);
}
}
// if (DBType.equals(MYSQL)) { // MySQL
//
// // the full name equal to "dbname.tablename"
// tableName = schemaName + "." + tableName;
//
// querySampleOnTable = String
// .format(queryForRandomSampleOnTableMysql, listAttributes,
// tableName);
//
// }
AnalysisLogger.getLogger()
.debug("Sampler->preparing to submit the query: "
+ querySampleOnTable);

View File

@ -896,8 +896,8 @@ public class DatabaseManagement {
((LinkedHashMap<String, Object>) element).values());
// // print check
// AnalysisLogger.getLogger().debug(
// "DatabaseManagement->values: " + listvalues);
// AnalysisLogger.getLogger().debug(
// "DatabaseManagement->values: " + listvalues);
// each row could have several column values
Object[] row = listvalues.toArray();