bug fixed in smart e random sampling for a database mysql
git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-access/DatabasesResourcesManager@99333 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
a2ea87687d
commit
fe5b1c211b
|
@ -18,12 +18,14 @@ import org.hibernate.SessionFactory;
|
|||
public class Sampler {
|
||||
|
||||
// query to perform sample operation on the table
|
||||
// private static final String queryForSampleOnTablePostgres = "select %1$s from \"%2$s\" limit 100";
|
||||
// private static final String queryForSampleOnTablePostgres =
|
||||
// "select %1$s from \"%2$s\" limit 100";
|
||||
private static final String queryForSampleOnTablePostgres = "select %1$s from %2$s limit 100";
|
||||
private static final String queryForSampleOnTableMysql = "select %1$s from %2$s limit 100";
|
||||
|
||||
// query to perform a smart sample operation randomly on the table
|
||||
// private static final String queryForSmartSampleOnTablePostgres = "select %1$s from \"%2$s\" order by random() limit 200";
|
||||
// private static final String queryForSmartSampleOnTablePostgres =
|
||||
// "select %1$s from \"%2$s\" order by random() limit 200";
|
||||
private static final String queryForSmartSampleOnTablePostgres = "select %1$s from %2$s order by random() limit 200";
|
||||
private static final String queryForSmartSampleOnTableMysql = "select %1$s from %2$s order by rand() limit 200";
|
||||
// private static final String queryForSmartSampleOnTablePostgres =
|
||||
|
@ -33,18 +35,22 @@ public class Sampler {
|
|||
|
||||
// query to perform a smart sample operation on the table considering the
|
||||
// threshold
|
||||
// private static final String queryForSmartSampleWithThresholdOnTablePostgres = "select %1$s from \"%2$s\" limit 200 offset %3$s";
|
||||
// private static final String
|
||||
// queryForSmartSampleWithThresholdOnTablePostgres =
|
||||
// "select %1$s from \"%2$s\" limit 200 offset %3$s";
|
||||
private static final String queryForSmartSampleWithThresholdOnTablePostgres = "select %1$s from %2$s limit 200 offset %3$s";
|
||||
// private static final String queryForSmartSampleWithThresholdOnTableMysql
|
||||
// = "select %1$s from %2$s limit 200 offset %3$s";
|
||||
private static final String queryForSmartSampleWithThresholdOnTableMysql = "select %1$s from %2$s limit 200 offset %3$s";
|
||||
|
||||
// query to perform a sample operation randomly on a table
|
||||
// private static final String queryForRandomSampleOnTablePostgres =
|
||||
// "select %1$s from \"%2$s\" order by random() limit 100";
|
||||
// query to perform a smart sample operation on the table considering the
|
||||
// threshold
|
||||
// private static final String queryForRandomSampleWithThresholdOnTablePostgres = "select %1$s from \"%2$s\" limit 100 offset %3$s";
|
||||
// private static final String
|
||||
// queryForRandomSampleWithThresholdOnTablePostgres =
|
||||
// "select %1$s from \"%2$s\" limit 100 offset %3$s";
|
||||
private static final String queryForRandomSampleWithThresholdOnTablePostgres = "select %1$s from %2$s limit 100 offset %3$s";
|
||||
private static final String queryForRandomSampleWithThresholdOnTableMysql = "select %1$s from %2$s limit 100 offset %3$s";
|
||||
private static final String queryForRandomSampleOnTableMysql = "select %1$s from %2$s order by rand() limit 100";
|
||||
private static final String queryForRandomSampleOnTablePostgres = "select %1$s from %2$s order by random() limit 100";
|
||||
|
||||
|
@ -281,7 +287,6 @@ public class Sampler {
|
|||
// AnalysisLogger.getLogger().debug(
|
||||
// "DatabaseManagement->size: " + columnsSet.size());
|
||||
|
||||
|
||||
for (int i = 0; i < columnsSet.size(); i++) {
|
||||
|
||||
Object element = columnsSet.get(i);
|
||||
|
@ -338,11 +343,17 @@ public class Sampler {
|
|||
// performed otherwise a not pure smart sample procedure is performed in
|
||||
// order to solve a bug with the random function in postgres
|
||||
|
||||
if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||
// // compute the smart sample on a table
|
||||
// rows = computeSmartSampleWithThreshold(connection, dbSession,
|
||||
// DBType, tableName, schemaName, NumRows, DataTypeColumns);
|
||||
//
|
||||
// }
|
||||
|
||||
if (NumRows > 700000) {
|
||||
// compute the smart sample on a table
|
||||
rows = computeSmartSampleWithThreshold(connection, dbSession,
|
||||
DBType, tableName, schemaName, NumRows, DataTypeColumns);
|
||||
|
||||
} else {
|
||||
|
||||
// computation of the iterations number
|
||||
|
@ -359,7 +370,6 @@ public class Sampler {
|
|||
rows = computeSmartSample(connection, dbSession, DBType, tableName,
|
||||
schemaName, NIterations, DataTypeColumns,
|
||||
DataTypeColumns.size());
|
||||
|
||||
}
|
||||
|
||||
// if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||
|
@ -512,7 +522,6 @@ public class Sampler {
|
|||
// the full name equal to "schemaname.tablename"
|
||||
tablename = schemaName + "." + "\"" + tablename + "\"";
|
||||
|
||||
|
||||
query = String.format(queryForSmartSampleOnTablePostgres,
|
||||
listAttributes, tablename);
|
||||
|
||||
|
@ -734,10 +743,9 @@ public class Sampler {
|
|||
|
||||
// //check rows added in the final result
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler->adding row with index: " + i);
|
||||
// "Sampler->adding row with index: " + i + " " + listRows.get(i).getRow());
|
||||
|
||||
rows.add(listRows.get(i).getRow());
|
||||
|
||||
}
|
||||
|
||||
return rows;
|
||||
|
@ -777,8 +785,8 @@ public class Sampler {
|
|||
|
||||
// Define Lower and Upper Index (LI and UL) of a range
|
||||
|
||||
int LI = X + 200;
|
||||
int UI = X - 200;
|
||||
int LI = X - 200;
|
||||
int UI = X + 200;
|
||||
|
||||
AnalysisLogger.getLogger().debug(
|
||||
"Sampler->Lower Index of the range: " + LI);
|
||||
|
@ -817,8 +825,8 @@ public class Sampler {
|
|||
|
||||
// Define Lower and Upper Index (LI and UL) of a range
|
||||
|
||||
int LI = X + 200;
|
||||
int UI = X - 200;
|
||||
int LI = X - 200;
|
||||
int UI = X + 200;
|
||||
|
||||
AnalysisLogger.getLogger().debug(
|
||||
"Sampler->Lower Index of the range: " + LI);
|
||||
|
@ -906,11 +914,20 @@ public class Sampler {
|
|||
// the full name equal to "schemaname.tablename"
|
||||
tablename = schemaName + "." + "\"" + tablename + "\"";
|
||||
|
||||
|
||||
query = String.format(
|
||||
queryForSmartSampleWithThresholdOnTablePostgres,
|
||||
listAttributes, tablename, indexes[i]);
|
||||
}
|
||||
|
||||
// build the query for database mysql
|
||||
if (DBType.equals(MYSQL)) {
|
||||
|
||||
// the full name equal to "dbname.tablename"
|
||||
tablename = schemaName + "." + tablename;
|
||||
|
||||
query = String.format(
|
||||
queryForSmartSampleWithThresholdOnTableMysql,
|
||||
listAttributes, tablename, indexes[i]);
|
||||
}
|
||||
|
||||
AnalysisLogger.getLogger().debug(
|
||||
|
@ -1093,9 +1110,10 @@ public class Sampler {
|
|||
|
||||
for (int i = 0; i < listRows.size(); i++) {
|
||||
|
||||
// //check the row list result
|
||||
//check rows added in the final result
|
||||
// AnalysisLogger.getLogger().debug(
|
||||
// "Sampler->adding row with index: " + i);
|
||||
// "Sampler->adding row with index: " + i + " " +
|
||||
// listRows.get(i).getRow());
|
||||
|
||||
rows.add(listRows.get(i).getRow());
|
||||
|
||||
|
@ -1220,25 +1238,45 @@ public class Sampler {
|
|||
// preparing the query
|
||||
|
||||
// if is rows number <= 700000 then the pure random sample procedure is
|
||||
// performed otherwise a not pure ranom sample procedure is performed in
|
||||
// performed otherwise a not pure random sample procedure is performed
|
||||
// in
|
||||
// order to solve a bug with the random function in postgres
|
||||
|
||||
if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||
// if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||
//
|
||||
// //the full name equal to "schemaname.tablename"
|
||||
// tableName=schemaName+"."+ "\""+tableName+"\"";
|
||||
//
|
||||
// querySampleOnTable = String.format(
|
||||
// queryForRandomSampleOnTablePostgres, listAttributes,
|
||||
// tableName);
|
||||
//
|
||||
// }
|
||||
|
||||
if (NumRows <= 700000) {
|
||||
|
||||
if (DBType.equals(POSTGRES)) {
|
||||
// the full name equal to "schemaname.tablename"
|
||||
tableName = schemaName + "." + "\"" + tableName + "\"";
|
||||
|
||||
querySampleOnTable = String.format(
|
||||
queryForRandomSampleOnTablePostgres, listAttributes,
|
||||
tableName);
|
||||
}
|
||||
|
||||
if (DBType.equals(MYSQL)) {
|
||||
//the full name equal to "dbname.tablename"
|
||||
tableName=schemaName+"."+tableName;
|
||||
querySampleOnTable = String.format(
|
||||
queryForRandomSampleOnTableMysql, listAttributes,
|
||||
tableName);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||
|
||||
//the full name equal to "schemaname.tablename"
|
||||
tableName=schemaName+"."+ "\""+tableName+"\"";
|
||||
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||
|
||||
if (NumRows > 700000) {
|
||||
|
||||
// generate an index randomly to execute the query
|
||||
|
||||
|
@ -1275,23 +1313,46 @@ public class Sampler {
|
|||
|
||||
}
|
||||
|
||||
if (DBType.equals(POSTGRES)){
|
||||
// the full name equal to "schemaname.tablename"
|
||||
tableName = schemaName + "." + "\"" + tableName + "\"";
|
||||
|
||||
querySampleOnTable = String.format(
|
||||
queryForRandomSampleWithThresholdOnTablePostgres,
|
||||
listAttributes, tableName, X);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (DBType.equals(MYSQL)) { // MySQL
|
||||
|
||||
// the full name equal to "dbname.tablename"
|
||||
tableName = schemaName + "." + tableName;
|
||||
|
||||
querySampleOnTable = String
|
||||
.format(queryForRandomSampleOnTableMysql, listAttributes,
|
||||
tableName);
|
||||
// querySampleOnTable = String
|
||||
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
||||
// tableName);
|
||||
|
||||
querySampleOnTable = String.format(
|
||||
queryForRandomSampleWithThresholdOnTableMysql,
|
||||
listAttributes, tableName, X);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// if (DBType.equals(MYSQL)) { // MySQL
|
||||
//
|
||||
// // the full name equal to "dbname.tablename"
|
||||
// tableName = schemaName + "." + tableName;
|
||||
//
|
||||
// querySampleOnTable = String
|
||||
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
||||
// tableName);
|
||||
//
|
||||
// }
|
||||
|
||||
AnalysisLogger.getLogger()
|
||||
.debug("Sampler->preparing to submit the query: "
|
||||
+ querySampleOnTable);
|
||||
|
|
Loading…
Reference in New Issue