bug fixed in smart e random sampling for a database mysql
git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-access/DatabasesResourcesManager@99333 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
a2ea87687d
commit
fe5b1c211b
|
@ -18,12 +18,14 @@ import org.hibernate.SessionFactory;
|
||||||
public class Sampler {
|
public class Sampler {
|
||||||
|
|
||||||
// query to perform sample operation on the table
|
// query to perform sample operation on the table
|
||||||
// private static final String queryForSampleOnTablePostgres = "select %1$s from \"%2$s\" limit 100";
|
// private static final String queryForSampleOnTablePostgres =
|
||||||
|
// "select %1$s from \"%2$s\" limit 100";
|
||||||
private static final String queryForSampleOnTablePostgres = "select %1$s from %2$s limit 100";
|
private static final String queryForSampleOnTablePostgres = "select %1$s from %2$s limit 100";
|
||||||
private static final String queryForSampleOnTableMysql = "select %1$s from %2$s limit 100";
|
private static final String queryForSampleOnTableMysql = "select %1$s from %2$s limit 100";
|
||||||
|
|
||||||
// query to perform a smart sample operation randomly on the table
|
// query to perform a smart sample operation randomly on the table
|
||||||
// private static final String queryForSmartSampleOnTablePostgres = "select %1$s from \"%2$s\" order by random() limit 200";
|
// private static final String queryForSmartSampleOnTablePostgres =
|
||||||
|
// "select %1$s from \"%2$s\" order by random() limit 200";
|
||||||
private static final String queryForSmartSampleOnTablePostgres = "select %1$s from %2$s order by random() limit 200";
|
private static final String queryForSmartSampleOnTablePostgres = "select %1$s from %2$s order by random() limit 200";
|
||||||
private static final String queryForSmartSampleOnTableMysql = "select %1$s from %2$s order by rand() limit 200";
|
private static final String queryForSmartSampleOnTableMysql = "select %1$s from %2$s order by rand() limit 200";
|
||||||
// private static final String queryForSmartSampleOnTablePostgres =
|
// private static final String queryForSmartSampleOnTablePostgres =
|
||||||
|
@ -33,18 +35,22 @@ public class Sampler {
|
||||||
|
|
||||||
// query to perform a smart sample operation on the table considering the
|
// query to perform a smart sample operation on the table considering the
|
||||||
// threshold
|
// threshold
|
||||||
// private static final String queryForSmartSampleWithThresholdOnTablePostgres = "select %1$s from \"%2$s\" limit 200 offset %3$s";
|
// private static final String
|
||||||
|
// queryForSmartSampleWithThresholdOnTablePostgres =
|
||||||
|
// "select %1$s from \"%2$s\" limit 200 offset %3$s";
|
||||||
private static final String queryForSmartSampleWithThresholdOnTablePostgres = "select %1$s from %2$s limit 200 offset %3$s";
|
private static final String queryForSmartSampleWithThresholdOnTablePostgres = "select %1$s from %2$s limit 200 offset %3$s";
|
||||||
// private static final String queryForSmartSampleWithThresholdOnTableMysql
|
private static final String queryForSmartSampleWithThresholdOnTableMysql = "select %1$s from %2$s limit 200 offset %3$s";
|
||||||
// = "select %1$s from %2$s limit 200 offset %3$s";
|
|
||||||
|
|
||||||
// query to perform a sample operation randomly on a table
|
// query to perform a sample operation randomly on a table
|
||||||
// private static final String queryForRandomSampleOnTablePostgres =
|
// private static final String queryForRandomSampleOnTablePostgres =
|
||||||
// "select %1$s from \"%2$s\" order by random() limit 100";
|
// "select %1$s from \"%2$s\" order by random() limit 100";
|
||||||
// query to perform a smart sample operation on the table considering the
|
// query to perform a smart sample operation on the table considering the
|
||||||
// threshold
|
// threshold
|
||||||
// private static final String queryForRandomSampleWithThresholdOnTablePostgres = "select %1$s from \"%2$s\" limit 100 offset %3$s";
|
// private static final String
|
||||||
|
// queryForRandomSampleWithThresholdOnTablePostgres =
|
||||||
|
// "select %1$s from \"%2$s\" limit 100 offset %3$s";
|
||||||
private static final String queryForRandomSampleWithThresholdOnTablePostgres = "select %1$s from %2$s limit 100 offset %3$s";
|
private static final String queryForRandomSampleWithThresholdOnTablePostgres = "select %1$s from %2$s limit 100 offset %3$s";
|
||||||
|
private static final String queryForRandomSampleWithThresholdOnTableMysql = "select %1$s from %2$s limit 100 offset %3$s";
|
||||||
private static final String queryForRandomSampleOnTableMysql = "select %1$s from %2$s order by rand() limit 100";
|
private static final String queryForRandomSampleOnTableMysql = "select %1$s from %2$s order by rand() limit 100";
|
||||||
private static final String queryForRandomSampleOnTablePostgres = "select %1$s from %2$s order by random() limit 100";
|
private static final String queryForRandomSampleOnTablePostgres = "select %1$s from %2$s order by random() limit 100";
|
||||||
|
|
||||||
|
@ -95,10 +101,10 @@ public class Sampler {
|
||||||
// preparing the query
|
// preparing the query
|
||||||
|
|
||||||
if (DBType.equals(POSTGRES)) {
|
if (DBType.equals(POSTGRES)) {
|
||||||
|
|
||||||
//the full name equal to "schemaname.tablename"
|
// the full name equal to "schemaname.tablename"
|
||||||
tableName=schemaName+"."+"\""+tableName+"\"";
|
tableName = schemaName + "." + "\"" + tableName + "\"";
|
||||||
|
|
||||||
querySampleOnTable = String.format(queryForSampleOnTablePostgres,
|
querySampleOnTable = String.format(queryForSampleOnTablePostgres,
|
||||||
listAttributes, tableName);
|
listAttributes, tableName);
|
||||||
|
|
||||||
|
@ -106,9 +112,9 @@ public class Sampler {
|
||||||
|
|
||||||
if (DBType.equals(MYSQL)) {
|
if (DBType.equals(MYSQL)) {
|
||||||
|
|
||||||
//the full name equal to "dbname.tablename"
|
// the full name equal to "dbname.tablename"
|
||||||
tableName=schemaName+"."+tableName;
|
tableName = schemaName + "." + tableName;
|
||||||
|
|
||||||
querySampleOnTable = String.format(queryForSampleOnTableMysql,
|
querySampleOnTable = String.format(queryForSampleOnTableMysql,
|
||||||
listAttributes, tableName);
|
listAttributes, tableName);
|
||||||
|
|
||||||
|
@ -280,7 +286,6 @@ public class Sampler {
|
||||||
// //print check
|
// //print check
|
||||||
// AnalysisLogger.getLogger().debug(
|
// AnalysisLogger.getLogger().debug(
|
||||||
// "DatabaseManagement->size: " + columnsSet.size());
|
// "DatabaseManagement->size: " + columnsSet.size());
|
||||||
|
|
||||||
|
|
||||||
for (int i = 0; i < columnsSet.size(); i++) {
|
for (int i = 0; i < columnsSet.size(); i++) {
|
||||||
|
|
||||||
|
@ -338,11 +343,17 @@ public class Sampler {
|
||||||
// performed otherwise a not pure smart sample procedure is performed in
|
// performed otherwise a not pure smart sample procedure is performed in
|
||||||
// order to solve a bug with the random function in postgres
|
// order to solve a bug with the random function in postgres
|
||||||
|
|
||||||
if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||||
|
// // compute the smart sample on a table
|
||||||
|
// rows = computeSmartSampleWithThreshold(connection, dbSession,
|
||||||
|
// DBType, tableName, schemaName, NumRows, DataTypeColumns);
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
|
||||||
|
if (NumRows > 700000) {
|
||||||
// compute the smart sample on a table
|
// compute the smart sample on a table
|
||||||
rows = computeSmartSampleWithThreshold(connection, dbSession,
|
rows = computeSmartSampleWithThreshold(connection, dbSession,
|
||||||
DBType, tableName, schemaName, NumRows, DataTypeColumns);
|
DBType, tableName, schemaName, NumRows, DataTypeColumns);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
// computation of the iterations number
|
// computation of the iterations number
|
||||||
|
@ -359,7 +370,6 @@ public class Sampler {
|
||||||
rows = computeSmartSample(connection, dbSession, DBType, tableName,
|
rows = computeSmartSample(connection, dbSession, DBType, tableName,
|
||||||
schemaName, NIterations, DataTypeColumns,
|
schemaName, NIterations, DataTypeColumns,
|
||||||
DataTypeColumns.size());
|
DataTypeColumns.size());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
// if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||||
|
@ -508,20 +518,19 @@ public class Sampler {
|
||||||
|
|
||||||
// build the query for database postgres
|
// build the query for database postgres
|
||||||
if (DBType.equals(POSTGRES)) {
|
if (DBType.equals(POSTGRES)) {
|
||||||
|
|
||||||
//the full name equal to "schemaname.tablename"
|
// the full name equal to "schemaname.tablename"
|
||||||
tablename=schemaName+"."+ "\""+tablename+"\"";
|
tablename = schemaName + "." + "\"" + tablename + "\"";
|
||||||
|
|
||||||
|
|
||||||
query = String.format(queryForSmartSampleOnTablePostgres,
|
query = String.format(queryForSmartSampleOnTablePostgres,
|
||||||
listAttributes, tablename);
|
listAttributes, tablename);
|
||||||
|
|
||||||
}
|
}
|
||||||
// build the query for database mysql
|
// build the query for database mysql
|
||||||
if (DBType.equals(MYSQL)) {
|
if (DBType.equals(MYSQL)) {
|
||||||
|
|
||||||
//the full name equal to "dbname.tablename"
|
// the full name equal to "dbname.tablename"
|
||||||
tablename=schemaName+"."+tablename;
|
tablename = schemaName + "." + tablename;
|
||||||
|
|
||||||
query = String.format(queryForSmartSampleOnTableMysql,
|
query = String.format(queryForSmartSampleOnTableMysql,
|
||||||
listAttributes, tablename);
|
listAttributes, tablename);
|
||||||
|
@ -732,12 +741,11 @@ public class Sampler {
|
||||||
|
|
||||||
for (int i = 0; i < listRows.size(); i++) {
|
for (int i = 0; i < listRows.size(); i++) {
|
||||||
|
|
||||||
// //check rows added in the final result
|
// //check rows added in the final result
|
||||||
// AnalysisLogger.getLogger().debug(
|
// AnalysisLogger.getLogger().debug(
|
||||||
// "Sampler->adding row with index: " + i);
|
// "Sampler->adding row with index: " + i + " " + listRows.get(i).getRow());
|
||||||
|
|
||||||
rows.add(listRows.get(i).getRow());
|
rows.add(listRows.get(i).getRow());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return rows;
|
return rows;
|
||||||
|
@ -777,8 +785,8 @@ public class Sampler {
|
||||||
|
|
||||||
// Define Lower and Upper Index (LI and UL) of a range
|
// Define Lower and Upper Index (LI and UL) of a range
|
||||||
|
|
||||||
int LI = X + 200;
|
int LI = X - 200;
|
||||||
int UI = X - 200;
|
int UI = X + 200;
|
||||||
|
|
||||||
AnalysisLogger.getLogger().debug(
|
AnalysisLogger.getLogger().debug(
|
||||||
"Sampler->Lower Index of the range: " + LI);
|
"Sampler->Lower Index of the range: " + LI);
|
||||||
|
@ -817,8 +825,8 @@ public class Sampler {
|
||||||
|
|
||||||
// Define Lower and Upper Index (LI and UL) of a range
|
// Define Lower and Upper Index (LI and UL) of a range
|
||||||
|
|
||||||
int LI = X + 200;
|
int LI = X - 200;
|
||||||
int UI = X - 200;
|
int UI = X + 200;
|
||||||
|
|
||||||
AnalysisLogger.getLogger().debug(
|
AnalysisLogger.getLogger().debug(
|
||||||
"Sampler->Lower Index of the range: " + LI);
|
"Sampler->Lower Index of the range: " + LI);
|
||||||
|
@ -903,14 +911,23 @@ public class Sampler {
|
||||||
// build the query for database postgres
|
// build the query for database postgres
|
||||||
if (DBType.equals(POSTGRES)) {
|
if (DBType.equals(POSTGRES)) {
|
||||||
|
|
||||||
//the full name equal to "schemaname.tablename"
|
// the full name equal to "schemaname.tablename"
|
||||||
tablename=schemaName+"."+ "\""+tablename+"\"";
|
tablename = schemaName + "." + "\"" + tablename + "\"";
|
||||||
|
|
||||||
|
|
||||||
query = String.format(
|
query = String.format(
|
||||||
queryForSmartSampleWithThresholdOnTablePostgres,
|
queryForSmartSampleWithThresholdOnTablePostgres,
|
||||||
listAttributes, tablename, indexes[i]);
|
listAttributes, tablename, indexes[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// build the query for database mysql
|
||||||
|
if (DBType.equals(MYSQL)) {
|
||||||
|
|
||||||
|
// the full name equal to "dbname.tablename"
|
||||||
|
tablename = schemaName + "." + tablename;
|
||||||
|
|
||||||
|
query = String.format(
|
||||||
|
queryForSmartSampleWithThresholdOnTableMysql,
|
||||||
|
listAttributes, tablename, indexes[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
AnalysisLogger.getLogger().debug(
|
AnalysisLogger.getLogger().debug(
|
||||||
|
@ -1093,9 +1110,10 @@ public class Sampler {
|
||||||
|
|
||||||
for (int i = 0; i < listRows.size(); i++) {
|
for (int i = 0; i < listRows.size(); i++) {
|
||||||
|
|
||||||
// //check the row list result
|
//check rows added in the final result
|
||||||
// AnalysisLogger.getLogger().debug(
|
// AnalysisLogger.getLogger().debug(
|
||||||
// "Sampler->adding row with index: " + i);
|
// "Sampler->adding row with index: " + i + " " +
|
||||||
|
// listRows.get(i).getRow());
|
||||||
|
|
||||||
rows.add(listRows.get(i).getRow());
|
rows.add(listRows.get(i).getRow());
|
||||||
|
|
||||||
|
@ -1220,26 +1238,46 @@ public class Sampler {
|
||||||
// preparing the query
|
// preparing the query
|
||||||
|
|
||||||
// if is rows number <= 700000 then the pure random sample procedure is
|
// if is rows number <= 700000 then the pure random sample procedure is
|
||||||
// performed otherwise a not pure ranom sample procedure is performed in
|
// performed otherwise a not pure random sample procedure is performed
|
||||||
|
// in
|
||||||
// order to solve a bug with the random function in postgres
|
// order to solve a bug with the random function in postgres
|
||||||
|
|
||||||
if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
// if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||||
|
//
|
||||||
//the full name equal to "schemaname.tablename"
|
// //the full name equal to "schemaname.tablename"
|
||||||
tableName=schemaName+"."+ "\""+tableName+"\"";
|
// tableName=schemaName+"."+ "\""+tableName+"\"";
|
||||||
|
//
|
||||||
|
// querySampleOnTable = String.format(
|
||||||
|
// queryForRandomSampleOnTablePostgres, listAttributes,
|
||||||
|
// tableName);
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
|
||||||
querySampleOnTable = String.format(
|
if (NumRows <= 700000) {
|
||||||
queryForRandomSampleOnTablePostgres, listAttributes,
|
|
||||||
tableName);
|
if (DBType.equals(POSTGRES)) {
|
||||||
|
// the full name equal to "schemaname.tablename"
|
||||||
|
tableName = schemaName + "." + "\"" + tableName + "\"";
|
||||||
|
|
||||||
|
querySampleOnTable = String.format(
|
||||||
|
queryForRandomSampleOnTablePostgres, listAttributes,
|
||||||
|
tableName);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DBType.equals(MYSQL)) {
|
||||||
|
//the full name equal to "dbname.tablename"
|
||||||
|
tableName=schemaName+"."+tableName;
|
||||||
|
querySampleOnTable = String.format(
|
||||||
|
queryForRandomSampleOnTableMysql, listAttributes,
|
||||||
|
tableName);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres
|
||||||
|
|
||||||
//the full name equal to "schemaname.tablename"
|
if (NumRows > 700000) {
|
||||||
tableName=schemaName+"."+ "\""+tableName+"\"";
|
|
||||||
|
|
||||||
|
|
||||||
// generate an index randomly to execute the query
|
// generate an index randomly to execute the query
|
||||||
|
|
||||||
// Define threshold
|
// Define threshold
|
||||||
|
@ -1274,24 +1312,47 @@ public class Sampler {
|
||||||
AnalysisLogger.getLogger().debug("Sampler->X index: " + X);
|
AnalysisLogger.getLogger().debug("Sampler->X index: " + X);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
querySampleOnTable = String.format(
|
|
||||||
queryForRandomSampleWithThresholdOnTablePostgres,
|
|
||||||
listAttributes, tableName, X);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (DBType.equals(MYSQL)) { // MySQL
|
|
||||||
|
|
||||||
//the full name equal to "dbname.tablename"
|
if (DBType.equals(POSTGRES)){
|
||||||
tableName=schemaName+"."+tableName;
|
// the full name equal to "schemaname.tablename"
|
||||||
|
tableName = schemaName + "." + "\"" + tableName + "\"";
|
||||||
|
|
||||||
|
querySampleOnTable = String.format(
|
||||||
|
queryForRandomSampleWithThresholdOnTablePostgres,
|
||||||
|
listAttributes, tableName, X);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
querySampleOnTable = String
|
|
||||||
.format(queryForRandomSampleOnTableMysql, listAttributes,
|
|
||||||
tableName);
|
if (DBType.equals(MYSQL)) { // MySQL
|
||||||
|
|
||||||
|
// the full name equal to "dbname.tablename"
|
||||||
|
tableName = schemaName + "." + tableName;
|
||||||
|
|
||||||
|
// querySampleOnTable = String
|
||||||
|
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
||||||
|
// tableName);
|
||||||
|
|
||||||
|
querySampleOnTable = String.format(
|
||||||
|
queryForRandomSampleWithThresholdOnTableMysql,
|
||||||
|
listAttributes, tableName, X);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if (DBType.equals(MYSQL)) { // MySQL
|
||||||
|
//
|
||||||
|
// // the full name equal to "dbname.tablename"
|
||||||
|
// tableName = schemaName + "." + tableName;
|
||||||
|
//
|
||||||
|
// querySampleOnTable = String
|
||||||
|
// .format(queryForRandomSampleOnTableMysql, listAttributes,
|
||||||
|
// tableName);
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
|
||||||
AnalysisLogger.getLogger()
|
AnalysisLogger.getLogger()
|
||||||
.debug("Sampler->preparing to submit the query: "
|
.debug("Sampler->preparing to submit the query: "
|
||||||
+ querySampleOnTable);
|
+ querySampleOnTable);
|
||||||
|
|
|
@ -896,8 +896,8 @@ public class DatabaseManagement {
|
||||||
((LinkedHashMap<String, Object>) element).values());
|
((LinkedHashMap<String, Object>) element).values());
|
||||||
|
|
||||||
// // print check
|
// // print check
|
||||||
// AnalysisLogger.getLogger().debug(
|
// AnalysisLogger.getLogger().debug(
|
||||||
// "DatabaseManagement->values: " + listvalues);
|
// "DatabaseManagement->values: " + listvalues);
|
||||||
|
|
||||||
// each row could have several column values
|
// each row could have several column values
|
||||||
Object[] row = listvalues.toArray();
|
Object[] row = listvalues.toArray();
|
||||||
|
|
Loading…
Reference in New Issue