From fe5b1c211b657c5b30357553c0967ca91137af95 Mon Sep 17 00:00:00 2001 From: Loredana Liccardo Date: Thu, 28 Aug 2014 13:57:57 +0000 Subject: [PATCH] bug fixed in smart e random sampling for a database mysql git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-access/DatabasesResourcesManager@99333 82a268e6-3cf1-43bd-a215-b396298e98cf --- .../dataaccess/databases/sampler/Sampler.java | 195 ++++++++++++------ .../databases/utils/DatabaseManagement.java | 4 +- 2 files changed, 130 insertions(+), 69 deletions(-) diff --git a/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java b/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java index 35e642d..1c1a9dc 100644 --- a/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java +++ b/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java @@ -18,12 +18,14 @@ import org.hibernate.SessionFactory; public class Sampler { // query to perform sample operation on the table -// private static final String queryForSampleOnTablePostgres = "select %1$s from \"%2$s\" limit 100"; + // private static final String queryForSampleOnTablePostgres = + // "select %1$s from \"%2$s\" limit 100"; private static final String queryForSampleOnTablePostgres = "select %1$s from %2$s limit 100"; private static final String queryForSampleOnTableMysql = "select %1$s from %2$s limit 100"; // query to perform a smart sample operation randomly on the table -// private static final String queryForSmartSampleOnTablePostgres = "select %1$s from \"%2$s\" order by random() limit 200"; + // private static final String queryForSmartSampleOnTablePostgres = + // "select %1$s from \"%2$s\" order by random() limit 200"; private static final String queryForSmartSampleOnTablePostgres = "select %1$s from %2$s order by random() limit 200"; private static final String queryForSmartSampleOnTableMysql = "select %1$s from %2$s order by rand() limit 200"; // private static final String queryForSmartSampleOnTablePostgres = @@ -33,18 +35,22 @@ public class Sampler { // query to perform a smart sample operation on the table considering the // threshold -// private static final String queryForSmartSampleWithThresholdOnTablePostgres = "select %1$s from \"%2$s\" limit 200 offset %3$s"; + // private static final String + // queryForSmartSampleWithThresholdOnTablePostgres = + // "select %1$s from \"%2$s\" limit 200 offset %3$s"; private static final String queryForSmartSampleWithThresholdOnTablePostgres = "select %1$s from %2$s limit 200 offset %3$s"; - // private static final String queryForSmartSampleWithThresholdOnTableMysql - // = "select %1$s from %2$s limit 200 offset %3$s"; + private static final String queryForSmartSampleWithThresholdOnTableMysql = "select %1$s from %2$s limit 200 offset %3$s"; // query to perform a sample operation randomly on a table // private static final String queryForRandomSampleOnTablePostgres = // "select %1$s from \"%2$s\" order by random() limit 100"; // query to perform a smart sample operation on the table considering the // threshold -// private static final String queryForRandomSampleWithThresholdOnTablePostgres = "select %1$s from \"%2$s\" limit 100 offset %3$s"; + // private static final String + // queryForRandomSampleWithThresholdOnTablePostgres = + // "select %1$s from \"%2$s\" limit 100 offset %3$s"; private static final String queryForRandomSampleWithThresholdOnTablePostgres = "select %1$s from %2$s limit 100 offset %3$s"; + private static final String queryForRandomSampleWithThresholdOnTableMysql = "select %1$s from %2$s limit 100 offset %3$s"; private static final String queryForRandomSampleOnTableMysql = "select %1$s from %2$s order by rand() limit 100"; private static final String queryForRandomSampleOnTablePostgres = "select %1$s from %2$s order by random() limit 100"; @@ -95,10 +101,10 @@ public class Sampler { // preparing the query if (DBType.equals(POSTGRES)) { - - //the full name equal to "schemaname.tablename" - tableName=schemaName+"."+"\""+tableName+"\""; - + + // the full name equal to "schemaname.tablename" + tableName = schemaName + "." + "\"" + tableName + "\""; + querySampleOnTable = String.format(queryForSampleOnTablePostgres, listAttributes, tableName); @@ -106,9 +112,9 @@ public class Sampler { if (DBType.equals(MYSQL)) { - //the full name equal to "dbname.tablename" - tableName=schemaName+"."+tableName; - + // the full name equal to "dbname.tablename" + tableName = schemaName + "." + tableName; + querySampleOnTable = String.format(queryForSampleOnTableMysql, listAttributes, tableName); @@ -280,7 +286,6 @@ public class Sampler { // //print check // AnalysisLogger.getLogger().debug( // "DatabaseManagement->size: " + columnsSet.size()); - for (int i = 0; i < columnsSet.size(); i++) { @@ -338,11 +343,17 @@ public class Sampler { // performed otherwise a not pure smart sample procedure is performed in // order to solve a bug with the random function in postgres - if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres + // if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres + // // compute the smart sample on a table + // rows = computeSmartSampleWithThreshold(connection, dbSession, + // DBType, tableName, schemaName, NumRows, DataTypeColumns); + // + // } + + if (NumRows > 700000) { // compute the smart sample on a table rows = computeSmartSampleWithThreshold(connection, dbSession, DBType, tableName, schemaName, NumRows, DataTypeColumns); - } else { // computation of the iterations number @@ -359,7 +370,6 @@ public class Sampler { rows = computeSmartSample(connection, dbSession, DBType, tableName, schemaName, NIterations, DataTypeColumns, DataTypeColumns.size()); - } // if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres @@ -508,20 +518,19 @@ public class Sampler { // build the query for database postgres if (DBType.equals(POSTGRES)) { - - //the full name equal to "schemaname.tablename" - tablename=schemaName+"."+ "\""+tablename+"\""; - - + + // the full name equal to "schemaname.tablename" + tablename = schemaName + "." + "\"" + tablename + "\""; + query = String.format(queryForSmartSampleOnTablePostgres, listAttributes, tablename); } // build the query for database mysql if (DBType.equals(MYSQL)) { - - //the full name equal to "dbname.tablename" - tablename=schemaName+"."+tablename; + + // the full name equal to "dbname.tablename" + tablename = schemaName + "." + tablename; query = String.format(queryForSmartSampleOnTableMysql, listAttributes, tablename); @@ -732,12 +741,11 @@ public class Sampler { for (int i = 0; i < listRows.size(); i++) { - // //check rows added in the final result - // AnalysisLogger.getLogger().debug( - // "Sampler->adding row with index: " + i); - +// //check rows added in the final result +// AnalysisLogger.getLogger().debug( +// "Sampler->adding row with index: " + i + " " + listRows.get(i).getRow()); + rows.add(listRows.get(i).getRow()); - } return rows; @@ -777,8 +785,8 @@ public class Sampler { // Define Lower and Upper Index (LI and UL) of a range - int LI = X + 200; - int UI = X - 200; + int LI = X - 200; + int UI = X + 200; AnalysisLogger.getLogger().debug( "Sampler->Lower Index of the range: " + LI); @@ -817,8 +825,8 @@ public class Sampler { // Define Lower and Upper Index (LI and UL) of a range - int LI = X + 200; - int UI = X - 200; + int LI = X - 200; + int UI = X + 200; AnalysisLogger.getLogger().debug( "Sampler->Lower Index of the range: " + LI); @@ -903,14 +911,23 @@ public class Sampler { // build the query for database postgres if (DBType.equals(POSTGRES)) { - //the full name equal to "schemaname.tablename" - tablename=schemaName+"."+ "\""+tablename+"\""; - - + // the full name equal to "schemaname.tablename" + tablename = schemaName + "." + "\"" + tablename + "\""; + query = String.format( queryForSmartSampleWithThresholdOnTablePostgres, listAttributes, tablename, indexes[i]); + } + // build the query for database mysql + if (DBType.equals(MYSQL)) { + + // the full name equal to "dbname.tablename" + tablename = schemaName + "." + tablename; + + query = String.format( + queryForSmartSampleWithThresholdOnTableMysql, + listAttributes, tablename, indexes[i]); } AnalysisLogger.getLogger().debug( @@ -1093,9 +1110,10 @@ public class Sampler { for (int i = 0; i < listRows.size(); i++) { - // //check the row list result + //check rows added in the final result // AnalysisLogger.getLogger().debug( - // "Sampler->adding row with index: " + i); + // "Sampler->adding row with index: " + i + " " + + // listRows.get(i).getRow()); rows.add(listRows.get(i).getRow()); @@ -1220,26 +1238,46 @@ public class Sampler { // preparing the query // if is rows number <= 700000 then the pure random sample procedure is - // performed otherwise a not pure ranom sample procedure is performed in + // performed otherwise a not pure random sample procedure is performed + // in // order to solve a bug with the random function in postgres - if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres - - //the full name equal to "schemaname.tablename" - tableName=schemaName+"."+ "\""+tableName+"\""; + // if ((NumRows <= 700000) && (DBType.equals(POSTGRES))) { // Postgres + // + // //the full name equal to "schemaname.tablename" + // tableName=schemaName+"."+ "\""+tableName+"\""; + // + // querySampleOnTable = String.format( + // queryForRandomSampleOnTablePostgres, listAttributes, + // tableName); + // + // } - querySampleOnTable = String.format( - queryForRandomSampleOnTablePostgres, listAttributes, - tableName); + if (NumRows <= 700000) { + + if (DBType.equals(POSTGRES)) { + // the full name equal to "schemaname.tablename" + tableName = schemaName + "." + "\"" + tableName + "\""; + + querySampleOnTable = String.format( + queryForRandomSampleOnTablePostgres, listAttributes, + tableName); + } + + if (DBType.equals(MYSQL)) { + //the full name equal to "dbname.tablename" + tableName=schemaName+"."+tableName; + querySampleOnTable = String.format( + queryForRandomSampleOnTableMysql, listAttributes, + tableName); + } } - if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres - - //the full name equal to "schemaname.tablename" - tableName=schemaName+"."+ "\""+tableName+"\""; - - +// if ((NumRows > 700000) && (DBType.equals(POSTGRES))) { // Postgres + + if (NumRows > 700000) { + // generate an index randomly to execute the query // Define threshold @@ -1274,24 +1312,47 @@ public class Sampler { AnalysisLogger.getLogger().debug("Sampler->X index: " + X); } - - querySampleOnTable = String.format( - queryForRandomSampleWithThresholdOnTablePostgres, - listAttributes, tableName, X); - - } - - if (DBType.equals(MYSQL)) { // MySQL - //the full name equal to "dbname.tablename" - tableName=schemaName+"."+tableName; + if (DBType.equals(POSTGRES)){ + // the full name equal to "schemaname.tablename" + tableName = schemaName + "." + "\"" + tableName + "\""; + + querySampleOnTable = String.format( + queryForRandomSampleWithThresholdOnTablePostgres, + listAttributes, tableName, X); + + } - querySampleOnTable = String - .format(queryForRandomSampleOnTableMysql, listAttributes, - tableName); + + + if (DBType.equals(MYSQL)) { // MySQL + + // the full name equal to "dbname.tablename" + tableName = schemaName + "." + tableName; + +// querySampleOnTable = String +// .format(queryForRandomSampleOnTableMysql, listAttributes, +// tableName); + + querySampleOnTable = String.format( + queryForRandomSampleWithThresholdOnTableMysql, + listAttributes, tableName, X); + + } } +// if (DBType.equals(MYSQL)) { // MySQL +// +// // the full name equal to "dbname.tablename" +// tableName = schemaName + "." + tableName; +// +// querySampleOnTable = String +// .format(queryForRandomSampleOnTableMysql, listAttributes, +// tableName); +// +// } + AnalysisLogger.getLogger() .debug("Sampler->preparing to submit the query: " + querySampleOnTable); diff --git a/src/main/java/org/gcube/dataaccess/databases/utils/DatabaseManagement.java b/src/main/java/org/gcube/dataaccess/databases/utils/DatabaseManagement.java index 4ca07a2..bd31ec6 100644 --- a/src/main/java/org/gcube/dataaccess/databases/utils/DatabaseManagement.java +++ b/src/main/java/org/gcube/dataaccess/databases/utils/DatabaseManagement.java @@ -896,8 +896,8 @@ public class DatabaseManagement { ((LinkedHashMap) element).values()); // // print check - // AnalysisLogger.getLogger().debug( - // "DatabaseManagement->values: " + listvalues); +// AnalysisLogger.getLogger().debug( +// "DatabaseManagement->values: " + listvalues); // each row could have several column values Object[] row = listvalues.toArray();