From 885a219fe1e579bdc6a5e96b6bcf98afb80b6734 Mon Sep 17 00:00:00 2001 From: Loredana Liccardo Date: Fri, 28 Nov 2014 11:54:54 +0000 Subject: [PATCH] Samplings and SubmitQuery changed in order to return as a result a map with column values truncated to 255 characters and a file with the whole columns values in order to manage so to manage big data properly and the conversion of a geometry data type in a multipolygon representation applying the function st_astext() for a database postgres. Sampler and DatabaseManagement classes changed. The file is stored on the statistical manager side and recovered through an ulr. pom file changed in 1.3.0 git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-access/DatabasesResourcesManager@101766 82a268e6-3cf1-43bd-a215-b396298e98cf --- pom.xml | 2 +- .../dataaccess/databases/sampler/Sampler.java | 102 +++- .../databases/utils/DatabaseManagement.java | 470 +++++++++++++++--- 3 files changed, 494 insertions(+), 80 deletions(-) diff --git a/pom.xml b/pom.xml index d5df0fb..07082ee 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.gcube.dataaccess database-resource-manager - 1.2.0-SNAPSHOT + 1.3.0-SNAPSHOT DatabaseResourceManager Database resource manager diff --git a/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java b/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java index 4f61fc3..de30845 100644 --- a/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java +++ b/src/main/java/org/gcube/dataaccess/databases/sampler/Sampler.java @@ -171,18 +171,48 @@ public class Sampler { if (DBType.equals(POSTGRES)) { - // attribute = "CAST(" + listColumns.get(i) + " as text), "; - attribute = "CAST(" + "\"" +listColumns.get(i)+ "\"" - + " as character varying(255)), "; + //to manage postgis data types + if((DataTypeColumns.get(i).equals("geometry")) || (DataTypeColumns.get(i).equals("geography"))){ + attribute = "st_astext("+listColumns.get(i)+") as "+listColumns.get(i)+", "; + + if (i == (listColumns.size() - 1)) { + attribute = "st_astext("+listColumns.get(i)+") as "+listColumns.get(i); + } + }else{ + // attribute = "CAST(" + listColumns.get(i) + " as text), "; + attribute = "CAST(" + "\"" +listColumns.get(i)+ "\"" + + " as character varying), "; - if (i == (listColumns.size() - 1)) { + if (i == (listColumns.size() - 1)) { - // attribute = "CAST(" + listColumns.get(i) + - // " as text)"; - attribute = "CAST(" + "\"" + listColumns.get(i)+ "\"" - + " as character varying(255))"; + // attribute = "CAST(" + listColumns.get(i) + + // " as text)"; + attribute = "CAST(" + "\"" + listColumns.get(i)+ "\"" + + " as character varying)"; + } } +// //to manage postgis data types +// if((DataTypeColumns.get(i).equals("geometry")) || (DataTypeColumns.get(i).equals("geography"))){ +// attribute = "SUBSTRING(st_astext("+listColumns.get(i)+") FROM 1 FOR 255), "; +// +// if (i == (listColumns.size() - 1)) { +// attribute = "SUBSTRING(st_astext("+listColumns.get(i)+") FROM 1 FOR 255)"; +// } +// }else{ +// // attribute = "CAST(" + listColumns.get(i) + " as text), "; +// attribute = "CAST(" + "\"" +listColumns.get(i)+ "\"" +// + " as character varying(255)), "; +// +// if (i == (listColumns.size() - 1)) { +// +// // attribute = "CAST(" + listColumns.get(i) + +// // " as text)"; +// attribute = "CAST(" + "\"" + listColumns.get(i)+ "\"" +// + " as character varying(255))"; +// +// } +// } } @@ -201,7 +231,7 @@ public class Sampler { // attribute = "CONVERT(" + listColumns.get(i) + // ", CHAR), "; attribute = "CAST(" + "`" + listColumns.get(i) + "`" - + " as CHAR(255) CHARACTER SET utf8), "; + + " as CHAR CHARACTER SET utf8), "; if (i == (listColumns.size() - 1)) { @@ -212,7 +242,7 @@ public class Sampler { // ", BINARY)"; attribute = "CAST(" + "`" + listColumns.get(i) - + "`" + " as CHAR(255) CHARACTER SET utf8)"; + + "`" + " as CHAR CHARACTER SET utf8)"; } @@ -222,7 +252,7 @@ public class Sampler { + "`" + listColumns.get(i) + "`" - + " as BINARY) as CHAR(255) CHARACTER SET utf8), "; + + " as BINARY) as CHAR CHARACTER SET utf8), "; if (i == (listColumns.size() - 1)) { @@ -236,12 +266,60 @@ public class Sampler { + "`" + listColumns.get(i) + "`" - + " as BINARY) as CHAR(255) CHARACTER SET utf8)"; + + " as BINARY) as CHAR CHARACTER SET utf8)"; } } +// if (DataTypeColumns.get(i).contains("char")) { +// +// // attribute = "CAST(" + listColumns.get(i) + +// // " as CHAR CHARACTER SET utf8), "; +// // attribute = "CONVERT(" + listColumns.get(i) + +// // ", CHAR), "; +// attribute = "CAST(" + "`" + listColumns.get(i) + "`" +// + " as CHAR(255) CHARACTER SET utf8), "; +// +// if (i == (listColumns.size() - 1)) { +// +// // attribute = "CAST(" + listColumns.get(i) + +// // " as CHAR CHARACTER SET utf8)"; +// +// // attribute = "CONVERT(" + listColumns.get(i) + +// // ", BINARY)"; +// +// attribute = "CAST(" + "`" + listColumns.get(i) +// + "`" + " as CHAR(255) CHARACTER SET utf8)"; +// +// } +// +// } else { +// +// attribute = "CAST(CAST(" +// + "`" +// + listColumns.get(i) +// + "`" +// + " as BINARY) as CHAR(255) CHARACTER SET utf8), "; +// +// if (i == (listColumns.size() - 1)) { +// +// // attribute = "CAST(" + listColumns.get(i) + +// // " as CHAR CHARACTER SET utf8)"; +// +// // attribute = "CONVERT(" + listColumns.get(i) + +// // ", BINARY)"; +// +// attribute = "CAST(CAST(" +// + "`" +// + listColumns.get(i) +// + "`" +// + " as BINARY) as CHAR(255) CHARACTER SET utf8)"; +// +// } +// +// } + } listAttributes = listAttributes + attribute; diff --git a/src/main/java/org/gcube/dataaccess/databases/utils/DatabaseManagement.java b/src/main/java/org/gcube/dataaccess/databases/utils/DatabaseManagement.java index f1675b4..976f7fa 100644 --- a/src/main/java/org/gcube/dataaccess/databases/utils/DatabaseManagement.java +++ b/src/main/java/org/gcube/dataaccess/databases/utils/DatabaseManagement.java @@ -83,7 +83,7 @@ public class DatabaseManagement { private static final String listSchemaNameQuery = "select nspname from pg_namespace where nspname <> 'information_schema' and nspname !~ E'^pg_'"; // query to retrieve datatype columns of a database table - private static final String queryForDataTypeColumnsPostgres = "SELECT data_type FROM information_schema.COLUMNS WHERE table_name ='%1$s' and table_schema='%2$s' order by ordinal_position asc"; + private static final String queryForDataTypeColumnsPostgres = "SELECT data_type, udt_name FROM information_schema.COLUMNS WHERE table_name ='%1$s' and table_schema='%2$s' order by ordinal_position asc"; private static final String queryForDataTypeColumnsMysql = "SELECT data_type FROM information_schema.COLUMNS WHERE table_name ='%1$s' and table_schema='%2$s' order by ordinal_position asc"; // query to get columns' name @@ -193,6 +193,9 @@ public class DatabaseManagement { // + valConverted); } + if((type.contains("geometry")) ||(type.contains("geography"))){ + valConverted = "\"" + valConverted + "\""; + } } @@ -202,18 +205,18 @@ public class DatabaseManagement { } - //to truncate value to 255 characters if it exceeds 255 characters - if(valConverted.length()>255){ - valConverted = valConverted.substring(0, 255); -// System.out.println("val truncated: " + valConverted); -// System.out.println("elem geometry truncated"); - - //add quote if it has been removed with truncation - if((valConverted.startsWith("\""))&&(!valConverted.endsWith("\""))){ - valConverted = valConverted+"\""; -// System.out.println("adding quote: " + valConverted); - } - } +// //to truncate value to 255 characters if it exceeds 255 characters +// if(valConverted.length()>255){ +// valConverted = valConverted.substring(0, 255); +//// System.out.println("val truncated: " + valConverted); +//// System.out.println("elem geometry truncated"); +// +// //add quote if it has been removed with truncation +// if((valConverted.startsWith("\""))&&(!valConverted.endsWith("\""))){ +// valConverted = valConverted+"\""; +//// System.out.println("adding quote: " + valConverted); +// } +// } return valConverted; } @@ -367,7 +370,7 @@ public class DatabaseManagement { List DataTypeColumns = null; // store table in a file and build the result Map - String FileName = pathFile + "QueryResult"+UUID.randomUUID()+".csv"; + String FileName = pathFile + "SubmitQueryResult_"+UUID.randomUUID()+".csv"; // write the result in the file and in the map AnalysisLogger.getLogger().debug( "In DatabaseManagement-> writing the result in the file: " @@ -377,7 +380,7 @@ public class DatabaseManagement { file = new File(FileName); out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream( file), "UTF-8")); - writeTableIntoFile(results, DataTypeColumns); + writeSubmitResultIntoFile(results, DataTypeColumns); // //truncate the result list to 10000 rows // if ((mapResult!=null)&&(mapResult.size()!=0)){ @@ -727,17 +730,18 @@ public class DatabaseManagement { // writeSampleTableIntoFile(resultSet, tableName, schemaName); - String FileName = pathFile + "SampleResult.csv"; - - // to recover columns names list - + //TODO ** COMMENTED +// String FileName = pathFile + "SampleResult.csv"; +// +// // to recover columns names list +// List listColumns = sampler.getListColumns(); - // String header = ""; - - // //print check - // AnalysisLogger.getLogger().debug( - // "In DatabaseManagement->list columns size: " +listColumns.size()); - +// // String header = ""; +// +// // //print check +// // AnalysisLogger.getLogger().debug( +// // "In DatabaseManagement->list columns size: " +listColumns.size()); +// // to recover columns names list for (int i = 0; i < listColumns.size(); i++) { @@ -766,7 +770,32 @@ public class DatabaseManagement { // // writeTableIntoFile(resultSet, DataTypeColumns); // build the Map of Result - buildMapResult(resultSet, DataTypeColumns); +// buildMapResult(resultSet, DataTypeColumns); + + // store table in a file and build the result Map + String FileName = pathFile + "SmartSampling_"+UUID.randomUUID()+".csv"; + // write the result in the file and in the map + AnalysisLogger.getLogger().debug( + "In DatabaseManagement-> writing the result in the file: " + + FileName); + + try{ + file = new File(FileName); + out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream( + file), "UTF-8")); + writeSamplingResultIntoFile(resultSet, DataTypeColumns); + + }catch (Throwable e) { + throw e; + }finally{ + //close the file + if (out!=null){ + out.close(); + out = null; + AnalysisLogger.getLogger().debug( + "In DatabaseManagement-> File closed"); + } + } } /** @@ -809,6 +838,7 @@ public class DatabaseManagement { // AnalysisLogger.getLogger().debug( // "In DatabaseManagement->store table in a file"); + // to recover columns names list List listColumns = sampler.getListColumns(); @@ -819,22 +849,47 @@ public class DatabaseManagement { header = header + listColumns.get(i); } } +// +// // // store table in a file +// // String FileName = pathFile + "SampleResult.csv"; +// // // write the result in the file and in the map +// // AnalysisLogger.getLogger().debug( +// // "In DatabaseManagement->writing the result in the file: " +// // + FileName); +// // file = new File(FileName); +// // out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream( +// // file), "UTF-8")); +// // writeTableIntoFile(resultSet, DataTypeColumns); +// +// // build the Map of Result +// buildMapResult(resultSet, DataTypeColumns); + + // store table in a file and build the result Map + String FileName = pathFile + "Sampling_"+UUID.randomUUID()+".csv"; + // write the result in the file and in the map + AnalysisLogger.getLogger().debug( + "In DatabaseManagement-> writing the result in the file: " + + FileName); - // // store table in a file - // String FileName = pathFile + "SampleResult.csv"; - // // write the result in the file and in the map - // AnalysisLogger.getLogger().debug( - // "In DatabaseManagement->writing the result in the file: " - // + FileName); - // file = new File(FileName); - // out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream( - // file), "UTF-8")); - // writeTableIntoFile(resultSet, DataTypeColumns); + try{ + file = new File(FileName); + out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream( + file), "UTF-8")); + writeSamplingResultIntoFile(resultSet, DataTypeColumns); - // build the Map of Result - buildMapResult(resultSet, DataTypeColumns); - } + }catch (Throwable e) { + throw e; + }finally{ + //close the file + if (out!=null){ + out.close(); + out = null; + AnalysisLogger.getLogger().debug( + "In DatabaseManagement-> File closed"); + } + } + } /** * Retrieve 100 rows of a table in a random manner. @@ -880,17 +935,18 @@ public class DatabaseManagement { // AnalysisLogger.getLogger().debug( // "In DatabaseManagement->store table in a file"); + //TODO ** COMMENTED // to recover columns names list List listColumns = sampler.getListColumns(); - - // //print check - // for (int i = 0; i < listColumns.size(); i++) { - // AnalysisLogger.getLogger() - // .debug("In DatabaseManagement->listcolumns: " - // + listColumns.get(i)); - // } - // String header = ""; - +// +// // //print check +// // for (int i = 0; i < listColumns.size(); i++) { +// // AnalysisLogger.getLogger() +// // .debug("In DatabaseManagement->listcolumns: " +// // + listColumns.get(i)); +// // } +// // String header = ""; +// for (int i = 0; i < listColumns.size(); i++) { if (i != listColumns.size() - 1) { header = header + listColumns.get(i) + ", "; @@ -911,7 +967,32 @@ public class DatabaseManagement { // writeTableIntoFile(resultSet, DataTypeColumns); // build the Map of Result - buildMapResult(resultSet, DataTypeColumns); +// buildMapResult(resultSet, DataTypeColumns); + + // store table in a file and build the result Map + String FileName = pathFile + "RandomSampling_"+UUID.randomUUID()+".csv"; + // write the result in the file and in the map + AnalysisLogger.getLogger().debug( + "In DatabaseManagement-> writing the result in the file: " + + FileName); + + try{ + file = new File(FileName); + out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream( + file), "UTF-8")); + writeSamplingResultIntoFile(resultSet, DataTypeColumns); + + }catch (Throwable e) { + throw e; + }finally{ + //close the file + if (out!=null){ + out.close(); + out = null; + AnalysisLogger.getLogger().debug( + "In DatabaseManagement-> File closed"); + } + } } @@ -1083,7 +1164,7 @@ public class DatabaseManagement { } // write the table result in the file and build the map of results - private void writeTableIntoFile(List result, + private void writeSubmitResultIntoFile(List result, List DataTypeColumns) throws Exception { // // file that will contain result @@ -1173,12 +1254,30 @@ public class DatabaseManagement { parsed = convertToJavaType( DataTypeColumns.get(j), parsed); } - - // // check value - // AnalysisLogger.getLogger().debug( - // "In DatabaseManagement->row: " + (i + 1) - // + " column: " + (j + 1) + " value= " - // + parsed); + +// // check value +// AnalysisLogger.getLogger().debug( +// "In DatabaseManagement->row: " + (i + 1) +// + " column: " + (j + 1) + " value= " +// + parsed); + + //write the whole column value in a file but a truncated value in a map + + //to truncate value to 255 characters if it exceeds 255 characters + String truncVal=parsed; + if(truncVal.length()>255){ + truncVal = truncVal.substring(0, 255); + +// System.out.println("val truncated: " + valConverted); +// System.out.println("elem geometry truncated"); + + //add quote if it has been removed with truncation + if((truncVal.startsWith("\""))&&(!truncVal.endsWith("\""))){ + truncVal = truncVal+"\""; +// System.out.println("adding quote: " + valConverted); + } + + } // write in a file if (j != row.length - 1) { @@ -1190,9 +1289,9 @@ public class DatabaseManagement { // System.out.println("write column : " + j); // RowString = RowString + parsed + " "; if (j == 0) { - RowString = parsed; + RowString = truncVal; } else { - RowString = RowString + "," + parsed; + RowString = RowString + "," + truncVal; } } if (j == row.length - 1) { @@ -1203,9 +1302,9 @@ public class DatabaseManagement { // to add a row to the map if (row.length == 1) { - RowString = parsed; + RowString = truncVal; } else { - RowString = RowString + "," + parsed; + RowString = RowString + "," + truncVal; } // to add a row to the map @@ -1213,10 +1312,10 @@ public class DatabaseManagement { // mapSampleTableResult.put(String.valueOf(i), // RowString); - // // check value row - // AnalysisLogger.getLogger().debug( - // "writing the value: " + RowString + " key: " - // + String.valueOf(i)); +// // check value row +// AnalysisLogger.getLogger().debug( +// "writing the value: " + RowString + " key: " +// + String.valueOf(i)); // mapResult.put(Integer.valueOf(i), RowString); @@ -1292,6 +1391,238 @@ public class DatabaseManagement { } } + + // write the table result in the file and build the map of results + private void writeSamplingResultIntoFile(List result, + List DataTypeColumns) throws Exception { + + // // file that will contain result + // BufferedWriter out; + // // String fileName; + // // fileName = "./cfg/" + "table.txt"; + // // fileName = "./files/" + "table.txt"; + // // fileName = + // // + // "/home/loredana/workspace/DatabasesResourcesManagerAlgorithms/cfg/" + // // + "SampleOnTable.txt"; + // // fileName = "./files/" + "SmartSampleOnTable.txt"; + // // File file = new File(fileName); + // file = new File(FileName); + // out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream( + // file), "UTF-8")); + + + try{ + // to get columns names and result + // write headers in the file + // to recover columns names + if (header.equals("")) { + ArrayList listKeys = new ArrayList( + ((LinkedHashMap) (result.get(0))).keySet()); + + for (int i = 0; i < listKeys.size(); i++) { + if (i != listKeys.size() - 1) { + header = header + listKeys.get(i) + ", "; + } else { + header = header + listKeys.get(i); + } + } + } + + // // print check + // AnalysisLogger.getLogger().debug( + // "DatabaseManagement->HEADERS: " + header); + + out.write(header); + out.newLine(); + mapResult.put("HEADERS", header); + + // //print check values + // AnalysisLogger.getLogger().debug( + // "DatabaseManagement->columns names: " + listKeys); + + if (result != null && result.size() != 0) { + // // write operation in the file + for (int i = 0; i < result.size(); i++) { + String RowString = ""; + Object element = result.get(i); + + // arraylist in which each element is a row result + ArrayList listvalues = new ArrayList( + ((LinkedHashMap) element).values()); + + // // print check + // AnalysisLogger.getLogger().debug( + // "DatabaseManagement->values: " + listvalues); + + // each row could have several column values + Object[] row = listvalues.toArray(); + if (row.length >= 1) { + for (int j = 0; j < row.length; j++) { + if (row[j] == null) { + row[j] = ""; + } + // to parse the obtained results in order to align + // number + // values with those of postgres + String original = row[j].toString(); + + // // check value + // AnalysisLogger.getLogger().debug( + // "In DatabaseManagement->original value: " + // + original); + + String parsed = "" + row[j]; + if (original != "") { + // convert database datatypes to Java datatypes + if (DataTypeColumns == null + || DataTypeColumns.size() == 0) + parsed = convertToJavaType(row[j].getClass() + .getName(), parsed); + else + parsed = convertToJavaType( + DataTypeColumns.get(j), parsed); + } + + // // check value +// AnalysisLogger.getLogger().debug( +// "In DatabaseManagement->row: " + (i + 1) +// + " column: " + (j + 1) + " value= " +// + parsed); + + //write the whole column value in a file but a truncated value in a map + + //to truncate value to 255 characters if it exceeds 255 characters + String truncVal=parsed; + if(truncVal.length()>255){ + truncVal = truncVal.substring(0, 255); + +// System.out.println("val truncated: " + valConverted); +// System.out.println("elem geometry truncated"); + + //add quote if it has been removed with truncation + if((truncVal.startsWith("\""))&&(!truncVal.endsWith("\""))){ + truncVal = truncVal+"\""; +// System.out.println("adding quote: " + valConverted); + } + + } + + // write in a file + if (j != row.length - 1) { + // out.write("\"" + parsed + "\""); + // out.write(","); + out.write(parsed); + out.write(","); + + // System.out.println("write column : " + j); + // RowString = RowString + parsed + " "; + if (j == 0) { + RowString = truncVal; + } else { + RowString = RowString + "," + truncVal; + } + } + if (j == row.length - 1) { + // out.write("\"" + parsed + "\""); + // out.newLine(); + out.write(parsed); + out.newLine(); + + // to add a row to the map + if (row.length == 1) { + RowString = truncVal; + } else { + RowString = RowString + "," + truncVal; + } + + // to add a row to the map + // RowString = RowString + "," + parsed; + // mapSampleTableResult.put(String.valueOf(i), + // RowString); + + // // check value row +// AnalysisLogger.getLogger().debug( +// "writing the value: " + RowString + " key: " +// + String.valueOf(i)); + + // mapResult.put(Integer.valueOf(i), RowString); + + //add in the map only the first 1000 rows if the result list size is greater than 1000 +// if (result.size()>1000){ +// if(i<1000){ +// mapResult.put(String.valueOf(i), RowString); +// } +// +// }else{ +// mapResult.put(String.valueOf(i), RowString); +// } + + //add row in a map + mapResult.put(String.valueOf(i), RowString); + } + } + } + // else if (result.size() == 1) { + // + // // Object RowElement = (Object) result.get(0); + // + // if (row[0] == null) { + // row[0] = ""; + // } + // + // // to parse the obtained results in order to align + // // number + // // values with those of postgres + // String original = row[0].toString(); + // + // // // check value + // // AnalysisLogger.getLogger().debug( + // // "In DatabaseManagement->original value: " + // // + original); + // + // String parsed = "" + row[0]; + // + // if (original != "") { + // // convert database datatypes to Java datatypes + // if (DataTypeColumns == null + // || DataTypeColumns.size() == 0) + // parsed = convertToJavaType(row[0].getClass() + // .getName(), parsed); + // else + // parsed = convertToJavaType(DataTypeColumns.get(0), + // parsed); + // } + // + // out.write(row[0].toString()); + // out.newLine(); + // + // // to add a row to the map + // mapResult.put(String.valueOf(i), row[0].toString()); + // + // } + } + } + + AnalysisLogger.getLogger().debug( + "In DatabaseManagement-> map size without header: " + (mapResult.size()-1)); + + AnalysisLogger.getLogger().debug( + "In DatabaseManagement-> Writing File and Result Map creation operations terminated"); + + }catch (Throwable e) { + throw e; + }finally{ + // close the file + if (out!=null){ + out.close(); + out=null; + AnalysisLogger.getLogger().debug( + "In DatabaseManagement-> File closed"); + } + + } + } // to retrieve datatype columns of a table private List getDataTypeColumns(String tableName, String schemaName) @@ -1335,7 +1666,12 @@ public class DatabaseManagement { // "DatabaseManagement->datatype values: " // + listvalues); + //to manage USER-DEFINED types + if(listvalues.get(0).toString().equals("USER-DEFINED")){ + DataTypeColumns.add(i, (String) listvalues.get(1)); + }else{ DataTypeColumns.add(i, (String) listvalues.get(0)); + } } @@ -1475,9 +1811,9 @@ public class DatabaseManagement { } - // to return the file in which the query result (originated from a submit - // query) is stored - public File getFileQueryResult() { + // to return the file in which the result (originated from a submit + // query or sampling operations) is stored + public File getFile() { // return fileQueryResult; return file;