Samplings and SubmitQuery changed in order to return as a result a map with column values truncated to 255 characters and a file with the whole columns values in order to manage so to manage big data properly and the conversion of a geometry data type in a multipolygon representation applying the function st_astext() for a database postgres.

Sampler and DatabaseManagement classes changed. The file is stored on the statistical manager side and recovered through an ulr.
pom file changed in 1.3.0

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-access/DatabasesResourcesManager@101766 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Loredana Liccardo 2014-11-28 11:54:54 +00:00
parent 72b8e87e81
commit 885a219fe1
3 changed files with 494 additions and 80 deletions

View File

@ -10,7 +10,7 @@
<groupId>org.gcube.dataaccess</groupId>
<artifactId>database-resource-manager</artifactId>
<version>1.2.0-SNAPSHOT</version>
<version>1.3.0-SNAPSHOT</version>
<name>DatabaseResourceManager</name>
<description>Database resource manager</description>

View File

@ -171,18 +171,48 @@ public class Sampler {
if (DBType.equals(POSTGRES)) {
// attribute = "CAST(" + listColumns.get(i) + " as text), ";
attribute = "CAST(" + "\"" +listColumns.get(i)+ "\""
+ " as character varying(255)), ";
//to manage postgis data types
if((DataTypeColumns.get(i).equals("geometry")) || (DataTypeColumns.get(i).equals("geography"))){
attribute = "st_astext("+listColumns.get(i)+") as "+listColumns.get(i)+", ";
if (i == (listColumns.size() - 1)) {
attribute = "st_astext("+listColumns.get(i)+") as "+listColumns.get(i);
}
}else{
// attribute = "CAST(" + listColumns.get(i) + " as text), ";
attribute = "CAST(" + "\"" +listColumns.get(i)+ "\""
+ " as character varying), ";
if (i == (listColumns.size() - 1)) {
if (i == (listColumns.size() - 1)) {
// attribute = "CAST(" + listColumns.get(i) +
// " as text)";
attribute = "CAST(" + "\"" + listColumns.get(i)+ "\""
+ " as character varying(255))";
// attribute = "CAST(" + listColumns.get(i) +
// " as text)";
attribute = "CAST(" + "\"" + listColumns.get(i)+ "\""
+ " as character varying)";
}
}
// //to manage postgis data types
// if((DataTypeColumns.get(i).equals("geometry")) || (DataTypeColumns.get(i).equals("geography"))){
// attribute = "SUBSTRING(st_astext("+listColumns.get(i)+") FROM 1 FOR 255), ";
//
// if (i == (listColumns.size() - 1)) {
// attribute = "SUBSTRING(st_astext("+listColumns.get(i)+") FROM 1 FOR 255)";
// }
// }else{
// // attribute = "CAST(" + listColumns.get(i) + " as text), ";
// attribute = "CAST(" + "\"" +listColumns.get(i)+ "\""
// + " as character varying(255)), ";
//
// if (i == (listColumns.size() - 1)) {
//
// // attribute = "CAST(" + listColumns.get(i) +
// // " as text)";
// attribute = "CAST(" + "\"" + listColumns.get(i)+ "\""
// + " as character varying(255))";
//
// }
// }
}
@ -201,7 +231,7 @@ public class Sampler {
// attribute = "CONVERT(" + listColumns.get(i) +
// ", CHAR), ";
attribute = "CAST(" + "`" + listColumns.get(i) + "`"
+ " as CHAR(255) CHARACTER SET utf8), ";
+ " as CHAR CHARACTER SET utf8), ";
if (i == (listColumns.size() - 1)) {
@ -212,7 +242,7 @@ public class Sampler {
// ", BINARY)";
attribute = "CAST(" + "`" + listColumns.get(i)
+ "`" + " as CHAR(255) CHARACTER SET utf8)";
+ "`" + " as CHAR CHARACTER SET utf8)";
}
@ -222,7 +252,7 @@ public class Sampler {
+ "`"
+ listColumns.get(i)
+ "`"
+ " as BINARY) as CHAR(255) CHARACTER SET utf8), ";
+ " as BINARY) as CHAR CHARACTER SET utf8), ";
if (i == (listColumns.size() - 1)) {
@ -236,12 +266,60 @@ public class Sampler {
+ "`"
+ listColumns.get(i)
+ "`"
+ " as BINARY) as CHAR(255) CHARACTER SET utf8)";
+ " as BINARY) as CHAR CHARACTER SET utf8)";
}
}
// if (DataTypeColumns.get(i).contains("char")) {
//
// // attribute = "CAST(" + listColumns.get(i) +
// // " as CHAR CHARACTER SET utf8), ";
// // attribute = "CONVERT(" + listColumns.get(i) +
// // ", CHAR), ";
// attribute = "CAST(" + "`" + listColumns.get(i) + "`"
// + " as CHAR(255) CHARACTER SET utf8), ";
//
// if (i == (listColumns.size() - 1)) {
//
// // attribute = "CAST(" + listColumns.get(i) +
// // " as CHAR CHARACTER SET utf8)";
//
// // attribute = "CONVERT(" + listColumns.get(i) +
// // ", BINARY)";
//
// attribute = "CAST(" + "`" + listColumns.get(i)
// + "`" + " as CHAR(255) CHARACTER SET utf8)";
//
// }
//
// } else {
//
// attribute = "CAST(CAST("
// + "`"
// + listColumns.get(i)
// + "`"
// + " as BINARY) as CHAR(255) CHARACTER SET utf8), ";
//
// if (i == (listColumns.size() - 1)) {
//
// // attribute = "CAST(" + listColumns.get(i) +
// // " as CHAR CHARACTER SET utf8)";
//
// // attribute = "CONVERT(" + listColumns.get(i) +
// // ", BINARY)";
//
// attribute = "CAST(CAST("
// + "`"
// + listColumns.get(i)
// + "`"
// + " as BINARY) as CHAR(255) CHARACTER SET utf8)";
//
// }
//
// }
}
listAttributes = listAttributes + attribute;

View File

@ -83,7 +83,7 @@ public class DatabaseManagement {
private static final String listSchemaNameQuery = "select nspname from pg_namespace where nspname <> 'information_schema' and nspname !~ E'^pg_'";
// query to retrieve datatype columns of a database table
private static final String queryForDataTypeColumnsPostgres = "SELECT data_type FROM information_schema.COLUMNS WHERE table_name ='%1$s' and table_schema='%2$s' order by ordinal_position asc";
private static final String queryForDataTypeColumnsPostgres = "SELECT data_type, udt_name FROM information_schema.COLUMNS WHERE table_name ='%1$s' and table_schema='%2$s' order by ordinal_position asc";
private static final String queryForDataTypeColumnsMysql = "SELECT data_type FROM information_schema.COLUMNS WHERE table_name ='%1$s' and table_schema='%2$s' order by ordinal_position asc";
// query to get columns' name
@ -193,6 +193,9 @@ public class DatabaseManagement {
// + valConverted);
}
if((type.contains("geometry")) ||(type.contains("geography"))){
valConverted = "\"" + valConverted + "\"";
}
}
@ -202,18 +205,18 @@ public class DatabaseManagement {
}
//to truncate value to 255 characters if it exceeds 255 characters
if(valConverted.length()>255){
valConverted = valConverted.substring(0, 255);
// System.out.println("val truncated: " + valConverted);
// System.out.println("elem geometry truncated");
//add quote if it has been removed with truncation
if((valConverted.startsWith("\""))&&(!valConverted.endsWith("\""))){
valConverted = valConverted+"\"";
// System.out.println("adding quote: " + valConverted);
}
}
// //to truncate value to 255 characters if it exceeds 255 characters
// if(valConverted.length()>255){
// valConverted = valConverted.substring(0, 255);
//// System.out.println("val truncated: " + valConverted);
//// System.out.println("elem geometry truncated");
//
// //add quote if it has been removed with truncation
// if((valConverted.startsWith("\""))&&(!valConverted.endsWith("\""))){
// valConverted = valConverted+"\"";
//// System.out.println("adding quote: " + valConverted);
// }
// }
return valConverted;
}
@ -367,7 +370,7 @@ public class DatabaseManagement {
List<String> DataTypeColumns = null;
// store table in a file and build the result Map
String FileName = pathFile + "QueryResult"+UUID.randomUUID()+".csv";
String FileName = pathFile + "SubmitQueryResult_"+UUID.randomUUID()+".csv";
// write the result in the file and in the map
AnalysisLogger.getLogger().debug(
"In DatabaseManagement-> writing the result in the file: "
@ -377,7 +380,7 @@ public class DatabaseManagement {
file = new File(FileName);
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
file), "UTF-8"));
writeTableIntoFile(results, DataTypeColumns);
writeSubmitResultIntoFile(results, DataTypeColumns);
// //truncate the result list to 10000 rows
// if ((mapResult!=null)&&(mapResult.size()!=0)){
@ -727,17 +730,18 @@ public class DatabaseManagement {
// writeSampleTableIntoFile(resultSet, tableName, schemaName);
String FileName = pathFile + "SampleResult.csv";
// to recover columns names list
//TODO ** COMMENTED
// String FileName = pathFile + "SampleResult.csv";
//
// // to recover columns names list
//
List<String> listColumns = sampler.getListColumns();
// String header = "";
// //print check
// AnalysisLogger.getLogger().debug(
// "In DatabaseManagement->list columns size: " +listColumns.size());
// // String header = "";
//
// // //print check
// // AnalysisLogger.getLogger().debug(
// // "In DatabaseManagement->list columns size: " +listColumns.size());
//
// to recover columns names list
for (int i = 0; i < listColumns.size(); i++) {
@ -766,7 +770,32 @@ public class DatabaseManagement {
// // writeTableIntoFile(resultSet, DataTypeColumns);
// build the Map of Result
buildMapResult(resultSet, DataTypeColumns);
// buildMapResult(resultSet, DataTypeColumns);
// store table in a file and build the result Map
String FileName = pathFile + "SmartSampling_"+UUID.randomUUID()+".csv";
// write the result in the file and in the map
AnalysisLogger.getLogger().debug(
"In DatabaseManagement-> writing the result in the file: "
+ FileName);
try{
file = new File(FileName);
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
file), "UTF-8"));
writeSamplingResultIntoFile(resultSet, DataTypeColumns);
}catch (Throwable e) {
throw e;
}finally{
//close the file
if (out!=null){
out.close();
out = null;
AnalysisLogger.getLogger().debug(
"In DatabaseManagement-> File closed");
}
}
}
/**
@ -809,6 +838,7 @@ public class DatabaseManagement {
// AnalysisLogger.getLogger().debug(
// "In DatabaseManagement->store table in a file");
// to recover columns names list
List<String> listColumns = sampler.getListColumns();
@ -819,22 +849,47 @@ public class DatabaseManagement {
header = header + listColumns.get(i);
}
}
//
// // // store table in a file
// // String FileName = pathFile + "SampleResult.csv";
// // // write the result in the file and in the map
// // AnalysisLogger.getLogger().debug(
// // "In DatabaseManagement->writing the result in the file: "
// // + FileName);
// // file = new File(FileName);
// // out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
// // file), "UTF-8"));
// // writeTableIntoFile(resultSet, DataTypeColumns);
//
// // build the Map of Result
// buildMapResult(resultSet, DataTypeColumns);
// store table in a file and build the result Map
String FileName = pathFile + "Sampling_"+UUID.randomUUID()+".csv";
// write the result in the file and in the map
AnalysisLogger.getLogger().debug(
"In DatabaseManagement-> writing the result in the file: "
+ FileName);
// // store table in a file
// String FileName = pathFile + "SampleResult.csv";
// // write the result in the file and in the map
// AnalysisLogger.getLogger().debug(
// "In DatabaseManagement->writing the result in the file: "
// + FileName);
// file = new File(FileName);
// out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
// file), "UTF-8"));
// writeTableIntoFile(resultSet, DataTypeColumns);
try{
file = new File(FileName);
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
file), "UTF-8"));
writeSamplingResultIntoFile(resultSet, DataTypeColumns);
// build the Map of Result
buildMapResult(resultSet, DataTypeColumns);
}
}catch (Throwable e) {
throw e;
}finally{
//close the file
if (out!=null){
out.close();
out = null;
AnalysisLogger.getLogger().debug(
"In DatabaseManagement-> File closed");
}
}
}
/**
* Retrieve 100 rows of a table in a random manner.
@ -880,17 +935,18 @@ public class DatabaseManagement {
// AnalysisLogger.getLogger().debug(
// "In DatabaseManagement->store table in a file");
//TODO ** COMMENTED
// to recover columns names list
List<String> listColumns = sampler.getListColumns();
// //print check
// for (int i = 0; i < listColumns.size(); i++) {
// AnalysisLogger.getLogger()
// .debug("In DatabaseManagement->listcolumns: "
// + listColumns.get(i));
// }
// String header = "";
//
// // //print check
// // for (int i = 0; i < listColumns.size(); i++) {
// // AnalysisLogger.getLogger()
// // .debug("In DatabaseManagement->listcolumns: "
// // + listColumns.get(i));
// // }
// // String header = "";
//
for (int i = 0; i < listColumns.size(); i++) {
if (i != listColumns.size() - 1) {
header = header + listColumns.get(i) + ", ";
@ -911,7 +967,32 @@ public class DatabaseManagement {
// writeTableIntoFile(resultSet, DataTypeColumns);
// build the Map of Result
buildMapResult(resultSet, DataTypeColumns);
// buildMapResult(resultSet, DataTypeColumns);
// store table in a file and build the result Map
String FileName = pathFile + "RandomSampling_"+UUID.randomUUID()+".csv";
// write the result in the file and in the map
AnalysisLogger.getLogger().debug(
"In DatabaseManagement-> writing the result in the file: "
+ FileName);
try{
file = new File(FileName);
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
file), "UTF-8"));
writeSamplingResultIntoFile(resultSet, DataTypeColumns);
}catch (Throwable e) {
throw e;
}finally{
//close the file
if (out!=null){
out.close();
out = null;
AnalysisLogger.getLogger().debug(
"In DatabaseManagement-> File closed");
}
}
}
@ -1083,7 +1164,7 @@ public class DatabaseManagement {
}
// write the table result in the file and build the map of results
private void writeTableIntoFile(List<Object> result,
private void writeSubmitResultIntoFile(List<Object> result,
List<String> DataTypeColumns) throws Exception {
// // file that will contain result
@ -1173,12 +1254,30 @@ public class DatabaseManagement {
parsed = convertToJavaType(
DataTypeColumns.get(j), parsed);
}
// // check value
// AnalysisLogger.getLogger().debug(
// "In DatabaseManagement->row: " + (i + 1)
// + " column: " + (j + 1) + " value= "
// + parsed);
// // check value
// AnalysisLogger.getLogger().debug(
// "In DatabaseManagement->row: " + (i + 1)
// + " column: " + (j + 1) + " value= "
// + parsed);
//write the whole column value in a file but a truncated value in a map
//to truncate value to 255 characters if it exceeds 255 characters
String truncVal=parsed;
if(truncVal.length()>255){
truncVal = truncVal.substring(0, 255);
// System.out.println("val truncated: " + valConverted);
// System.out.println("elem geometry truncated");
//add quote if it has been removed with truncation
if((truncVal.startsWith("\""))&&(!truncVal.endsWith("\""))){
truncVal = truncVal+"\"";
// System.out.println("adding quote: " + valConverted);
}
}
// write in a file
if (j != row.length - 1) {
@ -1190,9 +1289,9 @@ public class DatabaseManagement {
// System.out.println("write column : " + j);
// RowString = RowString + parsed + " ";
if (j == 0) {
RowString = parsed;
RowString = truncVal;
} else {
RowString = RowString + "," + parsed;
RowString = RowString + "," + truncVal;
}
}
if (j == row.length - 1) {
@ -1203,9 +1302,9 @@ public class DatabaseManagement {
// to add a row to the map
if (row.length == 1) {
RowString = parsed;
RowString = truncVal;
} else {
RowString = RowString + "," + parsed;
RowString = RowString + "," + truncVal;
}
// to add a row to the map
@ -1213,10 +1312,10 @@ public class DatabaseManagement {
// mapSampleTableResult.put(String.valueOf(i),
// RowString);
// // check value row
// AnalysisLogger.getLogger().debug(
// "writing the value: " + RowString + " key: "
// + String.valueOf(i));
// // check value row
// AnalysisLogger.getLogger().debug(
// "writing the value: " + RowString + " key: "
// + String.valueOf(i));
// mapResult.put(Integer.valueOf(i), RowString);
@ -1292,6 +1391,238 @@ public class DatabaseManagement {
}
}
// write the table result in the file and build the map of results
private void writeSamplingResultIntoFile(List<Object> result,
List<String> DataTypeColumns) throws Exception {
// // file that will contain result
// BufferedWriter out;
// // String fileName;
// // fileName = "./cfg/" + "table.txt";
// // fileName = "./files/" + "table.txt";
// // fileName =
// //
// "/home/loredana/workspace/DatabasesResourcesManagerAlgorithms/cfg/"
// // + "SampleOnTable.txt";
// // fileName = "./files/" + "SmartSampleOnTable.txt";
// // File file = new File(fileName);
// file = new File(FileName);
// out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
// file), "UTF-8"));
try{
// to get columns names and result
// write headers in the file
// to recover columns names
if (header.equals("")) {
ArrayList<String> listKeys = new ArrayList<String>(
((LinkedHashMap<String, Object>) (result.get(0))).keySet());
for (int i = 0; i < listKeys.size(); i++) {
if (i != listKeys.size() - 1) {
header = header + listKeys.get(i) + ", ";
} else {
header = header + listKeys.get(i);
}
}
}
// // print check
// AnalysisLogger.getLogger().debug(
// "DatabaseManagement->HEADERS: " + header);
out.write(header);
out.newLine();
mapResult.put("HEADERS", header);
// //print check values
// AnalysisLogger.getLogger().debug(
// "DatabaseManagement->columns names: " + listKeys);
if (result != null && result.size() != 0) {
// // write operation in the file
for (int i = 0; i < result.size(); i++) {
String RowString = "";
Object element = result.get(i);
// arraylist in which each element is a row result
ArrayList<Object> listvalues = new ArrayList<Object>(
((LinkedHashMap<String, Object>) element).values());
// // print check
// AnalysisLogger.getLogger().debug(
// "DatabaseManagement->values: " + listvalues);
// each row could have several column values
Object[] row = listvalues.toArray();
if (row.length >= 1) {
for (int j = 0; j < row.length; j++) {
if (row[j] == null) {
row[j] = "";
}
// to parse the obtained results in order to align
// number
// values with those of postgres
String original = row[j].toString();
// // check value
// AnalysisLogger.getLogger().debug(
// "In DatabaseManagement->original value: "
// + original);
String parsed = "" + row[j];
if (original != "") {
// convert database datatypes to Java datatypes
if (DataTypeColumns == null
|| DataTypeColumns.size() == 0)
parsed = convertToJavaType(row[j].getClass()
.getName(), parsed);
else
parsed = convertToJavaType(
DataTypeColumns.get(j), parsed);
}
// // check value
// AnalysisLogger.getLogger().debug(
// "In DatabaseManagement->row: " + (i + 1)
// + " column: " + (j + 1) + " value= "
// + parsed);
//write the whole column value in a file but a truncated value in a map
//to truncate value to 255 characters if it exceeds 255 characters
String truncVal=parsed;
if(truncVal.length()>255){
truncVal = truncVal.substring(0, 255);
// System.out.println("val truncated: " + valConverted);
// System.out.println("elem geometry truncated");
//add quote if it has been removed with truncation
if((truncVal.startsWith("\""))&&(!truncVal.endsWith("\""))){
truncVal = truncVal+"\"";
// System.out.println("adding quote: " + valConverted);
}
}
// write in a file
if (j != row.length - 1) {
// out.write("\"" + parsed + "\"");
// out.write(",");
out.write(parsed);
out.write(",");
// System.out.println("write column : " + j);
// RowString = RowString + parsed + " ";
if (j == 0) {
RowString = truncVal;
} else {
RowString = RowString + "," + truncVal;
}
}
if (j == row.length - 1) {
// out.write("\"" + parsed + "\"");
// out.newLine();
out.write(parsed);
out.newLine();
// to add a row to the map
if (row.length == 1) {
RowString = truncVal;
} else {
RowString = RowString + "," + truncVal;
}
// to add a row to the map
// RowString = RowString + "," + parsed;
// mapSampleTableResult.put(String.valueOf(i),
// RowString);
// // check value row
// AnalysisLogger.getLogger().debug(
// "writing the value: " + RowString + " key: "
// + String.valueOf(i));
// mapResult.put(Integer.valueOf(i), RowString);
//add in the map only the first 1000 rows if the result list size is greater than 1000
// if (result.size()>1000){
// if(i<1000){
// mapResult.put(String.valueOf(i), RowString);
// }
//
// }else{
// mapResult.put(String.valueOf(i), RowString);
// }
//add row in a map
mapResult.put(String.valueOf(i), RowString);
}
}
}
// else if (result.size() == 1) {
//
// // Object RowElement = (Object) result.get(0);
//
// if (row[0] == null) {
// row[0] = "";
// }
//
// // to parse the obtained results in order to align
// // number
// // values with those of postgres
// String original = row[0].toString();
//
// // // check value
// // AnalysisLogger.getLogger().debug(
// // "In DatabaseManagement->original value: "
// // + original);
//
// String parsed = "" + row[0];
//
// if (original != "") {
// // convert database datatypes to Java datatypes
// if (DataTypeColumns == null
// || DataTypeColumns.size() == 0)
// parsed = convertToJavaType(row[0].getClass()
// .getName(), parsed);
// else
// parsed = convertToJavaType(DataTypeColumns.get(0),
// parsed);
// }
//
// out.write(row[0].toString());
// out.newLine();
//
// // to add a row to the map
// mapResult.put(String.valueOf(i), row[0].toString());
//
// }
}
}
AnalysisLogger.getLogger().debug(
"In DatabaseManagement-> map size without header: " + (mapResult.size()-1));
AnalysisLogger.getLogger().debug(
"In DatabaseManagement-> Writing File and Result Map creation operations terminated");
}catch (Throwable e) {
throw e;
}finally{
// close the file
if (out!=null){
out.close();
out=null;
AnalysisLogger.getLogger().debug(
"In DatabaseManagement-> File closed");
}
}
}
// to retrieve datatype columns of a table
private List<String> getDataTypeColumns(String tableName, String schemaName)
@ -1335,7 +1666,12 @@ public class DatabaseManagement {
// "DatabaseManagement->datatype values: "
// + listvalues);
//to manage USER-DEFINED types
if(listvalues.get(0).toString().equals("USER-DEFINED")){
DataTypeColumns.add(i, (String) listvalues.get(1));
}else{
DataTypeColumns.add(i, (String) listvalues.get(0));
}
}
@ -1475,9 +1811,9 @@ public class DatabaseManagement {
}
// to return the file in which the query result (originated from a submit
// query) is stored
public File getFileQueryResult() {
// to return the file in which the result (originated from a submit
// query or sampling operations) is stored
public File getFile() {
// return fileQueryResult;
return file;