Management of the I/O names

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/DataMiner@128582 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Gianpaolo Coro 2016-05-13 00:21:37 +00:00
parent e459abb522
commit 306ef9020a
8 changed files with 63 additions and 47 deletions

View File

@ -3,30 +3,22 @@ package org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mapping;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.gcube.common.scope.api.ScopeProvider;
import org.gcube.contentmanagement.blobstorage.service.IClient;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanagement.lexicalmatcher.utils.FileTools;
import org.gcube.contentmanager.storageclient.wrapper.AccessType;
import org.gcube.contentmanager.storageclient.wrapper.MemoryType;
import org.gcube.contentmanager.storageclient.wrapper.StorageClient;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
@ -42,10 +34,8 @@ import org.gcube.dataanalysis.wps.statisticalmanager.synchserver.infrastructure.
import org.gcube.dataanalysis.wps.statisticalmanager.synchserver.infrastructure.InfrastructureDialoguer;
import org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mapping.dataspace.DataProvenance;
import org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mapping.dataspace.StoredData;
import org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mapping.dataspace.StoredType;
import org.gcube.dataanalysis.wps.statisticalmanager.synchserver.utils.GML2CSV;
import org.hibernate.SessionFactory;
import org.n52.wps.algorithm.annotation.LiteralDataInput;
import org.n52.wps.io.data.GenericFileData;
public class InputsManager {
@ -143,7 +133,7 @@ public class InputsManager {
inputtables += tableName;
saveInputData(inputName, inputName, tableFile.getAbsolutePath());
saveInputData(tableFile.getName(), inputName, tableFile.getAbsolutePath());
}
// the only possible complex input is a table - check the WPS
// parsers
@ -181,6 +171,27 @@ public class InputsManager {
return null;
}
}
public String inputNameFromHttpHeader(String url) throws Exception{
URL obj = new URL(url);
URLConnection conn = obj.openConnection();
String filename=null;
// get all headers
Map<String, List<String>> map = conn.getHeaderFields();
AnalysisLogger.getLogger().debug("Getting file name from http header");
for (Map.Entry<String, List<String>> entry : map.entrySet()) {
String value = entry.getValue().toString();
if (value.toLowerCase().contains("filename=")){
AnalysisLogger.getLogger().debug("Searching in http header: found file name in header value "+value);
filename=value.substring(value.indexOf("=")+1);
filename=filename.replace("\"", "").replace("]", "");
AnalysisLogger.getLogger().debug("Searching in http header: retrieved file name "+filename);
break;
}
}
return filename;
}
public List<File> getLocalFiles(GenericFileData files) throws Exception {
// download input
@ -210,9 +221,14 @@ public class InputsManager {
fileName = subfilelink.substring(subfilelink.lastIndexOf("/") + 1).trim();
if (fileName.contains("."))
fileName = fileName.substring(0, fileName.lastIndexOf(".")) + UUID.randomUUID() + fileName.substring(fileName.lastIndexOf("."));
else
fileName = fileName + UUID.randomUUID();
else{
//take file name from http header
String fileNameTemp = inputNameFromHttpHeader(subfilelink);
if (fileNameTemp==null)
fileName = fileName + UUID.randomUUID();
else
fileName = fileNameTemp+ "_[" + computationId + "]."+FilenameUtils.getExtension(fileNameTemp);
}
AnalysisLogger.getLogger().debug("Retrieving remote input in file: " + fileName);
AnalysisLogger.getLogger().debug("Creating local temp file: " + fileName);
File of = new File(config.getPersistencePath(), fileName);
@ -410,10 +426,13 @@ public class InputsManager {
String creationDate = new java.text.SimpleDateFormat("dd/MM/yyyy HH:mm:ss").format(System.currentTimeMillis());
String operator = config.getAgent();
StoredType type = StoredType.STRING;
String type = "text/plain";
if (payload != null && (new File (payload).exists())) {
type = StoredType.DATA;
if (payload.toLowerCase().endsWith(".csv") || payload.toLowerCase().endsWith(".txt")) {
type = "text/csv";
} else
type = "application/d4science";
}
StoredData data = new StoredData(name, description, id, provenance, creationDate, operator, computationId, type,payload,config.getGcubeScope());

View File

@ -16,9 +16,7 @@ import org.gcube.contentmanager.storageclient.wrapper.StorageClient;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mapping.dataspace.DataProvenance;
import org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mapping.dataspace.DataspaceManager;
import org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mapping.dataspace.StoredData;
import org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mapping.dataspace.StoredType;
public class OutputsManager {
@ -152,10 +150,11 @@ public class OutputsManager {
String creationDate = new java.text.SimpleDateFormat("dd/MM/yyyy HH:mm:ss").format(System.currentTimeMillis());
String operator = config.getAgent();
String computationId = computationsession;
StoredType type = StoredType.STRING;
if (info.getLocalMachineContent() != null) {
String type = info.getMimetype();
/* if (info.getLocalMachineContent() != null) {
type = StoredType.DATA;
}
*/
String payload = info.getContent();
StoredData data = new StoredData(name, info.getAbstractStr(),id, provenance, creationDate, operator, computationId, type,payload,config.getGcubeScope());

View File

@ -3,12 +3,10 @@ package org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mapping;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.InputStream;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@ -19,7 +17,6 @@ import javax.imageio.ImageIO;
import org.gcube.contentmanagement.graphtools.data.conversions.ImageTools;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.contentmanagement.lexicalmatcher.utils.FileTools;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnTypesList;
@ -34,9 +31,6 @@ import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalTypeList;
import org.gcube.dataanalysis.ecoengine.datatypes.TablesList;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
import org.gcube.dataanalysis.ecoengine.utils.IOHelper;
import org.gcube.dataanalysis.executor.scripts.OSCommand;
import org.gcube.dataanalysis.wps.statisticalmanager.synchserver.utils.FileManager;
import org.postgresql.copy.CopyManager;
import org.postgresql.core.BaseConnection;

View File

@ -114,12 +114,17 @@ public class DataspaceManager implements Runnable {
// String filenameonwsString = WorkspaceUtil.getUniqueName(data.name, wsFolder);
String filenameonwsString = data.name ;
if (changename)
if (changename){
filenameonwsString = data.name + "_[" + data.computationId + "]";// ("_"+UUID.randomUUID()).replace("-", "");
if (data.type.equals("text/csv"))
filenameonwsString+=".csv";
else if (data.type.equals("image/png"))
filenameonwsString+=".png";
}
InputStream in = null;
String url = "";
try {
if (data.type == StoredType.DATA) {
if (data.type.equals("text/csv")||data.type.equals("application/d4science")||data.type.equals("image/png")) {
if (new File(data.payload).exists() || !data.payload.startsWith("http")) {
AnalysisLogger.getLogger().debug("Dataspace->Uploading file " + data.payload);
@ -159,7 +164,7 @@ public class DataspaceManager implements Runnable {
fileItem.getProperties().addProperty(data_id, data.id);
fileItem.getProperties().addProperty(data_description, data.description);
fileItem.getProperties().addProperty(IO, data.provenance.name());
fileItem.getProperties().addProperty(data_type, data.type.name());
fileItem.getProperties().addProperty(data_type, data.type);
url = fileItem.getPublicLink(true);
fileItem.getProperties().addProperty(payload, url);
data.payload = url;

View File

@ -74,9 +74,9 @@ public class ProvOGenerator {
*/
List<StoredData> inputData = new ArrayList<StoredData>();
List<StoredData> outputData = new ArrayList<StoredData>();
StoredData in = new StoredData("inputT1","descrT1", "inputT1", DataProvenance.IMPORTED, "15/03/2016 11:32:22", operator, id, StoredType.STRING, "hello","devsec");
StoredData in = new StoredData("inputT1","descrT1", "inputT1", DataProvenance.IMPORTED, "15/03/2016 11:32:22", operator, id, "text/plain", "hello","devsec");
inputData.add(in);
StoredData out = new StoredData("outputT1","descrT1", "outputT1", DataProvenance.IMPORTED, "16/03/2016 11:32:22", operator, id, StoredType.STRING, "hellooutput","devsec");
StoredData out = new StoredData("outputT1","descrT1", "outputT1", DataProvenance.IMPORTED, "16/03/2016 11:32:22", operator, id, "text/plain", "hellooutput","devsec");
outputData.add(out);
//System.out.println(dataToEntity(in));
System.out.println(toProvO(computation, inputData, outputData));
@ -102,7 +102,7 @@ public class ProvOGenerator {
public static String dataToEntity(StoredData data){
String io = getDataIOAttribute(data.provenance.name());
String type = getDataTypeAttribute(data.type.name());
String type = getDataTypeAttribute(data.type);
String description = getDataDescriptionEntity(data.description);
String operator = getOperatorRefEntity(data.operator);
String computation = getComputationRefEntity(data.computationId);

View File

@ -2,7 +2,7 @@ package org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mapping.datasp
public class StoredData {
public StoredData(String name, String description, String id, DataProvenance provenance, String creationDate, String operator, String computationId, StoredType type, String payload, String vre) {
public StoredData(String name, String description, String id, DataProvenance provenance, String creationDate, String operator, String computationId, String type, String payload, String vre) {
super();
this.name = name;
this.id = id;
@ -23,6 +23,6 @@ public class StoredData {
String operator;
String computationId;
String vre;
StoredType type;
String type;
String payload;
}

View File

@ -1,5 +0,0 @@
package org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mapping.dataspace;
public enum StoredType {
DATA,STRING
}

View File

@ -16,16 +16,20 @@ public class MultiThreadingCalls {
//final URL urlToCall = new URL("http://localhost:8080/wps/WebProcessingService?request=Execute&service=WPS&version=1.0.0&lang=en-US&Identifier=org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.generators.FEED_FORWARD_A_N_N_DISTRIBUTION&DataInputs="+
//URLEncoder.encode("scope=/gcube/devsec;user.name=test.user;FinalTableLabel=wps_fann;GroupingFactor= ;FeaturesColumnNames=depthmin|depthmax;FeaturesTable=https://dl.dropboxusercontent.com/u/12809149/hcaf_d_mini.csv@text/csv;ModelName=https://dl.dropboxusercontent.com/u/12809149/1430317177514.1_wpssynch.statistical_wps_model_ann","UTF-8"));
//String host = "localhost";
String host = "statistical-manager-new.d4science.org";
new org.gcube.dataaccess.algorithms.drmalgorithms.SubmitQuery ();
String host = "dataminer1-d-d4s.d4science.org";
// http://statistical-manager-new.d4science.org:8080/wps/WebProcessingService?request=Execute&service=WPS&version=1.0.0&lang=en-US&Identifier=org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.BIONYM_LOCAL&DataInputs=scope=/gcube/devsec/devVRE;user.name=tester;SpeciesAuthorName=Gadus%20morhua;Taxa_Authority_File=ASFIS;Parser_Name=SIMPLE;Activate_Preparsing_Processing=true;Use_Stemmed_Genus_and_Species=false;Accuracy_vs_Speed=MAX_ACCURACY;Matcher_1=GSAy;Threshold_1=0.6;MaxResults_1=10
// final URL urlToCall = new URL("http://"+host+":8080/wps/WebProcessingService?request=Execute&service=WPS&version=1.0.0&lang=en-US&Identifier=org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.modellers.FEED_FORWARD_ANN&DataInputs=" +
// URLEncoder.encode("scope=/gcube/devsec;user.name=test.user;LayersNeurons=10|10;LearningThreshold=0.01;MaxIterations=100;ModelName=wps_ann;Reference=1;TargetColumn=depthmean;TrainingColumns=depthmin|depthmax;TrainingDataSet=http://goo.gl/juNsCK@MimeType=text/csv","UTF-8"));
final URL urlToCall = new URL("http://"+host+":8080/wps/WebProcessingService?request=Execute&service=WPS&version=1.0.0&lang=en-US&Identifier=org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.BIONYM_LOCAL&DataInputs=scope=/gcube/devsec/devVRE;user.name=tester.user;SpeciesAuthorName=Gadus%20morhua;Taxa_Authority_File=ASFIS;Parser_Name=SIMPLE;Activate_Preparsing_Processing=true;Use_Stemmed_Genus_and_Species=false;Accuracy_vs_Speed=MAX_ACCURACY;Matcher_1=GSAy;Threshold_1=0.6;MaxResults_1=10");
int nthreads = 50;
/*
final URL urlToCall = new URL("http://"+host+"/wps/WebProcessingService?request=Execute&service=WPS&Version=1.0.0&gcube-token=4ccc2c35-60c9-4c9b-9800-616538d5d48b&lang=en-US&Identifier=org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.clusterers.XMEANS&DataInputs=" +
URLEncoder.encode("OccurrencePointsClusterLabel=OccClustersTest;min_points=1;maxIterations=100;minClusters=1;maxClusters=3;OccurrencePointsTable=http://goo.gl/VDzpch;FeaturesColumnNames=depthmean|sstmnmax|salinitymean;","UTF-8"));
*/
final URL urlToCall = new URL("http://"+host+"/wps/WebProcessingService?Request=GetCapabilities&Service=WPS&gcube-token=4ccc2c35-60c9-4c9b-9800-616538d5d48b");
int nthreads = 200;
for (int i = 0; i < nthreads; i++) {
final int index = i+1;