2018-03-15 15:40:50 +01:00
|
|
|
package org.gcube.nlphub.nlp;
|
|
|
|
|
|
|
|
import java.io.BufferedReader;
|
|
|
|
import java.io.ByteArrayInputStream;
|
|
|
|
import java.io.InputStreamReader;
|
|
|
|
import java.net.HttpURLConnection;
|
|
|
|
import java.net.URL;
|
|
|
|
import java.net.URLEncoder;
|
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
|
|
|
import javax.xml.parsers.DocumentBuilderFactory;
|
|
|
|
|
|
|
|
import org.apache.log4j.Logger;
|
|
|
|
import org.gcube.nlphub.legacy.AsyncHttpRequest;
|
|
|
|
import org.gcube.nlphub.legacy.Constants;
|
|
|
|
import org.w3c.dom.Document;
|
|
|
|
import org.w3c.dom.Node;
|
|
|
|
import org.w3c.dom.NodeList;
|
|
|
|
import org.xml.sax.InputSource;
|
|
|
|
|
|
|
|
public class NlpAsyncNerRunner extends AsyncHttpRequest {
|
2018-03-19 16:31:23 +01:00
|
|
|
public final static String WPS_EXECUTE_URL = Constants.DATAMINER_URL + "/wps/WebProcessingService?request=Execute&service=WPS&Version=1.0.0";
|
|
|
|
public final static String WPS_DESCRIBE_PROCESS_URL = Constants.DATAMINER_URL + "/wps/WebProcessingService?request=DescribeProcess&service=WPS&Version=1.0.0";
|
2018-03-15 15:40:50 +01:00
|
|
|
private String identifier, token, httpMethod, annotations, publicLink, language;
|
|
|
|
private Logger logger = Logger.getLogger(NlpAsyncNerRunner.class.getSimpleName());
|
2018-03-20 14:45:49 +01:00
|
|
|
private RunnerCommander commander;
|
2018-03-15 15:40:50 +01:00
|
|
|
|
2018-03-20 14:45:49 +01:00
|
|
|
public NlpAsyncNerRunner(String identifier, String token, String publicLink, String annotations, String language, RunnerCommander commander) {
|
2018-03-15 15:40:50 +01:00
|
|
|
super();
|
|
|
|
this.identifier = identifier;
|
|
|
|
this.token = token;
|
|
|
|
this.httpMethod = "GET";
|
|
|
|
this.annotations = annotations;
|
|
|
|
this.publicLink = publicLink;
|
|
|
|
this.language = language; // not used for the moment...
|
2018-03-20 14:45:49 +01:00
|
|
|
this.commander = commander;
|
2018-03-15 15:40:50 +01:00
|
|
|
ArrayList<NlpParameter> params = buildParameterString();
|
|
|
|
String serviceUrl = WPS_EXECUTE_URL + "&gcube-token=" + token + "&lang=en-US&Identifier=" + identifier;
|
|
|
|
serviceUrl += "&" + setUrl(params);
|
|
|
|
super.setBaseUrl(serviceUrl);
|
|
|
|
super.setMethod(httpMethod);
|
|
|
|
}
|
|
|
|
|
|
|
|
public NlpAsyncNerRunner(String baseUrl, String method) {
|
|
|
|
super(baseUrl, method, null);
|
|
|
|
}
|
|
|
|
|
|
|
|
public String getIdentifier() {
|
|
|
|
return identifier;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String getToken() {
|
|
|
|
return token;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String getHttpMethod() {
|
|
|
|
return httpMethod;
|
|
|
|
}
|
|
|
|
|
|
|
|
private String setUrl(ArrayList<NlpParameter> parameters) {
|
|
|
|
String url = "DataInputs=";
|
|
|
|
for (NlpParameter p : parameters) {
|
|
|
|
try {
|
|
|
|
url += p.getName() + "=" + URLEncoder.encode((String) p.getValue(), "UTF-8") + ";";
|
|
|
|
} catch (Exception ex) {
|
|
|
|
logger.error(ex.getLocalizedMessage());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
|
|
|
private ArrayList<NlpParameter> buildParameterString() {
|
|
|
|
ArrayList<NlpParameter> parameters = new ArrayList<>();
|
|
|
|
HttpURLConnection connection = null;
|
|
|
|
BufferedReader r = null;
|
|
|
|
try {
|
|
|
|
String finalUrl = WPS_DESCRIBE_PROCESS_URL + "&gcube-token=" + token;
|
|
|
|
finalUrl += "&lang=en-US&Identifier=" + identifier;
|
|
|
|
URL url = new URL(finalUrl);
|
|
|
|
connection = (HttpURLConnection) url.openConnection();
|
|
|
|
connection.setDoInput(true);
|
|
|
|
connection.setDoOutput(true);
|
|
|
|
connection.setUseCaches(false);
|
|
|
|
connection.setRequestMethod("GET");
|
|
|
|
r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
|
|
|
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
|
|
|
|
doc.getDocumentElement().normalize();
|
|
|
|
NodeList nListInput = doc.getElementsByTagName("Input");
|
|
|
|
for (int i = 0; i < nListInput.getLength(); i++) {
|
|
|
|
Node nodeInput = nListInput.item(i);
|
|
|
|
NlpParameter nlpParam = new NlpParameter();
|
|
|
|
NodeList inputChildren = nodeInput.getChildNodes();
|
|
|
|
|
|
|
|
// try to find the name and the type of the input parameter
|
|
|
|
for (int j = 0; j < inputChildren.getLength(); j++) {
|
|
|
|
Node node = inputChildren.item(j);
|
|
|
|
// for the moment we limit the type at 'file' and
|
|
|
|
// 'annotations'
|
|
|
|
if (node.getNodeName().equals("ows:Identifier")) {
|
|
|
|
nlpParam.setName(node.getTextContent());
|
|
|
|
} else if (node.getNodeName().equals("ows:Title")) {
|
|
|
|
nlpParam.setDescription(node.getTextContent());
|
|
|
|
} else if (node.getNodeName().equals("ows:Abstract")) {
|
|
|
|
String text = node.getTextContent().toLowerCase();
|
|
|
|
if ((text.indexOf("file") >= 0) || (text.indexOf("text") >= 0)) {
|
|
|
|
nlpParam.setObjectType(NlpParameter.INPUT_FILE);
|
|
|
|
nlpParam.setValue(publicLink);
|
|
|
|
} else if ((text.indexOf("annotation") >= 0) || (text.indexOf("list") >= 0)) {
|
|
|
|
nlpParam.setObjectType(NlpParameter.INPUT_ANNOTATIONS);
|
|
|
|
nlpParam.setValue(annotations.replaceAll(",", "|"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
parameters.add(nlpParam);
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (Exception x) {
|
|
|
|
logger.error(x.getLocalizedMessage());
|
|
|
|
} finally {
|
|
|
|
try {
|
|
|
|
if (r != null)
|
|
|
|
r.close();
|
|
|
|
if (connection != null)
|
|
|
|
connection.disconnect();
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.error(e.getLocalizedMessage());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return parameters;
|
|
|
|
}
|
|
|
|
|
|
|
|
public long getElapsedTime() {
|
|
|
|
return elapsedTime;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void asyncHttpRequestCallback() {
|
|
|
|
elapsedTime = System.currentTimeMillis() - elapsedTime;
|
|
|
|
logger.info("ID: " + identifier.substring(identifier.lastIndexOf(".") + 1) + " elapsed time: " + elapsedTime);
|
|
|
|
String result = super.getResult();
|
|
|
|
String theLink = "";
|
|
|
|
try {
|
2018-03-20 14:45:49 +01:00
|
|
|
BufferedReader r = new BufferedReader(
|
|
|
|
new InputStreamReader(new ByteArrayInputStream(result.getBytes(StandardCharsets.UTF_8))));
|
|
|
|
|
2018-03-15 15:40:50 +01:00
|
|
|
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
|
|
|
|
doc.getDocumentElement().normalize();
|
|
|
|
NodeList nListResult = doc.getElementsByTagName("ogr:Result");
|
|
|
|
|
|
|
|
for (int i=0, found=0; (i<nListResult.getLength()) && (found==0); i++) {
|
|
|
|
Node nodeResult = nListResult.item(i);
|
|
|
|
NodeList list = nodeResult.getChildNodes();
|
|
|
|
String res = "";
|
|
|
|
for(int j=0; j<list.getLength(); j++) {
|
|
|
|
Node node = list.item(j);
|
|
|
|
if(node.getNodeName().equals("d4science:Data")) {
|
|
|
|
res = node.getTextContent();
|
|
|
|
}
|
|
|
|
else if(node.getNodeName().equals("d4science:MimeType")) {
|
|
|
|
if(node.getTextContent().equals("application/d4science")) {
|
|
|
|
found = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(found > 0) {
|
|
|
|
theLink = res;
|
|
|
|
}
|
|
|
|
}
|
2018-03-20 14:45:49 +01:00
|
|
|
commander.updateResultList(identifier.substring(identifier.lastIndexOf(".") + 1) + ":::" + theLink);
|
2018-03-15 15:40:50 +01:00
|
|
|
} catch (Exception x) {
|
2018-03-20 14:45:49 +01:00
|
|
|
commander.updateResultList(identifier.substring(identifier.lastIndexOf(".") + 1) + ":::" + Constants.ERROR_ID);
|
2018-03-15 15:40:50 +01:00
|
|
|
logger.error(x.getLocalizedMessage());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// public static void main(String[] args) {
|
|
|
|
// String id1 = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.ENGLISH_NAMED_ENTITY_RECOGNIZER";
|
|
|
|
// String id2 = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.ENGLISH_NER_CORENLP";
|
|
|
|
// String tokken = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
|
|
|
// String ann = "Organization,Location,Person";
|
|
|
|
// String file = "http://data.d4science.org/TWhNTS9DdVdXaTZLSWsrWUNQdHk3OUdZSU93SXRFbjhHbWJQNStIS0N6Yz0";
|
|
|
|
// file = "http://data.d4science.org/L0txb3o3Tk9GaW1LSWsrWUNQdHk3MG1ZWFdtWkJENU5HbWJQNStIS0N6Yz0";
|
|
|
|
// NlpAsyncNerRunner n1 = new NlpAsyncNerRunner(id1, tokken, file, ann, null);
|
|
|
|
// NlpAsyncNerRunner n2 = new NlpAsyncNerRunner(id2, tokken, file, ann, null);
|
|
|
|
// n2.start();
|
|
|
|
// n1.start();
|
|
|
|
// }
|
|
|
|
}
|