nlphub/src/main/java/org/gcube/data/analysis/nlphub/nlp/NlpAsyncNerRunner.java

212 lines
7.5 KiB
Java
Executable File

package org.gcube.data.analysis.nlphub.nlp;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import javax.xml.parsers.DocumentBuilderFactory;
import org.gcube.data.analysis.nlphub.is.DMDiscover;
import org.gcube.data.analysis.nlphub.legacy.AsyncHttpRequest;
import org.gcube.data.analysis.nlphub.shared.Constants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
public class NlpAsyncNerRunner extends AsyncHttpRequest {
private static final Logger logger = LoggerFactory.getLogger(NlpAsyncNerRunner.class);
public static final String WPS_EXECUTE_URL = "?request=Execute&service=WPS&Version=1.0.0";
public static final String WPS_DESCRIBE_PROCESS_URL = "?request=DescribeProcess&service=WPS&Version=1.0.0";
// private String identifier, token, httpMethod, annotations, publicLink,
// dataMiner;
private String identifier;
private RunnerCommander commander;
public NlpAsyncNerRunner(String dataMiner, String identifier, String token, String publicLink, String annotations,
String language, RunnerCommander commander) throws Exception {
super();
this.commander = commander;
this.identifier = identifier;
String httpMethod = "GET";
if (dataMiner == null || dataMiner.isEmpty()) {
try {
DMDiscover discoverDataMinerService = new DMDiscover();
dataMiner = discoverDataMinerService.retrieveServiceUrl(token);
} catch (Exception e) {
logger.error("Error retrieving DataMiner service:" + e.getMessage(), e);
throw new Exception("Error retrieving DataMiner service:" + e.getMessage());
}
}
// else
// this.dataMiner = "http://" + dataMiner;
logger.debug("NlpAsyncNerRunner: Create Excution Request");
logger.debug("NlpAsyncNerRunner: [identifier=" + identifier + ", httpMethod=" + httpMethod + ", annotations="
+ annotations + ", publicLink=" + publicLink + ", dataminer=" + dataMiner + "]");
ArrayList<NlpParameter> params = buildParameterString(dataMiner, identifier, publicLink, annotations, token);
StringBuilder serviceUrl = new StringBuilder();
serviceUrl.append(dataMiner);
serviceUrl.append(WPS_EXECUTE_URL);
serviceUrl.append("&lang=en-US&Identifier=");
serviceUrl.append(identifier);
serviceUrl.append("&");
serviceUrl.append(setParams(params));
serviceUrl.append("&gcube-token=" + token);
logger.debug("NlpAsyncNerRunner: [serviceUrl=" + serviceUrl + ", method=" + httpMethod + "]");
super.setBaseUrl(serviceUrl.toString());
super.setMethod(httpMethod);
}
/*
* public NlpAsyncNerRunner(String baseUrl, String method) { super(baseUrl,
* method, null); }
*/
/*
* public String getIdentifier() { return identifier; }
*
*
* public String getToken() { return token; }
*
* public String getHttpMethod() { return httpMethod; }
*/
private String setParams(ArrayList<NlpParameter> parameters) {
logger.debug("Set Params");
String url = "DataInputs=";
for (NlpParameter p : parameters) {
try {
url += p.getName() + "=" + URLEncoder.encode((String) p.getValue(), "UTF-8") + ";";
} catch (Exception ex) {
logger.error(ex.getLocalizedMessage());
}
}
return url;
}
private ArrayList<NlpParameter> buildParameterString(String dataMiner, String identifier, String publicLink,
String annotations, String token) {
ArrayList<NlpParameter> parameters = new ArrayList<>();
HttpURLConnection connection = null;
BufferedReader r = null;
try {
StringBuilder serviceUrl = new StringBuilder();
serviceUrl.append(dataMiner);
serviceUrl.append(WPS_DESCRIBE_PROCESS_URL);
serviceUrl.append("&lang=en-US&Identifier=");
serviceUrl.append(identifier);
logger.debug("DescribeProcess Requested: " + serviceUrl);
serviceUrl.append("&gcube-token=");
serviceUrl.append(token);
URL url = new URL(serviceUrl.toString());
connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
connection.setDoInput(true);
connection.setDoOutput(false);
connection.setUseCaches(false);
r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
doc.getDocumentElement().normalize();
NodeList nListInput = doc.getElementsByTagName("Input");
for (int i = 0; i < nListInput.getLength(); i++) {
Node nodeInput = nListInput.item(i);
NlpParameter nlpParam = new NlpParameter();
NodeList inputChildren = nodeInput.getChildNodes();
// try to find the name and the type of the input parameter
for (int j = 0; j < inputChildren.getLength(); j++) {
Node node = inputChildren.item(j);
// for the moment we limit the type at 'file' and
// 'annotations'
if (node.getNodeName().equals("ows:Identifier")) {
nlpParam.setName(node.getTextContent());
} else if (node.getNodeName().equals("ows:Title")) {
nlpParam.setDescription(node.getTextContent());
} else if (node.getNodeName().equals("ows:Abstract")) {
String text = node.getTextContent().toLowerCase();
if ((text.indexOf("file") >= 0) || (text.indexOf("text") >= 0)) {
nlpParam.setObjectType(NlpParameter.INPUT_FILE);
nlpParam.setValue(publicLink);
} else if ((text.indexOf("annotation") >= 0) || (text.indexOf("list") >= 0)) {
nlpParam.setObjectType(NlpParameter.INPUT_ANNOTATIONS);
nlpParam.setValue(annotations.replaceAll(",", "|"));
}
}
}
parameters.add(nlpParam);
}
} catch (Exception e) {
logger.error("NlpAsyncNerRunner buildParameterString: " + e.getLocalizedMessage(), e);
} finally {
try {
if (r != null)
r.close();
if (connection != null)
connection.disconnect();
} catch (Exception e) {
logger.error(e.getLocalizedMessage(), e);
}
}
return parameters;
}
public long getElapsedTime() {
return elapsedTime;
}
@Override
public void asyncHttpRequestCallback() {
elapsedTime = System.currentTimeMillis() - elapsedTime;
logger.debug("ID: " + identifier.substring(identifier.lastIndexOf(".") + 1) + " elapsed time: " + elapsedTime);
String result = super.getResult();
String theLink = "";
try {
BufferedReader r = new BufferedReader(
new InputStreamReader(new ByteArrayInputStream(result.getBytes(StandardCharsets.UTF_8))));
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
doc.getDocumentElement().normalize();
NodeList nListResult = doc.getElementsByTagName("ogr:Result");
for (int i = 0, found = 0; (i < nListResult.getLength()) && (found == 0); i++) {
Node nodeResult = nListResult.item(i);
NodeList list = nodeResult.getChildNodes();
String res = "";
for (int j = 0; j < list.getLength(); j++) {
Node node = list.item(j);
if (node.getNodeName().equals("d4science:Data")) {
res = node.getTextContent();
} else if (node.getNodeName().equals("d4science:MimeType")) {
if (node.getTextContent().equals("application/d4science")) {
found = 1;
}
}
}
if (found > 0) {
theLink = res;
}
}
commander.updateResultList(identifier.substring(identifier.lastIndexOf(".") + 1) + ":::" + theLink);
} catch (Exception e) {
commander.updateResultList(
identifier.substring(identifier.lastIndexOf(".") + 1) + ":::" + Constants.ERROR_ID);
logger.error(e.getLocalizedMessage(), e);
}
}
}