package org.gcube.nlphub.nlp; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import javax.xml.parsers.DocumentBuilderFactory; import org.apache.log4j.Logger; import org.gcube.nlphub.legacy.AsyncHttpRequest; import org.gcube.nlphub.legacy.Constants; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; public class NlpAsyncNerRunner extends AsyncHttpRequest { public final static String WPS_EXECUTE_URL = Constants.DATAMINER_URL + "/wps/WebProcessingService?request=Execute&service=WPS&Version=1.0.0"; public final static String WPS_DESCRIBE_PROCESS_URL = Constants.DATAMINER_URL + "/wps/WebProcessingService?request=DescribeProcess&service=WPS&Version=1.0.0"; private String identifier, token, httpMethod, annotations, publicLink, language; private Logger logger = Logger.getLogger(NlpAsyncNerRunner.class.getSimpleName()); private RunnerCommander commander; public NlpAsyncNerRunner(String identifier, String token, String publicLink, String annotations, String language, RunnerCommander commander) { super(); this.identifier = identifier; this.token = token; this.httpMethod = "GET"; this.annotations = annotations; this.publicLink = publicLink; this.language = language; // not used for the moment... this.commander = commander; ArrayList params = buildParameterString(); String serviceUrl = WPS_EXECUTE_URL + "&gcube-token=" + token + "&lang=en-US&Identifier=" + identifier; serviceUrl += "&" + setUrl(params); super.setBaseUrl(serviceUrl); super.setMethod(httpMethod); } public NlpAsyncNerRunner(String baseUrl, String method) { super(baseUrl, method, null); } public String getIdentifier() { return identifier; } public String getToken() { return token; } public String getHttpMethod() { return httpMethod; } private String setUrl(ArrayList parameters) { String url = "DataInputs="; for (NlpParameter p : parameters) { try { url += p.getName() + "=" + URLEncoder.encode((String) p.getValue(), "UTF-8") + ";"; } catch (Exception ex) { logger.error(ex.getLocalizedMessage()); } } return url; } private ArrayList buildParameterString() { ArrayList parameters = new ArrayList<>(); HttpURLConnection connection = null; BufferedReader r = null; try { String finalUrl = WPS_DESCRIBE_PROCESS_URL + "&gcube-token=" + token; finalUrl += "&lang=en-US&Identifier=" + identifier; URL url = new URL(finalUrl); connection = (HttpURLConnection) url.openConnection(); connection.setDoInput(true); connection.setDoOutput(true); connection.setUseCaches(false); connection.setRequestMethod("GET"); r = new BufferedReader(new InputStreamReader(connection.getInputStream())); Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r)); doc.getDocumentElement().normalize(); NodeList nListInput = doc.getElementsByTagName("Input"); for (int i = 0; i < nListInput.getLength(); i++) { Node nodeInput = nListInput.item(i); NlpParameter nlpParam = new NlpParameter(); NodeList inputChildren = nodeInput.getChildNodes(); // try to find the name and the type of the input parameter for (int j = 0; j < inputChildren.getLength(); j++) { Node node = inputChildren.item(j); // for the moment we limit the type at 'file' and // 'annotations' if (node.getNodeName().equals("ows:Identifier")) { nlpParam.setName(node.getTextContent()); } else if (node.getNodeName().equals("ows:Title")) { nlpParam.setDescription(node.getTextContent()); } else if (node.getNodeName().equals("ows:Abstract")) { String text = node.getTextContent().toLowerCase(); if ((text.indexOf("file") >= 0) || (text.indexOf("text") >= 0)) { nlpParam.setObjectType(NlpParameter.INPUT_FILE); nlpParam.setValue(publicLink); } else if ((text.indexOf("annotation") >= 0) || (text.indexOf("list") >= 0)) { nlpParam.setObjectType(NlpParameter.INPUT_ANNOTATIONS); nlpParam.setValue(annotations.replaceAll(",", "|")); } } } parameters.add(nlpParam); } } catch (Exception x) { logger.error(x.getLocalizedMessage()); } finally { try { if (r != null) r.close(); if (connection != null) connection.disconnect(); } catch (Exception e) { logger.error(e.getLocalizedMessage()); } } return parameters; } public long getElapsedTime() { return elapsedTime; } @Override public void asyncHttpRequestCallback() { elapsedTime = System.currentTimeMillis() - elapsedTime; logger.info("ID: " + identifier.substring(identifier.lastIndexOf(".") + 1) + " elapsed time: " + elapsedTime); String result = super.getResult(); String theLink = ""; try { BufferedReader r = new BufferedReader( new InputStreamReader(new ByteArrayInputStream(result.getBytes(StandardCharsets.UTF_8)))); Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r)); doc.getDocumentElement().normalize(); NodeList nListResult = doc.getElementsByTagName("ogr:Result"); for (int i=0, found=0; (i 0) { theLink = res; } } commander.updateResultList(identifier.substring(identifier.lastIndexOf(".") + 1) + ":::" + theLink); } catch (Exception x) { commander.updateResultList(identifier.substring(identifier.lastIndexOf(".") + 1) + ":::" + Constants.ERROR_ID); logger.error(x.getLocalizedMessage()); } } // public static void main(String[] args) { // String id1 = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.ENGLISH_NAMED_ENTITY_RECOGNIZER"; // String id2 = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.ENGLISH_NER_CORENLP"; // String tokken = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; // String ann = "Organization,Location,Person"; // String file = "http://data.d4science.org/TWhNTS9DdVdXaTZLSWsrWUNQdHk3OUdZSU93SXRFbjhHbWJQNStIS0N6Yz0"; // file = "http://data.d4science.org/L0txb3o3Tk9GaW1LSWsrWUNQdHk3MG1ZWFdtWkJENU5HbWJQNStIS0N6Yz0"; // NlpAsyncNerRunner n1 = new NlpAsyncNerRunner(id1, tokken, file, ann, null); // NlpAsyncNerRunner n2 = new NlpAsyncNerRunner(id2, tokken, file, ann, null); // n2.start(); // n1.start(); // } }