nlphub/src/main/java/org/gcube/data/analysis/nlphub/nlp/NlpAsyncNerRunner.java

package org.gcube.data.analysis.nlphub.nlp;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;

import javax.xml.parsers.DocumentBuilderFactory;

import org.gcube.data.analysis.nlphub.legacy.AsyncHttpRequest;
import org.gcube.data.analysis.nlphub.legacy.Constants;
import org.gcube.data.analysis.nlphub.legacy.NerOutput;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

public class NlpAsyncNerRunner extends AsyncHttpRequest {
	private static final Logger logger = LoggerFactory.getLogger(NerOutput.class);
	public static final String WPS_EXECUTE_URL = "/wps/WebProcessingService?request=Execute&service=WPS&Version=1.0.0";
	public static final String WPS_DESCRIBE_PROCESS_URL = "/wps/WebProcessingService?request=DescribeProcess&service=WPS&Version=1.0.0";
	private String identifier, token, httpMethod, annotations, publicLink, dataMiner;
	private RunnerCommander commander;

	public NlpAsyncNerRunner(String dataMiner, String identifier, String token, String publicLink, String annotations,
			String language, RunnerCommander commander) {
		super();
		this.identifier = identifier;
		this.token = token;
		this.httpMethod = "GET";
		this.annotations = annotations;
		this.publicLink = publicLink;

		this.commander = commander;
		if (dataMiner == null)
			this.dataMiner = Constants.DEFAULT_DATAMINER_URL;
		else
			this.dataMiner = "http://" + dataMiner;

		ArrayList<NlpParameter> params = buildParameterString();
		String serviceUrl = this.dataMiner + WPS_EXECUTE_URL + "&gcube-token=" + token + "&lang=en-US&Identifier="
				+ identifier;
		serviceUrl += "&" + setUrl(params);
		super.setBaseUrl(serviceUrl);
		super.setMethod(httpMethod);
	}

	public NlpAsyncNerRunner(String baseUrl, String method) {
		super(baseUrl, method, null);
	}

	public String getIdentifier() {
		return identifier;
	}

	public String getToken() {
		return token;
	}

	public String getHttpMethod() {
		return httpMethod;
	}

	private String setUrl(ArrayList<NlpParameter> parameters) {
		String url = "DataInputs=";
		for (NlpParameter p : parameters) {
			try {
				url += p.getName() + "=" + URLEncoder.encode((String) p.getValue(), "UTF-8") + ";";
			} catch (Exception ex) {
				logger.error(ex.getLocalizedMessage());
			}
		}
		return url;
	}

	private ArrayList<NlpParameter> buildParameterString() {
		ArrayList<NlpParameter> parameters = new ArrayList<>();
		HttpURLConnection connection = null;
		BufferedReader r = null;
		try {
			String finalUrl = dataMiner + WPS_DESCRIBE_PROCESS_URL + "&gcube-token=" + token;
			finalUrl += "&lang=en-US&Identifier=" + identifier;
			URL url = new URL(finalUrl);
			connection = (HttpURLConnection) url.openConnection();
			connection.setDoInput(true);
			connection.setDoOutput(true);
			connection.setUseCaches(false);
			connection.setRequestMethod("GET");
			r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
			Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
			doc.getDocumentElement().normalize();
			NodeList nListInput = doc.getElementsByTagName("Input");
			for (int i = 0; i < nListInput.getLength(); i++) {
				Node nodeInput = nListInput.item(i);
				NlpParameter nlpParam = new NlpParameter();
				NodeList inputChildren = nodeInput.getChildNodes();

				// try to find the name and the type of the input parameter
				for (int j = 0; j < inputChildren.getLength(); j++) {
					Node node = inputChildren.item(j);
					// for the moment we limit the type at 'file' and
					// 'annotations'
					if (node.getNodeName().equals("ows:Identifier")) {
						nlpParam.setName(node.getTextContent());
					} else if (node.getNodeName().equals("ows:Title")) {
						nlpParam.setDescription(node.getTextContent());
					} else if (node.getNodeName().equals("ows:Abstract")) {
						String text = node.getTextContent().toLowerCase();
						if ((text.indexOf("file") >= 0) || (text.indexOf("text") >= 0)) {
							nlpParam.setObjectType(NlpParameter.INPUT_FILE);
							nlpParam.setValue(publicLink);
						} else if ((text.indexOf("annotation") >= 0) || (text.indexOf("list") >= 0)) {
							nlpParam.setObjectType(NlpParameter.INPUT_ANNOTATIONS);
							nlpParam.setValue(annotations.replaceAll(",", "|"));
						}
					}
				}
				parameters.add(nlpParam);
			}

		} catch (Exception e) {
			logger.error(e.getLocalizedMessage(), e);
		} finally {
			try {
				if (r != null)
					r.close();
				if (connection != null)
					connection.disconnect();
			} catch (Exception e) {
				logger.error(e.getLocalizedMessage(), e);
			}
		}
		return parameters;
	}

	public long getElapsedTime() {
		return elapsedTime;
	}

	@Override
	public void asyncHttpRequestCallback() {
		elapsedTime = System.currentTimeMillis() - elapsedTime;
		logger.info("ID: " + identifier.substring(identifier.lastIndexOf(".") + 1) + " elapsed time: " + elapsedTime);
		String result = super.getResult();
		String theLink = "";
		try {
			BufferedReader r = new BufferedReader(
					new InputStreamReader(new ByteArrayInputStream(result.getBytes(StandardCharsets.UTF_8))));

			Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
			doc.getDocumentElement().normalize();
			NodeList nListResult = doc.getElementsByTagName("ogr:Result");

			for (int i = 0, found = 0; (i < nListResult.getLength()) && (found == 0); i++) {
				Node nodeResult = nListResult.item(i);
				NodeList list = nodeResult.getChildNodes();
				String res = "";
				for (int j = 0; j < list.getLength(); j++) {
					Node node = list.item(j);
					if (node.getNodeName().equals("d4science:Data")) {
						res = node.getTextContent();
					} else if (node.getNodeName().equals("d4science:MimeType")) {
						if (node.getTextContent().equals("application/d4science")) {
							found = 1;
						}
					}
				}
				if (found > 0) {
					theLink = res;
				}
			}
			commander.updateResultList(identifier.substring(identifier.lastIndexOf(".") + 1) + ":::" + theLink);
		} catch (Exception e) {
			commander.updateResultList(
					identifier.substring(identifier.lastIndexOf(".") + 1) + ":::" + Constants.ERROR_ID);
			logger.error(e.getLocalizedMessage(), e);
		}
	}
}