multi-thread version

git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@165134 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Erik Perrone 2018-03-15 14:40:50 +00:00
parent c7f9c61294
commit 24ff49d294
16 changed files with 627 additions and 106 deletions

View File

@ -2,32 +2,18 @@ package org.gcube.nlphub;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import javax.servlet.ServletException;
import javax.servlet.annotation.MultipartConfig;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.log4j.Logger;
import org.gcube.data.analysis.dataminermanagercl.server.dmservice.SClient;
import org.gcube.data.analysis.dataminermanagercl.shared.data.OutputData;
import org.gcube.data.analysis.dataminermanagercl.shared.data.computations.ComputationId;
import org.gcube.data.analysis.dataminermanagercl.shared.data.output.MapResource;
import org.gcube.data.analysis.dataminermanagercl.shared.data.output.Resource;
import org.gcube.data.analysis.dataminermanagercl.shared.parameters.FileParameter;
import org.gcube.data.analysis.dataminermanagercl.shared.parameters.ObjectParameter;
import org.gcube.data.analysis.dataminermanagercl.shared.parameters.Parameter;
import org.gcube.nlphub.legacy.Constants;
import org.gcube.nlphub.legacy.DataminerClient;
import org.gcube.nlphub.legacy.JsonManager;
//import org.gcube.dataminerclient.DataminerClient;
import org.gcube.nlphub.nlp.NlpNerRunner;
import org.gcube.nlphub.nlp.RunnerCommander;
/**
* Servlet implementation class NLPHub
@ -76,6 +62,11 @@ public class NLPHub extends HttpServlet {
}
RunnerCommander commander = new RunnerCommander(algs, request.getParameter("plink"), request.getParameter("annotations"), token,
response);
commander.setSleepTime(100);
commander.startProcess();
/*
if (algs.length >= 1) {
NlpNerRunner runner = new NlpNerRunner(service, algs, token, response);
runner.run(request.getParameter("plink"), request.getParameter("annotations"),
@ -88,7 +79,8 @@ public class NLPHub extends HttpServlet {
} catch (Exception ex) {
logger.error(ex.getLocalizedMessage());
}
}
}*/
} catch (Exception x) {
x.printStackTrace();
}

View File

@ -67,9 +67,9 @@ public class NLPMapper extends HttpServlet {
response.setCharacterEncoding("utf-8");
token = Constants.getToken(request, devMode);
String documentLink = request.getParameter("plink"); // link to text file (workspace)
System.out.println("documentLink\n: " + documentLink);
//System.out.println("documentLink\n: " + documentLink);
String toBeMap = request.getParameter("tobemap");
System.out.println("tobemap:\n" + toBeMap);
//System.out.println("tobemap:\n" + toBeMap);
String[] tokens;
if(toBeMap.indexOf("|") > 0)
tokens = toBeMap.split("\\|");
@ -79,15 +79,15 @@ public class NLPMapper extends HttpServlet {
}
String annotations = request.getParameter("annotations");
System.out.println("annotations\n: " + annotations);
//System.out.println("annotations\n: " + annotations);
String language = request.getParameter("lang");
PrintWriter writer = response.getWriter();
System.out.println("language\n: " + language);
//System.out.println("language\n: " + language);
System.out.println("tokens length: " + tokens.length);
for(int u=0; u<tokens.length; u++) {
System.out.println("[" + u + "] " + tokens[u]);
}
//System.out.println("tokens length: " + tokens.length);
// for(int u=0; u<tokens.length; u++) {
// System.out.println("[" + u + "] " + tokens[u]);
// }
try {
String text = getDocument(documentLink);
@ -109,11 +109,11 @@ public class NLPMapper extends HttpServlet {
for (int i = 0; i < tokens.length; i++) {
String token = tokens[i];
String[] t = token.split(":::");
System.out.println(t[0]);
System.out.println(t[1]);
//System.out.println(t[0]);
//System.out.println(t[1]);
try {
String json = ((JsonMapper) (getMapper(t[0]).newInstance())).getJson(t[0], t[1]);
System.out.println("" + json);
//System.out.println("" + json);
out += json;
if (i < tokens.length - 1)
out += ",";

View File

@ -1,17 +1,10 @@
package org.gcube.nlphub;
import static org.gcube.common.authorization.client.Constants.authorizationService;
//import static org.gcube.common.authorization.client.Constants.authorizationService;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import javax.servlet.ServletException;
import javax.servlet.annotation.MultipartConfig;
@ -39,9 +32,9 @@ public class NLPUploader extends HttpServlet {
private static final long serialVersionUID = 1L;
private Logger logger = Logger.getLogger(NLPUploader.class.getSimpleName());
private boolean devMode = true;
private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
private String token; // = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
private WorkspaceManager ws;
/**
* @see HttpServlet#HttpServlet()
*/
@ -82,7 +75,7 @@ public class NLPUploader extends HttpServlet {
throws ServletException, IOException {
String freeText = request.getParameter("freetext");
freeText = NlpUtils.replaceDirtyCharacters(freeText);
byte[] content = freeText.getBytes("UTF-8");
String fileName = generateFileName();
PrintWriter writer = response.getWriter();
@ -144,11 +137,19 @@ public class NLPUploader extends HttpServlet {
String link = ws.getPublicLink(fileName, token);
String sentence = NlpUtils.getLanguageRecognizerDigest(stringContent);
System.out.println(sentence);
//NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response);
//recognizer.run();
NLpLanguageRecognizer.run(sentence, token, link, response);
//writer.println(new JsonManager().getSuccessJsonResponse("" + link));
logger.info(sentence);
// System.out.println(sentence);
// NLpLanguageRecognizer recognizer = new
// NLpLanguageRecognizer(NLPHub.service, token, sentence, link,
// response);
// recognizer.run();
try {
NLpLanguageRecognizer.run(sentence, token, link, response);
} catch (NlpHubException ex) {
writer.println(new JsonManager().getSuccessJsonResponse(Constants.UNAVAILABLE, link));
}
// writer.println(new JsonManager().getSuccessJsonResponse("" +
// link));
} catch (Exception x) {
x.printStackTrace();
logger.error(x.getClass().getName() + ": " + x.getLocalizedMessage());
@ -172,15 +173,4 @@ public class NLPUploader extends HttpServlet {
long now = System.currentTimeMillis();
return "auto-nlp-" + now;
}
private byte[] escapeForJson(byte[] content) throws UnsupportedEncodingException {
String text = new String(content, "UTF-8");
text = text.replaceAll("\n", "\\\n");
text = text.replaceAll("\r", "\\\r");
text = text.replaceAll("\t", "\\\t");
text = text.replaceAll("\"", "\\\"");
//text = text.replaceAll("\\", "\\\\");
System.out.println("*** text\n" + text);
return text.getBytes("UTF-8");
}
}

View File

@ -0,0 +1,112 @@
package org.gcube.nlphub.legacy;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import org.apache.log4j.Logger;
import org.gcube.nlphub.nlp.NlpParameter;
public class AsyncHttpRequest extends Thread {
private String baseUrl, finalUrl, result, method;
private ArrayList<NlpParameter> parameters;
private Logger logger = Logger.getLogger(AsyncHttpRequest.class.getSimpleName());
protected long elapsedTime;
public AsyncHttpRequest() {
this.baseUrl = null;
this.parameters = null;
this.method = "GET";
finalUrl = null;
elapsedTime = 0;
}
public AsyncHttpRequest(String baseUrl, String method, ArrayList<NlpParameter> parameters) {
this.baseUrl = baseUrl;
this.parameters = parameters;
if(method == null)
this.method = "GET";
else
this.method = (method.equalsIgnoreCase("GET") || method.equalsIgnoreCase("POST")) ? method : "GET";
setFinalUrl();
elapsedTime = 0;
}
public void run() {
elapsedTime = System.currentTimeMillis();
if(finalUrl == null)
finalUrl = baseUrl;
HttpURLConnection connection = null;
BufferedReader r = null;
try {
URL url = new URL(finalUrl);
connection = (HttpURLConnection) url.openConnection();
connection.setDoInput(true);
connection.setDoOutput(true);
connection.setUseCaches(false);
connection.setRequestMethod(method);
r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String line = "";
result = "";
while (line != null) {
line = r.readLine();
if (line != null)
result += line.trim();
}
asyncHttpRequestCallback();
} catch (Exception x) {
logger.error(x.getLocalizedMessage());
} finally {
try {
if (r != null)
r.close();
if (connection != null)
connection.disconnect();
} catch (Exception e) {
logger.error(e.getLocalizedMessage());
}
}
}
public String getResult() {
return result;
}
public void asyncHttpRequestCallback() {
elapsedTime = System.currentTimeMillis() - elapsedTime;
}
public void setBaseUrl(String baseUrl) {
this.baseUrl = baseUrl;
}
public void setMethod(String method) {
this.method = method;
}
public void setParameters(ArrayList<NlpParameter> parameters) {
this.parameters = parameters;
}
private void setFinalUrl() {
finalUrl = baseUrl;
if (parameters != null) {
if (finalUrl.indexOf("?") < 0)
finalUrl += "?";
for (NlpParameter p : parameters) {
try {
finalUrl += p.getName() + "=" + URLEncoder.encode((String) p.getValue(), "UTF-8");
finalUrl += "&";
} catch (Exception x) {
logger.error(x.getLocalizedMessage());
}
}
finalUrl = finalUrl.substring(0, finalUrl.length() - 1);
}
}
}

View File

@ -7,7 +7,8 @@ public class Constants {
public static String TEST_TOKEN = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
public static String MIME_TEXT = "text/plain";
public static String CONTENT_TYPE = "Content-Type";
public static String UNAVAILABLE = "unavailable";
public static String ERROR_ID = "ERROR";
public static String getToken(HttpServletRequest request, boolean devMode) {
String token = request.getParameter(TOKEN_PARAMETER);

View File

@ -127,7 +127,7 @@ public class DataminerClient {
@Override
public void running(double percentage) {
logger.debug("Operation Running: " + percentage);
//logger.debug("Operation Running: " + percentage);
//System.out.println("Operation Running: " + percentage);
}

View File

@ -80,14 +80,14 @@ public class NLpLanguageRecognizer extends DataminerClient {
String link = data.getTextContent();
String type = description.getTextContent();
if(type.equals("outfile")) {
System.out.println(link);
//System.out.println(link);
String content = readFileContent(link, token);
if (response != null) {
response.getWriter()
.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
}
else {
System.out.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName()).debug(new JsonManager().getSuccessJsonResponse(content, publicLink));
}
}
}

View File

@ -0,0 +1,188 @@
package org.gcube.nlphub.nlp;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.log4j.Logger;
import org.gcube.nlphub.legacy.AsyncHttpRequest;
import org.gcube.nlphub.legacy.Constants;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
public class NlpAsyncNerRunner extends AsyncHttpRequest {
public final static String WPS_EXECUTE_URL = "http://dataminer-prototypes.d4science.org/wps/WebProcessingService?request=Execute&service=WPS&Version=1.0.0";
public final static String WPS_DESCRIBE_PROCESS_URL = "http://dataminer-prototypes.d4science.org/wps/WebProcessingService?request=DescribeProcess&service=WPS&Version=1.0.0";
private String identifier, token, httpMethod, annotations, publicLink, language;
private Logger logger = Logger.getLogger(NlpAsyncNerRunner.class.getSimpleName());
public NlpAsyncNerRunner(String identifier, String token, String publicLink, String annotations, String language) {
super();
this.identifier = identifier;
this.token = token;
this.httpMethod = "GET";
this.annotations = annotations;
this.publicLink = publicLink;
this.language = language; // not used for the moment...
ArrayList<NlpParameter> params = buildParameterString();
String serviceUrl = WPS_EXECUTE_URL + "&gcube-token=" + token + "&lang=en-US&Identifier=" + identifier;
serviceUrl += "&" + setUrl(params);
super.setBaseUrl(serviceUrl);
super.setMethod(httpMethod);
//System.out.println(serviceUrl);
}
public NlpAsyncNerRunner(String baseUrl, String method) {
super(baseUrl, method, null);
}
public String getIdentifier() {
return identifier;
}
public String getToken() {
return token;
}
public String getHttpMethod() {
return httpMethod;
}
private String setUrl(ArrayList<NlpParameter> parameters) {
String url = "DataInputs=";
for (NlpParameter p : parameters) {
try {
url += p.getName() + "=" + URLEncoder.encode((String) p.getValue(), "UTF-8") + ";";
} catch (Exception ex) {
logger.error(ex.getLocalizedMessage());
}
}
return url;
}
private ArrayList<NlpParameter> buildParameterString() {
ArrayList<NlpParameter> parameters = new ArrayList<>();
HttpURLConnection connection = null;
BufferedReader r = null;
try {
String finalUrl = WPS_DESCRIBE_PROCESS_URL + "&gcube-token=" + token;
finalUrl += "&lang=en-US&Identifier=" + identifier;
URL url = new URL(finalUrl);
connection = (HttpURLConnection) url.openConnection();
connection.setDoInput(true);
connection.setDoOutput(true);
connection.setUseCaches(false);
connection.setRequestMethod("GET");
r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
doc.getDocumentElement().normalize();
NodeList nListInput = doc.getElementsByTagName("Input");
for (int i = 0; i < nListInput.getLength(); i++) {
Node nodeInput = nListInput.item(i);
NlpParameter nlpParam = new NlpParameter();
NodeList inputChildren = nodeInput.getChildNodes();
// try to find the name and the type of the input parameter
for (int j = 0; j < inputChildren.getLength(); j++) {
Node node = inputChildren.item(j);
// for the moment we limit the type at 'file' and
// 'annotations'
if (node.getNodeName().equals("ows:Identifier")) {
nlpParam.setName(node.getTextContent());
} else if (node.getNodeName().equals("ows:Title")) {
nlpParam.setDescription(node.getTextContent());
} else if (node.getNodeName().equals("ows:Abstract")) {
String text = node.getTextContent().toLowerCase();
if ((text.indexOf("file") >= 0) || (text.indexOf("text") >= 0)) {
nlpParam.setObjectType(NlpParameter.INPUT_FILE);
nlpParam.setValue(publicLink);
} else if ((text.indexOf("annotation") >= 0) || (text.indexOf("list") >= 0)) {
nlpParam.setObjectType(NlpParameter.INPUT_ANNOTATIONS);
nlpParam.setValue(annotations.replaceAll(",", "|"));
}
}
}
parameters.add(nlpParam);
}
} catch (Exception x) {
logger.error(x.getLocalizedMessage());
} finally {
try {
if (r != null)
r.close();
if (connection != null)
connection.disconnect();
} catch (Exception e) {
logger.error(e.getLocalizedMessage());
}
}
return parameters;
}
public long getElapsedTime() {
return elapsedTime;
}
@Override
public void asyncHttpRequestCallback() {
elapsedTime = System.currentTimeMillis() - elapsedTime;
logger.info("ID: " + identifier.substring(identifier.lastIndexOf(".") + 1) + " elapsed time: " + elapsedTime);
String result = super.getResult();
String theLink = "";
//System.out.println(result);
BufferedReader r = new BufferedReader(
new InputStreamReader(new ByteArrayInputStream(result.getBytes(StandardCharsets.UTF_8))));
try {
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
doc.getDocumentElement().normalize();
NodeList nListResult = doc.getElementsByTagName("ogr:Result");
for (int i=0, found=0; (i<nListResult.getLength()) && (found==0); i++) {
Node nodeResult = nListResult.item(i);
NodeList list = nodeResult.getChildNodes();
String res = "";
for(int j=0; j<list.getLength(); j++) {
Node node = list.item(j);
if(node.getNodeName().equals("d4science:Data")) {
res = node.getTextContent();
}
else if(node.getNodeName().equals("d4science:MimeType")) {
if(node.getTextContent().equals("application/d4science")) {
found = 1;
}
}
}
if(found > 0) {
theLink = res;
}
}
RunnerCommander.updateResultList(identifier.substring(identifier.lastIndexOf(".") + 1) + ":::" + theLink);
} catch (Exception x) {
RunnerCommander.updateResultList(identifier.substring(identifier.lastIndexOf(".") + 1) + ":::" + Constants.ERROR_ID);
logger.error(x.getLocalizedMessage());
}
}
// public static void main(String[] args) {
// String id1 = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.ENGLISH_NAMED_ENTITY_RECOGNIZER";
// String id2 = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.ENGLISH_NER_CORENLP";
// String tokken = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
// String ann = "Organization,Location,Person";
// String file = "http://data.d4science.org/TWhNTS9DdVdXaTZLSWsrWUNQdHk3OUdZSU93SXRFbjhHbWJQNStIS0N6Yz0";
// file = "http://data.d4science.org/L0txb3o3Tk9GaW1LSWsrWUNQdHk3MG1ZWFdtWkJENU5HbWJQNStIS0N6Yz0";
// NlpAsyncNerRunner n1 = new NlpAsyncNerRunner(id1, tokken, file, ann, null);
// NlpAsyncNerRunner n2 = new NlpAsyncNerRunner(id2, tokken, file, ann, null);
// n2.start();
// n1.start();
// }
}

View File

@ -84,8 +84,9 @@ public class NlpNerRunner extends DataminerClient {
String mimeType = f.getMimeType();
if (mimeType.equalsIgnoreCase("application/d4science")) {
String link = f.getUrl();
System.out.println("url: " + link);
//System.out.println("url: " + link);
logger.debug("url: " + link);
logger.error("url: " + link);
String op = computationId.getOperatorId();
op = op.substring(op.lastIndexOf(".") + 1);
testEndOfProcess(op + ":::" + link);
@ -103,11 +104,11 @@ public class NlpNerRunner extends DataminerClient {
for (String id : identifiers) {
try {
super.identifier = id;
System.out.println("Running: " + id);
logger.error("Running: " + id);
super.init();
List<Parameter> parameters = mapParameters(filePublicLink, annotations);
super.execute(parameters);
System.out.println("Runned: " + id);
logger.error("Runned: " + id);
} catch (Exception e) {
logger.error(e.getLocalizedMessage());
throw new NlpHubException(e.getLocalizedMessage(), e);
@ -136,11 +137,11 @@ public class NlpNerRunner extends DataminerClient {
try {
writer = response.getWriter();
String msg = new JsonManager().getSuccessJsonResponse(content);
System.out.println("msg: " + msg);
//System.out.println("msg: " + msg);
writer.println(msg);
if(writer.checkError()) {
logger.error("writeResponse method has some problem in writing result.");
System.out.println("writeResponse method has some problem in writing result.");
//System.out.println("writeResponse method has some problem in writing result.");
}
} catch (Exception ex) {
@ -187,14 +188,14 @@ public class NlpNerRunner extends DataminerClient {
fileName.setName(p.getName());
fileName.setValue(publicLink);
parameters.add(fileName);
System.out.println(fileName.toString());
//System.out.println(fileName.toString());
break;
case LIST:
ListParameter list = new ListParameter();
list.setName(p.getName());
list.setValue(annotations.replace(",", "|"));
parameters.add(list);
System.out.println(list.toString());
//System.out.println(list.toString());
break;
case ENUM:
// to be managed...

View File

@ -2,12 +2,17 @@ package org.gcube.nlphub.nlp;
public class NlpParameter {
public static String ANNOTATION_LIST = "annotations";
public static int INPUT_FILE = 0;
public static int INPUT_ANNOTATIONS = 1;
public static int INPUT_LANGUAGE = 2;
private String name, description;
private Object value;
private int objectType;
public NlpParameter() {
}
public NlpParameter(String name, String description, Object value, int objectType) {
super();
this.name = name;
this.description = description;
this.value = value;
@ -29,6 +34,20 @@ public class NlpParameter {
public int getObjectType() {
return objectType;
}
public void setName(String name) {
this.name = name;
}
public void setDescription(String description) {
this.description = description;
}
public void setValue(Object value) {
this.value = value;
}
public void setObjectType(int objectType) {
this.objectType = objectType;
}
}

View File

@ -5,33 +5,17 @@ import java.util.ArrayList;
public class NlpUtils {
public static String getLanguageRecognizerDigest(String content) {
int minToken = 20;
int minToken = 100;
content = content.trim();
String[] tokens = content.split("\\.");
if(tokens.length == 1)
tokens = content.split(";");
if(tokens.length == 1)
return escapeContent(content);
ArrayList<String> list = new ArrayList<>();
for(int i=0; i<tokens.length; i++) {
int n = countTokens(tokens[i]);
if(n >= minToken) {
list.add(tokens[i]);
}
String[] tokens = content.split("\\s");
String digest = "";
int len = (minToken <= tokens.length) ? minToken : tokens.length;
for(int i=0; i<len; i++) {
digest += tokens[i] + " ";
}
if(list.isEmpty())
return escapeContent(content);
String digest = list.get(0);
for(String s : list) {
if(s.length() < digest.length())
digest = s;
}
return escapeContent(digest);
return escapeContent(digest.trim());
}
public static int countTokens(String content) {
@ -41,6 +25,8 @@ public class NlpUtils {
public static String escapeContent(String content) {
content = content.replaceAll("\\\\", " ");
content = content.replaceAll("\"", " ");
content = content.replaceAll(";", " ");
content = content.replaceAll("=", " ");
return content;
}
@ -67,8 +53,16 @@ public class NlpUtils {
text += "In talks, held at the border village of Panmunjom or \"truce village,\" in the Korean peninsula\'s heavily fortified demilitarized zone, North Korea negotiators agreed to send a \"high-level delegation\" comprising athletes, a cheering squad, an art troupe, a visitors\' group, a Taekwondo demonstration team and a press corps, South Korea\'s Unification Ministry told reporters in Seoul.";
text += "Unification Vice Minister Chun Hae-sung also announced that both sides plan to re-open a military hotline on the western Korean Peninsula.";
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
text += "In talks, held at the border village of Panmunjom or \"truce village,\" in the Korean peninsula\'s heavily fortified demilitarized zone, North Korea negotiators agreed to send a \"high-level delegation\" comprising athletes, a cheering squad, an art troupe, a visitors\' group, a Taekwondo demonstration team and a press corps, South Korea\'s Unification Ministry told reporters in Seoul.";
text += "Unification Vice Minister Chun Hae-sung also announced that both sides plan to re-open a military hotline on the western Korean Peninsula.";
text = " Tutti i modelli meteo sono d'accordo, \\puntiamo su una rotta poco comune, che non ho mai fatto, ma che dovrebbe funzionare bene\"";
//text = " Tutti i modelli meteo sono d'accordo, \\puntiamo su una rotta poco comune, che non ho mai fatto, ma che dovrebbe funzionare bene\"";
//text = "A me piace la zuppa, a me piace la pasta, a me piace il formaggio, a me piace la panna. A me piace la cioccolata.";

View File

@ -0,0 +1,166 @@
package org.gcube.nlphub.nlp;
import java.io.PrintWriter;
import java.util.ArrayList;
import javax.servlet.http.HttpServletResponse;
import org.apache.log4j.Logger;
import org.gcube.nlphub.legacy.Constants;
import org.gcube.nlphub.legacy.JsonManager;
public class RunnerCommander extends Thread {
private HttpServletResponse response = null;
private String annotationList = "";
private String[] identifiers = null;
private String link = "";
private String token = "";
private static ArrayList<String> outResultList = null;
private long sleepTime = 500l;
private long maxWaitingTime = 5l * 60l * 1000l;
private Logger logger = Logger.getLogger(RunnerCommander.class.getSimpleName());
private boolean complete = false;
public RunnerCommander(String[] identifiers, String link, String annotationList, String token) {
this.identifiers = identifiers;
this.annotationList = annotationList;
this.link = link;
this.token = token;
this.response = null;
this.sleepTime = 500l;
outResultList = new ArrayList<String>();
}
public RunnerCommander(String[] identifiers, String link, String annotationList, String token,
HttpServletResponse response) {
this.identifiers = identifiers;
this.annotationList = annotationList;
this.link = link;
this.token = token;
this.response = response;
this.sleepTime = 500l;
outResultList = new ArrayList<String>();
}
public long getSleepTime() {
return sleepTime;
}
public void setSleepTime(long sleepTime) {
this.sleepTime = sleepTime;
}
public synchronized static void updateResultList(String res) {
outResultList.add(res);
}
public long getMaxWaitingtime() {
return maxWaitingTime;
}
public void setMaxWaitingTime(long maxWaitingTime) {
this.maxWaitingTime = maxWaitingTime;
}
public void startProcess() {
start();
while(!complete) {
try {
sleep(sleepTime);
} catch (InterruptedException x) {
logger.info("Interrupted.");
}
}
}
private void runAlgorithms() {
for (String id : identifiers) {
NlpAsyncNerRunner n = new NlpAsyncNerRunner(id, token, link, annotationList, null);
n.start();
}
}
public void run() {
runAlgorithms();
long counter = 0;
try {
while (counter <= maxWaitingTime) {
if (outResultList.size() == identifiers.length) {
String[] links = new String[outResultList.size()];
links = outResultList.toArray(links);
writeResponse(links);
logger.info("Elapsed time: " + counter + " msec.");
return;
}
counter += sleepTime;
sleep(sleepTime);
}
logger.error("Timeout error.");
timeoutHandler();
} catch (InterruptedException x) {
logger.info("Elapsed time: " + counter + " msec.");
logger.info("Thread interrupted.");
timeoutHandler();
}
}
private void timeoutHandler() {
boolean found = false;
for(int i=0; i<identifiers.length; i++, found=false) {
String id = identifiers[i];
id = id.substring(id.lastIndexOf(".") + 1);
for(String s : outResultList) {
if(s.startsWith(id)) {
found = true;
break;
}
}
if(!found) {
outResultList.add(id + ":::" + Constants.ERROR_ID);
}
}
String[] links = new String[outResultList.size()];
links = outResultList.toArray(links);
writeResponse(links);
}
private void writeResponse(String[] content) {
logger.info("RunnerCommander has run " + content.length + " tasks.");
if (response != null) {
response.setContentType("application/json;charset=UTF-8");
PrintWriter writer = null;
try {
writer = response.getWriter();
String msg = new JsonManager().getSuccessJsonResponse(content);
writer.println(msg);
if (writer.checkError()) {
logger.error("writeResponse method has some problem in writing result.");
}
} catch (Exception ex) {
logger.error(ex.getLocalizedMessage());
ex.printStackTrace();
} finally {
complete = true;
if (writer != null)
writer.close();
}
}
}
/*
public static void main(String[] args) {
String id1 = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.ENGLISH_NAMED_ENTITY_RECOGNIZER";
String id2 = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.ENGLISH_NER_CORENLP";
String tokken = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
String ann = "Organization,Location,Person";
String file = "http://data.d4science.org/TWhNTS9DdVdXaTZLSWsrWUNQdHk3OUdZSU93SXRFbjhHbWJQNStIS0N6Yz0";
file = "http://data.d4science.org/L0txb3o3Tk9GaW1LSWsrWUNQdHk3MG1ZWFdtWkJENU5HbWJQNStIS0N6Yz0";
String[] ids = new String[2];
ids[0] = id1;
ids[1] = id2;
RunnerCommander rc = new RunnerCommander(ids, file, ann, tokken);
rc.start();
} */
}

View File

@ -4,7 +4,8 @@
<meta charset="UTF-8">
<title>Colors</title>
<script type="text/javascript">
var hexLetters = '0123456789ABCDEF';
var hexLetters = '0123456789ABCDEF';
enhanceColor = function(color) {
var c = eval("0x" + color);
@ -47,14 +48,24 @@
span[1].setAttribute("style", "color:#" + color + "; background:"
+ complement + "; font-weight:bold;");
}
testReplace = function() {
var text = "<< Text between >>";
text = text.replace(/</g, "&lt;");
text = text.replace(/>/g, "&gt;");
console.log(text);
document.getElementById("between-text").innerHtml = text;
}
</script>
</head>
<body onload="changeColor()">
<body onload="testReplace()">
<div>
<span>TEST COLORE</span>
</div>
<div>
<span>TEST COLORE + SFONDO</span>
</div>
<div id="between-text">
</div>
</body>
</html>

View File

@ -61,7 +61,7 @@
<div class="row">
<div class="clearfix">
<div class="column margin-right-10px">
<p>Drag a file on the Upload box, or select a file from your PC, or paste a text.</p>
<p>Drag a .TXT file on the Upload box, or select a file from your PC, or paste a text.</p>
</div>
<div class="column margin-left-10px">
<div class="centered full-width" id="fu-container">

View File

@ -10,6 +10,7 @@ var jsonContent = null;
var named = null;
var resultText = "";
var hexLetters = '0123456789ABCDEF';
var txtFlag = true;
// ------------------------------------------------------------------------------------------------------
// Starting point: get the csv file with algorithm parameters and set the page
@ -50,13 +51,15 @@ $(document).ready(function() {
fileName : "mytxtfile",
maxFileCount : 100,
multiple : false,
maxFileSize : 1024 * 1000 * 6.14,
maxFileSize : 1024 * 1000 * 1,
showFileCounter : false,
showCancel : true,
//allowedTypes: "txt,.txt",
dragDropStr : "<img class='img-prompt' src='img/upload.png' width='60px' height='60px' style='display:block; margin:auto; padding: 10px'>",
extErrorStr : "Error. Text file only",
sizeErrorStr : "Error. Max size: 6 Mb",
onLoad : function(obj) {
txtFlag = true;
$("#file-info").remove();
$("#reset-upload").remove();
$("#fu-container")
@ -81,9 +84,21 @@ $(document).ready(function() {
setEventListeners();
},
onSelect : function(files) {
showProgress();
textAreaEnable(false);
$("#input-textarea").val("");
var fileName = files[0].name;
var extension = "" + fileName.substring(fileName.lastIndexOf("."));
if(extension.toUpperCase() == ".TXT") {
showProgress();
textAreaEnable(false);
$("#input-textarea").val("");
} else {
txtFlag = false;
alert("The application supports text file only (.TXT)");
}
},
onSubmit: function(files) {
var submitFlag = txtFlag;
txtFlag = true;
return submitFlag;
},
onSuccess : function(files, data, xhr) {
hideProgress();
@ -124,6 +139,8 @@ $(document).ready(function() {
* Utility function processing the language indication returned by the language recognition service
*/
checkLanguage = function(lang) {
if(lang == 'unavailable')
return;
var options = $("#language-select option");
for(var i=0; i<options.length; i++) {
if(options[i].innerText.toLowerCase() == lang.toLowerCase()) {
@ -381,11 +398,23 @@ launchAlgorithm = function() {
annList = annList.substring(0, annList.length - 1);
savedAnnotations = annList;
// build the algorithm list, selecting on:
// [1] the language
// [2] the annotations
var algList = "";
for (j in algorithms) {
if (algorithms[j].lang.toLowerCase().indexOf(
$("#language-select").val().toLowerCase()) >= 0) {
algList += encodeURI(algorithms[j].id) + ",";
var algAnnotations = algorithms[j].annotations.toLowerCase();
for(k in list) {
var a = list[k].toLowerCase();
if(algAnnotations.indexOf(a) > -1) {
algList += encodeURI(algorithms[j].id) + ",";
break;
}
}
}
}
if (algList.length == 0) {
@ -575,8 +604,9 @@ rewriteText = function(annotation, color) {
var G = enhanceColor(complement.substring(2,4));
var B = enhanceColor(complement.substring(4));
complement = "#" + R + G + B;
console.log("-getIndices: start");
var indices = getIndices(annotation);
console.log("-getIndices: end");
$("#result-header-right").empty();
$("#result-header-right").append("<span style='color:" + color + ";'>" + annotation + "</span> occurs " + indices.length + " times.");
var indexedText = "";
@ -584,6 +614,8 @@ rewriteText = function(annotation, color) {
if ((typeof (indices) == 'undefined') || (indices.length == 0)) {
indexedText = resultText;
indexedText = indexedText.replace(/\n/g, "<br>");
indexedText = indexedText.replace(/</g, "&lt;");
indexedText = indexedText.replace(/>/g, "&gt;");
$("#result-text-div").append("<p>" + indexedText + "</p>");
return;
}
@ -595,6 +627,7 @@ rewriteText = function(annotation, color) {
var start = index[0];
var end = index[1];
indexedText += resultText.substring(t, start);
/*
offset += countSubstringOccurrencies(resultText.substring(t, start),
"\n");
offset += countSubstringOccurrencies(resultText.substring(t, start),
@ -602,7 +635,7 @@ rewriteText = function(annotation, color) {
offset += countSubstringOccurrencies(resultText.substring(t, start),
"\t");
start += offset;
end += offset;
end += offset;*/
var colored = "<span style='color:" + color + "; background:"
+ complement + "; font-weight:bold;'>"
+ resultText.substring(start, end) + "</span>";
@ -625,7 +658,8 @@ checkAnnotation = function(annotation) {
for (var j = 0; j < entities.length; j++) {
a = entities[j][annotation];
if (typeof a != 'undefined') {
return true;
if(a.length > 0)
return true;
}
}
}
@ -665,11 +699,14 @@ getIndices = function(annotation) {
* Merge the indices
*/
mergeIndices = function(indices) {
var newIndices = []
if (indices.length <= 1)
console.log("--mergeIndices start");
var newIndices = [];
//console.log("--mergeIndices: indices.length=" + indices.length);
if (indices.length == 1)
newIndices = indices[0];
else
newIndices = mergeAll(indices);
console.log("--mergeIndices end");
return newIndices;
}
@ -707,10 +744,18 @@ resizeTable = function() {
rowId = "row-" + i;
$("#annotations-table").append("<tr id=\"" + rowId + "\"></tr>");
}
var annotationElement = "<input type=\"checkbox\" id=\"check-" + i
+ "\" value=\"" + annotations[i]
+ "\" checked=\"checked\"></input> <label for=\"check-" + i
+ "\">" + annotations[i] + "</label>";
if(annotations[i].toLowerCase() == "keyword") {
annotationElement = "<input type=\"checkbox\" id=\"check-" + i
+ "\" value=\"" + annotations[i]
+ "\"></input> <label for=\"check-" + i
+ "\">" + annotations[i] + "</label>";
}
$("#" + rowId).append("<td>" + annotationElement + "</td>");
}
}

View File

@ -87,6 +87,7 @@ compareSegmentList = function(list1, list2) {
*/
mergeAll = function(indices) {
console.log("---mergeAll indices.length=" + indices.length);
var m = [];
for(var i=0; i<indices[0].length; i++) {
for(var j=1; j<indices.length; j++) {
@ -105,6 +106,7 @@ mergeAll = function(indices) {
m = cleanSegmentList(m);
//console.log(m);
end = compareSegmentList(old, m);
counter++;
if(counter == 100)
end = true;
}