From 73ca1ce2eb81f242be367946e696614e91336e94 Mon Sep 17 00:00:00 2001 From: Erik Perrone Date: Mon, 26 Feb 2018 14:39:10 +0000 Subject: [PATCH] git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@164578 82a268e6-3cf1-43bd-a215-b396298e98cf --- src/main/java/org/gcube/nlphub/NLPMapper.java | 17 +- .../java/org/gcube/nlphub/NLPUploader.java | 148 ++---------------- .../org/gcube/nlphub/legacy/Constants.java | 20 ++- src/main/webapp/css/custom.css | 4 + src/main/webapp/index.jsp | 147 +++++++++++------ 5 files changed, 150 insertions(+), 186 deletions(-) diff --git a/src/main/java/org/gcube/nlphub/NLPMapper.java b/src/main/java/org/gcube/nlphub/NLPMapper.java index 97ae8d3..c63479e 100644 --- a/src/main/java/org/gcube/nlphub/NLPMapper.java +++ b/src/main/java/org/gcube/nlphub/NLPMapper.java @@ -62,8 +62,10 @@ public class NLPMapper extends HttpServlet { } private void doWork(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - token = Constants.getToken(request, devMode); + response.setContentType("application/json"); + response.setCharacterEncoding("utf-8"); + token = Constants.getToken(request, devMode); String documentLink = request.getParameter("plink"); // link to text file (workspace) String toBeMap = request.getParameter("tobemap"); System.out.println("tobemap: " + toBeMap); @@ -74,12 +76,21 @@ public class NLPMapper extends HttpServlet { tokens = new String[1]; tokens[0] = toBeMap; } - System.out.println("token nr. : " + tokens.length); String annotations = request.getParameter("annotations"); String language = request.getParameter("lang"); PrintWriter writer = response.getWriter(); try { String text = getDocument(documentLink); + + + text = text.replaceAll("\n", "\\\\n");; + text = text.replaceAll("\r", "\\\\r"); + text = text.replaceAll("\t", "\\\\t"); + text = text.replaceAll("\"", "\\\\\""); + +// byte[] b = text.getBytes(); +// System.out.println(Constants.hexDump(b)); + String out = "{"; out += "\"text\":\"" + text + "\","; out += "\"annotations\":\"" + annotations + "\","; @@ -154,4 +165,6 @@ public class NLPMapper extends HttpServlet { String link = ws.getPublicLink(fileName, token); return link; } + + } diff --git a/src/main/java/org/gcube/nlphub/NLPUploader.java b/src/main/java/org/gcube/nlphub/NLPUploader.java index dd97f40..4671a04 100644 --- a/src/main/java/org/gcube/nlphub/NLPUploader.java +++ b/src/main/java/org/gcube/nlphub/NLPUploader.java @@ -8,6 +8,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLEncoder; @@ -78,7 +79,6 @@ public class NLPUploader extends HttpServlet { private void handleFreeText(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String freeText = request.getParameter("freetext"); - System.out.println(freeText); byte[] content = freeText.getBytes("UTF-8"); String fileName = generateFileName(); PrintWriter writer = response.getWriter(); @@ -111,7 +111,7 @@ public class NLPUploader extends HttpServlet { byte[] bufferedContent; try { InputStream fileContent = filePart.getInputStream(); - int offset = 0, len = 100, byteRead = 0; + int offset = 0, len = 256, byteRead = 0; byte[] readBuffer = new byte[len]; while (byteRead > -1) { byteRead = fileContent.read(readBuffer, 0, len); @@ -157,141 +157,19 @@ public class NLPUploader extends HttpServlet { return null; } -// private String getPublicLink(String fileName, String token) throws NlpHubException { -// try { -// String link = ""; -// String user = authorizationService().get(token).getClientInfo().getId(); -// String wsRoot = "/Home/" + user + "/Workspace/"; -// String webapp = "https://workspace-repository.d4science.org/home-library-webapp"; -// String uri = webapp + "/rest/GetPublicLink?absPath=" + URLEncoder.encode(wsRoot + fileName, "UTF-8") -// + "&shortUrl=false"; -// URL url = new URL(uri); -// -// // System.out.println(uri); -// -// HttpURLConnection connection = (HttpURLConnection) url.openConnection(); -// connection.setRequestProperty(Constants.TOKEN_PARAMETER, token); -// connection.setDoInput(true); -// connection.setDoOutput(true); -// connection.setUseCaches(false); -// connection.setRequestMethod("GET"); -// -// BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream())); -// -// StringBuffer response = new StringBuffer(); -// String inputLine; -// while ((inputLine = r.readLine()) != null) { -// response.append(inputLine); -// } -// -// String xmlOut = response.toString(); -// // System.out.println("xmlOut: " + xmlOut); -// -// String begin = ""; -// String end = ""; -// int b = xmlOut.indexOf(begin); -// int e = xmlOut.indexOf(end); -// -// if (xmlOut.contains("Exception") || (e < 0) || (b < 0)) { -// String message = "Invalid link: " + URLEncoder.encode(xmlOut, "UTF-8"); -// logger.error(message); -// throw new NlpHubException(message, null); -// } -// -// link = xmlOut.substring(b + begin.length(), e); -// return link; -// -// } catch (Exception e) { -// logger.error(e.getLocalizedMessage()); -// throw new NlpHubException(e.getLocalizedMessage(), e); -// } -// } -// -// private void deleteFile(String fileName, String token) throws NlpHubException { -// try { -// String user = authorizationService().get(token).getClientInfo().getId(); -// String wsRoot = "/Home/" + user + "/Workspace/"; -// String webapp = "https://workspace-repository.d4science.org/home-library-webapp"; -// String uri = webapp + "/rest/Delete?absPath=" + URLEncoder.encode(wsRoot + fileName, "UTF-8"); -// URL url = new URL(uri); -// -// // System.out.println(uri); -// -// HttpURLConnection connection = (HttpURLConnection) url.openConnection(); -// connection.setRequestProperty(Constants.TOKEN_PARAMETER, token); -// connection.setDoInput(true); -// connection.setDoOutput(true); -// connection.setUseCaches(false); -// connection.setRequestMethod("GET"); -// -// BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream())); -// -// StringBuffer response = new StringBuffer(); -// String inputLine; -// while ((inputLine = r.readLine()) != null) { -// response.append(inputLine); -// } -// -// String xmlOut = response.toString(); -// // System.out.println(xmlOut); -// } catch (Exception e) { -// logger.error(e.getLocalizedMessage()); -// throw new NlpHubException(e.getLocalizedMessage(), e); -// } -// } -// -// private boolean uploadFile(byte[] in, String name, String description, String token) throws NlpHubException { -// OutputStream output = null; -// try { -// String user = authorizationService().get(token).getClientInfo().getId(); -// String wsRoot = "/Home/" + user + "/Workspace/"; -// String webapp = "https://workspace-repository.d4science.org/home-library-webapp"; -// String uri = webapp + "/rest/Upload?name=" + URLEncoder.encode(name, "UTF-8") + "&description=" -// + URLEncoder.encode(description, "UTF-8") + "&parentPath=" + URLEncoder.encode(wsRoot, "UTF-8"); -// URL url = new URL(uri); -// -// // System.out.println(uri); -// -// HttpURLConnection connection = (HttpURLConnection) url.openConnection(); -// connection.setRequestProperty(Constants.TOKEN_PARAMETER, token); -// connection.setDoInput(true); -// connection.setDoOutput(true); -// connection.setUseCaches(false); -// connection.setRequestProperty(Constants.CONTENT_TYPE, Constants.MIME_TEXT); -// connection.setRequestMethod("POST"); -// output = connection.getOutputStream(); -// output.write(in); -// -// BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream())); -// -// StringBuffer response = new StringBuffer(); -// String inputLine; -// while ((inputLine = r.readLine()) != null) { -// response.append(inputLine); -// } -// -// String xmlOut = response.toString(); -// // System.out.println(xmlOut); -// if (xmlOut.contains("Exception")) -// return false; -// return true; -// -// } catch (Exception e) { -// logger.error(e.getLocalizedMessage()); -// throw new NlpHubException(e.getLocalizedMessage(), e); -// } finally { -// // output stream must be closed anyway... -// if (output != null) -// try { -// output.close(); -// } catch (IOException e) { -// logger.error(e.getLocalizedMessage()); -// } -// } -// } - private String generateFileName() { long now = System.currentTimeMillis(); return "auto-nlp-" + now; } + + private byte[] escapeForJson(byte[] content) throws UnsupportedEncodingException { + String text = new String(content, "UTF-8"); + text = text.replaceAll("\n", "\\\n"); + text = text.replaceAll("\r", "\\\r"); + text = text.replaceAll("\t", "\\\t"); + text = text.replaceAll("\"", "\\\""); + //text = text.replaceAll("\\", "\\\\"); + System.out.println("*** text\n" + text); + return text.getBytes("UTF-8"); + } } diff --git a/src/main/java/org/gcube/nlphub/legacy/Constants.java b/src/main/java/org/gcube/nlphub/legacy/Constants.java index 0cc83fc..3a23763 100644 --- a/src/main/java/org/gcube/nlphub/legacy/Constants.java +++ b/src/main/java/org/gcube/nlphub/legacy/Constants.java @@ -16,5 +16,23 @@ public class Constants { } return token; } - + + public static String hexDump(byte[] bytes) { + char[] hexArray = "0123456789ABCDEF".toCharArray(); + char[] hexChars = new char[bytes.length * 3]; + for (int j = 0; j < bytes.length; j++) { + hexChars[3*j] = hexArray[bytes[j] / 16]; + hexChars[3*j+1] = hexArray[bytes[j] % 16]; + hexChars[3*j+2] = ' '; + } + return new String(hexChars); + } +/* + public static void main(String[] args) { + String text = "I am here in\nPisa."; + System.out.println(text); + System.out.println(hexDump(text.getBytes())); + text = text.replaceAll("\n", "\\\\n"); + System.out.println(hexDump(text.getBytes())); + }*/ } diff --git a/src/main/webapp/css/custom.css b/src/main/webapp/css/custom.css index 4479259..1fd0b3c 100644 --- a/src/main/webapp/css/custom.css +++ b/src/main/webapp/css/custom.css @@ -266,6 +266,10 @@ select { } } +#result-text-div { + overflow-y: auto; +} + /* input[type="checkbox"]:not(:checked){ position: unset !important; diff --git a/src/main/webapp/index.jsp b/src/main/webapp/index.jsp index de213a7..9b4bbc1 100644 --- a/src/main/webapp/index.jsp +++ b/src/main/webapp/index.jsp @@ -30,6 +30,7 @@ var savedAnnotations = ""; var jsonContent = null; var named = null; + var resultText = ""; // ------------------------------------------------------------------------------------------------------ // Starting point: get the csv file with algorithm parameters and set the page with valid algs info. @@ -43,12 +44,6 @@ contentType : "text/plain; charset=utf-8", success : function(data, stato) { $("#ner-result-container").hide(); - - // -------------------- - // the following 2 lines are only for develop - //$("#ner-result-container").show(); - //$("#ner-ui").hide(); - // -------------------- var lineTokens = manageCsvData(data); getAlgorithms(lineTokens); buildAnnotationsAndLanguages(); @@ -58,27 +53,29 @@ $("#back-ner-ui-button").click(function() { $("#ner-result-container").hide(); $("#ner-ui").show(); + $("#input-textarea").val(""); + publicLink = null; + jsonContent = null; }); // the following line is only for develop + /* + var devData = "{\"link\":\"http://data.d4science.org/Wm5XY1p0TlFIM3NXZnZ1SkhLRlRMK3l6N05jVFhiSXFHbWJQNStIS0N6Yz0\",\"output\":" + + "{\"text\":\"yes, we can do it tomorrow in the evening.\",\"annotations\":\"Date,Sentence\",\"language\":\"English\",\"result\":" + + "[{\"algorithm\":\"ENGLISH_NAMED_ENTITY_RECOGNIZER\",\"entities\":[{\"Date\":[{\"indices\":[18,26],\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"GazDateWords\"}]},{\"Sentence\":[{\"indices\":[0,42]}]}]}]}}"; - var devData = "{\"link\":\"http://data.d4science.org/Wm5XY1p0TlFIM3NXZnZ1SkhLRlRMK3l6N05jVFhiSXFHbWJQNStIS0N6Yz0\",\"output\":" - + "{\"text\":\"yes, we can do it tomorrow in the evening.\",\"annotations\":\"Date,Sentence\",\"language\":\"English\",\"result\":" - + "[{\"algorithm\":\"ENGLISH_NAMED_ENTITY_RECOGNIZER\",\"entities\":[{\"Date\":[{\"indices\":[18,26],\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"GazDateWords\"}]},{\"Sentence\":[{\"indices\":[0,42]}]}]}]}}"; - - devData = "{\"link\":\"http://data.d4science.org/dXZ5L3RrRjYyTWNXZnZ1SkhLRlRMeXZzd1prOVFqWDFHbWJQNStIS0N6Yz0\",\"output\":" - + "{\"text\":\"In the 1970s, Iraq under Baath Party dictator Brigadier General Ahmad Hassan al-Bakr had grown close to the Soviet Union, " - + "with which it signed a treaty of friendship in 1972 and from which it began importing arms. In 1973, al-Bakr supported the Syrians in their war with Israel.\"," - + "\"annotations\":\"Date,Person\",\"language\":\"English\",\"result\": [{\"algorithm\":\"ENGLISH_NAMED_ENTITY_RECOGNIZER\",\"entities\":[{\"Date\":[{\"indices\":[3,12]," - + "\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"YearSpan1\"},{\"indices\":[169,173],\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"YearContext1\"}," - + "{\"indices\":[217,221],\"ruleFinal\":\"YearOnlyFinal\",\"kind\":\"date\",\"rule\":\"TempYear2\"}]},{\"Person\":[{\"indices\":[46,76],\"firstName\":\"Ahmad\",\"ruleFinal\":\"" - + "PersonFinal\",\"gender\":\"unknown\",\"surname\":\"Hassan\",\"kind\":\"personName\",\"rule\":\"PersonTitleGenderUnknown\",\"title\":\"Brigadier\"},{\"indices\":[77,84],\"firstName\":" - + "\"al\",\"ruleFinal\":\"PersonFinal\",\"gender\":\"male\",\"surname\":\"Bakr\",\"kind\":\"fullName\",\"rule\":\"PersonFull\"},{\"indices\":[223,230],\"firstName\":\"al\",\"ruleFinal\":\"PersonFinal\",\"gender\":\"male\",\"surname\":\"Bakr\",\"kind\":\"fullName\",\"rule\":\"PersonFull\"}]}]}]}}"; - - showResult(devData); + devData = "{\"link\":\"http://data.d4science.org/dXZ5L3RrRjYyTWNXZnZ1SkhLRlRMeXZzd1prOVFqWDFHbWJQNStIS0N6Yz0\",\"output\":" + + "{\"text\":\"In the 1970s, Iraq under Baath Party dictator Brigadier General Ahmad Hassan al-Bakr had grown close to the Soviet Union, " + + "with which it signed a treaty of friendship in 1972 and from which it began importing arms. In 1973, al-Bakr supported the Syrians in their war with Israel.\"," + + "\"annotations\":\"Date,Person\",\"language\":\"English\",\"result\": [{\"algorithm\":\"ENGLISH_NAMED_ENTITY_RECOGNIZER\",\"entities\":[{\"Date\":[{\"indices\":[3,12]," + + "\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"YearSpan1\"},{\"indices\":[169,173],\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"YearContext1\"}," + + "{\"indices\":[217,221],\"ruleFinal\":\"YearOnlyFinal\",\"kind\":\"date\",\"rule\":\"TempYear2\"}]},{\"Person\":[{\"indices\":[46,76],\"firstName\":\"Ahmad\",\"ruleFinal\":\"" + + "PersonFinal\",\"gender\":\"unknown\",\"surname\":\"Hassan\",\"kind\":\"personName\",\"rule\":\"PersonTitleGenderUnknown\",\"title\":\"Brigadier\"},{\"indices\":[77,84],\"firstName\":" + + "\"al\",\"ruleFinal\":\"PersonFinal\",\"gender\":\"male\",\"surname\":\"Bakr\",\"kind\":\"fullName\",\"rule\":\"PersonFull\"},{\"indices\":[223,230],\"firstName\":\"al\",\"ruleFinal\":\"PersonFinal\",\"gender\":\"male\",\"surname\":\"Bakr\",\"kind\":\"fullName\",\"rule\":\"PersonFull\"}]}]}]}}"; + showResult(devData); + */ // -------------------------------------- - $("#execute-button").css("background-color", pageGray); $("#upload-button") @@ -126,6 +123,7 @@ onSelect : function(files) { showProgress(); textAreaEnable(false); + $("#input-textarea").val(""); }, onSuccess : function(files, data, xhr) { hideProgress(); @@ -155,7 +153,7 @@ }); }, error : function(richiesta, stato, errori) { - alert("Unexpected Error"); + alert("Unexpected Error. Status: " + richiesta.status); } }); @@ -298,7 +296,8 @@ }, error : function(richiesta, stato, errori) { - alert("Unexpected Error"); + alert("Unexpected Error. Status: " + + richiesta.status); hideProgress(); textAreaEnable(true); $("#file-info").empty(); @@ -370,7 +369,7 @@ error : function(richiesta, stato, errori) { hideProgress(); textAreaEnable(true); - alert("Unexpected Error"); + alert("Unexpected Error. Status: " + richiesta.status); resetExecuteButton(); } }); @@ -404,7 +403,7 @@ $("#reset-upload").hide(); savedAnnotations = ""; publicLink = null; - alert("Unexpected Error"); + alert("Unexpected Error. Status: " + richiesta.status); } }); } @@ -413,7 +412,11 @@ $("#ner-ui").hide(); $("#ner-result-container").show(); - jsonContent = JSON.parse(purgeJson(data)); + jsonContent = data; + //jsonContent = JSON.parse(purgeJson(data)); + + //resultText = jsonToText(jsonContent.output.text); + resultText = jsonContent.output.text; $("#result-header").empty(); $("#result-params-div").empty(); @@ -422,26 +425,26 @@ $("#result-header") .append( "

You can download the result file from here

"); - $("#result-text-div").append("

" + jsonContent.output.text + "

"); + var localText = resultText; + localText = resultText.replace(/\n/g, "
"); + $("#result-text-div").empty(); + $("#result-text-div").append("

" + localText + "

"); showAnnotationList(jsonContent.output.annotations); } showAnnotationList = function(list) { - /*var colors = [ "ef2c23", "9c62de", "61de6c", "de61d2", "dec062", - "23efde", "232bde" ];*/ - - var colors = []; + var colors = []; var annotations = list.split(","); - for(var i=0; i"); for (var i = 0; i < annotations.length; i++) { var cb = ""; - cb += "