From c7f9c61294245d829ff8d0f2cd6a728ae81b9cfc Mon Sep 17 00:00:00 2001 From: Erik Perrone Date: Tue, 13 Mar 2018 14:31:48 +0000 Subject: [PATCH] Text management git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@165003 82a268e6-3cf1-43bd-a215-b396298e98cf --- src/main/java/org/gcube/nlphub/NLPUploader.java | 8 ++++---- src/main/java/org/gcube/nlphub/nlp/NlpUtils.java | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/gcube/nlphub/NLPUploader.java b/src/main/java/org/gcube/nlphub/NLPUploader.java index 6e32c78..9f89a11 100644 --- a/src/main/java/org/gcube/nlphub/NLPUploader.java +++ b/src/main/java/org/gcube/nlphub/NLPUploader.java @@ -81,8 +81,8 @@ public class NLPUploader extends HttpServlet { private void handleFreeText(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String freeText = request.getParameter("freetext"); - freeText = freeText.replaceAll("[\\s]+", " ").trim(); - + freeText = NlpUtils.replaceDirtyCharacters(freeText); + byte[] content = freeText.getBytes("UTF-8"); String fileName = generateFileName(); PrintWriter writer = response.getWriter(); @@ -132,8 +132,8 @@ public class NLPUploader extends HttpServlet { bufferedContent = buffer; String stringContent = new String(bufferedContent); - stringContent = stringContent.replaceAll("[\\s]+", " ").trim(); - + stringContent = NlpUtils.replaceDirtyCharacters(stringContent); + ws.deleteFile(fileName, token); if (!ws.uploadFile(stringContent.getBytes(), fileName, Constants.DEFAULT_DESCRIPTION, token)) { diff --git a/src/main/java/org/gcube/nlphub/nlp/NlpUtils.java b/src/main/java/org/gcube/nlphub/nlp/NlpUtils.java index 0c1ea3b..5e8ee6e 100644 --- a/src/main/java/org/gcube/nlphub/nlp/NlpUtils.java +++ b/src/main/java/org/gcube/nlphub/nlp/NlpUtils.java @@ -44,7 +44,21 @@ public class NlpUtils { return content; } - + public static String replaceDirtyCharacters(String source) { + + char c = 0; + for (int i = 0; i < source.length(); i++) { + c = source.charAt(i); + if (!((c >= 33 && c <= 90) || (c >= 97 && c <= 122) || (c >= 128 && c <= 167) || (c >= 180 && c <= 183) || (c >= 210 && c <= 212) || (c >= 214 && c <= 216) || (c >= 224 && c<=255))) { + source = source.replace(source.substring(i, i + 1), " "); + } + } + + source = source.replaceAll("[\\s]+", " ").trim(); + source = source.replaceAll("<", " ").trim(); + source = source.replaceAll(">", " ").trim(); + return source; + } /* public static void main(String[] args) {