Text management

git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@165003 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Erik Perrone 2018-03-13 14:31:48 +00:00
parent 7dc3514e2e
commit c7f9c61294
2 changed files with 19 additions and 5 deletions

View File

@ -81,8 +81,8 @@ public class NLPUploader extends HttpServlet {
private void handleFreeText(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
String freeText = request.getParameter("freetext");
freeText = freeText.replaceAll("[\\s]+", " ").trim();
freeText = NlpUtils.replaceDirtyCharacters(freeText);
byte[] content = freeText.getBytes("UTF-8");
String fileName = generateFileName();
PrintWriter writer = response.getWriter();
@ -132,8 +132,8 @@ public class NLPUploader extends HttpServlet {
bufferedContent = buffer;
String stringContent = new String(bufferedContent);
stringContent = stringContent.replaceAll("[\\s]+", " ").trim();
stringContent = NlpUtils.replaceDirtyCharacters(stringContent);
ws.deleteFile(fileName, token);
if (!ws.uploadFile(stringContent.getBytes(), fileName, Constants.DEFAULT_DESCRIPTION, token)) {

View File

@ -44,7 +44,21 @@ public class NlpUtils {
return content;
}
public static String replaceDirtyCharacters(String source) {
char c = 0;
for (int i = 0; i < source.length(); i++) {
c = source.charAt(i);
if (!((c >= 33 && c <= 90) || (c >= 97 && c <= 122) || (c >= 128 && c <= 167) || (c >= 180 && c <= 183) || (c >= 210 && c <= 212) || (c >= 214 && c <= 216) || (c >= 224 && c<=255))) {
source = source.replace(source.substring(i, i + 1), " ");
}
}
source = source.replaceAll("[\\s]+", " ").trim();
source = source.replaceAll("<", " ").trim();
source = source.replaceAll(">", " ").trim();
return source;
}
/*
public static void main(String[] args) {