Text management
git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@165003 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
7dc3514e2e
commit
c7f9c61294
|
@ -81,7 +81,7 @@ public class NLPUploader extends HttpServlet {
|
||||||
private void handleFreeText(HttpServletRequest request, HttpServletResponse response)
|
private void handleFreeText(HttpServletRequest request, HttpServletResponse response)
|
||||||
throws ServletException, IOException {
|
throws ServletException, IOException {
|
||||||
String freeText = request.getParameter("freetext");
|
String freeText = request.getParameter("freetext");
|
||||||
freeText = freeText.replaceAll("[\\s]+", " ").trim();
|
freeText = NlpUtils.replaceDirtyCharacters(freeText);
|
||||||
|
|
||||||
byte[] content = freeText.getBytes("UTF-8");
|
byte[] content = freeText.getBytes("UTF-8");
|
||||||
String fileName = generateFileName();
|
String fileName = generateFileName();
|
||||||
|
@ -132,7 +132,7 @@ public class NLPUploader extends HttpServlet {
|
||||||
bufferedContent = buffer;
|
bufferedContent = buffer;
|
||||||
|
|
||||||
String stringContent = new String(bufferedContent);
|
String stringContent = new String(bufferedContent);
|
||||||
stringContent = stringContent.replaceAll("[\\s]+", " ").trim();
|
stringContent = NlpUtils.replaceDirtyCharacters(stringContent);
|
||||||
|
|
||||||
ws.deleteFile(fileName, token);
|
ws.deleteFile(fileName, token);
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,21 @@ public class NlpUtils {
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String replaceDirtyCharacters(String source) {
|
||||||
|
|
||||||
|
char c = 0;
|
||||||
|
for (int i = 0; i < source.length(); i++) {
|
||||||
|
c = source.charAt(i);
|
||||||
|
if (!((c >= 33 && c <= 90) || (c >= 97 && c <= 122) || (c >= 128 && c <= 167) || (c >= 180 && c <= 183) || (c >= 210 && c <= 212) || (c >= 214 && c <= 216) || (c >= 224 && c<=255))) {
|
||||||
|
source = source.replace(source.substring(i, i + 1), " ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
source = source.replaceAll("[\\s]+", " ").trim();
|
||||||
|
source = source.replaceAll("<", " ").trim();
|
||||||
|
source = source.replaceAll(">", " ").trim();
|
||||||
|
return source;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
|
Loading…
Reference in New Issue