This commit is contained in:
Erik Perrone 2018-02-26 14:39:10 +00:00
parent 75a08add56
commit 73ca1ce2eb
5 changed files with 150 additions and 186 deletions

View File

@ -62,8 +62,10 @@ public class NLPMapper extends HttpServlet {
}
private void doWork(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
token = Constants.getToken(request, devMode);
response.setContentType("application/json");
response.setCharacterEncoding("utf-8");
token = Constants.getToken(request, devMode);
String documentLink = request.getParameter("plink"); // link to text file (workspace)
String toBeMap = request.getParameter("tobemap");
System.out.println("tobemap: " + toBeMap);
@ -74,12 +76,21 @@ public class NLPMapper extends HttpServlet {
tokens = new String[1];
tokens[0] = toBeMap;
}
System.out.println("token nr. : " + tokens.length);
String annotations = request.getParameter("annotations");
String language = request.getParameter("lang");
PrintWriter writer = response.getWriter();
try {
String text = getDocument(documentLink);
text = text.replaceAll("\n", "\\\\n");;
text = text.replaceAll("\r", "\\\\r");
text = text.replaceAll("\t", "\\\\t");
text = text.replaceAll("\"", "\\\\\"");
// byte[] b = text.getBytes();
// System.out.println(Constants.hexDump(b));
String out = "{";
out += "\"text\":\"" + text + "\",";
out += "\"annotations\":\"" + annotations + "\",";
@ -154,4 +165,6 @@ public class NLPMapper extends HttpServlet {
String link = ws.getPublicLink(fileName, token);
return link;
}
}

View File

@ -8,6 +8,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
@ -78,7 +79,6 @@ public class NLPUploader extends HttpServlet {
private void handleFreeText(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
String freeText = request.getParameter("freetext");
System.out.println(freeText);
byte[] content = freeText.getBytes("UTF-8");
String fileName = generateFileName();
PrintWriter writer = response.getWriter();
@ -111,7 +111,7 @@ public class NLPUploader extends HttpServlet {
byte[] bufferedContent;
try {
InputStream fileContent = filePart.getInputStream();
int offset = 0, len = 100, byteRead = 0;
int offset = 0, len = 256, byteRead = 0;
byte[] readBuffer = new byte[len];
while (byteRead > -1) {
byteRead = fileContent.read(readBuffer, 0, len);
@ -157,141 +157,19 @@ public class NLPUploader extends HttpServlet {
return null;
}
// private String getPublicLink(String fileName, String token) throws NlpHubException {
// try {
// String link = "";
// String user = authorizationService().get(token).getClientInfo().getId();
// String wsRoot = "/Home/" + user + "/Workspace/";
// String webapp = "https://workspace-repository.d4science.org/home-library-webapp";
// String uri = webapp + "/rest/GetPublicLink?absPath=" + URLEncoder.encode(wsRoot + fileName, "UTF-8")
// + "&shortUrl=false";
// URL url = new URL(uri);
//
// // System.out.println(uri);
//
// HttpURLConnection connection = (HttpURLConnection) url.openConnection();
// connection.setRequestProperty(Constants.TOKEN_PARAMETER, token);
// connection.setDoInput(true);
// connection.setDoOutput(true);
// connection.setUseCaches(false);
// connection.setRequestMethod("GET");
//
// BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
//
// StringBuffer response = new StringBuffer();
// String inputLine;
// while ((inputLine = r.readLine()) != null) {
// response.append(inputLine);
// }
//
// String xmlOut = response.toString();
// // System.out.println("xmlOut: " + xmlOut);
//
// String begin = "<string>";
// String end = "</string>";
// int b = xmlOut.indexOf(begin);
// int e = xmlOut.indexOf(end);
//
// if (xmlOut.contains("Exception") || (e < 0) || (b < 0)) {
// String message = "Invalid link: " + URLEncoder.encode(xmlOut, "UTF-8");
// logger.error(message);
// throw new NlpHubException(message, null);
// }
//
// link = xmlOut.substring(b + begin.length(), e);
// return link;
//
// } catch (Exception e) {
// logger.error(e.getLocalizedMessage());
// throw new NlpHubException(e.getLocalizedMessage(), e);
// }
// }
//
// private void deleteFile(String fileName, String token) throws NlpHubException {
// try {
// String user = authorizationService().get(token).getClientInfo().getId();
// String wsRoot = "/Home/" + user + "/Workspace/";
// String webapp = "https://workspace-repository.d4science.org/home-library-webapp";
// String uri = webapp + "/rest/Delete?absPath=" + URLEncoder.encode(wsRoot + fileName, "UTF-8");
// URL url = new URL(uri);
//
// // System.out.println(uri);
//
// HttpURLConnection connection = (HttpURLConnection) url.openConnection();
// connection.setRequestProperty(Constants.TOKEN_PARAMETER, token);
// connection.setDoInput(true);
// connection.setDoOutput(true);
// connection.setUseCaches(false);
// connection.setRequestMethod("GET");
//
// BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
//
// StringBuffer response = new StringBuffer();
// String inputLine;
// while ((inputLine = r.readLine()) != null) {
// response.append(inputLine);
// }
//
// String xmlOut = response.toString();
// // System.out.println(xmlOut);
// } catch (Exception e) {
// logger.error(e.getLocalizedMessage());
// throw new NlpHubException(e.getLocalizedMessage(), e);
// }
// }
//
// private boolean uploadFile(byte[] in, String name, String description, String token) throws NlpHubException {
// OutputStream output = null;
// try {
// String user = authorizationService().get(token).getClientInfo().getId();
// String wsRoot = "/Home/" + user + "/Workspace/";
// String webapp = "https://workspace-repository.d4science.org/home-library-webapp";
// String uri = webapp + "/rest/Upload?name=" + URLEncoder.encode(name, "UTF-8") + "&description="
// + URLEncoder.encode(description, "UTF-8") + "&parentPath=" + URLEncoder.encode(wsRoot, "UTF-8");
// URL url = new URL(uri);
//
// // System.out.println(uri);
//
// HttpURLConnection connection = (HttpURLConnection) url.openConnection();
// connection.setRequestProperty(Constants.TOKEN_PARAMETER, token);
// connection.setDoInput(true);
// connection.setDoOutput(true);
// connection.setUseCaches(false);
// connection.setRequestProperty(Constants.CONTENT_TYPE, Constants.MIME_TEXT);
// connection.setRequestMethod("POST");
// output = connection.getOutputStream();
// output.write(in);
//
// BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
//
// StringBuffer response = new StringBuffer();
// String inputLine;
// while ((inputLine = r.readLine()) != null) {
// response.append(inputLine);
// }
//
// String xmlOut = response.toString();
// // System.out.println(xmlOut);
// if (xmlOut.contains("Exception"))
// return false;
// return true;
//
// } catch (Exception e) {
// logger.error(e.getLocalizedMessage());
// throw new NlpHubException(e.getLocalizedMessage(), e);
// } finally {
// // output stream must be closed anyway...
// if (output != null)
// try {
// output.close();
// } catch (IOException e) {
// logger.error(e.getLocalizedMessage());
// }
// }
// }
private String generateFileName() {
long now = System.currentTimeMillis();
return "auto-nlp-" + now;
}
private byte[] escapeForJson(byte[] content) throws UnsupportedEncodingException {
String text = new String(content, "UTF-8");
text = text.replaceAll("\n", "\\\n");
text = text.replaceAll("\r", "\\\r");
text = text.replaceAll("\t", "\\\t");
text = text.replaceAll("\"", "\\\"");
//text = text.replaceAll("\\", "\\\\");
System.out.println("*** text\n" + text);
return text.getBytes("UTF-8");
}
}

View File

@ -16,5 +16,23 @@ public class Constants {
}
return token;
}
public static String hexDump(byte[] bytes) {
char[] hexArray = "0123456789ABCDEF".toCharArray();
char[] hexChars = new char[bytes.length * 3];
for (int j = 0; j < bytes.length; j++) {
hexChars[3*j] = hexArray[bytes[j] / 16];
hexChars[3*j+1] = hexArray[bytes[j] % 16];
hexChars[3*j+2] = ' ';
}
return new String(hexChars);
}
/*
public static void main(String[] args) {
String text = "I am here in\nPisa.";
System.out.println(text);
System.out.println(hexDump(text.getBytes()));
text = text.replaceAll("\n", "\\\\n");
System.out.println(hexDump(text.getBytes()));
}*/
}

View File

@ -266,6 +266,10 @@ select {
}
}
#result-text-div {
overflow-y: auto;
}
/*
input[type="checkbox"]:not(:checked){
position: unset !important;

View File

@ -30,6 +30,7 @@
var savedAnnotations = "";
var jsonContent = null;
var named = null;
var resultText = "";
// ------------------------------------------------------------------------------------------------------
// Starting point: get the csv file with algorithm parameters and set the page with valid algs info.
@ -43,12 +44,6 @@
contentType : "text/plain; charset=utf-8",
success : function(data, stato) {
$("#ner-result-container").hide();
// --------------------
// the following 2 lines are only for develop
//$("#ner-result-container").show();
//$("#ner-ui").hide();
// --------------------
var lineTokens = manageCsvData(data);
getAlgorithms(lineTokens);
buildAnnotationsAndLanguages();
@ -58,27 +53,29 @@
$("#back-ner-ui-button").click(function() {
$("#ner-result-container").hide();
$("#ner-ui").show();
$("#input-textarea").val("");
publicLink = null;
jsonContent = null;
});
// the following line is only for develop
/*
var devData = "{\"link\":\"http://data.d4science.org/Wm5XY1p0TlFIM3NXZnZ1SkhLRlRMK3l6N05jVFhiSXFHbWJQNStIS0N6Yz0\",\"output\":"
+ "{\"text\":\"yes, we can do it tomorrow in the evening.\",\"annotations\":\"Date,Sentence\",\"language\":\"English\",\"result\":"
+ "[{\"algorithm\":\"ENGLISH_NAMED_ENTITY_RECOGNIZER\",\"entities\":[{\"Date\":[{\"indices\":[18,26],\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"GazDateWords\"}]},{\"Sentence\":[{\"indices\":[0,42]}]}]}]}}";
var devData = "{\"link\":\"http://data.d4science.org/Wm5XY1p0TlFIM3NXZnZ1SkhLRlRMK3l6N05jVFhiSXFHbWJQNStIS0N6Yz0\",\"output\":"
+ "{\"text\":\"yes, we can do it tomorrow in the evening.\",\"annotations\":\"Date,Sentence\",\"language\":\"English\",\"result\":"
+ "[{\"algorithm\":\"ENGLISH_NAMED_ENTITY_RECOGNIZER\",\"entities\":[{\"Date\":[{\"indices\":[18,26],\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"GazDateWords\"}]},{\"Sentence\":[{\"indices\":[0,42]}]}]}]}}";
devData = "{\"link\":\"http://data.d4science.org/dXZ5L3RrRjYyTWNXZnZ1SkhLRlRMeXZzd1prOVFqWDFHbWJQNStIS0N6Yz0\",\"output\":"
+ "{\"text\":\"In the 1970s, Iraq under Baath Party dictator Brigadier General Ahmad Hassan al-Bakr had grown close to the Soviet Union, "
+ "with which it signed a treaty of friendship in 1972 and from which it began importing arms. In 1973, al-Bakr supported the Syrians in their war with Israel.\","
+ "\"annotations\":\"Date,Person\",\"language\":\"English\",\"result\": [{\"algorithm\":\"ENGLISH_NAMED_ENTITY_RECOGNIZER\",\"entities\":[{\"Date\":[{\"indices\":[3,12],"
+ "\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"YearSpan1\"},{\"indices\":[169,173],\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"YearContext1\"},"
+ "{\"indices\":[217,221],\"ruleFinal\":\"YearOnlyFinal\",\"kind\":\"date\",\"rule\":\"TempYear2\"}]},{\"Person\":[{\"indices\":[46,76],\"firstName\":\"Ahmad\",\"ruleFinal\":\""
+ "PersonFinal\",\"gender\":\"unknown\",\"surname\":\"Hassan\",\"kind\":\"personName\",\"rule\":\"PersonTitleGenderUnknown\",\"title\":\"Brigadier\"},{\"indices\":[77,84],\"firstName\":"
+ "\"al\",\"ruleFinal\":\"PersonFinal\",\"gender\":\"male\",\"surname\":\"Bakr\",\"kind\":\"fullName\",\"rule\":\"PersonFull\"},{\"indices\":[223,230],\"firstName\":\"al\",\"ruleFinal\":\"PersonFinal\",\"gender\":\"male\",\"surname\":\"Bakr\",\"kind\":\"fullName\",\"rule\":\"PersonFull\"}]}]}]}}";
showResult(devData);
devData = "{\"link\":\"http://data.d4science.org/dXZ5L3RrRjYyTWNXZnZ1SkhLRlRMeXZzd1prOVFqWDFHbWJQNStIS0N6Yz0\",\"output\":"
+ "{\"text\":\"In the 1970s, Iraq under Baath Party dictator Brigadier General Ahmad Hassan al-Bakr had grown close to the Soviet Union, "
+ "with which it signed a treaty of friendship in 1972 and from which it began importing arms. In 1973, al-Bakr supported the Syrians in their war with Israel.\","
+ "\"annotations\":\"Date,Person\",\"language\":\"English\",\"result\": [{\"algorithm\":\"ENGLISH_NAMED_ENTITY_RECOGNIZER\",\"entities\":[{\"Date\":[{\"indices\":[3,12],"
+ "\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"YearSpan1\"},{\"indices\":[169,173],\"ruleFinal\":\"DateOnlyFinal\",\"kind\":\"date\",\"rule\":\"YearContext1\"},"
+ "{\"indices\":[217,221],\"ruleFinal\":\"YearOnlyFinal\",\"kind\":\"date\",\"rule\":\"TempYear2\"}]},{\"Person\":[{\"indices\":[46,76],\"firstName\":\"Ahmad\",\"ruleFinal\":\""
+ "PersonFinal\",\"gender\":\"unknown\",\"surname\":\"Hassan\",\"kind\":\"personName\",\"rule\":\"PersonTitleGenderUnknown\",\"title\":\"Brigadier\"},{\"indices\":[77,84],\"firstName\":"
+ "\"al\",\"ruleFinal\":\"PersonFinal\",\"gender\":\"male\",\"surname\":\"Bakr\",\"kind\":\"fullName\",\"rule\":\"PersonFull\"},{\"indices\":[223,230],\"firstName\":\"al\",\"ruleFinal\":\"PersonFinal\",\"gender\":\"male\",\"surname\":\"Bakr\",\"kind\":\"fullName\",\"rule\":\"PersonFull\"}]}]}]}}";
showResult(devData);
*/
// --------------------------------------
$("#execute-button").css("background-color", pageGray);
$("#upload-button")
@ -126,6 +123,7 @@
onSelect : function(files) {
showProgress();
textAreaEnable(false);
$("#input-textarea").val("");
},
onSuccess : function(files, data, xhr) {
hideProgress();
@ -155,7 +153,7 @@
});
},
error : function(richiesta, stato, errori) {
alert("Unexpected Error");
alert("Unexpected Error. Status: " + richiesta.status);
}
});
@ -298,7 +296,8 @@
},
error : function(richiesta, stato,
errori) {
alert("Unexpected Error");
alert("Unexpected Error. Status: "
+ richiesta.status);
hideProgress();
textAreaEnable(true);
$("#file-info").empty();
@ -370,7 +369,7 @@
error : function(richiesta, stato, errori) {
hideProgress();
textAreaEnable(true);
alert("Unexpected Error");
alert("Unexpected Error. Status: " + richiesta.status);
resetExecuteButton();
}
});
@ -404,7 +403,7 @@
$("#reset-upload").hide();
savedAnnotations = "";
publicLink = null;
alert("Unexpected Error");
alert("Unexpected Error. Status: " + richiesta.status);
}
});
}
@ -413,7 +412,11 @@
$("#ner-ui").hide();
$("#ner-result-container").show();
jsonContent = JSON.parse(purgeJson(data));
jsonContent = data;
//jsonContent = JSON.parse(purgeJson(data));
//resultText = jsonToText(jsonContent.output.text);
resultText = jsonContent.output.text;
$("#result-header").empty();
$("#result-params-div").empty();
@ -422,26 +425,26 @@
$("#result-header")
.append(
"<p>You can download the result file from <a href='" + jsonContent.link +"'>here</a></p>");
$("#result-text-div").append("<p>" + jsonContent.output.text + "</p>");
var localText = resultText;
localText = resultText.replace(/\n/g, "<br>");
$("#result-text-div").empty();
$("#result-text-div").append("<p>" + localText + "</p>");
showAnnotationList(jsonContent.output.annotations);
}
showAnnotationList = function(list) {
/*var colors = [ "ef2c23", "9c62de", "61de6c", "de61d2", "dec062",
"23efde", "232bde" ];*/
var colors = [];
var colors = [];
var annotations = list.split(",");
for(var i=0; i<annotations.length; i++) {
for (var i = 0; i < annotations.length; i++) {
colors[colors.length] = randomRGB();
}
$("#result-params-div")
.append("<form id='colored-annotations'></form>");
for (var i = 0; i < annotations.length; i++) {
var cb = "<input type='radio' name='foo' named='" + annotations[i] + "' value='" + colors[i] + "' id='color-" + i + "' class='filled-in'>";
cb += "<label for='color-" + i + "'><span style='color:#" + colors[i] + "'>"
cb += "<label for='color-" + i + "'><span style='font-weight:bold; color:#" + colors[i] + "'>"
+ annotations[i] + "</span></label><br>";
$("#colored-annotations").append(cb);
}
@ -452,29 +455,57 @@
});
}
countSubstringOccurrencies = function(string, substring) {
var occurrencies = 0;
var index = 0;
var s = string;
while(index >= 0) {
index = s.indexOf(substring);
if(index >= 0) {
occurrencies++;
s = s.substring(index + 1);
}
}
return occurrencies;
}
rewriteText = function(annotation, color) {
$("#result-text-div").empty();
console.log(annotation + " " + color);
var complement = "#" + (0xFFFFFF - eval("0x" + color.substring(1))).toString(16);
complement = "#FAFAFA";
var complement = "#"
+ (0xFFFFFF - eval("0x" + color.substring(1))).toString(16);
complement = "#F0F0F0";
var indices = getIndices(annotation);
var indexedText = "";
if (indices.length == 0)
if (indices.length == 0) {
indexedText = resultText;
indexedText = indexedText.replace(/\n/g, "<br>");
$("#result-text-div").append("<p>" + indexedText + "</p>");
return;
}
var t = 0;
var offset = 0;
for (var i = 0; i < indices.length; i++) {
var index = indices[i];
var start = index[0];
var end = index[1];
indexedText += jsonContent.output.text.substring(t, start);
indexedText += resultText.substring(t, start);
offset += countSubstringOccurrencies(resultText.substring(t, start), "\n");
offset += countSubstringOccurrencies(resultText.substring(t, start), "\r");
offset += countSubstringOccurrencies(resultText.substring(t, start), "\t");
start += offset;
end += offset;
var colored = "<span style='color:" + color + "; background:" + complement + "; font-weight:bold;'>"
+ jsonContent.output.text.substring(start, end) + "</span>";
+ resultText.substring(start, end) + "</span>";
indexedText += colored;
t = end;
}
if (t < jsonContent.output.text.length)
indexedText += jsonContent.output.text.substring(t);
$("#result-text-div").empty();
if (t < resultText.length)
indexedText += resultText.substring(t);
indexedText = indexedText.replace(/\n/g, "<br>");
$("#result-text-div").append("<p>" + indexedText + "</p>");
}
@ -587,16 +618,36 @@
}
purgeJson = function(json) {
return json.replace("\n", "");
console.log("purgeJson:before: " + json.length);
var purged = json.replace(/\n/g, "\\\\n").replace(/\r/g, "\\\\r")
.replace(/\t/g, "\\\\t");
console.log("purgeJson:after : " + purged.length);
return purged;
}
jsonToText = function(json) {
console.log("jsonToText:before: " + json.length);
var purged = json.replace(/\\n/g, "");
console.log("jsonToText:after : " + purged.length);
return purged;
//return json.replace(/\\n/g, "\n");
}
randomRGB = function() {
var letters = '0123456789ABCDEF';
var color = '';
for (var i = 0; i < 6; i++) {
color += letters[Math.floor(Math.random() * 16)];
}
return color;
var color = '';
var couple = '';
for(var i=0; i < 3; i++) {
do {
couple = '';
for(j=0; j<2; j++) {
couple += letters[Math.floor(Math.random() * 16)];
}
} while((255 - parseInt("0x" + couple)) < 48)
color += couple;
}
return color;
}
</script>