From 7f660e7377914fe41cf799445fd65007de64cc66 Mon Sep 17 00:00:00 2001 From: Erik Perrone Date: Thu, 8 Mar 2018 13:28:45 +0000 Subject: [PATCH] git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@164814 82a268e6-3cf1-43bd-a215-b396298e98cf --- src/main/java/org/gcube/nlphub/NLPHub.java | 4 - .../java/org/gcube/nlphub/NLPUploader.java | 9 +- .../nlphub/nlp/NLpLanguageRecognizer.java | 100 ++++++++++++++++++ .../org/gcube/nlphub/nlp/NlpNerRunner.java | 3 +- src/main/webapp/index.jsp | 61 ++++++++--- 5 files changed, 157 insertions(+), 20 deletions(-) create mode 100644 src/main/java/org/gcube/nlphub/nlp/NLpLanguageRecognizer.java diff --git a/src/main/java/org/gcube/nlphub/NLPHub.java b/src/main/java/org/gcube/nlphub/NLPHub.java index 1c02638..5fe3e81 100644 --- a/src/main/java/org/gcube/nlphub/NLPHub.java +++ b/src/main/java/org/gcube/nlphub/NLPHub.java @@ -69,10 +69,6 @@ public class NLPHub extends HttpServlet { private void doWork(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { token = Constants.getToken(request, devMode); try { - System.out.println("annotations: " + request.getParameter("annotations")); - System.out.println("lang: " + request.getParameter("lang")); - System.out.println("plink: " + request.getParameter("plink")); - System.out.println("algs: " + request.getParameter("algs")); String[] algs = request.getParameter("algs").split(","); for(int i=0; i -1) { byteRead = fileContent.read(readBuffer, 0, len); - // System.out.println(byteRead); if (byteRead > 0) { System.arraycopy(readBuffer, 0, buffer, offset, byteRead); offset += byteRead; @@ -129,9 +129,12 @@ public class NLPUploader extends HttpServlet { } else bufferedContent = buffer; + String stringContent = new String(bufferedContent); + stringContent = stringContent.replaceAll("[\\s]+", " ").trim(); + ws.deleteFile(fileName, token); - if (!ws.uploadFile(bufferedContent, fileName, Constants.DEFAULT_DESCRIPTION, token)) { + if (!ws.uploadFile(stringContent.getBytes(), fileName, Constants.DEFAULT_DESCRIPTION, token)) { writer.println(new JsonManager().getErrorJsonResponse( "Error uploading file. A file called '" + fileName + "' is already in the workspace?")); return; diff --git a/src/main/java/org/gcube/nlphub/nlp/NLpLanguageRecognizer.java b/src/main/java/org/gcube/nlphub/nlp/NLpLanguageRecognizer.java new file mode 100644 index 0000000..533c379 --- /dev/null +++ b/src/main/java/org/gcube/nlphub/nlp/NLpLanguageRecognizer.java @@ -0,0 +1,100 @@ +package org.gcube.nlphub.nlp; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.log4j.Logger; +import org.gcube.data.analysis.dataminermanagercl.server.dmservice.SClient; +import org.gcube.data.analysis.dataminermanagercl.shared.data.OutputData; +import org.gcube.data.analysis.dataminermanagercl.shared.data.computations.ComputationId; +import org.gcube.data.analysis.dataminermanagercl.shared.data.output.FileResource; +import org.gcube.data.analysis.dataminermanagercl.shared.data.output.MapResource; +import org.gcube.data.analysis.dataminermanagercl.shared.data.output.Resource; +import org.gcube.data.analysis.dataminermanagercl.shared.parameters.FileParameter; +import org.gcube.data.analysis.dataminermanagercl.shared.parameters.ListParameter; +import org.gcube.data.analysis.dataminermanagercl.shared.parameters.ObjectParameter; +import org.gcube.data.analysis.dataminermanagercl.shared.parameters.Parameter; +import org.gcube.data.analysis.dataminermanagercl.shared.parameters.ParameterType; +import org.gcube.nlphub.legacy.DataminerClient; +import org.gcube.nlphub.legacy.NlpHubException; + + + +public class NLpLanguageRecognizer extends DataminerClient { + private Logger logger = Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName()); + private String sentence; + public final static String RECOGNIZER_ID = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.LANGUAGE_RECOGNIZER"; +// private String service = "http://dataminer-prototypes.d4science.org/wps/"; +// private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; + + public NLpLanguageRecognizer(String service, String token, String sentence) { + super(service, "", token); + this.sentence = sentence; + } + + + public void run() throws NlpHubException { + try { + super.identifier = RECOGNIZER_ID; + super.init(); + ObjectParameter inputParameter = new ObjectParameter(); + inputParameter.setName("sentence"); + inputParameter.setValue(sentence); + ArrayList parameters = new ArrayList<>(); + parameters.add(inputParameter); + super.execute(parameters); + } catch (Exception e) { + logger.error(e.getLocalizedMessage()); + throw new NlpHubException(e.getLocalizedMessage(), e); + } + } + + + @Override + public void retrieveOutput(ComputationId computationId, SClient sClient) { + try { + OutputData output = sClient.getOutputDataByComputationId(computationId); + Resource resource = output.getResource(); + if (resource.isMap()) { + MapResource mapResource = (MapResource) resource; + for (String key : mapResource.getMap().keySet()) { + Resource r = mapResource.getMap().get(key); + if (r.isFile()) { + FileResource f = (FileResource) r; + String mimeType = f.getMimeType(); + if (mimeType.equalsIgnoreCase("application/d4science")) { + String link = f.getUrl(); + System.out.println("url: " + link); + String op = computationId.getOperatorId(); + op = op.substring(op.lastIndexOf(".") + 1); + //testEndOfProcess(op + ":::" + link); + } + } + } + } + } catch (Exception e) { + logger.error(e.getLocalizedMessage()); + //writeResponse(e.getLocalizedMessage(), false); + } + } + + public static void main(String[] args) { +// String service = "http://dataminer-prototypes.d4science.org/wps/"; +// String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; +// String sentence = "Per me si va nella città dolente"; +// +// NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(service, token, sentence); +// try { +// recognizer.run(); +// } catch (Exception x) { +// x.printStackTrace(); +// } + /* + String test = "Anch'io ho voglia di dare il mio contributo\n alla causa"; + String regularized = test.replaceAll("[\\s]+", " "); + System.out.println("Before: " + test + "\n" + "After: " + regularized); + regularized = test.replaceAll("[\\n]+", " "); + System.out.println("After: " + regularized); + */ + } +} diff --git a/src/main/java/org/gcube/nlphub/nlp/NlpNerRunner.java b/src/main/java/org/gcube/nlphub/nlp/NlpNerRunner.java index 36b5868..3cfda31 100644 --- a/src/main/java/org/gcube/nlphub/nlp/NlpNerRunner.java +++ b/src/main/java/org/gcube/nlphub/nlp/NlpNerRunner.java @@ -86,6 +86,7 @@ public class NlpNerRunner extends DataminerClient { if (mimeType.equalsIgnoreCase("application/d4science")) { String link = f.getUrl(); System.out.println("url: " + link); + logger.debug("url: " + link); String op = computationId.getOperatorId(); op = op.substring(op.lastIndexOf(".") + 1); testEndOfProcess(op + ":::" + link); @@ -177,7 +178,7 @@ public class NlpNerRunner extends DataminerClient { List parameters = new ArrayList<>(); try { List inputParameters = super.getOperatorInputParameters(); - System.out.println("n. " + inputParameters.size()); + //System.out.println("n. " + inputParameters.size()); for (Parameter p : inputParameters) { switch (p.getTypology()) { case FILE: diff --git a/src/main/webapp/index.jsp b/src/main/webapp/index.jsp index 0ef264d..b87c9d2 100644 --- a/src/main/webapp/index.jsp +++ b/src/main/webapp/index.jsp @@ -47,10 +47,12 @@ $("#ner-result-container").hide(); var lineTokens = manageCsvData(data); getAlgorithms(lineTokens); - buildAnnotationsAndLanguages(); + buildLanguageSelection(); + buildLanguageList(); + buildAnnotations(); resizeTable(); resizeLogo(); - buildLanguageList(); + $("#back-ner-ui-button").click(function() { $("#ner-result-container").hide(); $("#ner-ui").show(); @@ -200,10 +202,21 @@ } } + /* buildAnnotationsAndLanguages = function() { // extract data about languages and annotations in order to populate the proper // controls in the page + buildLanguageSelection(); + buildAnnotations(); + + }*/ + + buildAnnotations = function() { + annotations = []; + var language = $("#language-select").val(); for (i in algorithms) { + if(algorithms[i].lang.toLowerCase() != language.toLowerCase()) + continue; var annotationList = algorithms[i].annotations; var a = annotationList.split(/\s|,/); for (j in a) { @@ -220,7 +233,11 @@ annotations[annotations.length] = a[j]; } } - + } + } + + buildLanguageSelection = function() { + for (i in algorithms) { var languageList = algorithms[i].lang; var langs = languageList.split(/\s|,/); for (j in langs) { @@ -237,10 +254,9 @@ languages[languages.length] = langs[j]; } } - } } - + setEventListeners = function() { $("#input-textarea").on("keyup", function() { if ($("#input-textarea").val() == "") @@ -323,7 +339,13 @@ var algList = ""; for (j in algorithms) { - algList += encodeURI(algorithms[j].id) + ","; + if(algorithms[j].lang.toLowerCase().indexOf($("#language-select").val().toLowerCase()) >= 0) { + algList += encodeURI(algorithms[j].id) + ","; + } + } + if(algList.length == 0) { + alert("Warning. No algorithm matching with selected language."); + return; } algList = algList.substring(0, algList.length - 1); @@ -337,14 +359,15 @@ type : "POST", async : true, success : function(data, stato) { - //hideProgress(); textAreaEnable(true); $("#file-info").empty(); if (typeof (data.response) != 'undefined') { var jsonOut = getOutputJson(data.message); } else if (typeof (data.error) != 'undefined') { + hideProgress(); alert(data.message); } else { + hideProgress(); alert("Unexpected response"); } resetExecuteButton(); @@ -425,10 +448,16 @@ } showAnnotationList = function(list) { + var colorDisabled = "CCCCCC"; + var color; var colors = []; var annotations = list.split(","); for (var i = 0; i < annotations.length; i++) { - colors[colors.length] = randomRGB(); + do { + color = randomRGB(); + } + while(color == colorDisabled); + colors[colors.length] = color; } $("#result-params-div") @@ -437,6 +466,7 @@ var cb = ""; cb += "