From 7210d917900536138a5fc201c2ac1e9e48870456 Mon Sep 17 00:00:00 2001 From: Erik Perrone Date: Mon, 12 Mar 2018 16:46:20 +0000 Subject: [PATCH] git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@164942 82a268e6-3cf1-43bd-a215-b396298e98cf --- src/main/java/org/gcube/nlphub/NLPHub.java | 2 +- src/main/java/org/gcube/nlphub/NLPMapper.java | 2 +- .../java/org/gcube/nlphub/NLPUploader.java | 8 +- .../nlphub/nlp/NLpLanguageRecognizer.java | 170 +++++++++++++----- .../java/org/gcube/nlphub/nlp/NlpUtils.java | 24 ++- src/main/webapp/index.jsp | 2 +- src/main/webapp/js/main.js | 2 +- 7 files changed, 147 insertions(+), 63 deletions(-) diff --git a/src/main/java/org/gcube/nlphub/NLPHub.java b/src/main/java/org/gcube/nlphub/NLPHub.java index c31d556..2a19c1e 100644 --- a/src/main/java/org/gcube/nlphub/NLPHub.java +++ b/src/main/java/org/gcube/nlphub/NLPHub.java @@ -37,7 +37,7 @@ public class NLPHub extends HttpServlet { private Logger logger = Logger.getLogger(NLPHub.class.getSimpleName()); private static final long serialVersionUID = 1L; public static final String service = "http://dataminer-prototypes.d4science.org/wps/"; - private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; + private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; private boolean devMode = true; /** diff --git a/src/main/java/org/gcube/nlphub/NLPMapper.java b/src/main/java/org/gcube/nlphub/NLPMapper.java index 05b40c4..e817dcc 100644 --- a/src/main/java/org/gcube/nlphub/NLPMapper.java +++ b/src/main/java/org/gcube/nlphub/NLPMapper.java @@ -33,7 +33,7 @@ import org.gcube.nlphub.mapper.DefaultMapper; public class NLPMapper extends HttpServlet { private static final long serialVersionUID = 1L; private Logger logger = Logger.getLogger(NLPMapper.class.getSimpleName()); - private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; + private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; private boolean devMode = true; /** diff --git a/src/main/java/org/gcube/nlphub/NLPUploader.java b/src/main/java/org/gcube/nlphub/NLPUploader.java index f7cc0b3..6e32c78 100644 --- a/src/main/java/org/gcube/nlphub/NLPUploader.java +++ b/src/main/java/org/gcube/nlphub/NLPUploader.java @@ -39,7 +39,7 @@ public class NLPUploader extends HttpServlet { private static final long serialVersionUID = 1L; private Logger logger = Logger.getLogger(NLPUploader.class.getSimpleName()); private boolean devMode = true; - private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; + private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; private WorkspaceManager ws; /** @@ -145,9 +145,9 @@ public class NLPUploader extends HttpServlet { String link = ws.getPublicLink(fileName, token); String sentence = NlpUtils.getLanguageRecognizerDigest(stringContent); System.out.println(sentence); - NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response); - recognizer.run(); - + //NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response); + //recognizer.run(); + NLpLanguageRecognizer.run(sentence, token, link, response); //writer.println(new JsonManager().getSuccessJsonResponse("" + link)); } catch (Exception x) { x.printStackTrace(); diff --git a/src/main/java/org/gcube/nlphub/nlp/NLpLanguageRecognizer.java b/src/main/java/org/gcube/nlphub/nlp/NLpLanguageRecognizer.java index cbf1d75..94581ec 100644 --- a/src/main/java/org/gcube/nlphub/nlp/NLpLanguageRecognizer.java +++ b/src/main/java/org/gcube/nlphub/nlp/NLpLanguageRecognizer.java @@ -4,10 +4,12 @@ import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; +import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import javax.servlet.http.HttpServletResponse; +import javax.xml.parsers.DocumentBuilderFactory; import org.apache.log4j.Logger; import org.gcube.data.analysis.dataminermanagercl.server.dmservice.SClient; @@ -25,31 +27,113 @@ import org.gcube.nlphub.legacy.Constants; import org.gcube.nlphub.legacy.DataminerClient; import org.gcube.nlphub.legacy.JsonManager; import org.gcube.nlphub.legacy.NlpHubException; - - +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.xml.sax.InputSource; public class NLpLanguageRecognizer extends DataminerClient { private HttpServletResponse response; private Logger logger = Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName()); private String sentence, publicLink; public final static String RECOGNIZER_ID = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.LANGUAGE_RECOGNIZER"; -// private String service = "http://dataminer-prototypes.d4science.org/wps/"; -// private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; - + public NLpLanguageRecognizer(String service, String token, String sentence) { super(service, "", token); this.sentence = sentence; response = null; } - - public NLpLanguageRecognizer(String service, String token, String sentence, String publicLink, HttpServletResponse response) { + + public NLpLanguageRecognizer(String service, String token, String sentence, String publicLink, + HttpServletResponse response) { super(service, "", token); this.sentence = sentence; this.response = response; this.publicLink = publicLink; } + + public static void run(String sentence, String token, String publicLink, HttpServletResponse response) throws NlpHubException { + try { + String urlService = "http://dataminer-prototypes.d4science.org/wps/WebProcessingService?request=Execute&service=WPS&Version=1.0.0"; + urlService += "&gcube-token=" + token; + urlService += "&lang=en-US"; + urlService += "&Identifier=" + RECOGNIZER_ID; + urlService += "&DataInputs=sentence=" + URLEncoder.encode(sentence, "UTF-8"); + URL url = new URL(urlService); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + //connection.setRequestProperty(Constants.TOKEN_PARAMETER, super.getToken()); + connection.setDoInput(true); + connection.setDoOutput(true); + connection.setUseCaches(false); + connection.setRequestMethod("GET"); + + BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream())); + Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r)); + doc.getDocumentElement().normalize(); + NodeList nListData = doc.getElementsByTagName("d4science:Data"); + NodeList nListDesc = doc.getElementsByTagName("d4science:Description"); + + int len = nListData.getLength(); + for(int i=0; i list = new ArrayList<>(); @@ -24,22 +24,29 @@ public class NlpUtils { } if(list.isEmpty()) - return content; + return escapeContent(content); String digest = list.get(0); for(String s : list) { if(s.length() < digest.length()) digest = s; } - return digest; + return escapeContent(digest); } public static int countTokens(String content) { return content.split("\\s").length; } + public static String escapeContent(String content) { + content = content.replaceAll("\\\\", " "); + content = content.replaceAll("\"", " "); + return content; + } + -/* + + /* public static void main(String[] args) { String text = "Per me si va nella Città dolente.\n Per me si va tra la perduta Gente"; text = "North Korea has agreed to send a delegation to next month's Winter Olympics in South Korea, the first notable breakthrough to come out of a face-to-face meeting Tuesday between the neighboring nations."; @@ -47,7 +54,10 @@ public class NlpUtils { text += "Unification Vice Minister Chun Hae-sung also announced that both sides plan to re-open a military hotline on the western Korean Peninsula."; text += "The hotline was one of many that were closed as inter-Korean relations soured."; + text = " Tutti i modelli meteo sono d'accordo, \\puntiamo su una rotta poco comune, che non ho mai fatto, ma che dovrebbe funzionare bene\""; + + //text = "A me piace la zuppa, a me piace la pasta, a me piace il formaggio, a me piace la panna. A me piace la cioccolata."; + System.out.println(getLanguageRecognizerDigest(text)); - } - */ + }*/ } diff --git a/src/main/webapp/index.jsp b/src/main/webapp/index.jsp index f642740..ea27ac4 100644 --- a/src/main/webapp/index.jsp +++ b/src/main/webapp/index.jsp @@ -40,7 +40,7 @@
-

Name Entity Recognition

+

Named Entity Recognition

Language selection
diff --git a/src/main/webapp/js/main.js b/src/main/webapp/js/main.js index e1c72b1..a3f7af3 100644 --- a/src/main/webapp/js/main.js +++ b/src/main/webapp/js/main.js @@ -130,7 +130,7 @@ checkLanguage = function(lang) { return; } } - alert("The uploaded file seems to be written in " + lang + ", but this language is not supported by listed algorithms. Select the language you want, or try with another text."); + alert("The uploaded file seems to be in " + lang + ", but this language is not currently supported. Please, be aware of this, should you decide to continue and use the tools of another language... \"Praemonitus praemunitus!\""); } /*