git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@164942 82a268e6-3cf1-43bd-a215-b396298e98cf

2018-03-12 16:46:20 +00:00 · 2018-03-12 16:46:20 +00:00 · 7210d91790
parent 07fb54148b
commit 7210d91790
7 changed files with 147 additions and 63 deletions
--- a/src/main/java/org/gcube/nlphub/NLPHub.java
+++ b/src/main/java/org/gcube/nlphub/NLPHub.java
@ -37,7 +37,7 @@ public class NLPHub extends HttpServlet {
 	private Logger logger = Logger.getLogger(NLPHub.class.getSimpleName());
 	private static final long serialVersionUID = 1L;
 	public static final String service = "http://dataminer-prototypes.d4science.org/wps/";
-	private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
+	private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
 	private boolean devMode = true;

 	/**
--- a/src/main/java/org/gcube/nlphub/NLPMapper.java
+++ b/src/main/java/org/gcube/nlphub/NLPMapper.java
@ -33,7 +33,7 @@ import org.gcube.nlphub.mapper.DefaultMapper;
 public class NLPMapper extends HttpServlet {
 	private static final long serialVersionUID = 1L;
 	private Logger logger = Logger.getLogger(NLPMapper.class.getSimpleName());
-	private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
+	private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
 	private boolean devMode = true;
 	
 	/**
--- a/src/main/java/org/gcube/nlphub/NLPUploader.java
+++ b/src/main/java/org/gcube/nlphub/NLPUploader.java
@ -39,7 +39,7 @@ public class NLPUploader extends HttpServlet {
 	private static final long serialVersionUID = 1L;
 	private Logger logger = Logger.getLogger(NLPUploader.class.getSimpleName());
 	private boolean devMode = true;
-	private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
+	private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
 	private WorkspaceManager ws;
 	
 	/**
@ -145,9 +145,9 @@ public class NLPUploader extends HttpServlet {
 			String link = ws.getPublicLink(fileName, token);
 			String sentence = NlpUtils.getLanguageRecognizerDigest(stringContent);
 			System.out.println(sentence);
-			NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response);
-			recognizer.run();
-
+			//NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response);
+			//recognizer.run();
+			NLpLanguageRecognizer.run(sentence, token, link, response);
 			//writer.println(new JsonManager().getSuccessJsonResponse("" + link));
 		} catch (Exception x) {
 			x.printStackTrace();
--- a/src/main/java/org/gcube/nlphub/nlp/NLpLanguageRecognizer.java
+++ b/src/main/java/org/gcube/nlphub/nlp/NLpLanguageRecognizer.java
@ -4,10 +4,12 @@ import java.io.BufferedReader;
 import java.io.InputStreamReader;
 import java.net.HttpURLConnection;
 import java.net.URL;
+import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.List;

 import javax.servlet.http.HttpServletResponse;
+import javax.xml.parsers.DocumentBuilderFactory;

 import org.apache.log4j.Logger;
 import org.gcube.data.analysis.dataminermanagercl.server.dmservice.SClient;
@ -25,31 +27,113 @@ import org.gcube.nlphub.legacy.Constants;
 import org.gcube.nlphub.legacy.DataminerClient;
 import org.gcube.nlphub.legacy.JsonManager;
 import org.gcube.nlphub.legacy.NlpHubException;
-
-
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Node;
+import org.xml.sax.InputSource;

 public class NLpLanguageRecognizer extends DataminerClient {
 	private HttpServletResponse response;
 	private Logger logger = Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName());
 	private String sentence, publicLink;
 	public final static String RECOGNIZER_ID = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.LANGUAGE_RECOGNIZER";
-//	private String service = "http://dataminer-prototypes.d4science.org/wps/";
-//	private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
-	
+
 	public NLpLanguageRecognizer(String service, String token, String sentence) {
 		super(service, "", token);
 		this.sentence = sentence;
 		response = null;
 	}
-	
-	public NLpLanguageRecognizer(String service, String token,  String sentence, String publicLink, HttpServletResponse response) {
+
+	public NLpLanguageRecognizer(String service, String token, String sentence, String publicLink,
+			HttpServletResponse response) {
 		super(service, "", token);
 		this.sentence = sentence;
 		this.response = response;
 		this.publicLink = publicLink;
 	}
+
+	public static void run(String sentence, String token, String publicLink, HttpServletResponse response) throws NlpHubException {
+		try {
+			String urlService = "http://dataminer-prototypes.d4science.org/wps/WebProcessingService?request=Execute&service=WPS&Version=1.0.0";
+			urlService += "&gcube-token=" + token;
+			urlService += "&lang=en-US";
+			urlService += "&Identifier=" + RECOGNIZER_ID;
+			urlService += "&DataInputs=sentence=" + URLEncoder.encode(sentence, "UTF-8");
+			URL url = new URL(urlService);
+			HttpURLConnection connection = (HttpURLConnection) url.openConnection();
+			//connection.setRequestProperty(Constants.TOKEN_PARAMETER, super.getToken());
+			connection.setDoInput(true);
+			connection.setDoOutput(true);
+			connection.setUseCaches(false);
+			connection.setRequestMethod("GET");
+
+			BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
+			Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
+			doc.getDocumentElement().normalize();
+			NodeList nListData = doc.getElementsByTagName("d4science:Data");
+			NodeList nListDesc = doc.getElementsByTagName("d4science:Description");
+				
+			int len = nListData.getLength();
+			for(int i=0; i<len; i++) {
+				Node data = nListData.item(i);
+				Node description = nListDesc.item(i);
+				String link = data.getTextContent();
+				String type = description.getTextContent();
+				if(type.equals("outfile")) {
+					System.out.println(link);
+					String content = readFileContent(link, token);
+					if (response != null) {
+						response.getWriter()
+								.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
+					}
+					else {
+						System.out.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
+					}
+				}
+			}
+				
+		} catch (Exception e) {
+			Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName()).error(e.getLocalizedMessage());
+			throw new NlpHubException(e.getLocalizedMessage(), e);
+		}
+		
+	}
 	
 	public void run() throws NlpHubException {
+		runUsingClientLibrary();
+	}
+
+	@Override
+	public void retrieveOutput(ComputationId computationId, SClient sClient) {
+		try {
+			OutputData output = sClient.getOutputDataByComputationId(computationId);
+			Resource resource = output.getResource();
+			if (resource.isMap()) {
+				MapResource mapResource = (MapResource) resource;
+				for (String key : mapResource.getMap().keySet()) {
+					Resource r = mapResource.getMap().get(key);
+					if (r.isFile()) {
+						FileResource f = (FileResource) r;
+						String name = f.getName();
+						String link = f.getUrl();
+						if (name.equalsIgnoreCase("outfile")) {
+							String content = readFileContent(link);
+							System.out.println(content + ".");
+							if (response != null) {
+								response.getWriter()
+										.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
+							}
+						}
+					}
+				}
+			}
+		} catch (Exception e) {
+			logger.error(e.getLocalizedMessage());
+			// writeResponse(e.getLocalizedMessage(), false);
+		}
+	}
+
+	private void runUsingClientLibrary() throws NlpHubException {
 		try {
 			super.identifier = RECOGNIZER_ID;
 			super.init();
@ -64,35 +148,26 @@ public class NLpLanguageRecognizer extends DataminerClient {
 			throw new NlpHubException(e.getLocalizedMessage(), e);
 		}
 	}
-	
-	
-	@Override
-	public void retrieveOutput(ComputationId computationId, SClient sClient) {
-		try {
-			OutputData output = sClient.getOutputDataByComputationId(computationId);
-			Resource resource = output.getResource();
-			if (resource.isMap()) {
-				MapResource mapResource = (MapResource) resource;
-				for (String key : mapResource.getMap().keySet()) {
-					Resource r = mapResource.getMap().get(key);
-					if (r.isFile()) {
-						FileResource f = (FileResource) r;
-						String name = f.getName();
-						String link = f.getUrl();
-						if(name.equalsIgnoreCase("outfile")) {
-							String content = readFileContent(link);
-							System.out.println(content + ".");
-							if(response != null) {
-								response.getWriter().println(new JsonManager().getSuccessJsonResponse(content, publicLink));
-							}
-						}
-					}
-				}
-			}
-		} catch (Exception e) {
-			logger.error(e.getLocalizedMessage());
-			//writeResponse(e.getLocalizedMessage(), false);
+
+
+	private static String readFileContent(String link, String token) throws Exception {
+		URL url = new URL(link);
+		HttpURLConnection connection = (HttpURLConnection) url.openConnection();
+		connection.setRequestProperty(Constants.TOKEN_PARAMETER, token);
+		connection.setDoInput(true);
+		connection.setDoOutput(true);
+		connection.setUseCaches(false);
+		connection.setRequestMethod("GET");
+
+		BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
+		StringBuffer response = new StringBuffer();
+		String inputLine;
+		while ((inputLine = r.readLine()) != null) {
+			response.append(inputLine);
 		}
+		connection.disconnect();
+		String out = response.toString();
+		return out;
 	}
 	
 	private String readFileContent(String link) throws Exception {
@ -105,31 +180,30 @@ public class NLpLanguageRecognizer extends DataminerClient {
 		connection.setRequestMethod("GET");

 		BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
-
 		StringBuffer response = new StringBuffer();
 		String inputLine;
 		while ((inputLine = r.readLine()) != null) {
 			response.append(inputLine);
 		}
-
-		String out = response.toString();	
+		connection.disconnect();
+		String out = response.toString();
 		return out;
 	}
 	
 	/*
 	public static void main(String[] args) {
-		String service = "http://dataminer-prototypes.d4science.org/wps/";
-		String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
-		String sentence = "Per me si va nella città dolente";
-		sentence = "Querido amigo, te escribo, así que me distraigo un poco.";
-		sentence = "Per me si va in città";
+		//String pLink = "http://data.d4science.org/RkNBSmNFRG9MOHFLSWsrWUNQdHk3NTU0UC85ekRnSXNHbWJQNStIS0N6Yz0";
+		String token = Constants.TEST_TOKEN;
+		String sentence = "Questa mattina mi sono alzato ed ho trovato l'invasore.";
 		
-		NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(service, token, sentence);
 		try {
-			recognizer.run();
-		} catch (Exception x) {
-			x.printStackTrace();
+			//String sentence, String token, String publicLink, HttpServletResponse response
+			NLpLanguageRecognizer.run(sentence, token, "http://cazziemazzi", null);
+		} catch (NlpHubException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
 		}
+	}
+	*/

-	}*/
 }
--- a/src/main/java/org/gcube/nlphub/nlp/NlpUtils.java
+++ b/src/main/java/org/gcube/nlphub/nlp/NlpUtils.java
@ -5,14 +5,14 @@ import java.util.ArrayList;
 public class NlpUtils {
 	
 	public static String getLanguageRecognizerDigest(String content) {
-		int minToken = 10;
+		int minToken = 20;
 		
 		content = content.trim();
 		String[] tokens = content.split("\\.");
 		if(tokens.length == 1) 
 			tokens = content.split(";");
 		if(tokens.length == 1)
-			return content;
+			return escapeContent(content);
 		
 		ArrayList<String> list = new ArrayList<>();
 		
@ -24,22 +24,29 @@ public class NlpUtils {
 		}
 		
 		if(list.isEmpty())
-			return content;
+			return escapeContent(content);
 		
 		String digest = list.get(0);
 		for(String s : list) {
 			if(s.length() < digest.length())
 				digest = s;
 		}
-		return digest;
+		return escapeContent(digest);
 	}
 	
 	public static int countTokens(String content) {
 		return content.split("\\s").length;
 	}
 	
+	public static String escapeContent(String content) {
+		content = content.replaceAll("\\\\", " ");
+		content = content.replaceAll("\"", " ");
+		return content;
+	}
+	

-/*	
+	
+	/*
 	public static void main(String[] args) {
 		String text = "Per me si va nella Città dolente.\n Per me si va tra la perduta Gente";
 		text = "North Korea has agreed to send a delegation to next month's Winter Olympics in South Korea, the first notable breakthrough to come out of a face-to-face meeting Tuesday between the neighboring nations.";
@ -47,7 +54,10 @@ public class NlpUtils {
 		text += "Unification Vice Minister Chun Hae-sung also announced that both sides plan to re-open a military hotline on the western Korean Peninsula.";
 		text += "The hotline was one of many that were closed as inter-Korean relations soured.";
 		
+		text = " Tutti i modelli meteo sono d'accordo, \\puntiamo su una rotta poco comune, che non ho mai fatto, ma che dovrebbe funzionare bene\"";
+		
+		//text = "A me piace la zuppa, a me piace la pasta, a me piace il formaggio, a me piace la panna. A me piace la cioccolata."; 
+		
 		System.out.println(getLanguageRecognizerDigest(text));
-	}
-	*/
+	}*/
 }
--- a/src/main/webapp/index.jsp
+++ b/src/main/webapp/index.jsp
@ -40,7 +40,7 @@
 		<!--  "ner" div: contains the name entity recognizer interface -->
 		<div id="ner">
 			<div id="ner-ui">
-				<p class="flow-text">Name Entity Recognition</p>
+				<p class="flow-text">Named Entity Recognition</p>
 				<fieldset>
 					<legend>Language selection</legend>
 					<div class="row">
--- a/src/main/webapp/js/main.js
+++ b/src/main/webapp/js/main.js
@ -130,7 +130,7 @@ checkLanguage = function(lang) {
 			return;
 		}
 	}
-	alert("The uploaded file seems to be written in " + lang + ", but this language is not supported by listed algorithms. Select the language you want, or try with another text.");
+	alert("The uploaded file seems to be in " + lang + ", but this language is not currently supported. Please, be aware of this, should you decide to continue and use the tools of another language... \"Praemonitus praemunitus!\"");
 }

 /*