This commit is contained in:
Erik Perrone 2018-03-12 16:46:20 +00:00
parent 07fb54148b
commit 7210d91790
7 changed files with 147 additions and 63 deletions

View File

@ -37,7 +37,7 @@ public class NLPHub extends HttpServlet {
private Logger logger = Logger.getLogger(NLPHub.class.getSimpleName()); private Logger logger = Logger.getLogger(NLPHub.class.getSimpleName());
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
public static final String service = "http://dataminer-prototypes.d4science.org/wps/"; public static final String service = "http://dataminer-prototypes.d4science.org/wps/";
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
private boolean devMode = true; private boolean devMode = true;
/** /**

View File

@ -33,7 +33,7 @@ import org.gcube.nlphub.mapper.DefaultMapper;
public class NLPMapper extends HttpServlet { public class NLPMapper extends HttpServlet {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
private Logger logger = Logger.getLogger(NLPMapper.class.getSimpleName()); private Logger logger = Logger.getLogger(NLPMapper.class.getSimpleName());
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
private boolean devMode = true; private boolean devMode = true;
/** /**

View File

@ -39,7 +39,7 @@ public class NLPUploader extends HttpServlet {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
private Logger logger = Logger.getLogger(NLPUploader.class.getSimpleName()); private Logger logger = Logger.getLogger(NLPUploader.class.getSimpleName());
private boolean devMode = true; private boolean devMode = true;
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
private WorkspaceManager ws; private WorkspaceManager ws;
/** /**
@ -145,9 +145,9 @@ public class NLPUploader extends HttpServlet {
String link = ws.getPublicLink(fileName, token); String link = ws.getPublicLink(fileName, token);
String sentence = NlpUtils.getLanguageRecognizerDigest(stringContent); String sentence = NlpUtils.getLanguageRecognizerDigest(stringContent);
System.out.println(sentence); System.out.println(sentence);
NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response); //NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response);
recognizer.run(); //recognizer.run();
NLpLanguageRecognizer.run(sentence, token, link, response);
//writer.println(new JsonManager().getSuccessJsonResponse("" + link)); //writer.println(new JsonManager().getSuccessJsonResponse("" + link));
} catch (Exception x) { } catch (Exception x) {
x.printStackTrace(); x.printStackTrace();

View File

@ -4,10 +4,12 @@ import java.io.BufferedReader;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.net.HttpURLConnection; import java.net.HttpURLConnection;
import java.net.URL; import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.gcube.data.analysis.dataminermanagercl.server.dmservice.SClient; import org.gcube.data.analysis.dataminermanagercl.server.dmservice.SClient;
@ -25,16 +27,16 @@ import org.gcube.nlphub.legacy.Constants;
import org.gcube.nlphub.legacy.DataminerClient; import org.gcube.nlphub.legacy.DataminerClient;
import org.gcube.nlphub.legacy.JsonManager; import org.gcube.nlphub.legacy.JsonManager;
import org.gcube.nlphub.legacy.NlpHubException; import org.gcube.nlphub.legacy.NlpHubException;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
public class NLpLanguageRecognizer extends DataminerClient { public class NLpLanguageRecognizer extends DataminerClient {
private HttpServletResponse response; private HttpServletResponse response;
private Logger logger = Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName()); private Logger logger = Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName());
private String sentence, publicLink; private String sentence, publicLink;
public final static String RECOGNIZER_ID = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.LANGUAGE_RECOGNIZER"; public final static String RECOGNIZER_ID = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.LANGUAGE_RECOGNIZER";
// private String service = "http://dataminer-prototypes.d4science.org/wps/";
// private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
public NLpLanguageRecognizer(String service, String token, String sentence) { public NLpLanguageRecognizer(String service, String token, String sentence) {
super(service, "", token); super(service, "", token);
@ -42,14 +44,96 @@ public class NLpLanguageRecognizer extends DataminerClient {
response = null; response = null;
} }
public NLpLanguageRecognizer(String service, String token, String sentence, String publicLink, HttpServletResponse response) { public NLpLanguageRecognizer(String service, String token, String sentence, String publicLink,
HttpServletResponse response) {
super(service, "", token); super(service, "", token);
this.sentence = sentence; this.sentence = sentence;
this.response = response; this.response = response;
this.publicLink = publicLink; this.publicLink = publicLink;
} }
public static void run(String sentence, String token, String publicLink, HttpServletResponse response) throws NlpHubException {
try {
String urlService = "http://dataminer-prototypes.d4science.org/wps/WebProcessingService?request=Execute&service=WPS&Version=1.0.0";
urlService += "&gcube-token=" + token;
urlService += "&lang=en-US";
urlService += "&Identifier=" + RECOGNIZER_ID;
urlService += "&DataInputs=sentence=" + URLEncoder.encode(sentence, "UTF-8");
URL url = new URL(urlService);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
//connection.setRequestProperty(Constants.TOKEN_PARAMETER, super.getToken());
connection.setDoInput(true);
connection.setDoOutput(true);
connection.setUseCaches(false);
connection.setRequestMethod("GET");
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
doc.getDocumentElement().normalize();
NodeList nListData = doc.getElementsByTagName("d4science:Data");
NodeList nListDesc = doc.getElementsByTagName("d4science:Description");
int len = nListData.getLength();
for(int i=0; i<len; i++) {
Node data = nListData.item(i);
Node description = nListDesc.item(i);
String link = data.getTextContent();
String type = description.getTextContent();
if(type.equals("outfile")) {
System.out.println(link);
String content = readFileContent(link, token);
if (response != null) {
response.getWriter()
.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
}
else {
System.out.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
}
}
}
} catch (Exception e) {
Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName()).error(e.getLocalizedMessage());
throw new NlpHubException(e.getLocalizedMessage(), e);
}
}
public void run() throws NlpHubException { public void run() throws NlpHubException {
runUsingClientLibrary();
}
@Override
public void retrieveOutput(ComputationId computationId, SClient sClient) {
try {
OutputData output = sClient.getOutputDataByComputationId(computationId);
Resource resource = output.getResource();
if (resource.isMap()) {
MapResource mapResource = (MapResource) resource;
for (String key : mapResource.getMap().keySet()) {
Resource r = mapResource.getMap().get(key);
if (r.isFile()) {
FileResource f = (FileResource) r;
String name = f.getName();
String link = f.getUrl();
if (name.equalsIgnoreCase("outfile")) {
String content = readFileContent(link);
System.out.println(content + ".");
if (response != null) {
response.getWriter()
.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
}
}
}
}
}
} catch (Exception e) {
logger.error(e.getLocalizedMessage());
// writeResponse(e.getLocalizedMessage(), false);
}
}
private void runUsingClientLibrary() throws NlpHubException {
try { try {
super.identifier = RECOGNIZER_ID; super.identifier = RECOGNIZER_ID;
super.init(); super.init();
@ -66,33 +150,24 @@ public class NLpLanguageRecognizer extends DataminerClient {
} }
@Override private static String readFileContent(String link, String token) throws Exception {
public void retrieveOutput(ComputationId computationId, SClient sClient) { URL url = new URL(link);
try { HttpURLConnection connection = (HttpURLConnection) url.openConnection();
OutputData output = sClient.getOutputDataByComputationId(computationId); connection.setRequestProperty(Constants.TOKEN_PARAMETER, token);
Resource resource = output.getResource(); connection.setDoInput(true);
if (resource.isMap()) { connection.setDoOutput(true);
MapResource mapResource = (MapResource) resource; connection.setUseCaches(false);
for (String key : mapResource.getMap().keySet()) { connection.setRequestMethod("GET");
Resource r = mapResource.getMap().get(key);
if (r.isFile()) { BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
FileResource f = (FileResource) r; StringBuffer response = new StringBuffer();
String name = f.getName(); String inputLine;
String link = f.getUrl(); while ((inputLine = r.readLine()) != null) {
if(name.equalsIgnoreCase("outfile")) { response.append(inputLine);
String content = readFileContent(link);
System.out.println(content + ".");
if(response != null) {
response.getWriter().println(new JsonManager().getSuccessJsonResponse(content, publicLink));
}
}
}
}
}
} catch (Exception e) {
logger.error(e.getLocalizedMessage());
//writeResponse(e.getLocalizedMessage(), false);
} }
connection.disconnect();
String out = response.toString();
return out;
} }
private String readFileContent(String link) throws Exception { private String readFileContent(String link) throws Exception {
@ -105,31 +180,30 @@ public class NLpLanguageRecognizer extends DataminerClient {
connection.setRequestMethod("GET"); connection.setRequestMethod("GET");
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream())); BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
StringBuffer response = new StringBuffer(); StringBuffer response = new StringBuffer();
String inputLine; String inputLine;
while ((inputLine = r.readLine()) != null) { while ((inputLine = r.readLine()) != null) {
response.append(inputLine); response.append(inputLine);
} }
connection.disconnect();
String out = response.toString(); String out = response.toString();
return out; return out;
} }
/* /*
public static void main(String[] args) { public static void main(String[] args) {
String service = "http://dataminer-prototypes.d4science.org/wps/"; //String pLink = "http://data.d4science.org/RkNBSmNFRG9MOHFLSWsrWUNQdHk3NTU0UC85ekRnSXNHbWJQNStIS0N6Yz0";
String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462"; String token = Constants.TEST_TOKEN;
String sentence = "Per me si va nella città dolente"; String sentence = "Questa mattina mi sono alzato ed ho trovato l'invasore.";
sentence = "Querido amigo, te escribo, así que me distraigo un poco.";
sentence = "Per me si va in città";
NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(service, token, sentence);
try { try {
recognizer.run(); //String sentence, String token, String publicLink, HttpServletResponse response
} catch (Exception x) { NLpLanguageRecognizer.run(sentence, token, "http://cazziemazzi", null);
x.printStackTrace(); } catch (NlpHubException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} }
}
*/
}*/
} }

View File

@ -5,14 +5,14 @@ import java.util.ArrayList;
public class NlpUtils { public class NlpUtils {
public static String getLanguageRecognizerDigest(String content) { public static String getLanguageRecognizerDigest(String content) {
int minToken = 10; int minToken = 20;
content = content.trim(); content = content.trim();
String[] tokens = content.split("\\."); String[] tokens = content.split("\\.");
if(tokens.length == 1) if(tokens.length == 1)
tokens = content.split(";"); tokens = content.split(";");
if(tokens.length == 1) if(tokens.length == 1)
return content; return escapeContent(content);
ArrayList<String> list = new ArrayList<>(); ArrayList<String> list = new ArrayList<>();
@ -24,22 +24,29 @@ public class NlpUtils {
} }
if(list.isEmpty()) if(list.isEmpty())
return content; return escapeContent(content);
String digest = list.get(0); String digest = list.get(0);
for(String s : list) { for(String s : list) {
if(s.length() < digest.length()) if(s.length() < digest.length())
digest = s; digest = s;
} }
return digest; return escapeContent(digest);
} }
public static int countTokens(String content) { public static int countTokens(String content) {
return content.split("\\s").length; return content.split("\\s").length;
} }
public static String escapeContent(String content) {
content = content.replaceAll("\\\\", " ");
content = content.replaceAll("\"", " ");
return content;
}
/*
/*
public static void main(String[] args) { public static void main(String[] args) {
String text = "Per me si va nella Città dolente.\n Per me si va tra la perduta Gente"; String text = "Per me si va nella Città dolente.\n Per me si va tra la perduta Gente";
text = "North Korea has agreed to send a delegation to next month's Winter Olympics in South Korea, the first notable breakthrough to come out of a face-to-face meeting Tuesday between the neighboring nations."; text = "North Korea has agreed to send a delegation to next month's Winter Olympics in South Korea, the first notable breakthrough to come out of a face-to-face meeting Tuesday between the neighboring nations.";
@ -47,7 +54,10 @@ public class NlpUtils {
text += "Unification Vice Minister Chun Hae-sung also announced that both sides plan to re-open a military hotline on the western Korean Peninsula."; text += "Unification Vice Minister Chun Hae-sung also announced that both sides plan to re-open a military hotline on the western Korean Peninsula.";
text += "The hotline was one of many that were closed as inter-Korean relations soured."; text += "The hotline was one of many that were closed as inter-Korean relations soured.";
text = " Tutti i modelli meteo sono d'accordo, \\puntiamo su una rotta poco comune, che non ho mai fatto, ma che dovrebbe funzionare bene\"";
//text = "A me piace la zuppa, a me piace la pasta, a me piace il formaggio, a me piace la panna. A me piace la cioccolata.";
System.out.println(getLanguageRecognizerDigest(text)); System.out.println(getLanguageRecognizerDigest(text));
} }*/
*/
} }

View File

@ -40,7 +40,7 @@
<!-- "ner" div: contains the name entity recognizer interface --> <!-- "ner" div: contains the name entity recognizer interface -->
<div id="ner"> <div id="ner">
<div id="ner-ui"> <div id="ner-ui">
<p class="flow-text">Name Entity Recognition</p> <p class="flow-text">Named Entity Recognition</p>
<fieldset> <fieldset>
<legend>Language selection</legend> <legend>Language selection</legend>
<div class="row"> <div class="row">

View File

@ -130,7 +130,7 @@ checkLanguage = function(lang) {
return; return;
} }
} }
alert("The uploaded file seems to be written in " + lang + ", but this language is not supported by listed algorithms. Select the language you want, or try with another text."); alert("The uploaded file seems to be in " + lang + ", but this language is not currently supported. Please, be aware of this, should you decide to continue and use the tools of another language... \"Praemonitus praemunitus!\"");
} }
/* /*