This commit is contained in:
Erik Perrone 2018-03-12 16:46:20 +00:00
parent 07fb54148b
commit 7210d91790
7 changed files with 147 additions and 63 deletions

View File

@ -37,7 +37,7 @@ public class NLPHub extends HttpServlet {
private Logger logger = Logger.getLogger(NLPHub.class.getSimpleName());
private static final long serialVersionUID = 1L;
public static final String service = "http://dataminer-prototypes.d4science.org/wps/";
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
private boolean devMode = true;
/**

View File

@ -33,7 +33,7 @@ import org.gcube.nlphub.mapper.DefaultMapper;
public class NLPMapper extends HttpServlet {
private static final long serialVersionUID = 1L;
private Logger logger = Logger.getLogger(NLPMapper.class.getSimpleName());
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
private boolean devMode = true;
/**

View File

@ -39,7 +39,7 @@ public class NLPUploader extends HttpServlet {
private static final long serialVersionUID = 1L;
private Logger logger = Logger.getLogger(NLPUploader.class.getSimpleName());
private boolean devMode = true;
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
private WorkspaceManager ws;
/**
@ -145,9 +145,9 @@ public class NLPUploader extends HttpServlet {
String link = ws.getPublicLink(fileName, token);
String sentence = NlpUtils.getLanguageRecognizerDigest(stringContent);
System.out.println(sentence);
NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response);
recognizer.run();
//NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response);
//recognizer.run();
NLpLanguageRecognizer.run(sentence, token, link, response);
//writer.println(new JsonManager().getSuccessJsonResponse("" + link));
} catch (Exception x) {
x.printStackTrace();

View File

@ -4,10 +4,12 @@ import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import javax.servlet.http.HttpServletResponse;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.log4j.Logger;
import org.gcube.data.analysis.dataminermanagercl.server.dmservice.SClient;
@ -25,31 +27,113 @@ import org.gcube.nlphub.legacy.Constants;
import org.gcube.nlphub.legacy.DataminerClient;
import org.gcube.nlphub.legacy.JsonManager;
import org.gcube.nlphub.legacy.NlpHubException;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
public class NLpLanguageRecognizer extends DataminerClient {
private HttpServletResponse response;
private Logger logger = Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName());
private String sentence, publicLink;
public final static String RECOGNIZER_ID = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.LANGUAGE_RECOGNIZER";
// private String service = "http://dataminer-prototypes.d4science.org/wps/";
// private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
public NLpLanguageRecognizer(String service, String token, String sentence) {
super(service, "", token);
this.sentence = sentence;
response = null;
}
public NLpLanguageRecognizer(String service, String token, String sentence, String publicLink, HttpServletResponse response) {
public NLpLanguageRecognizer(String service, String token, String sentence, String publicLink,
HttpServletResponse response) {
super(service, "", token);
this.sentence = sentence;
this.response = response;
this.publicLink = publicLink;
}
public static void run(String sentence, String token, String publicLink, HttpServletResponse response) throws NlpHubException {
try {
String urlService = "http://dataminer-prototypes.d4science.org/wps/WebProcessingService?request=Execute&service=WPS&Version=1.0.0";
urlService += "&gcube-token=" + token;
urlService += "&lang=en-US";
urlService += "&Identifier=" + RECOGNIZER_ID;
urlService += "&DataInputs=sentence=" + URLEncoder.encode(sentence, "UTF-8");
URL url = new URL(urlService);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
//connection.setRequestProperty(Constants.TOKEN_PARAMETER, super.getToken());
connection.setDoInput(true);
connection.setDoOutput(true);
connection.setUseCaches(false);
connection.setRequestMethod("GET");
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
doc.getDocumentElement().normalize();
NodeList nListData = doc.getElementsByTagName("d4science:Data");
NodeList nListDesc = doc.getElementsByTagName("d4science:Description");
int len = nListData.getLength();
for(int i=0; i<len; i++) {
Node data = nListData.item(i);
Node description = nListDesc.item(i);
String link = data.getTextContent();
String type = description.getTextContent();
if(type.equals("outfile")) {
System.out.println(link);
String content = readFileContent(link, token);
if (response != null) {
response.getWriter()
.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
}
else {
System.out.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
}
}
}
} catch (Exception e) {
Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName()).error(e.getLocalizedMessage());
throw new NlpHubException(e.getLocalizedMessage(), e);
}
}
public void run() throws NlpHubException {
runUsingClientLibrary();
}
@Override
public void retrieveOutput(ComputationId computationId, SClient sClient) {
try {
OutputData output = sClient.getOutputDataByComputationId(computationId);
Resource resource = output.getResource();
if (resource.isMap()) {
MapResource mapResource = (MapResource) resource;
for (String key : mapResource.getMap().keySet()) {
Resource r = mapResource.getMap().get(key);
if (r.isFile()) {
FileResource f = (FileResource) r;
String name = f.getName();
String link = f.getUrl();
if (name.equalsIgnoreCase("outfile")) {
String content = readFileContent(link);
System.out.println(content + ".");
if (response != null) {
response.getWriter()
.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
}
}
}
}
}
} catch (Exception e) {
logger.error(e.getLocalizedMessage());
// writeResponse(e.getLocalizedMessage(), false);
}
}
private void runUsingClientLibrary() throws NlpHubException {
try {
super.identifier = RECOGNIZER_ID;
super.init();
@ -64,35 +148,26 @@ public class NLpLanguageRecognizer extends DataminerClient {
throw new NlpHubException(e.getLocalizedMessage(), e);
}
}
@Override
public void retrieveOutput(ComputationId computationId, SClient sClient) {
try {
OutputData output = sClient.getOutputDataByComputationId(computationId);
Resource resource = output.getResource();
if (resource.isMap()) {
MapResource mapResource = (MapResource) resource;
for (String key : mapResource.getMap().keySet()) {
Resource r = mapResource.getMap().get(key);
if (r.isFile()) {
FileResource f = (FileResource) r;
String name = f.getName();
String link = f.getUrl();
if(name.equalsIgnoreCase("outfile")) {
String content = readFileContent(link);
System.out.println(content + ".");
if(response != null) {
response.getWriter().println(new JsonManager().getSuccessJsonResponse(content, publicLink));
}
}
}
}
}
} catch (Exception e) {
logger.error(e.getLocalizedMessage());
//writeResponse(e.getLocalizedMessage(), false);
private static String readFileContent(String link, String token) throws Exception {
URL url = new URL(link);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty(Constants.TOKEN_PARAMETER, token);
connection.setDoInput(true);
connection.setDoOutput(true);
connection.setUseCaches(false);
connection.setRequestMethod("GET");
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
StringBuffer response = new StringBuffer();
String inputLine;
while ((inputLine = r.readLine()) != null) {
response.append(inputLine);
}
connection.disconnect();
String out = response.toString();
return out;
}
private String readFileContent(String link) throws Exception {
@ -105,31 +180,30 @@ public class NLpLanguageRecognizer extends DataminerClient {
connection.setRequestMethod("GET");
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
StringBuffer response = new StringBuffer();
String inputLine;
while ((inputLine = r.readLine()) != null) {
response.append(inputLine);
}
String out = response.toString();
connection.disconnect();
String out = response.toString();
return out;
}
/*
public static void main(String[] args) {
String service = "http://dataminer-prototypes.d4science.org/wps/";
String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
String sentence = "Per me si va nella città dolente";
sentence = "Querido amigo, te escribo, así que me distraigo un poco.";
sentence = "Per me si va in città";
//String pLink = "http://data.d4science.org/RkNBSmNFRG9MOHFLSWsrWUNQdHk3NTU0UC85ekRnSXNHbWJQNStIS0N6Yz0";
String token = Constants.TEST_TOKEN;
String sentence = "Questa mattina mi sono alzato ed ho trovato l'invasore.";
NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(service, token, sentence);
try {
recognizer.run();
} catch (Exception x) {
x.printStackTrace();
//String sentence, String token, String publicLink, HttpServletResponse response
NLpLanguageRecognizer.run(sentence, token, "http://cazziemazzi", null);
} catch (NlpHubException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
*/
}*/
}

View File

@ -5,14 +5,14 @@ import java.util.ArrayList;
public class NlpUtils {
public static String getLanguageRecognizerDigest(String content) {
int minToken = 10;
int minToken = 20;
content = content.trim();
String[] tokens = content.split("\\.");
if(tokens.length == 1)
tokens = content.split(";");
if(tokens.length == 1)
return content;
return escapeContent(content);
ArrayList<String> list = new ArrayList<>();
@ -24,22 +24,29 @@ public class NlpUtils {
}
if(list.isEmpty())
return content;
return escapeContent(content);
String digest = list.get(0);
for(String s : list) {
if(s.length() < digest.length())
digest = s;
}
return digest;
return escapeContent(digest);
}
public static int countTokens(String content) {
return content.split("\\s").length;
}
public static String escapeContent(String content) {
content = content.replaceAll("\\\\", " ");
content = content.replaceAll("\"", " ");
return content;
}
/*
/*
public static void main(String[] args) {
String text = "Per me si va nella Città dolente.\n Per me si va tra la perduta Gente";
text = "North Korea has agreed to send a delegation to next month's Winter Olympics in South Korea, the first notable breakthrough to come out of a face-to-face meeting Tuesday between the neighboring nations.";
@ -47,7 +54,10 @@ public class NlpUtils {
text += "Unification Vice Minister Chun Hae-sung also announced that both sides plan to re-open a military hotline on the western Korean Peninsula.";
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
text = " Tutti i modelli meteo sono d'accordo, \\puntiamo su una rotta poco comune, che non ho mai fatto, ma che dovrebbe funzionare bene\"";
//text = "A me piace la zuppa, a me piace la pasta, a me piace il formaggio, a me piace la panna. A me piace la cioccolata.";
System.out.println(getLanguageRecognizerDigest(text));
}
*/
}*/
}

View File

@ -40,7 +40,7 @@
<!-- "ner" div: contains the name entity recognizer interface -->
<div id="ner">
<div id="ner-ui">
<p class="flow-text">Name Entity Recognition</p>
<p class="flow-text">Named Entity Recognition</p>
<fieldset>
<legend>Language selection</legend>
<div class="row">

View File

@ -130,7 +130,7 @@ checkLanguage = function(lang) {
return;
}
}
alert("The uploaded file seems to be written in " + lang + ", but this language is not supported by listed algorithms. Select the language you want, or try with another text.");
alert("The uploaded file seems to be in " + lang + ", but this language is not currently supported. Please, be aware of this, should you decide to continue and use the tools of another language... \"Praemonitus praemunitus!\"");
}
/*