git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@164942 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
07fb54148b
commit
7210d91790
|
@ -37,7 +37,7 @@ public class NLPHub extends HttpServlet {
|
||||||
private Logger logger = Logger.getLogger(NLPHub.class.getSimpleName());
|
private Logger logger = Logger.getLogger(NLPHub.class.getSimpleName());
|
||||||
private static final long serialVersionUID = 1L;
|
private static final long serialVersionUID = 1L;
|
||||||
public static final String service = "http://dataminer-prototypes.d4science.org/wps/";
|
public static final String service = "http://dataminer-prototypes.d4science.org/wps/";
|
||||||
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
||||||
private boolean devMode = true;
|
private boolean devMode = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -33,7 +33,7 @@ import org.gcube.nlphub.mapper.DefaultMapper;
|
||||||
public class NLPMapper extends HttpServlet {
|
public class NLPMapper extends HttpServlet {
|
||||||
private static final long serialVersionUID = 1L;
|
private static final long serialVersionUID = 1L;
|
||||||
private Logger logger = Logger.getLogger(NLPMapper.class.getSimpleName());
|
private Logger logger = Logger.getLogger(NLPMapper.class.getSimpleName());
|
||||||
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
||||||
private boolean devMode = true;
|
private boolean devMode = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -39,7 +39,7 @@ public class NLPUploader extends HttpServlet {
|
||||||
private static final long serialVersionUID = 1L;
|
private static final long serialVersionUID = 1L;
|
||||||
private Logger logger = Logger.getLogger(NLPUploader.class.getSimpleName());
|
private Logger logger = Logger.getLogger(NLPUploader.class.getSimpleName());
|
||||||
private boolean devMode = true;
|
private boolean devMode = true;
|
||||||
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
||||||
private WorkspaceManager ws;
|
private WorkspaceManager ws;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -145,9 +145,9 @@ public class NLPUploader extends HttpServlet {
|
||||||
String link = ws.getPublicLink(fileName, token);
|
String link = ws.getPublicLink(fileName, token);
|
||||||
String sentence = NlpUtils.getLanguageRecognizerDigest(stringContent);
|
String sentence = NlpUtils.getLanguageRecognizerDigest(stringContent);
|
||||||
System.out.println(sentence);
|
System.out.println(sentence);
|
||||||
NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response);
|
//NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response);
|
||||||
recognizer.run();
|
//recognizer.run();
|
||||||
|
NLpLanguageRecognizer.run(sentence, token, link, response);
|
||||||
//writer.println(new JsonManager().getSuccessJsonResponse("" + link));
|
//writer.println(new JsonManager().getSuccessJsonResponse("" + link));
|
||||||
} catch (Exception x) {
|
} catch (Exception x) {
|
||||||
x.printStackTrace();
|
x.printStackTrace();
|
||||||
|
|
|
@ -4,10 +4,12 @@ import java.io.BufferedReader;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.net.URLEncoder;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import javax.servlet.http.HttpServletResponse;
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.gcube.data.analysis.dataminermanagercl.server.dmservice.SClient;
|
import org.gcube.data.analysis.dataminermanagercl.server.dmservice.SClient;
|
||||||
|
@ -25,31 +27,113 @@ import org.gcube.nlphub.legacy.Constants;
|
||||||
import org.gcube.nlphub.legacy.DataminerClient;
|
import org.gcube.nlphub.legacy.DataminerClient;
|
||||||
import org.gcube.nlphub.legacy.JsonManager;
|
import org.gcube.nlphub.legacy.JsonManager;
|
||||||
import org.gcube.nlphub.legacy.NlpHubException;
|
import org.gcube.nlphub.legacy.NlpHubException;
|
||||||
|
import org.w3c.dom.Document;
|
||||||
|
import org.w3c.dom.NodeList;
|
||||||
|
import org.w3c.dom.Node;
|
||||||
|
import org.xml.sax.InputSource;
|
||||||
|
|
||||||
public class NLpLanguageRecognizer extends DataminerClient {
|
public class NLpLanguageRecognizer extends DataminerClient {
|
||||||
private HttpServletResponse response;
|
private HttpServletResponse response;
|
||||||
private Logger logger = Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName());
|
private Logger logger = Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName());
|
||||||
private String sentence, publicLink;
|
private String sentence, publicLink;
|
||||||
public final static String RECOGNIZER_ID = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.LANGUAGE_RECOGNIZER";
|
public final static String RECOGNIZER_ID = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.LANGUAGE_RECOGNIZER";
|
||||||
// private String service = "http://dataminer-prototypes.d4science.org/wps/";
|
|
||||||
// private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
|
||||||
|
|
||||||
public NLpLanguageRecognizer(String service, String token, String sentence) {
|
public NLpLanguageRecognizer(String service, String token, String sentence) {
|
||||||
super(service, "", token);
|
super(service, "", token);
|
||||||
this.sentence = sentence;
|
this.sentence = sentence;
|
||||||
response = null;
|
response = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public NLpLanguageRecognizer(String service, String token, String sentence, String publicLink, HttpServletResponse response) {
|
public NLpLanguageRecognizer(String service, String token, String sentence, String publicLink,
|
||||||
|
HttpServletResponse response) {
|
||||||
super(service, "", token);
|
super(service, "", token);
|
||||||
this.sentence = sentence;
|
this.sentence = sentence;
|
||||||
this.response = response;
|
this.response = response;
|
||||||
this.publicLink = publicLink;
|
this.publicLink = publicLink;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void run(String sentence, String token, String publicLink, HttpServletResponse response) throws NlpHubException {
|
||||||
|
try {
|
||||||
|
String urlService = "http://dataminer-prototypes.d4science.org/wps/WebProcessingService?request=Execute&service=WPS&Version=1.0.0";
|
||||||
|
urlService += "&gcube-token=" + token;
|
||||||
|
urlService += "&lang=en-US";
|
||||||
|
urlService += "&Identifier=" + RECOGNIZER_ID;
|
||||||
|
urlService += "&DataInputs=sentence=" + URLEncoder.encode(sentence, "UTF-8");
|
||||||
|
URL url = new URL(urlService);
|
||||||
|
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
|
||||||
|
//connection.setRequestProperty(Constants.TOKEN_PARAMETER, super.getToken());
|
||||||
|
connection.setDoInput(true);
|
||||||
|
connection.setDoOutput(true);
|
||||||
|
connection.setUseCaches(false);
|
||||||
|
connection.setRequestMethod("GET");
|
||||||
|
|
||||||
|
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
||||||
|
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
|
||||||
|
doc.getDocumentElement().normalize();
|
||||||
|
NodeList nListData = doc.getElementsByTagName("d4science:Data");
|
||||||
|
NodeList nListDesc = doc.getElementsByTagName("d4science:Description");
|
||||||
|
|
||||||
|
int len = nListData.getLength();
|
||||||
|
for(int i=0; i<len; i++) {
|
||||||
|
Node data = nListData.item(i);
|
||||||
|
Node description = nListDesc.item(i);
|
||||||
|
String link = data.getTextContent();
|
||||||
|
String type = description.getTextContent();
|
||||||
|
if(type.equals("outfile")) {
|
||||||
|
System.out.println(link);
|
||||||
|
String content = readFileContent(link, token);
|
||||||
|
if (response != null) {
|
||||||
|
response.getWriter()
|
||||||
|
.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
System.out.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName()).error(e.getLocalizedMessage());
|
||||||
|
throw new NlpHubException(e.getLocalizedMessage(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public void run() throws NlpHubException {
|
public void run() throws NlpHubException {
|
||||||
|
runUsingClientLibrary();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void retrieveOutput(ComputationId computationId, SClient sClient) {
|
||||||
|
try {
|
||||||
|
OutputData output = sClient.getOutputDataByComputationId(computationId);
|
||||||
|
Resource resource = output.getResource();
|
||||||
|
if (resource.isMap()) {
|
||||||
|
MapResource mapResource = (MapResource) resource;
|
||||||
|
for (String key : mapResource.getMap().keySet()) {
|
||||||
|
Resource r = mapResource.getMap().get(key);
|
||||||
|
if (r.isFile()) {
|
||||||
|
FileResource f = (FileResource) r;
|
||||||
|
String name = f.getName();
|
||||||
|
String link = f.getUrl();
|
||||||
|
if (name.equalsIgnoreCase("outfile")) {
|
||||||
|
String content = readFileContent(link);
|
||||||
|
System.out.println(content + ".");
|
||||||
|
if (response != null) {
|
||||||
|
response.getWriter()
|
||||||
|
.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error(e.getLocalizedMessage());
|
||||||
|
// writeResponse(e.getLocalizedMessage(), false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void runUsingClientLibrary() throws NlpHubException {
|
||||||
try {
|
try {
|
||||||
super.identifier = RECOGNIZER_ID;
|
super.identifier = RECOGNIZER_ID;
|
||||||
super.init();
|
super.init();
|
||||||
|
@ -64,35 +148,26 @@ public class NLpLanguageRecognizer extends DataminerClient {
|
||||||
throw new NlpHubException(e.getLocalizedMessage(), e);
|
throw new NlpHubException(e.getLocalizedMessage(), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
private static String readFileContent(String link, String token) throws Exception {
|
||||||
public void retrieveOutput(ComputationId computationId, SClient sClient) {
|
URL url = new URL(link);
|
||||||
try {
|
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
|
||||||
OutputData output = sClient.getOutputDataByComputationId(computationId);
|
connection.setRequestProperty(Constants.TOKEN_PARAMETER, token);
|
||||||
Resource resource = output.getResource();
|
connection.setDoInput(true);
|
||||||
if (resource.isMap()) {
|
connection.setDoOutput(true);
|
||||||
MapResource mapResource = (MapResource) resource;
|
connection.setUseCaches(false);
|
||||||
for (String key : mapResource.getMap().keySet()) {
|
connection.setRequestMethod("GET");
|
||||||
Resource r = mapResource.getMap().get(key);
|
|
||||||
if (r.isFile()) {
|
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
||||||
FileResource f = (FileResource) r;
|
StringBuffer response = new StringBuffer();
|
||||||
String name = f.getName();
|
String inputLine;
|
||||||
String link = f.getUrl();
|
while ((inputLine = r.readLine()) != null) {
|
||||||
if(name.equalsIgnoreCase("outfile")) {
|
response.append(inputLine);
|
||||||
String content = readFileContent(link);
|
|
||||||
System.out.println(content + ".");
|
|
||||||
if(response != null) {
|
|
||||||
response.getWriter().println(new JsonManager().getSuccessJsonResponse(content, publicLink));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
logger.error(e.getLocalizedMessage());
|
|
||||||
//writeResponse(e.getLocalizedMessage(), false);
|
|
||||||
}
|
}
|
||||||
|
connection.disconnect();
|
||||||
|
String out = response.toString();
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String readFileContent(String link) throws Exception {
|
private String readFileContent(String link) throws Exception {
|
||||||
|
@ -105,31 +180,30 @@ public class NLpLanguageRecognizer extends DataminerClient {
|
||||||
connection.setRequestMethod("GET");
|
connection.setRequestMethod("GET");
|
||||||
|
|
||||||
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
||||||
|
|
||||||
StringBuffer response = new StringBuffer();
|
StringBuffer response = new StringBuffer();
|
||||||
String inputLine;
|
String inputLine;
|
||||||
while ((inputLine = r.readLine()) != null) {
|
while ((inputLine = r.readLine()) != null) {
|
||||||
response.append(inputLine);
|
response.append(inputLine);
|
||||||
}
|
}
|
||||||
|
connection.disconnect();
|
||||||
String out = response.toString();
|
String out = response.toString();
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
String service = "http://dataminer-prototypes.d4science.org/wps/";
|
//String pLink = "http://data.d4science.org/RkNBSmNFRG9MOHFLSWsrWUNQdHk3NTU0UC85ekRnSXNHbWJQNStIS0N6Yz0";
|
||||||
String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
String token = Constants.TEST_TOKEN;
|
||||||
String sentence = "Per me si va nella città dolente";
|
String sentence = "Questa mattina mi sono alzato ed ho trovato l'invasore.";
|
||||||
sentence = "Querido amigo, te escribo, así que me distraigo un poco.";
|
|
||||||
sentence = "Per me si va in città";
|
|
||||||
|
|
||||||
NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(service, token, sentence);
|
|
||||||
try {
|
try {
|
||||||
recognizer.run();
|
//String sentence, String token, String publicLink, HttpServletResponse response
|
||||||
} catch (Exception x) {
|
NLpLanguageRecognizer.run(sentence, token, "http://cazziemazzi", null);
|
||||||
x.printStackTrace();
|
} catch (NlpHubException e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
}*/
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,14 +5,14 @@ import java.util.ArrayList;
|
||||||
public class NlpUtils {
|
public class NlpUtils {
|
||||||
|
|
||||||
public static String getLanguageRecognizerDigest(String content) {
|
public static String getLanguageRecognizerDigest(String content) {
|
||||||
int minToken = 10;
|
int minToken = 20;
|
||||||
|
|
||||||
content = content.trim();
|
content = content.trim();
|
||||||
String[] tokens = content.split("\\.");
|
String[] tokens = content.split("\\.");
|
||||||
if(tokens.length == 1)
|
if(tokens.length == 1)
|
||||||
tokens = content.split(";");
|
tokens = content.split(";");
|
||||||
if(tokens.length == 1)
|
if(tokens.length == 1)
|
||||||
return content;
|
return escapeContent(content);
|
||||||
|
|
||||||
ArrayList<String> list = new ArrayList<>();
|
ArrayList<String> list = new ArrayList<>();
|
||||||
|
|
||||||
|
@ -24,22 +24,29 @@ public class NlpUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
if(list.isEmpty())
|
if(list.isEmpty())
|
||||||
return content;
|
return escapeContent(content);
|
||||||
|
|
||||||
String digest = list.get(0);
|
String digest = list.get(0);
|
||||||
for(String s : list) {
|
for(String s : list) {
|
||||||
if(s.length() < digest.length())
|
if(s.length() < digest.length())
|
||||||
digest = s;
|
digest = s;
|
||||||
}
|
}
|
||||||
return digest;
|
return escapeContent(digest);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int countTokens(String content) {
|
public static int countTokens(String content) {
|
||||||
return content.split("\\s").length;
|
return content.split("\\s").length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String escapeContent(String content) {
|
||||||
|
content = content.replaceAll("\\\\", " ");
|
||||||
|
content = content.replaceAll("\"", " ");
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
|
/*
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
String text = "Per me si va nella Città dolente.\n Per me si va tra la perduta Gente";
|
String text = "Per me si va nella Città dolente.\n Per me si va tra la perduta Gente";
|
||||||
text = "North Korea has agreed to send a delegation to next month's Winter Olympics in South Korea, the first notable breakthrough to come out of a face-to-face meeting Tuesday between the neighboring nations.";
|
text = "North Korea has agreed to send a delegation to next month's Winter Olympics in South Korea, the first notable breakthrough to come out of a face-to-face meeting Tuesday between the neighboring nations.";
|
||||||
|
@ -47,7 +54,10 @@ public class NlpUtils {
|
||||||
text += "Unification Vice Minister Chun Hae-sung also announced that both sides plan to re-open a military hotline on the western Korean Peninsula.";
|
text += "Unification Vice Minister Chun Hae-sung also announced that both sides plan to re-open a military hotline on the western Korean Peninsula.";
|
||||||
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
|
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
|
||||||
|
|
||||||
|
text = " Tutti i modelli meteo sono d'accordo, \\puntiamo su una rotta poco comune, che non ho mai fatto, ma che dovrebbe funzionare bene\"";
|
||||||
|
|
||||||
|
//text = "A me piace la zuppa, a me piace la pasta, a me piace il formaggio, a me piace la panna. A me piace la cioccolata.";
|
||||||
|
|
||||||
System.out.println(getLanguageRecognizerDigest(text));
|
System.out.println(getLanguageRecognizerDigest(text));
|
||||||
}
|
}*/
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,7 @@
|
||||||
<!-- "ner" div: contains the name entity recognizer interface -->
|
<!-- "ner" div: contains the name entity recognizer interface -->
|
||||||
<div id="ner">
|
<div id="ner">
|
||||||
<div id="ner-ui">
|
<div id="ner-ui">
|
||||||
<p class="flow-text">Name Entity Recognition</p>
|
<p class="flow-text">Named Entity Recognition</p>
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<legend>Language selection</legend>
|
<legend>Language selection</legend>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
|
|
|
@ -130,7 +130,7 @@ checkLanguage = function(lang) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
alert("The uploaded file seems to be written in " + lang + ", but this language is not supported by listed algorithms. Select the language you want, or try with another text.");
|
alert("The uploaded file seems to be in " + lang + ", but this language is not currently supported. Please, be aware of this, should you decide to continue and use the tools of another language... \"Praemonitus praemunitus!\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue