git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@164942 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
07fb54148b
commit
7210d91790
|
@ -37,7 +37,7 @@ public class NLPHub extends HttpServlet {
|
|||
private Logger logger = Logger.getLogger(NLPHub.class.getSimpleName());
|
||||
private static final long serialVersionUID = 1L;
|
||||
public static final String service = "http://dataminer-prototypes.d4science.org/wps/";
|
||||
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
||||
private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
||||
private boolean devMode = true;
|
||||
|
||||
/**
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.gcube.nlphub.mapper.DefaultMapper;
|
|||
public class NLPMapper extends HttpServlet {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private Logger logger = Logger.getLogger(NLPMapper.class.getSimpleName());
|
||||
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
||||
private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
||||
private boolean devMode = true;
|
||||
|
||||
/**
|
||||
|
|
|
@ -39,7 +39,7 @@ public class NLPUploader extends HttpServlet {
|
|||
private static final long serialVersionUID = 1L;
|
||||
private Logger logger = Logger.getLogger(NLPUploader.class.getSimpleName());
|
||||
private boolean devMode = true;
|
||||
private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
||||
private String token; //= "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
||||
private WorkspaceManager ws;
|
||||
|
||||
/**
|
||||
|
@ -145,9 +145,9 @@ public class NLPUploader extends HttpServlet {
|
|||
String link = ws.getPublicLink(fileName, token);
|
||||
String sentence = NlpUtils.getLanguageRecognizerDigest(stringContent);
|
||||
System.out.println(sentence);
|
||||
NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response);
|
||||
recognizer.run();
|
||||
|
||||
//NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(NLPHub.service, token, sentence, link, response);
|
||||
//recognizer.run();
|
||||
NLpLanguageRecognizer.run(sentence, token, link, response);
|
||||
//writer.println(new JsonManager().getSuccessJsonResponse("" + link));
|
||||
} catch (Exception x) {
|
||||
x.printStackTrace();
|
||||
|
|
|
@ -4,10 +4,12 @@ import java.io.BufferedReader;
|
|||
import java.io.InputStreamReader;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.gcube.data.analysis.dataminermanagercl.server.dmservice.SClient;
|
||||
|
@ -25,31 +27,113 @@ import org.gcube.nlphub.legacy.Constants;
|
|||
import org.gcube.nlphub.legacy.DataminerClient;
|
||||
import org.gcube.nlphub.legacy.JsonManager;
|
||||
import org.gcube.nlphub.legacy.NlpHubException;
|
||||
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.w3c.dom.Node;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
public class NLpLanguageRecognizer extends DataminerClient {
|
||||
private HttpServletResponse response;
|
||||
private Logger logger = Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName());
|
||||
private String sentence, publicLink;
|
||||
public final static String RECOGNIZER_ID = "org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.LANGUAGE_RECOGNIZER";
|
||||
// private String service = "http://dataminer-prototypes.d4science.org/wps/";
|
||||
// private String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
||||
|
||||
|
||||
public NLpLanguageRecognizer(String service, String token, String sentence) {
|
||||
super(service, "", token);
|
||||
this.sentence = sentence;
|
||||
response = null;
|
||||
}
|
||||
|
||||
public NLpLanguageRecognizer(String service, String token, String sentence, String publicLink, HttpServletResponse response) {
|
||||
|
||||
public NLpLanguageRecognizer(String service, String token, String sentence, String publicLink,
|
||||
HttpServletResponse response) {
|
||||
super(service, "", token);
|
||||
this.sentence = sentence;
|
||||
this.response = response;
|
||||
this.publicLink = publicLink;
|
||||
}
|
||||
|
||||
public static void run(String sentence, String token, String publicLink, HttpServletResponse response) throws NlpHubException {
|
||||
try {
|
||||
String urlService = "http://dataminer-prototypes.d4science.org/wps/WebProcessingService?request=Execute&service=WPS&Version=1.0.0";
|
||||
urlService += "&gcube-token=" + token;
|
||||
urlService += "&lang=en-US";
|
||||
urlService += "&Identifier=" + RECOGNIZER_ID;
|
||||
urlService += "&DataInputs=sentence=" + URLEncoder.encode(sentence, "UTF-8");
|
||||
URL url = new URL(urlService);
|
||||
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
|
||||
//connection.setRequestProperty(Constants.TOKEN_PARAMETER, super.getToken());
|
||||
connection.setDoInput(true);
|
||||
connection.setDoOutput(true);
|
||||
connection.setUseCaches(false);
|
||||
connection.setRequestMethod("GET");
|
||||
|
||||
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
||||
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(r));
|
||||
doc.getDocumentElement().normalize();
|
||||
NodeList nListData = doc.getElementsByTagName("d4science:Data");
|
||||
NodeList nListDesc = doc.getElementsByTagName("d4science:Description");
|
||||
|
||||
int len = nListData.getLength();
|
||||
for(int i=0; i<len; i++) {
|
||||
Node data = nListData.item(i);
|
||||
Node description = nListDesc.item(i);
|
||||
String link = data.getTextContent();
|
||||
String type = description.getTextContent();
|
||||
if(type.equals("outfile")) {
|
||||
System.out.println(link);
|
||||
String content = readFileContent(link, token);
|
||||
if (response != null) {
|
||||
response.getWriter()
|
||||
.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
|
||||
}
|
||||
else {
|
||||
System.out.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
Logger.getLogger(NLpLanguageRecognizer.class.getSimpleName()).error(e.getLocalizedMessage());
|
||||
throw new NlpHubException(e.getLocalizedMessage(), e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void run() throws NlpHubException {
|
||||
runUsingClientLibrary();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void retrieveOutput(ComputationId computationId, SClient sClient) {
|
||||
try {
|
||||
OutputData output = sClient.getOutputDataByComputationId(computationId);
|
||||
Resource resource = output.getResource();
|
||||
if (resource.isMap()) {
|
||||
MapResource mapResource = (MapResource) resource;
|
||||
for (String key : mapResource.getMap().keySet()) {
|
||||
Resource r = mapResource.getMap().get(key);
|
||||
if (r.isFile()) {
|
||||
FileResource f = (FileResource) r;
|
||||
String name = f.getName();
|
||||
String link = f.getUrl();
|
||||
if (name.equalsIgnoreCase("outfile")) {
|
||||
String content = readFileContent(link);
|
||||
System.out.println(content + ".");
|
||||
if (response != null) {
|
||||
response.getWriter()
|
||||
.println(new JsonManager().getSuccessJsonResponse(content, publicLink));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error(e.getLocalizedMessage());
|
||||
// writeResponse(e.getLocalizedMessage(), false);
|
||||
}
|
||||
}
|
||||
|
||||
private void runUsingClientLibrary() throws NlpHubException {
|
||||
try {
|
||||
super.identifier = RECOGNIZER_ID;
|
||||
super.init();
|
||||
|
@ -64,35 +148,26 @@ public class NLpLanguageRecognizer extends DataminerClient {
|
|||
throw new NlpHubException(e.getLocalizedMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void retrieveOutput(ComputationId computationId, SClient sClient) {
|
||||
try {
|
||||
OutputData output = sClient.getOutputDataByComputationId(computationId);
|
||||
Resource resource = output.getResource();
|
||||
if (resource.isMap()) {
|
||||
MapResource mapResource = (MapResource) resource;
|
||||
for (String key : mapResource.getMap().keySet()) {
|
||||
Resource r = mapResource.getMap().get(key);
|
||||
if (r.isFile()) {
|
||||
FileResource f = (FileResource) r;
|
||||
String name = f.getName();
|
||||
String link = f.getUrl();
|
||||
if(name.equalsIgnoreCase("outfile")) {
|
||||
String content = readFileContent(link);
|
||||
System.out.println(content + ".");
|
||||
if(response != null) {
|
||||
response.getWriter().println(new JsonManager().getSuccessJsonResponse(content, publicLink));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error(e.getLocalizedMessage());
|
||||
//writeResponse(e.getLocalizedMessage(), false);
|
||||
|
||||
|
||||
private static String readFileContent(String link, String token) throws Exception {
|
||||
URL url = new URL(link);
|
||||
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
|
||||
connection.setRequestProperty(Constants.TOKEN_PARAMETER, token);
|
||||
connection.setDoInput(true);
|
||||
connection.setDoOutput(true);
|
||||
connection.setUseCaches(false);
|
||||
connection.setRequestMethod("GET");
|
||||
|
||||
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
||||
StringBuffer response = new StringBuffer();
|
||||
String inputLine;
|
||||
while ((inputLine = r.readLine()) != null) {
|
||||
response.append(inputLine);
|
||||
}
|
||||
connection.disconnect();
|
||||
String out = response.toString();
|
||||
return out;
|
||||
}
|
||||
|
||||
private String readFileContent(String link) throws Exception {
|
||||
|
@ -105,31 +180,30 @@ public class NLpLanguageRecognizer extends DataminerClient {
|
|||
connection.setRequestMethod("GET");
|
||||
|
||||
BufferedReader r = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
||||
|
||||
StringBuffer response = new StringBuffer();
|
||||
String inputLine;
|
||||
while ((inputLine = r.readLine()) != null) {
|
||||
response.append(inputLine);
|
||||
}
|
||||
|
||||
String out = response.toString();
|
||||
connection.disconnect();
|
||||
String out = response.toString();
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
public static void main(String[] args) {
|
||||
String service = "http://dataminer-prototypes.d4science.org/wps/";
|
||||
String token = "df2cc5f5-63ee-48c1-b2a6-1210030c57b8-843339462";
|
||||
String sentence = "Per me si va nella città dolente";
|
||||
sentence = "Querido amigo, te escribo, así que me distraigo un poco.";
|
||||
sentence = "Per me si va in città";
|
||||
//String pLink = "http://data.d4science.org/RkNBSmNFRG9MOHFLSWsrWUNQdHk3NTU0UC85ekRnSXNHbWJQNStIS0N6Yz0";
|
||||
String token = Constants.TEST_TOKEN;
|
||||
String sentence = "Questa mattina mi sono alzato ed ho trovato l'invasore.";
|
||||
|
||||
NLpLanguageRecognizer recognizer = new NLpLanguageRecognizer(service, token, sentence);
|
||||
try {
|
||||
recognizer.run();
|
||||
} catch (Exception x) {
|
||||
x.printStackTrace();
|
||||
//String sentence, String token, String publicLink, HttpServletResponse response
|
||||
NLpLanguageRecognizer.run(sentence, token, "http://cazziemazzi", null);
|
||||
} catch (NlpHubException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
}*/
|
||||
}
|
||||
|
|
|
@ -5,14 +5,14 @@ import java.util.ArrayList;
|
|||
public class NlpUtils {
|
||||
|
||||
public static String getLanguageRecognizerDigest(String content) {
|
||||
int minToken = 10;
|
||||
int minToken = 20;
|
||||
|
||||
content = content.trim();
|
||||
String[] tokens = content.split("\\.");
|
||||
if(tokens.length == 1)
|
||||
tokens = content.split(";");
|
||||
if(tokens.length == 1)
|
||||
return content;
|
||||
return escapeContent(content);
|
||||
|
||||
ArrayList<String> list = new ArrayList<>();
|
||||
|
||||
|
@ -24,22 +24,29 @@ public class NlpUtils {
|
|||
}
|
||||
|
||||
if(list.isEmpty())
|
||||
return content;
|
||||
return escapeContent(content);
|
||||
|
||||
String digest = list.get(0);
|
||||
for(String s : list) {
|
||||
if(s.length() < digest.length())
|
||||
digest = s;
|
||||
}
|
||||
return digest;
|
||||
return escapeContent(digest);
|
||||
}
|
||||
|
||||
public static int countTokens(String content) {
|
||||
return content.split("\\s").length;
|
||||
}
|
||||
|
||||
public static String escapeContent(String content) {
|
||||
content = content.replaceAll("\\\\", " ");
|
||||
content = content.replaceAll("\"", " ");
|
||||
return content;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
||||
/*
|
||||
public static void main(String[] args) {
|
||||
String text = "Per me si va nella Città dolente.\n Per me si va tra la perduta Gente";
|
||||
text = "North Korea has agreed to send a delegation to next month's Winter Olympics in South Korea, the first notable breakthrough to come out of a face-to-face meeting Tuesday between the neighboring nations.";
|
||||
|
@ -47,7 +54,10 @@ public class NlpUtils {
|
|||
text += "Unification Vice Minister Chun Hae-sung also announced that both sides plan to re-open a military hotline on the western Korean Peninsula.";
|
||||
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
|
||||
|
||||
text = " Tutti i modelli meteo sono d'accordo, \\puntiamo su una rotta poco comune, che non ho mai fatto, ma che dovrebbe funzionare bene\"";
|
||||
|
||||
//text = "A me piace la zuppa, a me piace la pasta, a me piace il formaggio, a me piace la panna. A me piace la cioccolata.";
|
||||
|
||||
System.out.println(getLanguageRecognizerDigest(text));
|
||||
}
|
||||
*/
|
||||
}*/
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
<!-- "ner" div: contains the name entity recognizer interface -->
|
||||
<div id="ner">
|
||||
<div id="ner-ui">
|
||||
<p class="flow-text">Name Entity Recognition</p>
|
||||
<p class="flow-text">Named Entity Recognition</p>
|
||||
<fieldset>
|
||||
<legend>Language selection</legend>
|
||||
<div class="row">
|
||||
|
|
|
@ -130,7 +130,7 @@ checkLanguage = function(lang) {
|
|||
return;
|
||||
}
|
||||
}
|
||||
alert("The uploaded file seems to be written in " + lang + ", but this language is not supported by listed algorithms. Select the language you want, or try with another text.");
|
||||
alert("The uploaded file seems to be in " + lang + ", but this language is not currently supported. Please, be aware of this, should you decide to continue and use the tools of another language... \"Praemonitus praemunitus!\"");
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue