54 lines
1.8 KiB
Java
54 lines
1.8 KiB
Java
package org.gcube.nlphub.nlp;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
public class NlpUtils {
|
|
|
|
public static String getLanguageRecognizerDigest(String content) {
|
|
int minToken = 10;
|
|
|
|
content = content.trim();
|
|
String[] tokens = content.split("\\.");
|
|
if(tokens.length == 1)
|
|
tokens = content.split(";");
|
|
if(tokens.length == 1)
|
|
return content;
|
|
|
|
ArrayList<String> list = new ArrayList<>();
|
|
|
|
for(int i=0; i<tokens.length; i++) {
|
|
int n = countTokens(tokens[i]);
|
|
if(n >= minToken) {
|
|
list.add(tokens[i]);
|
|
}
|
|
}
|
|
|
|
if(list.isEmpty())
|
|
return content;
|
|
|
|
String digest = list.get(0);
|
|
for(String s : list) {
|
|
if(s.length() < digest.length())
|
|
digest = s;
|
|
}
|
|
return digest;
|
|
}
|
|
|
|
public static int countTokens(String content) {
|
|
return content.split("\\s").length;
|
|
}
|
|
|
|
|
|
/*
|
|
public static void main(String[] args) {
|
|
String text = "Per me si va nella Città dolente.\n Per me si va tra la perduta Gente";
|
|
text = "North Korea has agreed to send a delegation to next month's Winter Olympics in South Korea, the first notable breakthrough to come out of a face-to-face meeting Tuesday between the neighboring nations.";
|
|
text += "In talks, held at the border village of Panmunjom or \"truce village,\" in the Korean peninsula\'s heavily fortified demilitarized zone, North Korea negotiators agreed to send a \"high-level delegation\" comprising athletes, a cheering squad, an art troupe, a visitors\' group, a Taekwondo demonstration team and a press corps, South Korea\'s Unification Ministry told reporters in Seoul.";
|
|
text += "Unification Vice Minister Chun Hae-sung also announced that both sides plan to re-open a military hotline on the western Korean Peninsula.";
|
|
text += "The hotline was one of many that were closed as inter-Korean relations soured.";
|
|
|
|
System.out.println(getLanguageRecognizerDigest(text));
|
|
}
|
|
*/
|
|
}
|