diff --git a/dnet-pace-core/pom.xml b/dnet-pace-core/pom.xml
index 5c6466dd8..cb16ffea3 100644
--- a/dnet-pace-core/pom.xml
+++ b/dnet-pace-core/pom.xml
@@ -27,17 +27,14 @@
gson
- commons-lang
- commons-lang
+ org.apache.commons
+ commons-lang3
commons-io
commons-io
-
- commons-collections
- commons-collections
-
+
org.antlr
stringtemplate
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java
index 7fdcce497..01f146120 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java
@@ -3,7 +3,7 @@ package eu.dnetlib.pace.clustering;
import eu.dnetlib.pace.common.AbstractPaceFunctions;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import java.util.Collection;
import java.util.HashSet;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/FieldFilter.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/FieldFilter.java
index 7ede4c239..60d956970 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/FieldFilter.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/FieldFilter.java
@@ -6,7 +6,7 @@ import java.util.Map;
import com.google.common.base.Predicate;
import eu.dnetlib.pace.model.Field;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java
index 769ecf53c..b95d1c713 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java
@@ -3,7 +3,7 @@ package eu.dnetlib.pace.clustering;
import eu.dnetlib.pace.common.AbstractPaceFunctions;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.stream.Collectors;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java
index 6fe525f16..309650f73 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java
@@ -8,7 +8,7 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
@ClusteringClass("lowercase")
public class LowercaseClustering extends AbstractClusteringFunction {
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java
index aeb790f76..2391685b0 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java
@@ -2,7 +2,7 @@ package eu.dnetlib.pace.clustering;
import java.util.Set;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.pace.common.AbstractPaceFunctions;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java
index 26b07f020..db8d90bce 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java
@@ -5,7 +5,7 @@ import eu.dnetlib.pace.common.AbstractPaceFunctions;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.Person;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import java.util.Collection;
import java.util.List;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java
index fd8e7a3cc..50cea4db3 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java
@@ -5,8 +5,8 @@ import java.util.List;
import java.util.Map;
import eu.dnetlib.pace.config.Config;
-import org.apache.commons.lang.RandomStringUtils;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.Lists;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
index a4901fd53..b980018e8 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
@@ -9,9 +9,8 @@ import eu.dnetlib.pace.clustering.NGramUtils;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldList;
import eu.dnetlib.pace.model.FieldListImpl;
-import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.io.StringWriter;
@@ -25,295 +24,293 @@ import java.util.stream.Collectors;
* Set of common functions for the framework
*
* @author claudio
- *
*/
public abstract class AbstractPaceFunctions {
- //city map to be used when translating the city names into codes
- private static Map cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");
+ //city map to be used when translating the city names into codes
+ private static Map cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");
- //list of stopwords in different languages
- protected static Set stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
- protected static Set stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt");
- protected static Set stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt");
- protected static Set stopwords_fr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_fr.txt");
- protected static Set stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt");
- protected static Set stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt");
+ //list of stopwords in different languages
+ protected static Set stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
+ protected static Set stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt");
+ protected static Set stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt");
+ protected static Set stopwords_fr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_fr.txt");
+ protected static Set stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt");
+ protected static Set stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt");
- //blacklist of ngrams: to avoid generic keys
- protected static Set ngramBlacklist = loadFromClasspath("/eu/dnetlib/pace/config/ngram_blacklist.txt");
+ //blacklist of ngrams: to avoid generic keys
+ protected static Set ngramBlacklist = loadFromClasspath("/eu/dnetlib/pace/config/ngram_blacklist.txt");
- private static final String alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
- private static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń";
- private static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";
+ private static final String alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
+ private static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń";
+ private static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";
- //doi prefix for normalization
- public final String DOI_PREFIX = "(https?:\\/\\/dx\\.doi\\.org\\/)|(doi:)";
+ //doi prefix for normalization
+ public final String DOI_PREFIX = "(https?:\\/\\/dx\\.doi\\.org\\/)|(doi:)";
- private Pattern numberPattern = Pattern.compile("-?\\d+(\\.\\d+)?");
+ private Pattern numberPattern = Pattern.compile("-?\\d+(\\.\\d+)?");
- private Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})");
+ private Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})");
- protected final static FieldList EMPTY_FIELD = new FieldListImpl();
+ protected final static FieldList EMPTY_FIELD = new FieldListImpl();
- protected String concat(final List l) {
- return Joiner.on(" ").skipNulls().join(l);
- }
+ protected String concat(final List l) {
+ return Joiner.on(" ").skipNulls().join(l);
+ }
- protected String cleanup(final String s) {
- final String s0 = unicodeNormalization(s.toLowerCase());
- final String s1 = fixAliases(s0);
- final String s2 = nfd(s1);
- final String s3 = s2.replaceAll("–", " ");
- final String s4 = s3.replaceAll("&", " ");
- final String s5 = s4.replaceAll(""", " ");
- final String s6 = s5.replaceAll("−", " ");
- final String s7 = s6.replaceAll("([0-9]+)", " $1 ");
- final String s8 = s7.replaceAll("[^\\p{ASCII}]", "");
- final String s9 = s8.replaceAll("[\\p{Punct}]", " ");
- final String s10 = s9.replaceAll("\\n", " ");
- final String s11 = s10.replaceAll("(?m)\\s+", " ");
- final String s12 = s11.trim();
- return s12;
- }
+ protected String cleanup(final String s) {
+ final String s0 = unicodeNormalization(s.toLowerCase());
+ final String s1 = fixAliases(s0);
+ final String s2 = nfd(s1);
+ final String s3 = s2.replaceAll("–", " ");
+ final String s4 = s3.replaceAll("&", " ");
+ final String s5 = s4.replaceAll(""", " ");
+ final String s6 = s5.replaceAll("−", " ");
+ final String s7 = s6.replaceAll("([0-9]+)", " $1 ");
+ final String s8 = s7.replaceAll("[^\\p{ASCII}]", "");
+ final String s9 = s8.replaceAll("[\\p{Punct}]", " ");
+ final String s10 = s9.replaceAll("\\n", " ");
+ final String s11 = s10.replaceAll("(?m)\\s+", " ");
+ final String s12 = s11.trim();
+ return s12;
+ }
- protected boolean checkNumbers(final String a, final String b) {
- final String numbersA = getNumbers(a);
- final String numbersB = getNumbers(b);
- final String romansA = getRomans(a);
- final String romansB = getRomans(b);
- return !numbersA.equals(numbersB) || !romansA.equals(romansB);
- }
+ protected boolean checkNumbers(final String a, final String b) {
+ final String numbersA = getNumbers(a);
+ final String numbersB = getNumbers(b);
+ final String romansA = getRomans(a);
+ final String romansB = getRomans(b);
+ return !numbersA.equals(numbersB) || !romansA.equals(romansB);
+ }
- protected String getRomans(final String s) {
- final StringBuilder sb = new StringBuilder();
- for (final String t : s.split(" ")) {
- sb.append(isRoman(t) ? t : "");
- }
- return sb.toString();
- }
+ protected String getRomans(final String s) {
+ final StringBuilder sb = new StringBuilder();
+ for (final String t : s.split(" ")) {
+ sb.append(isRoman(t) ? t : "");
+ }
+ return sb.toString();
+ }
- protected boolean isRoman(final String s) {
- return s.replaceAll("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$", "qwertyuiop").equals("qwertyuiop");
- }
+ protected boolean isRoman(final String s) {
+ return s.replaceAll("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$", "qwertyuiop").equals("qwertyuiop");
+ }
- protected String getNumbers(final String s) {
- final StringBuilder sb = new StringBuilder();
- for (final String t : s.split(" ")) {
- sb.append(isNumber(t)? t : "");
- }
- return sb.toString();
- }
+ protected String getNumbers(final String s) {
+ final StringBuilder sb = new StringBuilder();
+ for (final String t : s.split(" ")) {
+ sb.append(isNumber(t) ? t : "");
+ }
+ return sb.toString();
+ }
- public boolean isNumber(String strNum) {
- if (strNum == null) {
- return false;
- }
- return numberPattern.matcher(strNum).matches();
- }
+ public boolean isNumber(String strNum) {
+ if (strNum == null) {
+ return false;
+ }
+ return numberPattern.matcher(strNum).matches();
+ }
- protected static String fixAliases(final String s) {
- final StringBuilder sb = new StringBuilder();
- for (final char ch : Lists.charactersOf(s)) {
- final int i = StringUtils.indexOf(aliases_from, ch);
- sb.append(i >= 0 ? aliases_to.charAt(i) : ch);
- }
- return sb.toString();
- }
+ protected static String fixAliases(final String s) {
+ final StringBuilder sb = new StringBuilder();
+ for (final char ch : Lists.charactersOf(s)) {
+ final int i = StringUtils.indexOf(aliases_from, ch);
+ sb.append(i >= 0 ? aliases_to.charAt(i) : ch);
+ }
+ return sb.toString();
+ }
- protected String removeSymbols(final String s) {
- final StringBuilder sb = new StringBuilder();
+ protected String removeSymbols(final String s) {
+ final StringBuilder sb = new StringBuilder();
- for (final char ch : Lists.charactersOf(s)) {
- sb.append(StringUtils.contains(alpha, ch) ? ch : " ");
- }
- return sb.toString().replaceAll("\\s+", " ");
- }
+ for (final char ch : Lists.charactersOf(s)) {
+ sb.append(StringUtils.contains(alpha, ch) ? ch : " ");
+ }
+ return sb.toString().replaceAll("\\s+", " ");
+ }
- protected String getFirstValue(final Field values) {
- return (values != null) && !Iterables.isEmpty(values) ? Iterables.getFirst(values, EMPTY_FIELD).stringValue() : "";
- }
+ protected String getFirstValue(final Field values) {
+ return (values != null) && !Iterables.isEmpty(values) ? Iterables.getFirst(values, EMPTY_FIELD).stringValue() : "";
+ }
- protected boolean notNull(final String s) {
- return s != null;
- }
+ protected boolean notNull(final String s) {
+ return s != null;
+ }
- protected String normalize(final String s) {
- return nfd(unicodeNormalization(s))
- .toLowerCase()
- // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings
- .replaceAll("[^ \\w]+", "")
- .replaceAll("(\\p{InCombiningDiacriticalMarks})+", "")
- .replaceAll("(\\p{Punct})+", " ")
- .replaceAll("(\\d)+", " ")
- .replaceAll("(\\n)+", " ")
- .trim();
- }
+ protected String normalize(final String s) {
+ return nfd(unicodeNormalization(s))
+ .toLowerCase()
+ // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings
+ .replaceAll("[^ \\w]+", "")
+ .replaceAll("(\\p{InCombiningDiacriticalMarks})+", "")
+ .replaceAll("(\\p{Punct})+", " ")
+ .replaceAll("(\\d)+", " ")
+ .replaceAll("(\\n)+", " ")
+ .trim();
+ }
- public String nfd(final String s) {
- return Normalizer.normalize(s, Normalizer.Form.NFD);
- }
+ public String nfd(final String s) {
+ return Normalizer.normalize(s, Normalizer.Form.NFD);
+ }
- public String unicodeNormalization(final String s) {
+ public String unicodeNormalization(final String s) {
- Matcher m = hexUnicodePattern.matcher(s);
- StringBuffer buf = new StringBuffer(s.length());
- while (m.find()) {
- String ch = String.valueOf((char) Integer.parseInt(m.group(1), 16));
- m.appendReplacement(buf, Matcher.quoteReplacement(ch));
- }
- m.appendTail(buf);
- return buf.toString();
- }
+ Matcher m = hexUnicodePattern.matcher(s);
+ StringBuffer buf = new StringBuffer(s.length());
+ while (m.find()) {
+ String ch = String.valueOf((char) Integer.parseInt(m.group(1), 16));
+ m.appendReplacement(buf, Matcher.quoteReplacement(ch));
+ }
+ m.appendTail(buf);
+ return buf.toString();
+ }
- protected String filterStopWords(final String s, final Set stopwords) {
- final StringTokenizer st = new StringTokenizer(s);
- final StringBuilder sb = new StringBuilder();
- while (st.hasMoreTokens()) {
- final String token = st.nextToken();
- if (!stopwords.contains(token)) {
- sb.append(token);
- sb.append(" ");
- }
- }
- return sb.toString().trim();
- }
+ protected String filterStopWords(final String s, final Set stopwords) {
+ final StringTokenizer st = new StringTokenizer(s);
+ final StringBuilder sb = new StringBuilder();
+ while (st.hasMoreTokens()) {
+ final String token = st.nextToken();
+ if (!stopwords.contains(token)) {
+ sb.append(token);
+ sb.append(" ");
+ }
+ }
+ return sb.toString().trim();
+ }
- public String filterAllStopWords(String s) {
+ public String filterAllStopWords(String s) {
- s = filterStopWords(s, stopwords_en);
- s = filterStopWords(s, stopwords_de);
- s = filterStopWords(s, stopwords_it);
- s = filterStopWords(s, stopwords_fr);
- s = filterStopWords(s, stopwords_pt);
- s = filterStopWords(s, stopwords_es);
+ s = filterStopWords(s, stopwords_en);
+ s = filterStopWords(s, stopwords_de);
+ s = filterStopWords(s, stopwords_it);
+ s = filterStopWords(s, stopwords_fr);
+ s = filterStopWords(s, stopwords_pt);
+ s = filterStopWords(s, stopwords_es);
- return s;
- }
+ return s;
+ }
- protected Collection filterBlacklisted(final Collection set, final Set ngramBlacklist) {
- final Set newset = Sets.newLinkedHashSet();
- for (final String s : set) {
- if (!ngramBlacklist.contains(s)) {
- newset.add(s);
- }
- }
- return newset;
- }
+ protected Collection filterBlacklisted(final Collection set, final Set ngramBlacklist) {
+ final Set newset = Sets.newLinkedHashSet();
+ for (final String s : set) {
+ if (!ngramBlacklist.contains(s)) {
+ newset.add(s);
+ }
+ }
+ return newset;
+ }
- public static Set loadFromClasspath(final String classpath) {
- final Set h = Sets.newHashSet();
- try {
- for (final String s : IOUtils.readLines(NGramUtils.class.getResourceAsStream(classpath))) {
- h.add(s);
- }
- } catch (final Throwable e) {
- return Sets.newHashSet();
- }
- return h;
- }
+ public static Set loadFromClasspath(final String classpath) {
+ final Set h = Sets.newHashSet();
+ try {
+ for (final String s : IOUtils.readLines(NGramUtils.class.getResourceAsStream(classpath))) {
+ h.add(s);
+ }
+ } catch (final Throwable e) {
+ return Sets.newHashSet();
+ }
+ return h;
+ }
- public static Map loadMapFromClasspath(final String classpath) {
- final Map m = new HashMap<>();
- try {
- for (final String s: IOUtils.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath))) {
- //string is like this: code;word1;word2;word3
- String[] line = s.split(";");
- String value = line[0];
- for (int i=1; i();
- }
- return m;
- }
+ public static Map loadMapFromClasspath(final String classpath) {
+ final Map m = new HashMap<>();
+ try {
+ for (final String s : IOUtils.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath))) {
+ //string is like this: code;word1;word2;word3
+ String[] line = s.split(";");
+ String value = line[0];
+ for (int i = 1; i < line.length; i++) {
+ m.put(line[i].toLowerCase(), value);
+ }
+ }
+ } catch (final Throwable e) {
+ return new HashMap<>();
+ }
+ return m;
+ }
- public String removeKeywords(String s, Set keywords) {
+ public String removeKeywords(String s, Set keywords) {
- s = " " + s + " ";
- for (String k: keywords ) {
- s = s.replaceAll(k.toLowerCase(), "");
- }
+ s = " " + s + " ";
+ for (String k : keywords) {
+ s = s.replaceAll(k.toLowerCase(), "");
+ }
- return s.trim();
- }
+ return s.trim();
+ }
- public double commonElementsPercentage(Set s1, Set s2){
+ public double commonElementsPercentage(Set s1, Set s2) {
- int longer = (s1.size()>s2.size())?s1.size():s2.size();
+ double longer = Math.max(s1.size(), s2.size());
+ return (double) s1.stream().filter(s2::contains).count() / longer;
+ }
- return (double)CollectionUtils.intersection(s1,s2).size()/(double)longer;
- }
+ //convert the set of keywords to codes
+ public Set toCodes(Set keywords, Map translationMap) {
+ return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet());
+ }
- //convert the set of keywords to codes
- public Set toCodes(Set keywords, Map translationMap) {
- return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet());
- }
+ public Set keywordsToCodes(Set keywords, Map translationMap) {
+ return toCodes(keywords, translationMap);
+ }
- public Set keywordsToCodes(Set keywords, Map translationMap) {
- return toCodes(keywords, translationMap);
- }
+ public Set citiesToCodes(Set keywords) {
+ return toCodes(keywords, cityMap);
+ }
- public Set citiesToCodes(Set keywords) {
- return toCodes(keywords, cityMap);
- }
+ protected String firstLC(final String s) {
+ return StringUtils.substring(s, 0, 1).toLowerCase();
+ }
- protected String firstLC(final String s) {
- return StringUtils.substring(s, 0, 1).toLowerCase();
- }
+ protected Iterable tokens(final String s, final int maxTokens) {
+ return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens);
+ }
- protected Iterable tokens(final String s, final int maxTokens) {
- return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens);
- }
+ public String normalizePid(String pid) {
+ return pid.toLowerCase().replaceAll(DOI_PREFIX, "");
+ }
- public String normalizePid(String pid) {
- return pid.toLowerCase().replaceAll(DOI_PREFIX, "");
- }
+ //get the list of keywords into the input string
+ public Set getKeywords(String s1, Map translationMap, int windowSize) {
- //get the list of keywords into the input string
- public Set getKeywords(String s1, Map translationMap, int windowSize){
+ String s = s1;
- String s = s1;
+ List tokens = Arrays.asList(s.toLowerCase().split(" "));
- List tokens = Arrays.asList(s.toLowerCase().split(" "));
+ Set codes = new HashSet<>();
- Set codes = new HashSet<>();
+ if (tokens.size() < windowSize)
+ windowSize = tokens.size();
- if (tokens.size() getCities(String s1, int windowSize) {
+ return getKeywords(s1, cityMap, windowSize);
+ }
- public Set getCities(String s1, int windowSize) {
- return getKeywords(s1, cityMap, windowSize);
- }
-
- public static String readFromClasspath(final String filename, final Class clazz) {
- final StringWriter sw = new StringWriter();
- try {
- IOUtils.copy(clazz.getResourceAsStream(filename), sw);
- return sw.toString();
- } catch (final IOException e) {
- throw new RuntimeException("cannot load resource from classpath: " + filename);
- }
- }
+ public static String readFromClasspath(final String filename, final Class clazz) {
+ final StringWriter sw = new StringWriter();
+ try {
+ IOUtils.copy(clazz.getResourceAsStream(filename), sw);
+ return sw.toString();
+ } catch (final IOException e) {
+ throw new RuntimeException("cannot load resource from classpath: " + filename);
+ }
+ }
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java
index 3cc5a38a2..78fc18a13 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java
@@ -4,7 +4,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.pace.util.PaceException;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.io.Serializable;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java
index 12c578c11..f7831edaa 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java
@@ -1,8 +1,9 @@
package eu.dnetlib.pace.model;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
-import com.google.gson.Gson;
import eu.dnetlib.pace.config.Type;
import java.io.Serializable;
@@ -103,7 +104,11 @@ public class FieldDef implements Serializable {
@Override
public String toString() {
- return new Gson().toJson(this);
+ try {
+ return new ObjectMapper().writeValueAsString(this);
+ } catch (JsonProcessingException e) {
+ return null;
+ }
}
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java
index d4a11c050..635178b83 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java
@@ -1,11 +1,11 @@
package eu.dnetlib.pace.model;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
-import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
-import com.google.gson.Gson;
import eu.dnetlib.pace.config.Type;
import java.util.Collection;
@@ -283,7 +283,12 @@ public class FieldListImpl extends AbstractField implements FieldList {
case String:
return Joiner.on(" ").join(stringList());
case JSON:
- final String json = new Gson().toJson(stringList());
+ String json;
+ try {
+ json = new ObjectMapper().writeValueAsString(this);
+ } catch (JsonProcessingException e) {
+ json = null;
+ }
return json;
default:
throw new IllegalArgumentException("Unknown type: " + getType().toString());
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java
index bf861276e..0a72c07c6 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java
@@ -2,12 +2,12 @@ package eu.dnetlib.pace.model;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import eu.dnetlib.pace.config.Type;
-import org.apache.commons.collections.iterators.SingletonIterator;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
/**
* The Class FieldValueImpl.
@@ -124,7 +124,7 @@ public class FieldValueImpl extends AbstractField implements FieldValue {
@Override
@SuppressWarnings("unchecked")
public Iterator iterator() {
- return new SingletonIterator(this);
+ return Collections.singleton((Field) this).iterator();
}
}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Author.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Author.java
deleted file mode 100644
index 17bd49d84..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Author.java
+++ /dev/null
@@ -1,129 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.lang.StringUtils;
-
-import com.google.common.collect.ComparisonChain;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Ordering;
-import com.google.common.collect.Sets;
-import com.google.gson.Gson;
-
-public class Author implements Comparable {
-
- private String id;
- private String fullname;
- private String firstname;
- private String secondnames;
-
- private List matches = Lists.newArrayList();
- private Set coauthors = Sets.newHashSet();
- private SubjectsMap subjectsMap = new SubjectsMap();
-
- public Author() {
- super();
- }
-
- public Author(final Author a) {
- this.id = a.getId();
- this.fullname = a.getFullname();
- this.firstname = a.getFirstname();
- this.secondnames = a.getSecondnames();
-
- this.matches = a.getMatches();
- this.coauthors = a.getCoauthors();
- this.subjectsMap = a.getSubjectsMap();
- }
-
- public boolean hasMatches() {
- return (getMatches() != null) && !getMatches().isEmpty();
- }
-
- public boolean hasCoauthors() {
- return (getCoauthors() != null) && !getCoauthors().isEmpty();
- }
-
- public boolean isWellFormed() {
- return StringUtils.isNotBlank(getSecondnames()) && StringUtils.isNotBlank(getFirstname());
- }
-
- public String getId() {
- return id;
- }
-
- public void setId(final String id) {
- this.id = id;
- }
-
- public String getFullname() {
- return fullname;
- }
-
- public void setFullname(final String fullname) {
- this.fullname = fullname;
- }
-
- public String getFirstname() {
- return firstname;
- }
-
- public void setFirstname(final String firstname) {
- this.firstname = firstname;
- }
-
- public String getSecondnames() {
- return secondnames;
- }
-
- public void setSecondnames(final String secondnames) {
- this.secondnames = secondnames;
- }
-
- public List getMatches() {
- return matches;
- }
-
- public void setMatches(final List matches) {
- this.matches = matches;
- }
-
- public Set getCoauthors() {
- return coauthors;
- }
-
- public void setCoauthors(final Set coauthors) {
- this.coauthors = coauthors;
- }
-
- @Override
- public String toString() {
- return new Gson().toJson(this);
- }
-
- @Override
- public int hashCode() {
- return getId().hashCode();
- }
-
- @Override
- public int compareTo(final Author o) {
- return ComparisonChain.start()
- .compare(this.getId(), o.getId(), Ordering.natural().nullsLast())
- .result();
- }
-
- @Override
- public boolean equals(final Object o) {
- return (o instanceof Author) && getId().equals(((Author) o).getId());
- }
-
- public SubjectsMap getSubjectsMap() {
- return subjectsMap;
- }
-
- public void setSubjectsMap(final SubjectsMap subjectsMap) {
- this.subjectsMap = subjectsMap;
- }
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/AuthorSet.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/AuthorSet.java
deleted file mode 100644
index c3f2576be..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/AuthorSet.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import com.google.gson.Gson;
-
-public class AuthorSet {
-
- private String id;
- private Authors authors;
-
- public AuthorSet(final String id, final Authors authors) {
- super();
- this.id = id;
- this.authors = authors;
- }
-
- public String getId() {
- return id;
- }
-
- public void setId(final String id) {
- this.id = id;
- }
-
- public Authors getAuthors() {
- return authors;
- }
-
- public void setAuthors(final Authors authors) {
- this.authors = authors;
- }
-
- @Override
- public String toString() {
- return new Gson().toJson(this);
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Authors.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Authors.java
deleted file mode 100644
index e74c43816..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Authors.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import java.util.Collection;
-import java.util.HashSet;
-
-import com.google.common.collect.ComparisonChain;
-import com.google.common.collect.Ordering;
-import com.google.common.collect.Sets;
-import com.google.gson.Gson;
-
-public class Authors extends HashSet implements Comparable {
-
- private static final long serialVersionUID = -6878376220805286142L;
-
- public Authors() {
- super();
- }
-
- public Authors(final Collection authors) {
- super(authors);
- }
-
- public Authors(final Author author) {
- super(Sets.newHashSet(author));
- }
-
- @Override
- public int compareTo(final Authors a) {
- return ComparisonChain.start()
- .compare(this.size(), a.size(), Ordering.natural().nullsLast())
- .result();
- }
-
- @Override
- public String toString() {
- return new Gson().toJson(this);
- }
-
- @Override
- public boolean equals(final Object o) {
- final boolean res = o instanceof Authors;
- return res && (Sets.intersection(this, (Authors) o).size() == this.size());
- }
-
- @Override
- public int hashCode() {
- int res = 0;
- for (final Author a : this) {
- res += a.hashCode();
- }
- return res;
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthor.java
deleted file mode 100644
index d4ce32de5..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthor.java
+++ /dev/null
@@ -1,50 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import com.google.gson.Gson;
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-public class CoAuthor extends Author {
-
- private static final Log log = LogFactory.getLog(CoAuthor.class);
- private String anchorId = null;
-
- public CoAuthor() {
- super();
- }
-
- public CoAuthor(final Author author) {
- super(author);
- }
-
- public boolean hasAnchorId() {
- return StringUtils.isNotBlank(getAnchorId());
- }
-
- public String getAnchorId() {
- return anchorId;
- }
-
- public void setAnchorId(final String anchorId) {
- this.anchorId = anchorId;
- }
-
- @Override
- public String toString() {
- return new Gson().toJson(this);
- }
-
- @Override
- public int hashCode() {
- return getId() != null ? getId().hashCode() : getFullname().hashCode();
- }
-
- @Override
- public boolean equals(final Object o) {
- return (o instanceof CoAuthor) && StringUtils.isNotBlank(getId()) ?
- getId().equals(((CoAuthor) o).getId()) :
- getFullname().equals(((CoAuthor) o).getFullname());
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSet.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSet.java
deleted file mode 100644
index 90898f624..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSet.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import com.google.gson.Gson;
-
-public class CoAuthorSet {
-
- private Author author;
- private Authors coAuthors;
-
- public CoAuthorSet(final Author author, final Authors coAuthors) {
- super();
- this.author = author;
- this.coAuthors = coAuthors;
- }
-
- public Author getAuthor() {
- return author;
- }
-
- public void setAuthor(final Author author) {
- this.author = author;
- }
-
- public Authors getCoAuthors() {
- return coAuthors;
- }
-
- public void setCoAuthors(final Authors coAuthors) {
- this.coAuthors = coAuthors;
- }
-
- @Override
- public String toString() {
- return new Gson().toJson(this);
- }
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSetLite.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSetLite.java
deleted file mode 100644
index a48e2d8a4..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSetLite.java
+++ /dev/null
@@ -1,40 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import java.util.Set;
-
-import com.google.gson.Gson;
-
-public class CoAuthorSetLite {
-
- private String id;
-
- private Set coAuthors;
-
- public CoAuthorSetLite(final String id, final Set coAuthors) {
- super();
- this.id = id;
- this.coAuthors = coAuthors;
- }
-
- public Set getCoAuthors() {
- return coAuthors;
- }
-
- public void setCoAuthors(final Set coAuthors) {
- this.coAuthors = coAuthors;
- }
-
- public String getId() {
- return id;
- }
-
- public void setId(final String id) {
- this.id = id;
- }
-
- @Override
- public String toString() {
- return new Gson().toJson(this);
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthors.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthors.java
deleted file mode 100644
index 8e7eca269..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthors.java
+++ /dev/null
@@ -1,78 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import java.util.Collection;
-import java.util.HashSet;
-
-import com.google.common.base.Function;
-import com.google.common.collect.ComparisonChain;
-import com.google.common.collect.Ordering;
-import com.google.common.collect.Sets;
-import com.google.gson.Gson;
-
-public class CoAuthors extends HashSet implements Comparable {
-
- private static final long serialVersionUID = 2525591524516562892L;
-
- private Function hashFunction;
-
- private static Function defaultHashFunction = new Function() {
-
- @Override
- public Integer apply(final CoAuthors input) {
- int res = 0;
- for (final CoAuthor a : input) {
- res += a.hashCode();
- }
- return res;
-
- }
- };
-
- public CoAuthors() {
- super();
- }
-
- public CoAuthors(final Collection coauthors) {
- super(coauthors);
- }
-
- public CoAuthors(final CoAuthor coauthor) {
- super(Sets.newHashSet(coauthor));
- }
-
- public Function getHashFunction() {
- return hashFunction;
- }
-
- public void setHashFunction(final Function hashFunction) {
- this.hashFunction = hashFunction;
- }
-
- @Override
- public int compareTo(final CoAuthors a) {
- return ComparisonChain.start()
- .compare(this.size(), a.size(), Ordering.natural().nullsLast())
- .result();
- }
-
- @Override
- public String toString() {
- return new Gson().toJson(this);
- }
-
- @Override
- public boolean equals(final Object o) {
- final boolean res = o instanceof CoAuthors;
- return res && (Sets.intersection(this, (CoAuthors) o).size() == this.size());
- }
-
- public String hashCodeString() {
- return String.valueOf(hashCode());
- }
-
- @Override
- public int hashCode() {
- return (getHashFunction() != null) ? getHashFunction().apply(this) : defaultHashFunction.apply(this);
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthor.java
deleted file mode 100644
index c9d4797e3..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthor.java
+++ /dev/null
@@ -1,196 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-
-import com.google.common.base.Function;
-import com.google.common.collect.ComparisonChain;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Ordering;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-
-public class GTAuthor implements Comparable {
-
- private String id;
- private Author author;
- private Authors merged;
- private CoAuthors coAuthors;
- private boolean anchor;
-
- public GTAuthor() {}
-
- public GTAuthor(final String id, final Authors merged, final CoAuthors coAuthors, final boolean anchor) {
- super();
-
- if ((merged == null) || merged.isEmpty())
- throw new IllegalArgumentException("empty merged author set, id: " + id);
-
- this.author = pickAuthor(merged);
- this.id = id;
- this.merged = merged;
- this.coAuthors = coAuthors;
- this.anchor = anchor;
- }
-
- class AuthorFrequency extends Author {
-
- private Integer frequency = new Integer(1);
-
- public AuthorFrequency(final Author a) {
- super(a);
- }
-
- public void increment() {
- setFrequency(getFrequency() + 1);
- }
-
- public Integer getFrequency() {
- return frequency;
- }
-
- public void setFrequency(final Integer frequency) {
- this.frequency = frequency;
- }
- }
-
- private Author pickAuthor(final Authors merged) {
- final List freq = getFrequencies(merged);
- Collections.sort(freq, Collections.reverseOrder(new Comparator() {
-
- @Override
- public int compare(final AuthorFrequency o1, final AuthorFrequency o2) {
- return ComparisonChain.start().compare(o1.getFullname().length(), o2.getFullname().length()).compare(o1.getFrequency(), o2.getFrequency())
- .result();
- }
- }));
-
- return Iterables.getFirst(freq, null);
- }
-
- private List getFrequencies(final Authors merged) {
- final Map countMap = Maps.newHashMap();
- for (final Author a : merged) {
- final Integer count = countMap.get(a.getFullname());
- if (count == null) {
- countMap.put(a.getFullname(), new Integer(1));
- } else {
- countMap.put(a.getFullname(), count + 1);
- }
- }
-
- return Lists.newArrayList(Iterables.transform(merged, new Function() {
-
- @Override
- public AuthorFrequency apply(final Author a) {
- final AuthorFrequency af = new AuthorFrequency(a);
- final Integer freq = countMap.get(af.getFullname());
- af.setFrequency(freq);
- return af;
- }
- }));
- }
-
- public String getId() {
- return id;
- }
-
- public void setId(final String id) {
- this.id = id;
- }
-
- public Author getAuthor() {
- return author;
- }
-
- public void setAuthor(final Author author) {
- this.author = author;
- }
-
- public boolean hasMerged() {
- return (getMerged() != null) && !getMerged().isEmpty();
- }
-
- public Authors getMerged() {
- return merged;
- }
-
- public void setMerged(final Authors merged) {
- this.merged = merged;
- }
-
- public boolean hasCoAuthors() {
- return (getCoAuthors() != null) && !getCoAuthors().isEmpty();
- }
-
- public CoAuthors getCoAuthors() {
- return coAuthors;
- }
-
- public void setCoAuthors(final CoAuthors coAuthors) {
- this.coAuthors = coAuthors;
- }
-
- public boolean isAnchor() {
- return anchor;
- }
-
- public void setAnchor(final boolean anchor) {
- this.anchor = anchor;
- }
-
- public static GTAuthor fromJson(final String json) {
- final Gson gson = new Gson();
- return gson.fromJson(json, GTAuthor.class);
- }
-
- public static List fromOafJson(final List json) {
-
- final GsonBuilder gb = new GsonBuilder();
- gb.registerTypeAdapter(GTAuthor.class, new GTAuthorOafSerialiser());
- final Gson gson = gb.create();
-
- return Lists.newArrayList(Iterables.transform(json, new Function() {
- @Override
- public GTAuthor apply(final String s) {
- return gson.fromJson(s, GTAuthor.class);
- }
- }));
- }
-
- public static GTAuthor fromOafJson(final String json) {
-
- final GsonBuilder gb = new GsonBuilder();
- gb.registerTypeAdapter(GTAuthor.class, new GTAuthorOafSerialiser());
- final Gson gson = gb.create();
-
- return gson.fromJson(json, GTAuthor.class);
- }
-
- @Override
- public String toString() {
- return new Gson().toJson(this);
- }
-
- @Override
- public int hashCode() {
- return getId().hashCode();
- }
-
- @Override
- public int compareTo(final GTAuthor o) {
- return ComparisonChain.start()
- .compare(this.getId(), o.getId(), Ordering.natural().nullsLast())
- .result();
- }
-
- @Override
- public boolean equals(final Object o) {
- return (o instanceof GTAuthor) && getId().equals(((GTAuthor) o).getId());
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthorOafSerialiser.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthorOafSerialiser.java
deleted file mode 100644
index cb541b953..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthorOafSerialiser.java
+++ /dev/null
@@ -1,104 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import java.lang.reflect.Type;
-
-import com.google.common.base.Function;
-import com.google.common.base.Joiner;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import com.google.gson.JsonDeserializationContext;
-import com.google.gson.JsonDeserializer;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParseException;
-
-public class GTAuthorOafSerialiser implements JsonDeserializer {
-
- private static final String VALUE = "value";
- private static final String SECONDNAMES = "secondnames";
- private static final String FIRSTNAME = "firstname";
- private static final String FULLNAME = "fullname";
- private static final String ID = "id";
- private static final String MERGEDPERSON = "mergedperson";
- private static final String METADATA = "metadata";
- private static final String ANCHOR_ID = "anchorId";
- private static final String COAUTHOR = "coauthor";
-
- @Override
- public GTAuthor deserialize(final JsonElement json, final Type typeOfT, final JsonDeserializationContext context) throws JsonParseException {
- final GTAuthor gta = new GTAuthor();
-
- gta.setAuthor(getAuthor(json));
- gta.setMerged(getMerged(json));
-
- gta.setCoAuthors(getCoAuthors(json));
-
- return gta;
- }
-
- private CoAuthors getCoAuthors(final JsonElement json) {
- final JsonObject obj = json.getAsJsonObject();
- if (!obj.has(COAUTHOR)) return null;
- return new CoAuthors(Lists.newArrayList(Iterables.transform(obj.get(COAUTHOR).getAsJsonArray(),
- new Function() {
-
- @Override
- public CoAuthor apply(final JsonElement in) {
- final CoAuthor a = new CoAuthor(getAuthor(in));
- final JsonObject jsonObject = in.getAsJsonObject();
- if (jsonObject.has(ANCHOR_ID)) {
- a.setAnchorId(jsonObject.get(ANCHOR_ID).getAsString());
- }
- return a;
- }
- })));
- }
-
- private Author getAuthor(final JsonElement json) {
-
- final Author a = new Author();
- a.setCoauthors(null);
- a.setMatches(null);
-
- final JsonObject jso = json.getAsJsonObject();
-
- a.setId(jso.has(ID) ? jso.get(ID).getAsString() : null);
-
- final JsonObject jsonObject = json.getAsJsonObject();
- if (jsonObject.has(METADATA)) {
- final JsonObject m = jsonObject.get(METADATA).getAsJsonObject();
- a.setFullname(getValue(m, FULLNAME));
- a.setFirstname(getValue(m, FIRSTNAME));
- a.setSecondnames(getValues(m, SECONDNAMES));
- }
- return a;
- }
-
- private Authors getMerged(final JsonElement json) {
- final JsonObject obj = json.getAsJsonObject();
- if (!obj.has(MERGEDPERSON)) return null;
- return new Authors(Lists.newArrayList(Iterables.transform(obj.get(MERGEDPERSON).getAsJsonArray(),
- new Function() {
-
- @Override
- public Author apply(final JsonElement in) {
- return getAuthor(in);
- }
- })));
- }
-
- private String getValues(final JsonObject m, final String fieldName) {
- return m.has(fieldName) ? Joiner.on(" ").join(Iterables.transform(m.get(fieldName).getAsJsonArray(), new Function() {
-
- @Override
- public String apply(final JsonElement in) {
- return in.getAsJsonObject().get(VALUE).getAsString();
- }
- })) : null;
- }
-
- private String getValue(final JsonObject m, final String fieldName) {
- return m.has(fieldName) ? m.get(fieldName).getAsJsonObject().get(VALUE).getAsString() : null;
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Group.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Group.java
deleted file mode 100644
index 86d93deb9..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Group.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import java.util.List;
-
-import com.google.gson.Gson;
-
-public class Group {
-
- private String id;
- private int size;
- private List results;
-
- public Group() {}
-
- public String getId() {
- return id;
- }
-
- public void setId(final String id) {
- this.id = id;
- }
-
- public int getSize() {
- return size;
- }
-
- public void setSize(final int size) {
- this.size = size;
- }
-
- public List getResults() {
- return results;
- }
-
- public void setResults(final List results) {
- this.results = results;
- }
-
- @Override
- public String toString() {
- return new Gson().toJson(this);
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/InvertedAuthor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/InvertedAuthor.java
deleted file mode 100644
index b9fa7f966..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/InvertedAuthor.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import java.util.Collection;
-
-import com.google.gson.Gson;
-
-public class InvertedAuthor {
-
- private Author author;
- private Collection ids;
-
- public InvertedAuthor() {}
-
- public InvertedAuthor(final Author author, final Collection ids) {
- super();
- this.author = author;
- this.ids = ids;
- }
-
- public Author getAuthor() {
- return author;
- }
-
- public void setAuthor(final Author author) {
- this.author = author;
- }
-
- public Collection getIds() {
- return ids;
- }
-
- public void setIds(final Collection ids) {
- this.ids = ids;
- }
-
- @Override
- public String toString() {
- return new Gson().toJson(this);
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Match.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Match.java
deleted file mode 100644
index e919069c7..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Match.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-public class Match extends Author {
-
- private double score;
-
- public Match() {
- super();
- }
-
- public static Match from(final Author a) {
- final Match m = new Match();
- if (a.isWellFormed()) {
- m.setFirstname(a.getFirstname());
- m.setSecondnames(a.getSecondnames());
- }
- m.setFullname(a.getFullname());
- m.setId(a.getId());
-
- return m;
- }
-
- public double getScore() {
- return score;
- }
-
- public void setScore(final double score) {
- this.score = score;
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Result.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Result.java
deleted file mode 100644
index d35c3bb2c..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Result.java
+++ /dev/null
@@ -1,72 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import java.util.List;
-
-import com.google.common.collect.ComparisonChain;
-import com.google.common.collect.Ordering;
-import com.google.gson.Gson;
-
-public class Result implements Comparable {
-
- private String id;
- private String originalId;
- private String title;
- private List authors;
-
- private double meanDistance;
-
- public Result() {}
-
- public String getId() {
- return id;
- }
-
- public void setId(final String id) {
- this.id = id;
- }
-
- public String getOriginalId() {
- return originalId;
- }
-
- public void setOriginalId(final String originalId) {
- this.originalId = originalId;
- }
-
- public String getTitle() {
- return title;
- }
-
- public void setTitle(final String title) {
- this.title = title;
- }
-
- public List getAuthors() {
- return authors;
- }
-
- public void setAuthors(final List authors) {
- this.authors = authors;
- }
-
- @Override
- public String toString() {
- return new Gson().toJson(this);
- }
-
- @Override
- public int compareTo(final Result o) {
- return ComparisonChain.start()
- .compare(this.getAuthors().size(), o.getAuthors().size(), Ordering.natural().nullsLast())
- .result();
- }
-
- public double getMeanDistance() {
- return meanDistance;
- }
-
- public void setMeanDistance(final double meanDistance) {
- this.meanDistance = meanDistance;
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Subjects.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Subjects.java
deleted file mode 100644
index fc2221aef..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Subjects.java
+++ /dev/null
@@ -1,10 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import java.util.HashMap;
-
-/**
- * Created by claudio on 07/03/16.
- */
-public class Subjects extends HashMap {
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/SubjectsMap.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/SubjectsMap.java
deleted file mode 100644
index 04ba4c6c1..000000000
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/SubjectsMap.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package eu.dnetlib.pace.model.gt;
-
-import java.util.HashMap;
-import java.util.Map.Entry;
-
-/**
- * Created by claudio on 07/03/16.
- */
-public class SubjectsMap extends HashMap {
-
- public SubjectsMap mergeFrom(SubjectsMap sm) {
-
- for(Entry e : sm.entrySet()) {
- if (!this.containsKey(e.getKey())) {
- Subjects sub = new Subjects();
-
- sub.putAll(e.getValue());
-
- this.put(e.getKey(), sub);
- } else {
- for (Entry es : e.getValue().entrySet()) {
- final Subjects subjects = this.get(e.getKey());
- if (subjects.containsKey(es.getKey())) {
- subjects.put(es.getKey(), es.getValue() + subjects.get(es.getKey()));
- } else {
- subjects.put(es.getKey(), new Integer(1));
- }
- }
- }
- }
-
- return this;
- }
-
-}
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java
index b89cffaed..5f4615080 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java
@@ -5,7 +5,7 @@ import eu.dnetlib.pace.tree.support.AbstractComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass;
import eu.dnetlib.pace.config.Config;
-import org.apache.commons.collections.CollectionUtils;
+
import java.util.Map;
import java.util.Set;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/SubStringLevenstein.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/SubStringLevenstein.java
index f76947930..0c4165bf6 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/SubStringLevenstein.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/SubStringLevenstein.java
@@ -1,14 +1,14 @@
package eu.dnetlib.pace.tree;
import eu.dnetlib.pace.config.Config;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.config.Type;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.tree.support.AbstractComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass;
-import org.apache.commons.lang.StringUtils;
+
import java.util.Map;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UrlMatcher.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UrlMatcher.java
index 34bbab7bf..fc98fc187 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UrlMatcher.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UrlMatcher.java
@@ -3,7 +3,7 @@ package eu.dnetlib.pace.tree;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.tree.support.ComparatorClass;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import java.net.MalformedURLException;
import java.net.URL;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/YearMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/YearMatch.java
index 64bd75b0c..5dda0e25f 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/YearMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/YearMatch.java
@@ -4,7 +4,7 @@ import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.tree.support.AbstractComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import java.util.Map;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java
index abc685ec6..4828a5dd8 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java
@@ -1,14 +1,11 @@
package eu.dnetlib.pace.tree.support;
import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.model.*;
-import eu.dnetlib.pace.model.gt.Match;
+import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.util.PaceException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import java.io.Serializable;
-import java.util.Map;
/**
* The compare between two documents is given by the weighted mean of the field distances
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java
index 34a6aa2cf..b73b28f19 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java
@@ -8,7 +8,7 @@ import eu.dnetlib.pace.tree.support.TreeProcessor;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.MapDocumentComparator;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java
index 2d2510112..2de729045 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java
@@ -1,15 +1,15 @@
package eu.dnetlib.pace.util;
-import org.apache.commons.lang.WordUtils;
import com.google.common.base.Function;
+import org.apache.commons.lang3.text.WordUtils;
public class Capitalise implements Function {
- private final char[] DELIM = { ' ', '-' };
+ private final char[] DELIM = {' ', '-'};
- @Override
- public String apply(final String s) {
- return WordUtils.capitalize(s.toLowerCase(), DELIM);
- }
+ @Override
+ public String apply(final String s) {
+ return WordUtils.capitalize(s.toLowerCase(), DELIM);
+ }
};