diff --git a/dnet-pace-core/pom.xml b/dnet-pace-core/pom.xml index 5c6466d..cb16ffe 100644 --- a/dnet-pace-core/pom.xml +++ b/dnet-pace-core/pom.xml @@ -27,17 +27,14 @@ gson - commons-lang - commons-lang + org.apache.commons + commons-lang3 commons-io commons-io - - commons-collections - commons-collections - + org.antlr stringtemplate diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java index 7fdcce4..01f1461 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java @@ -3,7 +3,7 @@ package eu.dnetlib.pace.clustering; import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.model.Field; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.util.Collection; import java.util.HashSet; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/FieldFilter.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/FieldFilter.java index 7ede4c2..60d9569 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/FieldFilter.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/FieldFilter.java @@ -6,7 +6,7 @@ import java.util.Map; import com.google.common.base.Predicate; import eu.dnetlib.pace.model.Field; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java index 769ecf5..b95d1c7 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java @@ -3,7 +3,7 @@ package eu.dnetlib.pace.clustering; import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.model.Field; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.util.*; import java.util.stream.Collectors; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java index 6fe525f..309650f 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java @@ -8,7 +8,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.model.Field; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; @ClusteringClass("lowercase") public class LowercaseClustering extends AbstractClusteringFunction { diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java index aeb790f..2391685 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java @@ -2,7 +2,7 @@ package eu.dnetlib.pace.clustering; import java.util.Set; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import eu.dnetlib.pace.common.AbstractPaceFunctions; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java index 26b07f0..db8d90b 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java @@ -5,7 +5,7 @@ import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Person; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.util.Collection; import java.util.List; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java index fd8e7a3..50cea4d 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java @@ -5,8 +5,8 @@ import java.util.List; import java.util.Map; import eu.dnetlib.pace.config.Config; -import org.apache.commons.lang.RandomStringUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; import com.google.common.collect.Lists; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java index a4901fd..b980018 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java @@ -9,9 +9,8 @@ import eu.dnetlib.pace.clustering.NGramUtils; import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.FieldList; import eu.dnetlib.pace.model.FieldListImpl; -import org.apache.commons.collections.CollectionUtils; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.io.IOException; import java.io.StringWriter; @@ -25,295 +24,293 @@ import java.util.stream.Collectors; * Set of common functions for the framework * * @author claudio - * */ public abstract class AbstractPaceFunctions { - //city map to be used when translating the city names into codes - private static Map cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv"); + //city map to be used when translating the city names into codes + private static Map cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv"); - //list of stopwords in different languages - protected static Set stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt"); - protected static Set stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt"); - protected static Set stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt"); - protected static Set stopwords_fr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_fr.txt"); - protected static Set stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt"); - protected static Set stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt"); + //list of stopwords in different languages + protected static Set stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt"); + protected static Set stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt"); + protected static Set stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt"); + protected static Set stopwords_fr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_fr.txt"); + protected static Set stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt"); + protected static Set stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt"); - //blacklist of ngrams: to avoid generic keys - protected static Set ngramBlacklist = loadFromClasspath("/eu/dnetlib/pace/config/ngram_blacklist.txt"); + //blacklist of ngrams: to avoid generic keys + protected static Set ngramBlacklist = loadFromClasspath("/eu/dnetlib/pace/config/ngram_blacklist.txt"); - private static final String alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 "; - private static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń"; - private static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn"; + private static final String alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 "; + private static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń"; + private static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn"; - //doi prefix for normalization - public final String DOI_PREFIX = "(https?:\\/\\/dx\\.doi\\.org\\/)|(doi:)"; + //doi prefix for normalization + public final String DOI_PREFIX = "(https?:\\/\\/dx\\.doi\\.org\\/)|(doi:)"; - private Pattern numberPattern = Pattern.compile("-?\\d+(\\.\\d+)?"); + private Pattern numberPattern = Pattern.compile("-?\\d+(\\.\\d+)?"); - private Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})"); + private Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})"); - protected final static FieldList EMPTY_FIELD = new FieldListImpl(); + protected final static FieldList EMPTY_FIELD = new FieldListImpl(); - protected String concat(final List l) { - return Joiner.on(" ").skipNulls().join(l); - } + protected String concat(final List l) { + return Joiner.on(" ").skipNulls().join(l); + } - protected String cleanup(final String s) { - final String s0 = unicodeNormalization(s.toLowerCase()); - final String s1 = fixAliases(s0); - final String s2 = nfd(s1); - final String s3 = s2.replaceAll("–", " "); - final String s4 = s3.replaceAll("&", " "); - final String s5 = s4.replaceAll(""", " "); - final String s6 = s5.replaceAll("−", " "); - final String s7 = s6.replaceAll("([0-9]+)", " $1 "); - final String s8 = s7.replaceAll("[^\\p{ASCII}]", ""); - final String s9 = s8.replaceAll("[\\p{Punct}]", " "); - final String s10 = s9.replaceAll("\\n", " "); - final String s11 = s10.replaceAll("(?m)\\s+", " "); - final String s12 = s11.trim(); - return s12; - } + protected String cleanup(final String s) { + final String s0 = unicodeNormalization(s.toLowerCase()); + final String s1 = fixAliases(s0); + final String s2 = nfd(s1); + final String s3 = s2.replaceAll("–", " "); + final String s4 = s3.replaceAll("&", " "); + final String s5 = s4.replaceAll(""", " "); + final String s6 = s5.replaceAll("−", " "); + final String s7 = s6.replaceAll("([0-9]+)", " $1 "); + final String s8 = s7.replaceAll("[^\\p{ASCII}]", ""); + final String s9 = s8.replaceAll("[\\p{Punct}]", " "); + final String s10 = s9.replaceAll("\\n", " "); + final String s11 = s10.replaceAll("(?m)\\s+", " "); + final String s12 = s11.trim(); + return s12; + } - protected boolean checkNumbers(final String a, final String b) { - final String numbersA = getNumbers(a); - final String numbersB = getNumbers(b); - final String romansA = getRomans(a); - final String romansB = getRomans(b); - return !numbersA.equals(numbersB) || !romansA.equals(romansB); - } + protected boolean checkNumbers(final String a, final String b) { + final String numbersA = getNumbers(a); + final String numbersB = getNumbers(b); + final String romansA = getRomans(a); + final String romansB = getRomans(b); + return !numbersA.equals(numbersB) || !romansA.equals(romansB); + } - protected String getRomans(final String s) { - final StringBuilder sb = new StringBuilder(); - for (final String t : s.split(" ")) { - sb.append(isRoman(t) ? t : ""); - } - return sb.toString(); - } + protected String getRomans(final String s) { + final StringBuilder sb = new StringBuilder(); + for (final String t : s.split(" ")) { + sb.append(isRoman(t) ? t : ""); + } + return sb.toString(); + } - protected boolean isRoman(final String s) { - return s.replaceAll("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$", "qwertyuiop").equals("qwertyuiop"); - } + protected boolean isRoman(final String s) { + return s.replaceAll("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$", "qwertyuiop").equals("qwertyuiop"); + } - protected String getNumbers(final String s) { - final StringBuilder sb = new StringBuilder(); - for (final String t : s.split(" ")) { - sb.append(isNumber(t)? t : ""); - } - return sb.toString(); - } + protected String getNumbers(final String s) { + final StringBuilder sb = new StringBuilder(); + for (final String t : s.split(" ")) { + sb.append(isNumber(t) ? t : ""); + } + return sb.toString(); + } - public boolean isNumber(String strNum) { - if (strNum == null) { - return false; - } - return numberPattern.matcher(strNum).matches(); - } + public boolean isNumber(String strNum) { + if (strNum == null) { + return false; + } + return numberPattern.matcher(strNum).matches(); + } - protected static String fixAliases(final String s) { - final StringBuilder sb = new StringBuilder(); - for (final char ch : Lists.charactersOf(s)) { - final int i = StringUtils.indexOf(aliases_from, ch); - sb.append(i >= 0 ? aliases_to.charAt(i) : ch); - } - return sb.toString(); - } + protected static String fixAliases(final String s) { + final StringBuilder sb = new StringBuilder(); + for (final char ch : Lists.charactersOf(s)) { + final int i = StringUtils.indexOf(aliases_from, ch); + sb.append(i >= 0 ? aliases_to.charAt(i) : ch); + } + return sb.toString(); + } - protected String removeSymbols(final String s) { - final StringBuilder sb = new StringBuilder(); + protected String removeSymbols(final String s) { + final StringBuilder sb = new StringBuilder(); - for (final char ch : Lists.charactersOf(s)) { - sb.append(StringUtils.contains(alpha, ch) ? ch : " "); - } - return sb.toString().replaceAll("\\s+", " "); - } + for (final char ch : Lists.charactersOf(s)) { + sb.append(StringUtils.contains(alpha, ch) ? ch : " "); + } + return sb.toString().replaceAll("\\s+", " "); + } - protected String getFirstValue(final Field values) { - return (values != null) && !Iterables.isEmpty(values) ? Iterables.getFirst(values, EMPTY_FIELD).stringValue() : ""; - } + protected String getFirstValue(final Field values) { + return (values != null) && !Iterables.isEmpty(values) ? Iterables.getFirst(values, EMPTY_FIELD).stringValue() : ""; + } - protected boolean notNull(final String s) { - return s != null; - } + protected boolean notNull(final String s) { + return s != null; + } - protected String normalize(final String s) { - return nfd(unicodeNormalization(s)) - .toLowerCase() - // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings - .replaceAll("[^ \\w]+", "") - .replaceAll("(\\p{InCombiningDiacriticalMarks})+", "") - .replaceAll("(\\p{Punct})+", " ") - .replaceAll("(\\d)+", " ") - .replaceAll("(\\n)+", " ") - .trim(); - } + protected String normalize(final String s) { + return nfd(unicodeNormalization(s)) + .toLowerCase() + // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings + .replaceAll("[^ \\w]+", "") + .replaceAll("(\\p{InCombiningDiacriticalMarks})+", "") + .replaceAll("(\\p{Punct})+", " ") + .replaceAll("(\\d)+", " ") + .replaceAll("(\\n)+", " ") + .trim(); + } - public String nfd(final String s) { - return Normalizer.normalize(s, Normalizer.Form.NFD); - } + public String nfd(final String s) { + return Normalizer.normalize(s, Normalizer.Form.NFD); + } - public String unicodeNormalization(final String s) { + public String unicodeNormalization(final String s) { - Matcher m = hexUnicodePattern.matcher(s); - StringBuffer buf = new StringBuffer(s.length()); - while (m.find()) { - String ch = String.valueOf((char) Integer.parseInt(m.group(1), 16)); - m.appendReplacement(buf, Matcher.quoteReplacement(ch)); - } - m.appendTail(buf); - return buf.toString(); - } + Matcher m = hexUnicodePattern.matcher(s); + StringBuffer buf = new StringBuffer(s.length()); + while (m.find()) { + String ch = String.valueOf((char) Integer.parseInt(m.group(1), 16)); + m.appendReplacement(buf, Matcher.quoteReplacement(ch)); + } + m.appendTail(buf); + return buf.toString(); + } - protected String filterStopWords(final String s, final Set stopwords) { - final StringTokenizer st = new StringTokenizer(s); - final StringBuilder sb = new StringBuilder(); - while (st.hasMoreTokens()) { - final String token = st.nextToken(); - if (!stopwords.contains(token)) { - sb.append(token); - sb.append(" "); - } - } - return sb.toString().trim(); - } + protected String filterStopWords(final String s, final Set stopwords) { + final StringTokenizer st = new StringTokenizer(s); + final StringBuilder sb = new StringBuilder(); + while (st.hasMoreTokens()) { + final String token = st.nextToken(); + if (!stopwords.contains(token)) { + sb.append(token); + sb.append(" "); + } + } + return sb.toString().trim(); + } - public String filterAllStopWords(String s) { + public String filterAllStopWords(String s) { - s = filterStopWords(s, stopwords_en); - s = filterStopWords(s, stopwords_de); - s = filterStopWords(s, stopwords_it); - s = filterStopWords(s, stopwords_fr); - s = filterStopWords(s, stopwords_pt); - s = filterStopWords(s, stopwords_es); + s = filterStopWords(s, stopwords_en); + s = filterStopWords(s, stopwords_de); + s = filterStopWords(s, stopwords_it); + s = filterStopWords(s, stopwords_fr); + s = filterStopWords(s, stopwords_pt); + s = filterStopWords(s, stopwords_es); - return s; - } + return s; + } - protected Collection filterBlacklisted(final Collection set, final Set ngramBlacklist) { - final Set newset = Sets.newLinkedHashSet(); - for (final String s : set) { - if (!ngramBlacklist.contains(s)) { - newset.add(s); - } - } - return newset; - } + protected Collection filterBlacklisted(final Collection set, final Set ngramBlacklist) { + final Set newset = Sets.newLinkedHashSet(); + for (final String s : set) { + if (!ngramBlacklist.contains(s)) { + newset.add(s); + } + } + return newset; + } - public static Set loadFromClasspath(final String classpath) { - final Set h = Sets.newHashSet(); - try { - for (final String s : IOUtils.readLines(NGramUtils.class.getResourceAsStream(classpath))) { - h.add(s); - } - } catch (final Throwable e) { - return Sets.newHashSet(); - } - return h; - } + public static Set loadFromClasspath(final String classpath) { + final Set h = Sets.newHashSet(); + try { + for (final String s : IOUtils.readLines(NGramUtils.class.getResourceAsStream(classpath))) { + h.add(s); + } + } catch (final Throwable e) { + return Sets.newHashSet(); + } + return h; + } - public static Map loadMapFromClasspath(final String classpath) { - final Map m = new HashMap<>(); - try { - for (final String s: IOUtils.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath))) { - //string is like this: code;word1;word2;word3 - String[] line = s.split(";"); - String value = line[0]; - for (int i=1; i(); - } - return m; - } + public static Map loadMapFromClasspath(final String classpath) { + final Map m = new HashMap<>(); + try { + for (final String s : IOUtils.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath))) { + //string is like this: code;word1;word2;word3 + String[] line = s.split(";"); + String value = line[0]; + for (int i = 1; i < line.length; i++) { + m.put(line[i].toLowerCase(), value); + } + } + } catch (final Throwable e) { + return new HashMap<>(); + } + return m; + } - public String removeKeywords(String s, Set keywords) { + public String removeKeywords(String s, Set keywords) { - s = " " + s + " "; - for (String k: keywords ) { - s = s.replaceAll(k.toLowerCase(), ""); - } + s = " " + s + " "; + for (String k : keywords) { + s = s.replaceAll(k.toLowerCase(), ""); + } - return s.trim(); - } + return s.trim(); + } - public double commonElementsPercentage(Set s1, Set s2){ + public double commonElementsPercentage(Set s1, Set s2) { - int longer = (s1.size()>s2.size())?s1.size():s2.size(); + double longer = Math.max(s1.size(), s2.size()); + return (double) s1.stream().filter(s2::contains).count() / longer; + } - return (double)CollectionUtils.intersection(s1,s2).size()/(double)longer; - } + //convert the set of keywords to codes + public Set toCodes(Set keywords, Map translationMap) { + return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet()); + } - //convert the set of keywords to codes - public Set toCodes(Set keywords, Map translationMap) { - return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet()); - } + public Set keywordsToCodes(Set keywords, Map translationMap) { + return toCodes(keywords, translationMap); + } - public Set keywordsToCodes(Set keywords, Map translationMap) { - return toCodes(keywords, translationMap); - } + public Set citiesToCodes(Set keywords) { + return toCodes(keywords, cityMap); + } - public Set citiesToCodes(Set keywords) { - return toCodes(keywords, cityMap); - } + protected String firstLC(final String s) { + return StringUtils.substring(s, 0, 1).toLowerCase(); + } - protected String firstLC(final String s) { - return StringUtils.substring(s, 0, 1).toLowerCase(); - } + protected Iterable tokens(final String s, final int maxTokens) { + return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens); + } - protected Iterable tokens(final String s, final int maxTokens) { - return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens); - } + public String normalizePid(String pid) { + return pid.toLowerCase().replaceAll(DOI_PREFIX, ""); + } - public String normalizePid(String pid) { - return pid.toLowerCase().replaceAll(DOI_PREFIX, ""); - } + //get the list of keywords into the input string + public Set getKeywords(String s1, Map translationMap, int windowSize) { - //get the list of keywords into the input string - public Set getKeywords(String s1, Map translationMap, int windowSize){ + String s = s1; - String s = s1; + List tokens = Arrays.asList(s.toLowerCase().split(" ")); - List tokens = Arrays.asList(s.toLowerCase().split(" ")); + Set codes = new HashSet<>(); - Set codes = new HashSet<>(); + if (tokens.size() < windowSize) + windowSize = tokens.size(); - if (tokens.size() getCities(String s1, int windowSize) { + return getKeywords(s1, cityMap, windowSize); + } - public Set getCities(String s1, int windowSize) { - return getKeywords(s1, cityMap, windowSize); - } - - public static String readFromClasspath(final String filename, final Class clazz) { - final StringWriter sw = new StringWriter(); - try { - IOUtils.copy(clazz.getResourceAsStream(filename), sw); - return sw.toString(); - } catch (final IOException e) { - throw new RuntimeException("cannot load resource from classpath: " + filename); - } - } + public static String readFromClasspath(final String filename, final Class clazz) { + final StringWriter sw = new StringWriter(); + try { + IOUtils.copy(clazz.getResourceAsStream(filename), sw); + return sw.toString(); + } catch (final IOException e) { + throw new RuntimeException("cannot load resource from classpath: " + filename); + } + } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java index 3cc5a38..78fc18a 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java @@ -4,7 +4,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import eu.dnetlib.pace.util.PaceException; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.io.IOException; import java.io.Serializable; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java index 12c578c..f7831ed 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java @@ -1,8 +1,9 @@ package eu.dnetlib.pace.model; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Splitter; import com.google.common.collect.Lists; -import com.google.gson.Gson; import eu.dnetlib.pace.config.Type; import java.io.Serializable; @@ -103,7 +104,11 @@ public class FieldDef implements Serializable { @Override public String toString() { - return new Gson().toJson(this); + try { + return new ObjectMapper().writeValueAsString(this); + } catch (JsonProcessingException e) { + return null; + } } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java index d4a11c0..635178b 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java @@ -1,11 +1,11 @@ package eu.dnetlib.pace.model; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Function; import com.google.common.base.Joiner; -import com.google.common.base.Predicate; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import com.google.gson.Gson; import eu.dnetlib.pace.config.Type; import java.util.Collection; @@ -283,7 +283,12 @@ public class FieldListImpl extends AbstractField implements FieldList { case String: return Joiner.on(" ").join(stringList()); case JSON: - final String json = new Gson().toJson(stringList()); + String json; + try { + json = new ObjectMapper().writeValueAsString(this); + } catch (JsonProcessingException e) { + json = null; + } return json; default: throw new IllegalArgumentException("Unknown type: " + getType().toString()); diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java index bf86127..0a72c07 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java @@ -2,12 +2,12 @@ package eu.dnetlib.pace.model; import java.net.MalformedURLException; import java.net.URL; +import java.util.Collections; import java.util.Iterator; import java.util.List; import eu.dnetlib.pace.config.Type; -import org.apache.commons.collections.iterators.SingletonIterator; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * The Class FieldValueImpl. @@ -124,7 +124,7 @@ public class FieldValueImpl extends AbstractField implements FieldValue { @Override @SuppressWarnings("unchecked") public Iterator iterator() { - return new SingletonIterator(this); + return Collections.singleton((Field) this).iterator(); } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Author.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Author.java deleted file mode 100644 index 17bd49d..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Author.java +++ /dev/null @@ -1,129 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import java.util.List; -import java.util.Set; - -import org.apache.commons.lang.StringUtils; - -import com.google.common.collect.ComparisonChain; -import com.google.common.collect.Lists; -import com.google.common.collect.Ordering; -import com.google.common.collect.Sets; -import com.google.gson.Gson; - -public class Author implements Comparable { - - private String id; - private String fullname; - private String firstname; - private String secondnames; - - private List matches = Lists.newArrayList(); - private Set coauthors = Sets.newHashSet(); - private SubjectsMap subjectsMap = new SubjectsMap(); - - public Author() { - super(); - } - - public Author(final Author a) { - this.id = a.getId(); - this.fullname = a.getFullname(); - this.firstname = a.getFirstname(); - this.secondnames = a.getSecondnames(); - - this.matches = a.getMatches(); - this.coauthors = a.getCoauthors(); - this.subjectsMap = a.getSubjectsMap(); - } - - public boolean hasMatches() { - return (getMatches() != null) && !getMatches().isEmpty(); - } - - public boolean hasCoauthors() { - return (getCoauthors() != null) && !getCoauthors().isEmpty(); - } - - public boolean isWellFormed() { - return StringUtils.isNotBlank(getSecondnames()) && StringUtils.isNotBlank(getFirstname()); - } - - public String getId() { - return id; - } - - public void setId(final String id) { - this.id = id; - } - - public String getFullname() { - return fullname; - } - - public void setFullname(final String fullname) { - this.fullname = fullname; - } - - public String getFirstname() { - return firstname; - } - - public void setFirstname(final String firstname) { - this.firstname = firstname; - } - - public String getSecondnames() { - return secondnames; - } - - public void setSecondnames(final String secondnames) { - this.secondnames = secondnames; - } - - public List getMatches() { - return matches; - } - - public void setMatches(final List matches) { - this.matches = matches; - } - - public Set getCoauthors() { - return coauthors; - } - - public void setCoauthors(final Set coauthors) { - this.coauthors = coauthors; - } - - @Override - public String toString() { - return new Gson().toJson(this); - } - - @Override - public int hashCode() { - return getId().hashCode(); - } - - @Override - public int compareTo(final Author o) { - return ComparisonChain.start() - .compare(this.getId(), o.getId(), Ordering.natural().nullsLast()) - .result(); - } - - @Override - public boolean equals(final Object o) { - return (o instanceof Author) && getId().equals(((Author) o).getId()); - } - - public SubjectsMap getSubjectsMap() { - return subjectsMap; - } - - public void setSubjectsMap(final SubjectsMap subjectsMap) { - this.subjectsMap = subjectsMap; - } -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/AuthorSet.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/AuthorSet.java deleted file mode 100644 index c3f2576..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/AuthorSet.java +++ /dev/null @@ -1,37 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import com.google.gson.Gson; - -public class AuthorSet { - - private String id; - private Authors authors; - - public AuthorSet(final String id, final Authors authors) { - super(); - this.id = id; - this.authors = authors; - } - - public String getId() { - return id; - } - - public void setId(final String id) { - this.id = id; - } - - public Authors getAuthors() { - return authors; - } - - public void setAuthors(final Authors authors) { - this.authors = authors; - } - - @Override - public String toString() { - return new Gson().toJson(this); - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Authors.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Authors.java deleted file mode 100644 index e74c438..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Authors.java +++ /dev/null @@ -1,54 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import java.util.Collection; -import java.util.HashSet; - -import com.google.common.collect.ComparisonChain; -import com.google.common.collect.Ordering; -import com.google.common.collect.Sets; -import com.google.gson.Gson; - -public class Authors extends HashSet implements Comparable { - - private static final long serialVersionUID = -6878376220805286142L; - - public Authors() { - super(); - } - - public Authors(final Collection authors) { - super(authors); - } - - public Authors(final Author author) { - super(Sets.newHashSet(author)); - } - - @Override - public int compareTo(final Authors a) { - return ComparisonChain.start() - .compare(this.size(), a.size(), Ordering.natural().nullsLast()) - .result(); - } - - @Override - public String toString() { - return new Gson().toJson(this); - } - - @Override - public boolean equals(final Object o) { - final boolean res = o instanceof Authors; - return res && (Sets.intersection(this, (Authors) o).size() == this.size()); - } - - @Override - public int hashCode() { - int res = 0; - for (final Author a : this) { - res += a.hashCode(); - } - return res; - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthor.java deleted file mode 100644 index d4ce32d..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthor.java +++ /dev/null @@ -1,50 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import com.google.gson.Gson; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -public class CoAuthor extends Author { - - private static final Log log = LogFactory.getLog(CoAuthor.class); - private String anchorId = null; - - public CoAuthor() { - super(); - } - - public CoAuthor(final Author author) { - super(author); - } - - public boolean hasAnchorId() { - return StringUtils.isNotBlank(getAnchorId()); - } - - public String getAnchorId() { - return anchorId; - } - - public void setAnchorId(final String anchorId) { - this.anchorId = anchorId; - } - - @Override - public String toString() { - return new Gson().toJson(this); - } - - @Override - public int hashCode() { - return getId() != null ? getId().hashCode() : getFullname().hashCode(); - } - - @Override - public boolean equals(final Object o) { - return (o instanceof CoAuthor) && StringUtils.isNotBlank(getId()) ? - getId().equals(((CoAuthor) o).getId()) : - getFullname().equals(((CoAuthor) o).getFullname()); - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSet.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSet.java deleted file mode 100644 index 90898f6..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSet.java +++ /dev/null @@ -1,36 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import com.google.gson.Gson; - -public class CoAuthorSet { - - private Author author; - private Authors coAuthors; - - public CoAuthorSet(final Author author, final Authors coAuthors) { - super(); - this.author = author; - this.coAuthors = coAuthors; - } - - public Author getAuthor() { - return author; - } - - public void setAuthor(final Author author) { - this.author = author; - } - - public Authors getCoAuthors() { - return coAuthors; - } - - public void setCoAuthors(final Authors coAuthors) { - this.coAuthors = coAuthors; - } - - @Override - public String toString() { - return new Gson().toJson(this); - } -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSetLite.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSetLite.java deleted file mode 100644 index a48e2d8..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthorSetLite.java +++ /dev/null @@ -1,40 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import java.util.Set; - -import com.google.gson.Gson; - -public class CoAuthorSetLite { - - private String id; - - private Set coAuthors; - - public CoAuthorSetLite(final String id, final Set coAuthors) { - super(); - this.id = id; - this.coAuthors = coAuthors; - } - - public Set getCoAuthors() { - return coAuthors; - } - - public void setCoAuthors(final Set coAuthors) { - this.coAuthors = coAuthors; - } - - public String getId() { - return id; - } - - public void setId(final String id) { - this.id = id; - } - - @Override - public String toString() { - return new Gson().toJson(this); - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthors.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthors.java deleted file mode 100644 index 8e7eca2..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/CoAuthors.java +++ /dev/null @@ -1,78 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import java.util.Collection; -import java.util.HashSet; - -import com.google.common.base.Function; -import com.google.common.collect.ComparisonChain; -import com.google.common.collect.Ordering; -import com.google.common.collect.Sets; -import com.google.gson.Gson; - -public class CoAuthors extends HashSet implements Comparable { - - private static final long serialVersionUID = 2525591524516562892L; - - private Function hashFunction; - - private static Function defaultHashFunction = new Function() { - - @Override - public Integer apply(final CoAuthors input) { - int res = 0; - for (final CoAuthor a : input) { - res += a.hashCode(); - } - return res; - - } - }; - - public CoAuthors() { - super(); - } - - public CoAuthors(final Collection coauthors) { - super(coauthors); - } - - public CoAuthors(final CoAuthor coauthor) { - super(Sets.newHashSet(coauthor)); - } - - public Function getHashFunction() { - return hashFunction; - } - - public void setHashFunction(final Function hashFunction) { - this.hashFunction = hashFunction; - } - - @Override - public int compareTo(final CoAuthors a) { - return ComparisonChain.start() - .compare(this.size(), a.size(), Ordering.natural().nullsLast()) - .result(); - } - - @Override - public String toString() { - return new Gson().toJson(this); - } - - @Override - public boolean equals(final Object o) { - final boolean res = o instanceof CoAuthors; - return res && (Sets.intersection(this, (CoAuthors) o).size() == this.size()); - } - - public String hashCodeString() { - return String.valueOf(hashCode()); - } - - @Override - public int hashCode() { - return (getHashFunction() != null) ? getHashFunction().apply(this) : defaultHashFunction.apply(this); - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthor.java deleted file mode 100644 index c9d4797..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthor.java +++ /dev/null @@ -1,196 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Map; - -import com.google.common.base.Function; -import com.google.common.collect.ComparisonChain; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Ordering; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; - -public class GTAuthor implements Comparable { - - private String id; - private Author author; - private Authors merged; - private CoAuthors coAuthors; - private boolean anchor; - - public GTAuthor() {} - - public GTAuthor(final String id, final Authors merged, final CoAuthors coAuthors, final boolean anchor) { - super(); - - if ((merged == null) || merged.isEmpty()) - throw new IllegalArgumentException("empty merged author set, id: " + id); - - this.author = pickAuthor(merged); - this.id = id; - this.merged = merged; - this.coAuthors = coAuthors; - this.anchor = anchor; - } - - class AuthorFrequency extends Author { - - private Integer frequency = new Integer(1); - - public AuthorFrequency(final Author a) { - super(a); - } - - public void increment() { - setFrequency(getFrequency() + 1); - } - - public Integer getFrequency() { - return frequency; - } - - public void setFrequency(final Integer frequency) { - this.frequency = frequency; - } - } - - private Author pickAuthor(final Authors merged) { - final List freq = getFrequencies(merged); - Collections.sort(freq, Collections.reverseOrder(new Comparator() { - - @Override - public int compare(final AuthorFrequency o1, final AuthorFrequency o2) { - return ComparisonChain.start().compare(o1.getFullname().length(), o2.getFullname().length()).compare(o1.getFrequency(), o2.getFrequency()) - .result(); - } - })); - - return Iterables.getFirst(freq, null); - } - - private List getFrequencies(final Authors merged) { - final Map countMap = Maps.newHashMap(); - for (final Author a : merged) { - final Integer count = countMap.get(a.getFullname()); - if (count == null) { - countMap.put(a.getFullname(), new Integer(1)); - } else { - countMap.put(a.getFullname(), count + 1); - } - } - - return Lists.newArrayList(Iterables.transform(merged, new Function() { - - @Override - public AuthorFrequency apply(final Author a) { - final AuthorFrequency af = new AuthorFrequency(a); - final Integer freq = countMap.get(af.getFullname()); - af.setFrequency(freq); - return af; - } - })); - } - - public String getId() { - return id; - } - - public void setId(final String id) { - this.id = id; - } - - public Author getAuthor() { - return author; - } - - public void setAuthor(final Author author) { - this.author = author; - } - - public boolean hasMerged() { - return (getMerged() != null) && !getMerged().isEmpty(); - } - - public Authors getMerged() { - return merged; - } - - public void setMerged(final Authors merged) { - this.merged = merged; - } - - public boolean hasCoAuthors() { - return (getCoAuthors() != null) && !getCoAuthors().isEmpty(); - } - - public CoAuthors getCoAuthors() { - return coAuthors; - } - - public void setCoAuthors(final CoAuthors coAuthors) { - this.coAuthors = coAuthors; - } - - public boolean isAnchor() { - return anchor; - } - - public void setAnchor(final boolean anchor) { - this.anchor = anchor; - } - - public static GTAuthor fromJson(final String json) { - final Gson gson = new Gson(); - return gson.fromJson(json, GTAuthor.class); - } - - public static List fromOafJson(final List json) { - - final GsonBuilder gb = new GsonBuilder(); - gb.registerTypeAdapter(GTAuthor.class, new GTAuthorOafSerialiser()); - final Gson gson = gb.create(); - - return Lists.newArrayList(Iterables.transform(json, new Function() { - @Override - public GTAuthor apply(final String s) { - return gson.fromJson(s, GTAuthor.class); - } - })); - } - - public static GTAuthor fromOafJson(final String json) { - - final GsonBuilder gb = new GsonBuilder(); - gb.registerTypeAdapter(GTAuthor.class, new GTAuthorOafSerialiser()); - final Gson gson = gb.create(); - - return gson.fromJson(json, GTAuthor.class); - } - - @Override - public String toString() { - return new Gson().toJson(this); - } - - @Override - public int hashCode() { - return getId().hashCode(); - } - - @Override - public int compareTo(final GTAuthor o) { - return ComparisonChain.start() - .compare(this.getId(), o.getId(), Ordering.natural().nullsLast()) - .result(); - } - - @Override - public boolean equals(final Object o) { - return (o instanceof GTAuthor) && getId().equals(((GTAuthor) o).getId()); - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthorOafSerialiser.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthorOafSerialiser.java deleted file mode 100644 index cb541b9..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/GTAuthorOafSerialiser.java +++ /dev/null @@ -1,104 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import java.lang.reflect.Type; - -import com.google.common.base.Function; -import com.google.common.base.Joiner; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonParseException; - -public class GTAuthorOafSerialiser implements JsonDeserializer { - - private static final String VALUE = "value"; - private static final String SECONDNAMES = "secondnames"; - private static final String FIRSTNAME = "firstname"; - private static final String FULLNAME = "fullname"; - private static final String ID = "id"; - private static final String MERGEDPERSON = "mergedperson"; - private static final String METADATA = "metadata"; - private static final String ANCHOR_ID = "anchorId"; - private static final String COAUTHOR = "coauthor"; - - @Override - public GTAuthor deserialize(final JsonElement json, final Type typeOfT, final JsonDeserializationContext context) throws JsonParseException { - final GTAuthor gta = new GTAuthor(); - - gta.setAuthor(getAuthor(json)); - gta.setMerged(getMerged(json)); - - gta.setCoAuthors(getCoAuthors(json)); - - return gta; - } - - private CoAuthors getCoAuthors(final JsonElement json) { - final JsonObject obj = json.getAsJsonObject(); - if (!obj.has(COAUTHOR)) return null; - return new CoAuthors(Lists.newArrayList(Iterables.transform(obj.get(COAUTHOR).getAsJsonArray(), - new Function() { - - @Override - public CoAuthor apply(final JsonElement in) { - final CoAuthor a = new CoAuthor(getAuthor(in)); - final JsonObject jsonObject = in.getAsJsonObject(); - if (jsonObject.has(ANCHOR_ID)) { - a.setAnchorId(jsonObject.get(ANCHOR_ID).getAsString()); - } - return a; - } - }))); - } - - private Author getAuthor(final JsonElement json) { - - final Author a = new Author(); - a.setCoauthors(null); - a.setMatches(null); - - final JsonObject jso = json.getAsJsonObject(); - - a.setId(jso.has(ID) ? jso.get(ID).getAsString() : null); - - final JsonObject jsonObject = json.getAsJsonObject(); - if (jsonObject.has(METADATA)) { - final JsonObject m = jsonObject.get(METADATA).getAsJsonObject(); - a.setFullname(getValue(m, FULLNAME)); - a.setFirstname(getValue(m, FIRSTNAME)); - a.setSecondnames(getValues(m, SECONDNAMES)); - } - return a; - } - - private Authors getMerged(final JsonElement json) { - final JsonObject obj = json.getAsJsonObject(); - if (!obj.has(MERGEDPERSON)) return null; - return new Authors(Lists.newArrayList(Iterables.transform(obj.get(MERGEDPERSON).getAsJsonArray(), - new Function() { - - @Override - public Author apply(final JsonElement in) { - return getAuthor(in); - } - }))); - } - - private String getValues(final JsonObject m, final String fieldName) { - return m.has(fieldName) ? Joiner.on(" ").join(Iterables.transform(m.get(fieldName).getAsJsonArray(), new Function() { - - @Override - public String apply(final JsonElement in) { - return in.getAsJsonObject().get(VALUE).getAsString(); - } - })) : null; - } - - private String getValue(final JsonObject m, final String fieldName) { - return m.has(fieldName) ? m.get(fieldName).getAsJsonObject().get(VALUE).getAsString() : null; - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Group.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Group.java deleted file mode 100644 index 86d93de..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Group.java +++ /dev/null @@ -1,44 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import java.util.List; - -import com.google.gson.Gson; - -public class Group { - - private String id; - private int size; - private List results; - - public Group() {} - - public String getId() { - return id; - } - - public void setId(final String id) { - this.id = id; - } - - public int getSize() { - return size; - } - - public void setSize(final int size) { - this.size = size; - } - - public List getResults() { - return results; - } - - public void setResults(final List results) { - this.results = results; - } - - @Override - public String toString() { - return new Gson().toJson(this); - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/InvertedAuthor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/InvertedAuthor.java deleted file mode 100644 index b9fa7f9..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/InvertedAuthor.java +++ /dev/null @@ -1,41 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import java.util.Collection; - -import com.google.gson.Gson; - -public class InvertedAuthor { - - private Author author; - private Collection ids; - - public InvertedAuthor() {} - - public InvertedAuthor(final Author author, final Collection ids) { - super(); - this.author = author; - this.ids = ids; - } - - public Author getAuthor() { - return author; - } - - public void setAuthor(final Author author) { - this.author = author; - } - - public Collection getIds() { - return ids; - } - - public void setIds(final Collection ids) { - this.ids = ids; - } - - @Override - public String toString() { - return new Gson().toJson(this); - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Match.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Match.java deleted file mode 100644 index e919069..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Match.java +++ /dev/null @@ -1,31 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -public class Match extends Author { - - private double score; - - public Match() { - super(); - } - - public static Match from(final Author a) { - final Match m = new Match(); - if (a.isWellFormed()) { - m.setFirstname(a.getFirstname()); - m.setSecondnames(a.getSecondnames()); - } - m.setFullname(a.getFullname()); - m.setId(a.getId()); - - return m; - } - - public double getScore() { - return score; - } - - public void setScore(final double score) { - this.score = score; - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Result.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Result.java deleted file mode 100644 index d35c3bb..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Result.java +++ /dev/null @@ -1,72 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import java.util.List; - -import com.google.common.collect.ComparisonChain; -import com.google.common.collect.Ordering; -import com.google.gson.Gson; - -public class Result implements Comparable { - - private String id; - private String originalId; - private String title; - private List authors; - - private double meanDistance; - - public Result() {} - - public String getId() { - return id; - } - - public void setId(final String id) { - this.id = id; - } - - public String getOriginalId() { - return originalId; - } - - public void setOriginalId(final String originalId) { - this.originalId = originalId; - } - - public String getTitle() { - return title; - } - - public void setTitle(final String title) { - this.title = title; - } - - public List getAuthors() { - return authors; - } - - public void setAuthors(final List authors) { - this.authors = authors; - } - - @Override - public String toString() { - return new Gson().toJson(this); - } - - @Override - public int compareTo(final Result o) { - return ComparisonChain.start() - .compare(this.getAuthors().size(), o.getAuthors().size(), Ordering.natural().nullsLast()) - .result(); - } - - public double getMeanDistance() { - return meanDistance; - } - - public void setMeanDistance(final double meanDistance) { - this.meanDistance = meanDistance; - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Subjects.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Subjects.java deleted file mode 100644 index fc2221a..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/Subjects.java +++ /dev/null @@ -1,10 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import java.util.HashMap; - -/** - * Created by claudio on 07/03/16. - */ -public class Subjects extends HashMap { - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/SubjectsMap.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/SubjectsMap.java deleted file mode 100644 index 04ba4c6..0000000 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/gt/SubjectsMap.java +++ /dev/null @@ -1,35 +0,0 @@ -package eu.dnetlib.pace.model.gt; - -import java.util.HashMap; -import java.util.Map.Entry; - -/** - * Created by claudio on 07/03/16. - */ -public class SubjectsMap extends HashMap { - - public SubjectsMap mergeFrom(SubjectsMap sm) { - - for(Entry e : sm.entrySet()) { - if (!this.containsKey(e.getKey())) { - Subjects sub = new Subjects(); - - sub.putAll(e.getValue()); - - this.put(e.getKey(), sub); - } else { - for (Entry es : e.getValue().entrySet()) { - final Subjects subjects = this.get(e.getKey()); - if (subjects.containsKey(es.getKey())) { - subjects.put(es.getKey(), es.getValue() + subjects.get(es.getKey())); - } else { - subjects.put(es.getKey(), new Integer(1)); - } - } - } - } - - return this; - } - -} diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java index b89cffa..5f46150 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java @@ -5,7 +5,7 @@ import eu.dnetlib.pace.tree.support.AbstractComparator; import eu.dnetlib.pace.tree.support.ComparatorClass; import eu.dnetlib.pace.config.Config; -import org.apache.commons.collections.CollectionUtils; + import java.util.Map; import java.util.Set; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/SubStringLevenstein.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/SubStringLevenstein.java index f769479..0c4165b 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/SubStringLevenstein.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/SubStringLevenstein.java @@ -1,14 +1,14 @@ package eu.dnetlib.pace.tree; import eu.dnetlib.pace.config.Config; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import com.wcohen.ss.AbstractStringDistance; import eu.dnetlib.pace.config.Type; import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.tree.support.AbstractComparator; import eu.dnetlib.pace.tree.support.ComparatorClass; -import org.apache.commons.lang.StringUtils; + import java.util.Map; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UrlMatcher.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UrlMatcher.java index 34bbab7..fc98fc1 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UrlMatcher.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/UrlMatcher.java @@ -3,7 +3,7 @@ package eu.dnetlib.pace.tree; import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.tree.support.ComparatorClass; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.net.MalformedURLException; import java.net.URL; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/YearMatch.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/YearMatch.java index 64bd75b..5dda0e2 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/YearMatch.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/YearMatch.java @@ -4,7 +4,7 @@ import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.tree.support.AbstractComparator; import eu.dnetlib.pace.tree.support.ComparatorClass; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.util.Map; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java index abc685e..4828a5d 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java @@ -1,14 +1,11 @@ package eu.dnetlib.pace.tree.support; import eu.dnetlib.pace.config.Config; -import eu.dnetlib.pace.model.*; -import eu.dnetlib.pace.model.gt.Match; +import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.PaceException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import java.io.Serializable; -import java.util.Map; /** * The compare between two documents is given by the weighted mean of the field distances diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java index 34a6aa2..b73b28f 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java @@ -8,7 +8,7 @@ import eu.dnetlib.pace.tree.support.TreeProcessor; import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.model.MapDocumentComparator; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java index 2d25101..2de7290 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/Capitalise.java @@ -1,15 +1,15 @@ package eu.dnetlib.pace.util; -import org.apache.commons.lang.WordUtils; import com.google.common.base.Function; +import org.apache.commons.lang3.text.WordUtils; public class Capitalise implements Function { - private final char[] DELIM = { ' ', '-' }; + private final char[] DELIM = {' ', '-'}; - @Override - public String apply(final String s) { - return WordUtils.capitalize(s.toLowerCase(), DELIM); - } + @Override + public String apply(final String s) { + return WordUtils.capitalize(s.toLowerCase(), DELIM); + } }; diff --git a/pom.xml b/pom.xml index bf61fdd..5aeb662 100644 --- a/pom.xml +++ b/pom.xml @@ -83,19 +83,7 @@ false - - - - central - Central Repository - http://repo.maven.apache.org/maven2 - - true - - - - target target/classes @@ -198,50 +186,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -292,11 +236,13 @@ gson ${google.gson.version} + - commons-lang - commons-lang + org.apache.commons + commons-lang3 ${commons.lang.version} + commons-io commons-io @@ -374,7 +320,7 @@ 2.2.0 2.6.6 - 2.6 + 3.5 2.4 3.2.1 1.1.3