upgraded maven version of commons-lang

This commit is contained in:
Sandro La Bruzzo 2020-02-10 12:38:40 +01:00
parent 5c8f6febee
commit 46727f5c76
35 changed files with 277 additions and 1233 deletions

View File

@ -27,17 +27,14 @@
<artifactId>gson</artifactId> <artifactId>gson</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-lang</groupId> <groupId>org.apache.commons</groupId>
<artifactId>commons-lang</artifactId> <artifactId>commons-lang3</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-io</groupId> <groupId>commons-io</groupId>
<artifactId>commons-io</artifactId> <artifactId>commons-io</artifactId>
</dependency> </dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
</dependency>
<dependency> <dependency>
<groupId>org.antlr</groupId> <groupId>org.antlr</groupId>
<artifactId>stringtemplate</artifactId> <artifactId>stringtemplate</artifactId>

View File

@ -3,7 +3,7 @@ package eu.dnetlib.pace.clustering;
import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.common.AbstractPaceFunctions;
import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;

View File

@ -6,7 +6,7 @@ import java.util.Map;
import com.google.common.base.Predicate; import com.google.common.base.Predicate;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;

View File

@ -3,7 +3,7 @@ package eu.dnetlib.pace.clustering;
import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.common.AbstractPaceFunctions;
import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;

View File

@ -8,7 +8,7 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
@ClusteringClass("lowercase") @ClusteringClass("lowercase")
public class LowercaseClustering extends AbstractClusteringFunction { public class LowercaseClustering extends AbstractClusteringFunction {

View File

@ -2,7 +2,7 @@ package eu.dnetlib.pace.clustering;
import java.util.Set; import java.util.Set;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.common.AbstractPaceFunctions;

View File

@ -5,7 +5,7 @@ import eu.dnetlib.pace.common.AbstractPaceFunctions;
import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.Person; import eu.dnetlib.pace.model.Person;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;

View File

@ -5,8 +5,8 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.config.Config;
import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;

View File

@ -9,9 +9,8 @@ import eu.dnetlib.pace.clustering.NGramUtils;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldList; import eu.dnetlib.pace.model.FieldList;
import eu.dnetlib.pace.model.FieldListImpl; import eu.dnetlib.pace.model.FieldListImpl;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import java.io.IOException; import java.io.IOException;
import java.io.StringWriter; import java.io.StringWriter;
@ -25,295 +24,293 @@ import java.util.stream.Collectors;
* Set of common functions for the framework * Set of common functions for the framework
* *
* @author claudio * @author claudio
*
*/ */
public abstract class AbstractPaceFunctions { public abstract class AbstractPaceFunctions {
//city map to be used when translating the city names into codes //city map to be used when translating the city names into codes
private static Map<String,String> cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv"); private static Map<String, String> cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");
//list of stopwords in different languages //list of stopwords in different languages
protected static Set<String> stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt"); protected static Set<String> stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
protected static Set<String> stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt"); protected static Set<String> stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt");
protected static Set<String> stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt"); protected static Set<String> stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt");
protected static Set<String> stopwords_fr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_fr.txt"); protected static Set<String> stopwords_fr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_fr.txt");
protected static Set<String> stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt"); protected static Set<String> stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt");
protected static Set<String> stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt"); protected static Set<String> stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt");
//blacklist of ngrams: to avoid generic keys //blacklist of ngrams: to avoid generic keys
protected static Set<String> ngramBlacklist = loadFromClasspath("/eu/dnetlib/pace/config/ngram_blacklist.txt"); protected static Set<String> ngramBlacklist = loadFromClasspath("/eu/dnetlib/pace/config/ngram_blacklist.txt");
private static final String alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 "; private static final String alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
private static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń"; private static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń";
private static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn"; private static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";
//doi prefix for normalization //doi prefix for normalization
public final String DOI_PREFIX = "(https?:\\/\\/dx\\.doi\\.org\\/)|(doi:)"; public final String DOI_PREFIX = "(https?:\\/\\/dx\\.doi\\.org\\/)|(doi:)";
private Pattern numberPattern = Pattern.compile("-?\\d+(\\.\\d+)?"); private Pattern numberPattern = Pattern.compile("-?\\d+(\\.\\d+)?");
private Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})"); private Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})");
protected final static FieldList EMPTY_FIELD = new FieldListImpl(); protected final static FieldList EMPTY_FIELD = new FieldListImpl();
protected String concat(final List<String> l) { protected String concat(final List<String> l) {
return Joiner.on(" ").skipNulls().join(l); return Joiner.on(" ").skipNulls().join(l);
} }
protected String cleanup(final String s) { protected String cleanup(final String s) {
final String s0 = unicodeNormalization(s.toLowerCase()); final String s0 = unicodeNormalization(s.toLowerCase());
final String s1 = fixAliases(s0); final String s1 = fixAliases(s0);
final String s2 = nfd(s1); final String s2 = nfd(s1);
final String s3 = s2.replaceAll("&ndash;", " "); final String s3 = s2.replaceAll("&ndash;", " ");
final String s4 = s3.replaceAll("&amp;", " "); final String s4 = s3.replaceAll("&amp;", " ");
final String s5 = s4.replaceAll("&quot;", " "); final String s5 = s4.replaceAll("&quot;", " ");
final String s6 = s5.replaceAll("&minus;", " "); final String s6 = s5.replaceAll("&minus;", " ");
final String s7 = s6.replaceAll("([0-9]+)", " $1 "); final String s7 = s6.replaceAll("([0-9]+)", " $1 ");
final String s8 = s7.replaceAll("[^\\p{ASCII}]", ""); final String s8 = s7.replaceAll("[^\\p{ASCII}]", "");
final String s9 = s8.replaceAll("[\\p{Punct}]", " "); final String s9 = s8.replaceAll("[\\p{Punct}]", " ");
final String s10 = s9.replaceAll("\\n", " "); final String s10 = s9.replaceAll("\\n", " ");
final String s11 = s10.replaceAll("(?m)\\s+", " "); final String s11 = s10.replaceAll("(?m)\\s+", " ");
final String s12 = s11.trim(); final String s12 = s11.trim();
return s12; return s12;
} }
protected boolean checkNumbers(final String a, final String b) { protected boolean checkNumbers(final String a, final String b) {
final String numbersA = getNumbers(a); final String numbersA = getNumbers(a);
final String numbersB = getNumbers(b); final String numbersB = getNumbers(b);
final String romansA = getRomans(a); final String romansA = getRomans(a);
final String romansB = getRomans(b); final String romansB = getRomans(b);
return !numbersA.equals(numbersB) || !romansA.equals(romansB); return !numbersA.equals(numbersB) || !romansA.equals(romansB);
} }
protected String getRomans(final String s) { protected String getRomans(final String s) {
final StringBuilder sb = new StringBuilder(); final StringBuilder sb = new StringBuilder();
for (final String t : s.split(" ")) { for (final String t : s.split(" ")) {
sb.append(isRoman(t) ? t : ""); sb.append(isRoman(t) ? t : "");
} }
return sb.toString(); return sb.toString();
} }
protected boolean isRoman(final String s) { protected boolean isRoman(final String s) {
return s.replaceAll("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$", "qwertyuiop").equals("qwertyuiop"); return s.replaceAll("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$", "qwertyuiop").equals("qwertyuiop");
} }
protected String getNumbers(final String s) { protected String getNumbers(final String s) {
final StringBuilder sb = new StringBuilder(); final StringBuilder sb = new StringBuilder();
for (final String t : s.split(" ")) { for (final String t : s.split(" ")) {
sb.append(isNumber(t)? t : ""); sb.append(isNumber(t) ? t : "");
} }
return sb.toString(); return sb.toString();
} }
public boolean isNumber(String strNum) { public boolean isNumber(String strNum) {
if (strNum == null) { if (strNum == null) {
return false; return false;
} }
return numberPattern.matcher(strNum).matches(); return numberPattern.matcher(strNum).matches();
} }
protected static String fixAliases(final String s) { protected static String fixAliases(final String s) {
final StringBuilder sb = new StringBuilder(); final StringBuilder sb = new StringBuilder();
for (final char ch : Lists.charactersOf(s)) { for (final char ch : Lists.charactersOf(s)) {
final int i = StringUtils.indexOf(aliases_from, ch); final int i = StringUtils.indexOf(aliases_from, ch);
sb.append(i >= 0 ? aliases_to.charAt(i) : ch); sb.append(i >= 0 ? aliases_to.charAt(i) : ch);
} }
return sb.toString(); return sb.toString();
} }
protected String removeSymbols(final String s) { protected String removeSymbols(final String s) {
final StringBuilder sb = new StringBuilder(); final StringBuilder sb = new StringBuilder();
for (final char ch : Lists.charactersOf(s)) { for (final char ch : Lists.charactersOf(s)) {
sb.append(StringUtils.contains(alpha, ch) ? ch : " "); sb.append(StringUtils.contains(alpha, ch) ? ch : " ");
} }
return sb.toString().replaceAll("\\s+", " "); return sb.toString().replaceAll("\\s+", " ");
} }
protected String getFirstValue(final Field values) { protected String getFirstValue(final Field values) {
return (values != null) && !Iterables.isEmpty(values) ? Iterables.getFirst(values, EMPTY_FIELD).stringValue() : ""; return (values != null) && !Iterables.isEmpty(values) ? Iterables.getFirst(values, EMPTY_FIELD).stringValue() : "";
} }
protected boolean notNull(final String s) { protected boolean notNull(final String s) {
return s != null; return s != null;
} }
protected String normalize(final String s) { protected String normalize(final String s) {
return nfd(unicodeNormalization(s)) return nfd(unicodeNormalization(s))
.toLowerCase() .toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings
.replaceAll("[^ \\w]+", "") .replaceAll("[^ \\w]+", "")
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", "") .replaceAll("(\\p{InCombiningDiacriticalMarks})+", "")
.replaceAll("(\\p{Punct})+", " ") .replaceAll("(\\p{Punct})+", " ")
.replaceAll("(\\d)+", " ") .replaceAll("(\\d)+", " ")
.replaceAll("(\\n)+", " ") .replaceAll("(\\n)+", " ")
.trim(); .trim();
} }
public String nfd(final String s) { public String nfd(final String s) {
return Normalizer.normalize(s, Normalizer.Form.NFD); return Normalizer.normalize(s, Normalizer.Form.NFD);
} }
public String unicodeNormalization(final String s) { public String unicodeNormalization(final String s) {
Matcher m = hexUnicodePattern.matcher(s); Matcher m = hexUnicodePattern.matcher(s);
StringBuffer buf = new StringBuffer(s.length()); StringBuffer buf = new StringBuffer(s.length());
while (m.find()) { while (m.find()) {
String ch = String.valueOf((char) Integer.parseInt(m.group(1), 16)); String ch = String.valueOf((char) Integer.parseInt(m.group(1), 16));
m.appendReplacement(buf, Matcher.quoteReplacement(ch)); m.appendReplacement(buf, Matcher.quoteReplacement(ch));
} }
m.appendTail(buf); m.appendTail(buf);
return buf.toString(); return buf.toString();
} }
protected String filterStopWords(final String s, final Set<String> stopwords) { protected String filterStopWords(final String s, final Set<String> stopwords) {
final StringTokenizer st = new StringTokenizer(s); final StringTokenizer st = new StringTokenizer(s);
final StringBuilder sb = new StringBuilder(); final StringBuilder sb = new StringBuilder();
while (st.hasMoreTokens()) { while (st.hasMoreTokens()) {
final String token = st.nextToken(); final String token = st.nextToken();
if (!stopwords.contains(token)) { if (!stopwords.contains(token)) {
sb.append(token); sb.append(token);
sb.append(" "); sb.append(" ");
} }
} }
return sb.toString().trim(); return sb.toString().trim();
} }
public String filterAllStopWords(String s) { public String filterAllStopWords(String s) {
s = filterStopWords(s, stopwords_en); s = filterStopWords(s, stopwords_en);
s = filterStopWords(s, stopwords_de); s = filterStopWords(s, stopwords_de);
s = filterStopWords(s, stopwords_it); s = filterStopWords(s, stopwords_it);
s = filterStopWords(s, stopwords_fr); s = filterStopWords(s, stopwords_fr);
s = filterStopWords(s, stopwords_pt); s = filterStopWords(s, stopwords_pt);
s = filterStopWords(s, stopwords_es); s = filterStopWords(s, stopwords_es);
return s; return s;
} }
protected Collection<String> filterBlacklisted(final Collection<String> set, final Set<String> ngramBlacklist) { protected Collection<String> filterBlacklisted(final Collection<String> set, final Set<String> ngramBlacklist) {
final Set<String> newset = Sets.newLinkedHashSet(); final Set<String> newset = Sets.newLinkedHashSet();
for (final String s : set) { for (final String s : set) {
if (!ngramBlacklist.contains(s)) { if (!ngramBlacklist.contains(s)) {
newset.add(s); newset.add(s);
} }
} }
return newset; return newset;
} }
public static Set<String> loadFromClasspath(final String classpath) { public static Set<String> loadFromClasspath(final String classpath) {
final Set<String> h = Sets.newHashSet(); final Set<String> h = Sets.newHashSet();
try { try {
for (final String s : IOUtils.readLines(NGramUtils.class.getResourceAsStream(classpath))) { for (final String s : IOUtils.readLines(NGramUtils.class.getResourceAsStream(classpath))) {
h.add(s); h.add(s);
} }
} catch (final Throwable e) { } catch (final Throwable e) {
return Sets.newHashSet(); return Sets.newHashSet();
} }
return h; return h;
} }
public static Map<String, String> loadMapFromClasspath(final String classpath) { public static Map<String, String> loadMapFromClasspath(final String classpath) {
final Map<String, String> m = new HashMap<>(); final Map<String, String> m = new HashMap<>();
try { try {
for (final String s: IOUtils.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath))) { for (final String s : IOUtils.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath))) {
//string is like this: code;word1;word2;word3 //string is like this: code;word1;word2;word3
String[] line = s.split(";"); String[] line = s.split(";");
String value = line[0]; String value = line[0];
for (int i=1; i<line.length;i++){ for (int i = 1; i < line.length; i++) {
m.put(line[i].toLowerCase(),value); m.put(line[i].toLowerCase(), value);
} }
} }
} catch (final Throwable e){ } catch (final Throwable e) {
return new HashMap<>(); return new HashMap<>();
} }
return m; return m;
} }
public String removeKeywords(String s, Set<String> keywords) { public String removeKeywords(String s, Set<String> keywords) {
s = " " + s + " "; s = " " + s + " ";
for (String k: keywords ) { for (String k : keywords) {
s = s.replaceAll(k.toLowerCase(), ""); s = s.replaceAll(k.toLowerCase(), "");
} }
return s.trim(); return s.trim();
} }
public double commonElementsPercentage(Set<String> s1, Set<String> s2){ public double commonElementsPercentage(Set<String> s1, Set<String> s2) {
int longer = (s1.size()>s2.size())?s1.size():s2.size(); double longer = Math.max(s1.size(), s2.size());
return (double) s1.stream().filter(s2::contains).count() / longer;
}
return (double)CollectionUtils.intersection(s1,s2).size()/(double)longer; //convert the set of keywords to codes
} public Set<String> toCodes(Set<String> keywords, Map<String, String> translationMap) {
return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet());
}
//convert the set of keywords to codes public Set<String> keywordsToCodes(Set<String> keywords, Map<String, String> translationMap) {
public Set<String> toCodes(Set<String> keywords, Map<String, String> translationMap) { return toCodes(keywords, translationMap);
return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet()); }
}
public Set<String> keywordsToCodes(Set<String> keywords, Map<String, String> translationMap) { public Set<String> citiesToCodes(Set<String> keywords) {
return toCodes(keywords, translationMap); return toCodes(keywords, cityMap);
} }
public Set<String> citiesToCodes(Set<String> keywords) { protected String firstLC(final String s) {
return toCodes(keywords, cityMap); return StringUtils.substring(s, 0, 1).toLowerCase();
} }
protected String firstLC(final String s) { protected Iterable<String> tokens(final String s, final int maxTokens) {
return StringUtils.substring(s, 0, 1).toLowerCase(); return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens);
} }
protected Iterable<String> tokens(final String s, final int maxTokens) { public String normalizePid(String pid) {
return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens); return pid.toLowerCase().replaceAll(DOI_PREFIX, "");
} }
public String normalizePid(String pid) { //get the list of keywords into the input string
return pid.toLowerCase().replaceAll(DOI_PREFIX, ""); public Set<String> getKeywords(String s1, Map<String, String> translationMap, int windowSize) {
}
//get the list of keywords into the input string String s = s1;
public Set<String> getKeywords(String s1, Map<String, String> translationMap, int windowSize){
String s = s1; List<String> tokens = Arrays.asList(s.toLowerCase().split(" "));
List<String> tokens = Arrays.asList(s.toLowerCase().split(" ")); Set<String> codes = new HashSet<>();
Set<String> codes = new HashSet<>(); if (tokens.size() < windowSize)
windowSize = tokens.size();
if (tokens.size()<windowSize) int length = windowSize;
windowSize = tokens.size();
int length = windowSize; while (length != 0) {
while (length != 0) { for (int i = 0; i <= tokens.size() - length; i++) {
String candidate = concat(tokens.subList(i, i + length));
if (translationMap.containsKey(candidate)) {
codes.add(candidate);
s = s.replace(candidate, "").trim();
}
}
for (int i = 0; i<=tokens.size()-length; i++){ tokens = Arrays.asList(s.split(" "));
String candidate = concat(tokens.subList(i, i + length)); length -= 1;
if (translationMap.containsKey(candidate)) { }
codes.add(candidate);
s = s.replace(candidate, "").trim();
}
}
tokens = Arrays.asList(s.split(" ")); return codes;
length-=1; }
}
return codes; public Set<String> getCities(String s1, int windowSize) {
} return getKeywords(s1, cityMap, windowSize);
}
public Set<String> getCities(String s1, int windowSize) { public static <T> String readFromClasspath(final String filename, final Class<T> clazz) {
return getKeywords(s1, cityMap, windowSize); final StringWriter sw = new StringWriter();
} try {
IOUtils.copy(clazz.getResourceAsStream(filename), sw);
public static <T> String readFromClasspath(final String filename, final Class<T> clazz) { return sw.toString();
final StringWriter sw = new StringWriter(); } catch (final IOException e) {
try { throw new RuntimeException("cannot load resource from classpath: " + filename);
IOUtils.copy(clazz.getResourceAsStream(filename), sw); }
return sw.toString(); }
} catch (final IOException e) {
throw new RuntimeException("cannot load resource from classpath: " + filename);
}
}
} }

View File

@ -4,7 +4,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import eu.dnetlib.pace.util.PaceException; import eu.dnetlib.pace.util.PaceException;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;

View File

@ -1,8 +1,9 @@
package eu.dnetlib.pace.model; package eu.dnetlib.pace.model;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Splitter; import com.google.common.base.Splitter;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.gson.Gson;
import eu.dnetlib.pace.config.Type; import eu.dnetlib.pace.config.Type;
import java.io.Serializable; import java.io.Serializable;
@ -103,7 +104,11 @@ public class FieldDef implements Serializable {
@Override @Override
public String toString() { public String toString() {
return new Gson().toJson(this); try {
return new ObjectMapper().writeValueAsString(this);
} catch (JsonProcessingException e) {
return null;
}
} }
} }

View File

@ -1,11 +1,11 @@
package eu.dnetlib.pace.model; package eu.dnetlib.pace.model;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Function; import com.google.common.base.Function;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.gson.Gson;
import eu.dnetlib.pace.config.Type; import eu.dnetlib.pace.config.Type;
import java.util.Collection; import java.util.Collection;
@ -283,7 +283,12 @@ public class FieldListImpl extends AbstractField implements FieldList {
case String: case String:
return Joiner.on(" ").join(stringList()); return Joiner.on(" ").join(stringList());
case JSON: case JSON:
final String json = new Gson().toJson(stringList()); String json;
try {
json = new ObjectMapper().writeValueAsString(this);
} catch (JsonProcessingException e) {
json = null;
}
return json; return json;
default: default:
throw new IllegalArgumentException("Unknown type: " + getType().toString()); throw new IllegalArgumentException("Unknown type: " + getType().toString());

View File

@ -2,12 +2,12 @@ package eu.dnetlib.pace.model;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.Collections;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import eu.dnetlib.pace.config.Type; import eu.dnetlib.pace.config.Type;
import org.apache.commons.collections.iterators.SingletonIterator; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang.StringUtils;
/** /**
* The Class FieldValueImpl. * The Class FieldValueImpl.
@ -124,7 +124,7 @@ public class FieldValueImpl extends AbstractField implements FieldValue {
@Override @Override
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public Iterator<Field> iterator() { public Iterator<Field> iterator() {
return new SingletonIterator(this); return Collections.singleton((Field) this).iterator();
} }
} }

View File

@ -1,129 +0,0 @@
package eu.dnetlib.pace.model.gt;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.gson.Gson;
public class Author implements Comparable<Author> {
private String id;
private String fullname;
private String firstname;
private String secondnames;
private List<Match> matches = Lists.newArrayList();
private Set<Author> coauthors = Sets.newHashSet();
private SubjectsMap subjectsMap = new SubjectsMap();
public Author() {
super();
}
public Author(final Author a) {
this.id = a.getId();
this.fullname = a.getFullname();
this.firstname = a.getFirstname();
this.secondnames = a.getSecondnames();
this.matches = a.getMatches();
this.coauthors = a.getCoauthors();
this.subjectsMap = a.getSubjectsMap();
}
public boolean hasMatches() {
return (getMatches() != null) && !getMatches().isEmpty();
}
public boolean hasCoauthors() {
return (getCoauthors() != null) && !getCoauthors().isEmpty();
}
public boolean isWellFormed() {
return StringUtils.isNotBlank(getSecondnames()) && StringUtils.isNotBlank(getFirstname());
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public String getFullname() {
return fullname;
}
public void setFullname(final String fullname) {
this.fullname = fullname;
}
public String getFirstname() {
return firstname;
}
public void setFirstname(final String firstname) {
this.firstname = firstname;
}
public String getSecondnames() {
return secondnames;
}
public void setSecondnames(final String secondnames) {
this.secondnames = secondnames;
}
public List<Match> getMatches() {
return matches;
}
public void setMatches(final List<Match> matches) {
this.matches = matches;
}
public Set<Author> getCoauthors() {
return coauthors;
}
public void setCoauthors(final Set<Author> coauthors) {
this.coauthors = coauthors;
}
@Override
public String toString() {
return new Gson().toJson(this);
}
@Override
public int hashCode() {
return getId().hashCode();
}
@Override
public int compareTo(final Author o) {
return ComparisonChain.start()
.compare(this.getId(), o.getId(), Ordering.natural().nullsLast())
.result();
}
@Override
public boolean equals(final Object o) {
return (o instanceof Author) && getId().equals(((Author) o).getId());
}
public SubjectsMap getSubjectsMap() {
return subjectsMap;
}
public void setSubjectsMap(final SubjectsMap subjectsMap) {
this.subjectsMap = subjectsMap;
}
}

View File

@ -1,37 +0,0 @@
package eu.dnetlib.pace.model.gt;
import com.google.gson.Gson;
public class AuthorSet {
private String id;
private Authors authors;
public AuthorSet(final String id, final Authors authors) {
super();
this.id = id;
this.authors = authors;
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public Authors getAuthors() {
return authors;
}
public void setAuthors(final Authors authors) {
this.authors = authors;
}
@Override
public String toString() {
return new Gson().toJson(this);
}
}

View File

@ -1,54 +0,0 @@
package eu.dnetlib.pace.model.gt;
import java.util.Collection;
import java.util.HashSet;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.gson.Gson;
public class Authors extends HashSet<Author> implements Comparable<Authors> {
private static final long serialVersionUID = -6878376220805286142L;
public Authors() {
super();
}
public Authors(final Collection<Author> authors) {
super(authors);
}
public Authors(final Author author) {
super(Sets.newHashSet(author));
}
@Override
public int compareTo(final Authors a) {
return ComparisonChain.start()
.compare(this.size(), a.size(), Ordering.natural().nullsLast())
.result();
}
@Override
public String toString() {
return new Gson().toJson(this);
}
@Override
public boolean equals(final Object o) {
final boolean res = o instanceof Authors;
return res && (Sets.intersection(this, (Authors) o).size() == this.size());
}
@Override
public int hashCode() {
int res = 0;
for (final Author a : this) {
res += a.hashCode();
}
return res;
}
}

View File

@ -1,50 +0,0 @@
package eu.dnetlib.pace.model.gt;
import com.google.gson.Gson;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class CoAuthor extends Author {
private static final Log log = LogFactory.getLog(CoAuthor.class);
private String anchorId = null;
public CoAuthor() {
super();
}
public CoAuthor(final Author author) {
super(author);
}
public boolean hasAnchorId() {
return StringUtils.isNotBlank(getAnchorId());
}
public String getAnchorId() {
return anchorId;
}
public void setAnchorId(final String anchorId) {
this.anchorId = anchorId;
}
@Override
public String toString() {
return new Gson().toJson(this);
}
@Override
public int hashCode() {
return getId() != null ? getId().hashCode() : getFullname().hashCode();
}
@Override
public boolean equals(final Object o) {
return (o instanceof CoAuthor) && StringUtils.isNotBlank(getId()) ?
getId().equals(((CoAuthor) o).getId()) :
getFullname().equals(((CoAuthor) o).getFullname());
}
}

View File

@ -1,36 +0,0 @@
package eu.dnetlib.pace.model.gt;
import com.google.gson.Gson;
public class CoAuthorSet {
private Author author;
private Authors coAuthors;
public CoAuthorSet(final Author author, final Authors coAuthors) {
super();
this.author = author;
this.coAuthors = coAuthors;
}
public Author getAuthor() {
return author;
}
public void setAuthor(final Author author) {
this.author = author;
}
public Authors getCoAuthors() {
return coAuthors;
}
public void setCoAuthors(final Authors coAuthors) {
this.coAuthors = coAuthors;
}
@Override
public String toString() {
return new Gson().toJson(this);
}
}

View File

@ -1,40 +0,0 @@
package eu.dnetlib.pace.model.gt;
import java.util.Set;
import com.google.gson.Gson;
public class CoAuthorSetLite {
private String id;
private Set<String> coAuthors;
public CoAuthorSetLite(final String id, final Set<String> coAuthors) {
super();
this.id = id;
this.coAuthors = coAuthors;
}
public Set<String> getCoAuthors() {
return coAuthors;
}
public void setCoAuthors(final Set<String> coAuthors) {
this.coAuthors = coAuthors;
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
@Override
public String toString() {
return new Gson().toJson(this);
}
}

View File

@ -1,78 +0,0 @@
package eu.dnetlib.pace.model.gt;
import java.util.Collection;
import java.util.HashSet;
import com.google.common.base.Function;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.gson.Gson;
public class CoAuthors extends HashSet<CoAuthor> implements Comparable<CoAuthors> {
private static final long serialVersionUID = 2525591524516562892L;
private Function<CoAuthors, Integer> hashFunction;
private static Function<CoAuthors, Integer> defaultHashFunction = new Function<CoAuthors, Integer>() {
@Override
public Integer apply(final CoAuthors input) {
int res = 0;
for (final CoAuthor a : input) {
res += a.hashCode();
}
return res;
}
};
public CoAuthors() {
super();
}
public CoAuthors(final Collection<CoAuthor> coauthors) {
super(coauthors);
}
public CoAuthors(final CoAuthor coauthor) {
super(Sets.newHashSet(coauthor));
}
public Function<CoAuthors, Integer> getHashFunction() {
return hashFunction;
}
public void setHashFunction(final Function<CoAuthors, Integer> hashFunction) {
this.hashFunction = hashFunction;
}
@Override
public int compareTo(final CoAuthors a) {
return ComparisonChain.start()
.compare(this.size(), a.size(), Ordering.natural().nullsLast())
.result();
}
@Override
public String toString() {
return new Gson().toJson(this);
}
@Override
public boolean equals(final Object o) {
final boolean res = o instanceof CoAuthors;
return res && (Sets.intersection(this, (CoAuthors) o).size() == this.size());
}
public String hashCodeString() {
return String.valueOf(hashCode());
}
@Override
public int hashCode() {
return (getHashFunction() != null) ? getHashFunction().apply(this) : defaultHashFunction.apply(this);
}
}

View File

@ -1,196 +0,0 @@
package eu.dnetlib.pace.model.gt;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import com.google.common.base.Function;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
public class GTAuthor implements Comparable<GTAuthor> {
private String id;
private Author author;
private Authors merged;
private CoAuthors coAuthors;
private boolean anchor;
public GTAuthor() {}
public GTAuthor(final String id, final Authors merged, final CoAuthors coAuthors, final boolean anchor) {
super();
if ((merged == null) || merged.isEmpty())
throw new IllegalArgumentException("empty merged author set, id: " + id);
this.author = pickAuthor(merged);
this.id = id;
this.merged = merged;
this.coAuthors = coAuthors;
this.anchor = anchor;
}
class AuthorFrequency extends Author {
private Integer frequency = new Integer(1);
public AuthorFrequency(final Author a) {
super(a);
}
public void increment() {
setFrequency(getFrequency() + 1);
}
public Integer getFrequency() {
return frequency;
}
public void setFrequency(final Integer frequency) {
this.frequency = frequency;
}
}
private Author pickAuthor(final Authors merged) {
final List<AuthorFrequency> freq = getFrequencies(merged);
Collections.sort(freq, Collections.reverseOrder(new Comparator<AuthorFrequency>() {
@Override
public int compare(final AuthorFrequency o1, final AuthorFrequency o2) {
return ComparisonChain.start().compare(o1.getFullname().length(), o2.getFullname().length()).compare(o1.getFrequency(), o2.getFrequency())
.result();
}
}));
return Iterables.getFirst(freq, null);
}
private List<AuthorFrequency> getFrequencies(final Authors merged) {
final Map<String, Integer> countMap = Maps.newHashMap();
for (final Author a : merged) {
final Integer count = countMap.get(a.getFullname());
if (count == null) {
countMap.put(a.getFullname(), new Integer(1));
} else {
countMap.put(a.getFullname(), count + 1);
}
}
return Lists.newArrayList(Iterables.transform(merged, new Function<Author, AuthorFrequency>() {
@Override
public AuthorFrequency apply(final Author a) {
final AuthorFrequency af = new AuthorFrequency(a);
final Integer freq = countMap.get(af.getFullname());
af.setFrequency(freq);
return af;
}
}));
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public Author getAuthor() {
return author;
}
public void setAuthor(final Author author) {
this.author = author;
}
public boolean hasMerged() {
return (getMerged() != null) && !getMerged().isEmpty();
}
public Authors getMerged() {
return merged;
}
public void setMerged(final Authors merged) {
this.merged = merged;
}
public boolean hasCoAuthors() {
return (getCoAuthors() != null) && !getCoAuthors().isEmpty();
}
public CoAuthors getCoAuthors() {
return coAuthors;
}
public void setCoAuthors(final CoAuthors coAuthors) {
this.coAuthors = coAuthors;
}
public boolean isAnchor() {
return anchor;
}
public void setAnchor(final boolean anchor) {
this.anchor = anchor;
}
public static GTAuthor fromJson(final String json) {
final Gson gson = new Gson();
return gson.fromJson(json, GTAuthor.class);
}
public static List<GTAuthor> fromOafJson(final List<String> json) {
final GsonBuilder gb = new GsonBuilder();
gb.registerTypeAdapter(GTAuthor.class, new GTAuthorOafSerialiser());
final Gson gson = gb.create();
return Lists.newArrayList(Iterables.transform(json, new Function<String, GTAuthor>() {
@Override
public GTAuthor apply(final String s) {
return gson.fromJson(s, GTAuthor.class);
}
}));
}
public static GTAuthor fromOafJson(final String json) {
final GsonBuilder gb = new GsonBuilder();
gb.registerTypeAdapter(GTAuthor.class, new GTAuthorOafSerialiser());
final Gson gson = gb.create();
return gson.fromJson(json, GTAuthor.class);
}
@Override
public String toString() {
return new Gson().toJson(this);
}
@Override
public int hashCode() {
return getId().hashCode();
}
@Override
public int compareTo(final GTAuthor o) {
return ComparisonChain.start()
.compare(this.getId(), o.getId(), Ordering.natural().nullsLast())
.result();
}
@Override
public boolean equals(final Object o) {
return (o instanceof GTAuthor) && getId().equals(((GTAuthor) o).getId());
}
}

View File

@ -1,104 +0,0 @@
package eu.dnetlib.pace.model.gt;
import java.lang.reflect.Type;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParseException;
public class GTAuthorOafSerialiser implements JsonDeserializer<GTAuthor> {
private static final String VALUE = "value";
private static final String SECONDNAMES = "secondnames";
private static final String FIRSTNAME = "firstname";
private static final String FULLNAME = "fullname";
private static final String ID = "id";
private static final String MERGEDPERSON = "mergedperson";
private static final String METADATA = "metadata";
private static final String ANCHOR_ID = "anchorId";
private static final String COAUTHOR = "coauthor";
@Override
public GTAuthor deserialize(final JsonElement json, final Type typeOfT, final JsonDeserializationContext context) throws JsonParseException {
final GTAuthor gta = new GTAuthor();
gta.setAuthor(getAuthor(json));
gta.setMerged(getMerged(json));
gta.setCoAuthors(getCoAuthors(json));
return gta;
}
private CoAuthors getCoAuthors(final JsonElement json) {
final JsonObject obj = json.getAsJsonObject();
if (!obj.has(COAUTHOR)) return null;
return new CoAuthors(Lists.newArrayList(Iterables.transform(obj.get(COAUTHOR).getAsJsonArray(),
new Function<JsonElement, CoAuthor>() {
@Override
public CoAuthor apply(final JsonElement in) {
final CoAuthor a = new CoAuthor(getAuthor(in));
final JsonObject jsonObject = in.getAsJsonObject();
if (jsonObject.has(ANCHOR_ID)) {
a.setAnchorId(jsonObject.get(ANCHOR_ID).getAsString());
}
return a;
}
})));
}
private Author getAuthor(final JsonElement json) {
final Author a = new Author();
a.setCoauthors(null);
a.setMatches(null);
final JsonObject jso = json.getAsJsonObject();
a.setId(jso.has(ID) ? jso.get(ID).getAsString() : null);
final JsonObject jsonObject = json.getAsJsonObject();
if (jsonObject.has(METADATA)) {
final JsonObject m = jsonObject.get(METADATA).getAsJsonObject();
a.setFullname(getValue(m, FULLNAME));
a.setFirstname(getValue(m, FIRSTNAME));
a.setSecondnames(getValues(m, SECONDNAMES));
}
return a;
}
private Authors getMerged(final JsonElement json) {
final JsonObject obj = json.getAsJsonObject();
if (!obj.has(MERGEDPERSON)) return null;
return new Authors(Lists.newArrayList(Iterables.transform(obj.get(MERGEDPERSON).getAsJsonArray(),
new Function<JsonElement, Author>() {
@Override
public Author apply(final JsonElement in) {
return getAuthor(in);
}
})));
}
private String getValues(final JsonObject m, final String fieldName) {
return m.has(fieldName) ? Joiner.on(" ").join(Iterables.transform(m.get(fieldName).getAsJsonArray(), new Function<JsonElement, String>() {
@Override
public String apply(final JsonElement in) {
return in.getAsJsonObject().get(VALUE).getAsString();
}
})) : null;
}
private String getValue(final JsonObject m, final String fieldName) {
return m.has(fieldName) ? m.get(fieldName).getAsJsonObject().get(VALUE).getAsString() : null;
}
}

View File

@ -1,44 +0,0 @@
package eu.dnetlib.pace.model.gt;
import java.util.List;
import com.google.gson.Gson;
public class Group {
private String id;
private int size;
private List<Result> results;
public Group() {}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public int getSize() {
return size;
}
public void setSize(final int size) {
this.size = size;
}
public List<Result> getResults() {
return results;
}
public void setResults(final List<Result> results) {
this.results = results;
}
@Override
public String toString() {
return new Gson().toJson(this);
}
}

View File

@ -1,41 +0,0 @@
package eu.dnetlib.pace.model.gt;
import java.util.Collection;
import com.google.gson.Gson;
public class InvertedAuthor {
private Author author;
private Collection<String> ids;
public InvertedAuthor() {}
public InvertedAuthor(final Author author, final Collection<String> ids) {
super();
this.author = author;
this.ids = ids;
}
public Author getAuthor() {
return author;
}
public void setAuthor(final Author author) {
this.author = author;
}
public Collection<String> getIds() {
return ids;
}
public void setIds(final Collection<String> ids) {
this.ids = ids;
}
@Override
public String toString() {
return new Gson().toJson(this);
}
}

View File

@ -1,31 +0,0 @@
package eu.dnetlib.pace.model.gt;
public class Match extends Author {
private double score;
public Match() {
super();
}
public static Match from(final Author a) {
final Match m = new Match();
if (a.isWellFormed()) {
m.setFirstname(a.getFirstname());
m.setSecondnames(a.getSecondnames());
}
m.setFullname(a.getFullname());
m.setId(a.getId());
return m;
}
public double getScore() {
return score;
}
public void setScore(final double score) {
this.score = score;
}
}

View File

@ -1,72 +0,0 @@
package eu.dnetlib.pace.model.gt;
import java.util.List;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Ordering;
import com.google.gson.Gson;
public class Result implements Comparable<Result> {
private String id;
private String originalId;
private String title;
private List<Author> authors;
private double meanDistance;
public Result() {}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public String getOriginalId() {
return originalId;
}
public void setOriginalId(final String originalId) {
this.originalId = originalId;
}
public String getTitle() {
return title;
}
public void setTitle(final String title) {
this.title = title;
}
public List<Author> getAuthors() {
return authors;
}
public void setAuthors(final List<Author> authors) {
this.authors = authors;
}
@Override
public String toString() {
return new Gson().toJson(this);
}
@Override
public int compareTo(final Result o) {
return ComparisonChain.start()
.compare(this.getAuthors().size(), o.getAuthors().size(), Ordering.natural().nullsLast())
.result();
}
public double getMeanDistance() {
return meanDistance;
}
public void setMeanDistance(final double meanDistance) {
this.meanDistance = meanDistance;
}
}

View File

@ -1,10 +0,0 @@
package eu.dnetlib.pace.model.gt;
import java.util.HashMap;
/**
* Created by claudio on 07/03/16.
*/
public class Subjects extends HashMap<String, Integer> {
}

View File

@ -1,35 +0,0 @@
package eu.dnetlib.pace.model.gt;
import java.util.HashMap;
import java.util.Map.Entry;
/**
* Created by claudio on 07/03/16.
*/
public class SubjectsMap extends HashMap<String, Subjects> {
public SubjectsMap mergeFrom(SubjectsMap sm) {
for(Entry<String, Subjects> e : sm.entrySet()) {
if (!this.containsKey(e.getKey())) {
Subjects sub = new Subjects();
sub.putAll(e.getValue());
this.put(e.getKey(), sub);
} else {
for (Entry<String, Integer> es : e.getValue().entrySet()) {
final Subjects subjects = this.get(e.getKey());
if (subjects.containsKey(es.getKey())) {
subjects.put(es.getKey(), es.getValue() + subjects.get(es.getKey()));
} else {
subjects.put(es.getKey(), new Integer(1));
}
}
}
}
return this;
}
}

View File

@ -5,7 +5,7 @@ import eu.dnetlib.pace.tree.support.AbstractComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass; import eu.dnetlib.pace.tree.support.ComparatorClass;
import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.config.Config;
import org.apache.commons.collections.CollectionUtils;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;

View File

@ -1,14 +1,14 @@
package eu.dnetlib.pace.tree; package eu.dnetlib.pace.tree;
import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.config.Config;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import com.wcohen.ss.AbstractStringDistance; import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.config.Type; import eu.dnetlib.pace.config.Type;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.tree.support.AbstractComparator; import eu.dnetlib.pace.tree.support.AbstractComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass; import eu.dnetlib.pace.tree.support.ComparatorClass;
import org.apache.commons.lang.StringUtils;
import java.util.Map; import java.util.Map;

View File

@ -3,7 +3,7 @@ package eu.dnetlib.pace.tree;
import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.tree.support.ComparatorClass; import eu.dnetlib.pace.tree.support.ComparatorClass;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;

View File

@ -4,7 +4,7 @@ import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.tree.support.AbstractComparator; import eu.dnetlib.pace.tree.support.AbstractComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass; import eu.dnetlib.pace.tree.support.ComparatorClass;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import java.util.Map; import java.util.Map;

View File

@ -1,14 +1,11 @@
package eu.dnetlib.pace.tree.support; package eu.dnetlib.pace.tree.support;
import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.*; import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.gt.Match;
import eu.dnetlib.pace.util.PaceException; import eu.dnetlib.pace.util.PaceException;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import java.io.Serializable;
import java.util.Map;
/** /**
* The compare between two documents is given by the weighted mean of the field distances * The compare between two documents is given by the weighted mean of the field distances

View File

@ -8,7 +8,7 @@ import eu.dnetlib.pace.tree.support.TreeProcessor;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.MapDocumentComparator; import eu.dnetlib.pace.model.MapDocumentComparator;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;

View File

@ -1,15 +1,15 @@
package eu.dnetlib.pace.util; package eu.dnetlib.pace.util;
import org.apache.commons.lang.WordUtils;
import com.google.common.base.Function; import com.google.common.base.Function;
import org.apache.commons.lang3.text.WordUtils;
public class Capitalise implements Function<String, String> { public class Capitalise implements Function<String, String> {
private final char[] DELIM = { ' ', '-' }; private final char[] DELIM = {' ', '-'};
@Override @Override
public String apply(final String s) { public String apply(final String s) {
return WordUtils.capitalize(s.toLowerCase(), DELIM); return WordUtils.capitalize(s.toLowerCase(), DELIM);
} }
}; };