forked from D-Net/dnet-hadoop
the param map has been updated: now it accepts string parameters
This commit is contained in:
parent
ddd40540aa
commit
8c0d346005
|
@ -11,7 +11,7 @@ import java.util.Map;
|
|||
@ComparatorClass("alwaysMatch")
|
||||
public class AlwaysMatch extends AbstractComparator {
|
||||
|
||||
public AlwaysMatch(final Map<String, Number> params){
|
||||
public AlwaysMatch(final Map<String, String> params){
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
|
||||
|
|
|
@ -10,9 +10,9 @@ import java.util.Set;
|
|||
@ComparatorClass("cityMatch")
|
||||
public class CityMatch extends AbstractComparator {
|
||||
|
||||
private Map<String, Number> params;
|
||||
private Map<String, String> params;
|
||||
|
||||
public CityMatch(Map<String, Number> params) {
|
||||
public CityMatch(Map<String, String> params) {
|
||||
super(params);
|
||||
this.params = params;
|
||||
}
|
||||
|
@ -29,8 +29,8 @@ public class CityMatch extends AbstractComparator {
|
|||
ca = filterAllStopWords(ca);
|
||||
cb = filterAllStopWords(cb);
|
||||
|
||||
Set<String> cities1 = getCities(ca, params.getOrDefault("windowSize", 4).intValue());
|
||||
Set<String> cities2 = getCities(cb, params.getOrDefault("windowSize", 4).intValue());
|
||||
Set<String> cities1 = getCities(ca, Integer.parseInt(params.getOrDefault("windowSize", "4")));
|
||||
Set<String> cities2 = getCities(cb, Integer.parseInt(params.getOrDefault("windowSize", "4")));
|
||||
|
||||
Set<String> codes1 = citiesToCodes(cities1);
|
||||
Set<String> codes2 = citiesToCodes(cities2);
|
||||
|
@ -41,7 +41,7 @@ public class CityMatch extends AbstractComparator {
|
|||
else {
|
||||
if (codes1.isEmpty() ^ codes2.isEmpty())
|
||||
return -1; //undefined if one of the two has no cities
|
||||
return commonElementsPercentage(codes1, codes2) > params.getOrDefault("threshold", 0).intValue() ? 1.0 : 0.0;
|
||||
return commonElementsPercentage(codes1, codes2) > Integer.parseInt(params.getOrDefault("threshold", "0")) ? 1.0 : 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.tree.support.AbstractComparator;
|
||||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* The Class Contains match
|
||||
*
|
||||
* @author miconis
|
||||
* */
|
||||
@ComparatorClass("containsMatch")
|
||||
public class ContainsMatch extends AbstractComparator {
|
||||
|
||||
private Map<String, String> params;
|
||||
|
||||
public ContainsMatch(Map<String, String> params) {
|
||||
super(params);
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double distance(final String a, final String b, final Config conf) {
|
||||
|
||||
//read parameters
|
||||
boolean caseSensitive = Boolean.parseBoolean(params.getOrDefault("caseSensitive", "false"));
|
||||
String string = params.get("string");
|
||||
String agg = params.get("bool");
|
||||
|
||||
String ca = a;
|
||||
String cb = b;
|
||||
if (!caseSensitive) {
|
||||
ca = a.toLowerCase();
|
||||
cb = b.toLowerCase();
|
||||
}
|
||||
|
||||
switch(agg) {
|
||||
case "AND":
|
||||
if(ca.contains(string) && cb.contains(string))
|
||||
return 1.0;
|
||||
break;
|
||||
case "OR":
|
||||
if(ca.contains(string) || cb.contains(string))
|
||||
return 1.0;
|
||||
break;
|
||||
case "XOR":
|
||||
if(ca.contains(string) ^ cb.contains(string))
|
||||
return 1.0;
|
||||
break;
|
||||
default:
|
||||
return 0.0;
|
||||
}
|
||||
return 0.0;
|
||||
}
|
||||
}
|
|
@ -1,6 +1,5 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
|
@ -16,7 +15,7 @@ public class DoiExactMatch extends ExactMatchIgnoreCase {
|
|||
|
||||
public final String PREFIX = "(http:\\/\\/dx\\.doi\\.org\\/)|(doi:)";
|
||||
|
||||
public DoiExactMatch(final Map<String, Number> params) {
|
||||
public DoiExactMatch(final Map<String, String> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ import java.util.Map;
|
|||
@ComparatorClass("domainExactMatch")
|
||||
public class DomainExactMatch extends ExactMatchIgnoreCase {
|
||||
|
||||
public DomainExactMatch(final Map<String, Number> params) {
|
||||
public DomainExactMatch(final Map<String, String> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ import java.util.Map;
|
|||
@ComparatorClass("exactMatch")
|
||||
public class ExactMatch extends AbstractComparator {
|
||||
|
||||
public ExactMatch(Map<String, Number> params){
|
||||
public ExactMatch(Map<String, String> params){
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ import java.util.Map;
|
|||
@ComparatorClass("exactMatchIgnoreCase")
|
||||
public class ExactMatchIgnoreCase extends AbstractComparator {
|
||||
|
||||
public ExactMatchIgnoreCase(Map<String, Number> params) {
|
||||
public ExactMatchIgnoreCase(Map<String, String> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ import java.util.Map;
|
|||
@ComparatorClass("jaroWinkler")
|
||||
public class JaroWinkler extends AbstractComparator {
|
||||
|
||||
public JaroWinkler(Map<String, Number> params){
|
||||
public JaroWinkler(Map<String, String> params){
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ import java.util.Set;
|
|||
@ComparatorClass("jaroWinklerNormalizedName")
|
||||
public class JaroWinklerNormalizedName extends AbstractComparator {
|
||||
|
||||
private Map<String, Number> params;
|
||||
private Map<String, String> params;
|
||||
|
||||
public JaroWinklerNormalizedName(Map<String, Number> params){
|
||||
public JaroWinklerNormalizedName(Map<String, String> params){
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
this.params = params;
|
||||
}
|
||||
|
@ -39,11 +39,11 @@ public class JaroWinklerNormalizedName extends AbstractComparator {
|
|||
ca = filterAllStopWords(ca);
|
||||
cb = filterAllStopWords(cb);
|
||||
|
||||
Set<String> keywords1 = getKeywords(ca, conf.translationMap(), params.getOrDefault("windowSize", 4).intValue());
|
||||
Set<String> keywords2 = getKeywords(cb, conf.translationMap(), params.getOrDefault("windowSize", 4).intValue());
|
||||
Set<String> keywords1 = getKeywords(ca, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
|
||||
Set<String> keywords2 = getKeywords(cb, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
|
||||
|
||||
Set<String> cities1 = getCities(ca, params.getOrDefault("windowSize", 4).intValue());
|
||||
Set<String> cities2 = getCities(cb, params.getOrDefault("windowSize", 4).intValue());
|
||||
Set<String> cities1 = getCities(ca, Integer.parseInt(params.getOrDefault("windowSize", "4")));
|
||||
Set<String> cities2 = getCities(cb, Integer.parseInt(params.getOrDefault("windowSize", "4")));
|
||||
|
||||
ca = removeKeywords(ca, keywords1);
|
||||
ca = removeKeywords(ca, cities1);
|
||||
|
|
|
@ -12,7 +12,7 @@ import java.util.Map;
|
|||
@ComparatorClass("jaroWinklerTitle")
|
||||
public class JaroWinklerTitle extends AbstractComparator {
|
||||
|
||||
public JaroWinklerTitle(Map<String, Number> params){
|
||||
public JaroWinklerTitle(Map<String, String> params){
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
|
||||
|
|
|
@ -10,9 +10,9 @@ import java.util.Set;
|
|||
@ComparatorClass("keywordMatch")
|
||||
public class KeywordMatch extends AbstractComparator {
|
||||
|
||||
Map<String, Number> params;
|
||||
Map<String, String> params;
|
||||
|
||||
public KeywordMatch(Map<String, Number> params) {
|
||||
public KeywordMatch(Map<String, String> params) {
|
||||
super(params);
|
||||
this.params = params;
|
||||
}
|
||||
|
@ -29,8 +29,8 @@ public class KeywordMatch extends AbstractComparator {
|
|||
ca = filterAllStopWords(ca);
|
||||
cb = filterAllStopWords(cb);
|
||||
|
||||
Set<String> keywords1 = getKeywords(ca, conf.translationMap(), params.getOrDefault("windowSize", 4).intValue());
|
||||
Set<String> keywords2 = getKeywords(cb, conf.translationMap(), params.getOrDefault("windowSize", 4).intValue());
|
||||
Set<String> keywords1 = getKeywords(ca, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
|
||||
Set<String> keywords2 = getKeywords(cb, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
|
||||
|
||||
Set<String> codes1 = toCodes(keywords1, conf.translationMap());
|
||||
Set<String> codes2 = toCodes(keywords2, conf.translationMap());
|
||||
|
@ -41,7 +41,7 @@ public class KeywordMatch extends AbstractComparator {
|
|||
else {
|
||||
if (codes1.isEmpty() ^ codes2.isEmpty())
|
||||
return -1; //undefined if one of the two has no keywords
|
||||
return commonElementsPercentage(codes1, codes2) > params.getOrDefault("threshold", 0).intValue() ? 1.0 : 0.0;
|
||||
return commonElementsPercentage(codes1, codes2) > Double.parseDouble(params.getOrDefault("threshold", "0.0")) ? 1.0 : 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@ import java.util.Map;
|
|||
@ComparatorClass("level2JaroWinkler")
|
||||
public class Level2JaroWinkler extends AbstractComparator {
|
||||
|
||||
public Level2JaroWinkler(Map<String, Number> params){
|
||||
public Level2JaroWinkler(Map<String, String> params){
|
||||
super(params, new com.wcohen.ss.Level2JaroWinkler());
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ import java.util.Map;
|
|||
@ComparatorClass("level2JaroWinklerTitle")
|
||||
public class Level2JaroWinklerTitle extends AbstractComparator {
|
||||
|
||||
public Level2JaroWinklerTitle(Map<String,Number> params){
|
||||
public Level2JaroWinklerTitle(Map<String,String> params){
|
||||
super(params, new com.wcohen.ss.Level2JaroWinkler());
|
||||
}
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ import java.util.Map;
|
|||
@ComparatorClass("level2Levenstein")
|
||||
public class Level2Levenstein extends AbstractComparator {
|
||||
|
||||
public Level2Levenstein(Map<String,Number> params){
|
||||
public Level2Levenstein(Map<String,String> params){
|
||||
super(params, new com.wcohen.ss.Level2Levenstein());
|
||||
}
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ import java.util.Map;
|
|||
@ComparatorClass("levenstein")
|
||||
public class Levenstein extends AbstractComparator {
|
||||
|
||||
public Levenstein(Map<String,Number> params){
|
||||
public Levenstein(Map<String,String> params){
|
||||
super(params, new com.wcohen.ss.Levenstein());
|
||||
}
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ public class LevensteinTitle extends AbstractComparator {
|
|||
|
||||
private static final Log log = LogFactory.getLog(LevensteinTitle.class);
|
||||
|
||||
public LevensteinTitle(Map<String,Number> params){
|
||||
public LevensteinTitle(Map<String,String> params){
|
||||
super(params, new com.wcohen.ss.Levenstein());
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ import java.util.Map;
|
|||
@ComparatorClass("levensteinTitleIgnoreVersion")
|
||||
public class LevensteinTitleIgnoreVersion extends AbstractComparator {
|
||||
|
||||
public LevensteinTitleIgnoreVersion(Map<String,Number> params){
|
||||
public LevensteinTitleIgnoreVersion(Map<String,String> params){
|
||||
super(params, new com.wcohen.ss.Levenstein());
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ import java.util.Map;
|
|||
@ComparatorClass("mustBeDifferent")
|
||||
public class MustBeDifferent extends AbstractComparator {
|
||||
|
||||
public MustBeDifferent(Map<String,Number> params){
|
||||
public MustBeDifferent(Map<String,String> params){
|
||||
super(params, new com.wcohen.ss.Levenstein());
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ import java.util.Map;
|
|||
@ComparatorClass("null")
|
||||
public class NullDistanceAlgo implements Comparator {
|
||||
|
||||
public NullDistanceAlgo(Map<String, Number> params){
|
||||
public NullDistanceAlgo(Map<String, String> params){
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,9 +20,9 @@ import java.util.stream.Collectors;
|
|||
public class PidMatch extends AbstractComparator {
|
||||
|
||||
private static final Log log = LogFactory.getLog(PidMatch.class);
|
||||
private Map<String, Number> params;
|
||||
private Map<String, String> params;
|
||||
|
||||
public PidMatch(final Map<String, Number> params) {
|
||||
public PidMatch(final Map<String, String> params) {
|
||||
super(params);
|
||||
this.params = params;
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ public class PidMatch extends AbstractComparator {
|
|||
return 0.0;
|
||||
}
|
||||
|
||||
return (double)incommon / (incommon + simDiff) > params.getOrDefault("threshold", 0.5).doubleValue() ? 1 : 0;
|
||||
return (double)incommon / (incommon + simDiff) > Double.parseDouble(params.getOrDefault("threshold", "0.5")) ? 1 : 0;
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ public class SizeMatch extends AbstractComparator {
|
|||
* @param params
|
||||
* the parameters
|
||||
*/
|
||||
public SizeMatch(final Map<String, Number> params) {
|
||||
public SizeMatch(final Map<String, String> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ import java.util.Map;
|
|||
@ComparatorClass("sortedJaroWinkler")
|
||||
public class SortedJaroWinkler extends AbstractSortedComparator {
|
||||
|
||||
public SortedJaroWinkler(Map<String,Number> params){
|
||||
public SortedJaroWinkler(Map<String,String> params){
|
||||
super(params, new com.wcohen.ss.Levenstein());
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ public class SortedLevel2JaroWinkler extends AbstractSortedComparator {
|
|||
super(weight, new com.wcohen.ss.Level2JaroWinkler());
|
||||
}
|
||||
|
||||
public SortedLevel2JaroWinkler(final Map<String, Number> params){
|
||||
public SortedLevel2JaroWinkler(final Map<String, String> params){
|
||||
super(params, new com.wcohen.ss.Level2JaroWinkler());
|
||||
}
|
||||
|
||||
|
|
|
@ -31,9 +31,9 @@ public class SubStringLevenstein extends AbstractComparator {
|
|||
super(w, new com.wcohen.ss.Levenstein());
|
||||
}
|
||||
|
||||
public SubStringLevenstein(Map<String, Number> params){
|
||||
public SubStringLevenstein(Map<String, String> params){
|
||||
super(params, new com.wcohen.ss.Levenstein());
|
||||
this.limit = params.get("limit").intValue();
|
||||
this.limit = Integer.parseInt(params.getOrDefault("limit", "1"));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,7 +17,7 @@ import eu.dnetlib.pace.tree.support.ComparatorClass;
|
|||
@ComparatorClass("titleVersionMatch")
|
||||
public class TitleVersionMatch extends AbstractComparator {
|
||||
|
||||
public TitleVersionMatch(final Map<String, Number> params) {
|
||||
public TitleVersionMatch(final Map<String, String> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
|
|
|
@ -12,19 +12,19 @@ import java.util.Map;
|
|||
@ComparatorClass("urlMatcher")
|
||||
public class UrlMatcher extends Levenstein {
|
||||
|
||||
private Map<String, Number> params;
|
||||
private Map<String, String> params;
|
||||
|
||||
public UrlMatcher(Map<String, Number> params){
|
||||
public UrlMatcher(Map<String, String> params){
|
||||
super(params);
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
public UrlMatcher(double weight, Map<String, Number> params) {
|
||||
public UrlMatcher(double weight, Map<String, String> params) {
|
||||
super(weight);
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
public void setParams(Map<String, Number> params) {
|
||||
public void setParams(Map<String, String> params) {
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
|
@ -37,8 +37,8 @@ public class UrlMatcher extends Levenstein {
|
|||
return 0.0;
|
||||
}
|
||||
|
||||
Double hostW = params.get("host").doubleValue();
|
||||
Double pathW = params.get("path").doubleValue();
|
||||
Double hostW = Double.parseDouble(params.getOrDefault("host", "0.5"));
|
||||
Double pathW = Double.parseDouble(params.getOrDefault("path", "0.5"));
|
||||
|
||||
if (StringUtils.isBlank(urlA.getPath()) || StringUtils.isBlank(urlB.getPath())) {
|
||||
return hostW * 0.5;
|
||||
|
|
|
@ -18,7 +18,7 @@ public class YearMatch extends AbstractComparator {
|
|||
|
||||
private int limit = 4;
|
||||
|
||||
public YearMatch(final Map<String, Number> params) {
|
||||
public YearMatch(final Map<String, String> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,13 +18,13 @@ public abstract class AbstractComparator extends AbstractPaceFunctions implement
|
|||
/** The weight. */
|
||||
protected double weight = 0.0;
|
||||
|
||||
private Map<String, Number> params;
|
||||
private Map<String, String> params;
|
||||
|
||||
protected AbstractComparator(Map<String, Number> params) {
|
||||
protected AbstractComparator(Map<String, String> params) {
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
protected AbstractComparator(Map<String, Number> params, final AbstractStringDistance ssalgo){
|
||||
protected AbstractComparator(Map<String, String> params, final AbstractStringDistance ssalgo){
|
||||
this.params = params;
|
||||
this.weight = 1.0;
|
||||
this.ssalgo = ssalgo;
|
||||
|
|
|
@ -23,8 +23,8 @@ public abstract class AbstractSortedComparator extends AbstractComparator {
|
|||
super(weight, ssalgo);
|
||||
}
|
||||
|
||||
protected AbstractSortedComparator(final Map<String, Number> params, final AbstractStringDistance ssalgo){
|
||||
super(params.get("weight").doubleValue(), ssalgo);
|
||||
protected AbstractSortedComparator(final Map<String, String> params, final AbstractStringDistance ssalgo){
|
||||
super(Double.parseDouble(params.get("weight")), ssalgo);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -12,7 +12,7 @@ public class FieldConf implements Serializable {
|
|||
private String field; //name of the field on which apply the comparator
|
||||
private String comparator; //comparator name
|
||||
private double weight = 1.0; //weight for the field (to be used in the aggregation)
|
||||
private Map<String,Number> params; //parameters
|
||||
private Map<String,String> params; //parameters
|
||||
|
||||
private boolean countIfUndefined;
|
||||
|
||||
|
@ -27,7 +27,7 @@ public class FieldConf implements Serializable {
|
|||
public FieldConf() {
|
||||
}
|
||||
|
||||
public FieldConf(String field, String comparator, double weight, Map<String, Number> params, boolean countIfUndefined) {
|
||||
public FieldConf(String field, String comparator, double weight, Map<String, String> params, boolean countIfUndefined) {
|
||||
this.field = field;
|
||||
this.comparator = comparator;
|
||||
this.weight = weight;
|
||||
|
@ -59,11 +59,11 @@ public class FieldConf implements Serializable {
|
|||
this.weight = weight;
|
||||
}
|
||||
|
||||
public Map<String, Number> getParams() {
|
||||
public Map<String, String> getParams() {
|
||||
return params;
|
||||
}
|
||||
|
||||
public void setParams(Map<String, Number> params) {
|
||||
public void setParams(Map<String, String> params) {
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ public class TreeNodeDef implements Serializable {
|
|||
|
||||
double result = comparator(fieldConf).compare(doc1.getFieldMap().get(fieldConf.getField()), doc2.getFieldMap().get(fieldConf.getField()), conf);
|
||||
|
||||
stats.addFieldStats(fieldConf.getComparator() + " on " + fieldConf.getField(), new FieldStats(weight, result, fieldConf.isCountIfUndefined()));
|
||||
stats.addFieldStats(fieldConf.getComparator() + " on " + fieldConf.getField() + " " + fields.indexOf(fieldConf), new FieldStats(weight, result, fieldConf.isCountIfUndefined()));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,6 @@ public class TreeProcessor{
|
|||
}
|
||||
|
||||
public boolean compare(final MapDocument a, final MapDocument b) {
|
||||
|
||||
//evaluate the decision tree
|
||||
return evaluateTree(a, b).getResult() == MatchType.MATCH;
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ public class PaceResolver implements Serializable {
|
|||
}
|
||||
}
|
||||
|
||||
public Comparator getComparator(String name, Map<String, Number> params) throws PaceException {
|
||||
public Comparator getComparator(String name, Map<String, String> params) throws PaceException {
|
||||
try {
|
||||
return comparators.get(name).getDeclaredConstructor(Map.class).newInstance(params);
|
||||
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException | NullPointerException e) {
|
||||
|
|
|
@ -2,9 +2,12 @@ package eu.dnetlib.pace.comparators;
|
|||
|
||||
import eu.dnetlib.pace.clustering.NGramUtils;
|
||||
import eu.dnetlib.pace.tree.CityMatch;
|
||||
import eu.dnetlib.pace.tree.ContainsMatch;
|
||||
import eu.dnetlib.pace.tree.JaroWinklerNormalizedName;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.tree.KeywordMatch;
|
||||
import org.junit.Before;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import eu.dnetlib.pace.common.AbstractPaceFunctions;
|
||||
|
@ -17,13 +20,13 @@ import static junit.framework.Assert.assertTrue;
|
|||
|
||||
public class ComparatorTest extends AbstractPaceFunctions {
|
||||
|
||||
private Map<String, Number> params;
|
||||
private Map<String, String> params;
|
||||
private DedupConfig conf;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
params = new HashMap<>();
|
||||
params.put("weight", 1.0);
|
||||
params.put("weight", "1.0");
|
||||
conf = DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/config/organization.current.conf", ComparatorTest.class));
|
||||
|
||||
}
|
||||
|
@ -34,103 +37,6 @@ public class ComparatorTest extends AbstractPaceFunctions {
|
|||
System.out.println("utils = " + utils.cleanupForOrdering("University of Pisa"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJaroWinklerNormalizedName() {
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
double result = jaroWinklerNormalizedName.distance("Free University of Bozen-Bolzano", "University of the Free State", conf);
|
||||
|
||||
System.out.println("result = " + result);
|
||||
assertEquals(0.0, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJaroWinklerNormalizedName2() {
|
||||
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
double result = jaroWinklerNormalizedName.distance("University of New York", "Università di New York", conf);
|
||||
|
||||
assertEquals(1.0, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJaroWinklerNormalizedName3() {
|
||||
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
double result = jaroWinklerNormalizedName.distance("Biblioteca dell'Universita di Bologna", "Università di Bologna", conf);
|
||||
|
||||
System.out.println("result = " + result);
|
||||
assertEquals(0.0, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJaroWinklerNormalizedName4() {
|
||||
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
double result = jaroWinklerNormalizedName.distance("Universita degli studi di Pisa", "Universita di Pisa", conf);
|
||||
|
||||
System.out.println("result = " + result);
|
||||
assertEquals(1.0, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJaroWinklerNormalizedName5() {
|
||||
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
double result = jaroWinklerNormalizedName.distance("RESEARCH PROMOTION FOUNDATION", "IDRYMA PROOTHISIS EREVNAS", conf);
|
||||
|
||||
System.out.println("result = " + result);
|
||||
assertEquals(1.0, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJaroWinklerNormalizedName6() {
|
||||
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
double result = jaroWinklerNormalizedName.distance("Fonds zur Förderung der wissenschaftlichen Forschung (Austrian Science Fund)", "Fonds zur Förderung der wissenschaftlichen Forschung", conf);
|
||||
|
||||
System.out.println("result = " + result);
|
||||
assertTrue(result > 0.9);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJaroWinklerNormalizedName7() {
|
||||
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
double result = jaroWinklerNormalizedName.distance("Polytechnic University of Turin", "POLITECNICO DI TORINO", conf);
|
||||
|
||||
System.out.println("result = " + result);
|
||||
assertTrue(result > 0.9);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJaroWinklerNormalizedName8() {
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
|
||||
double result = jaroWinklerNormalizedName.distance("Politechniki Warszawskiej (Warsaw University of Technology)", "Warsaw University of Technology", conf);
|
||||
|
||||
System.out.println("result = " + result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJaroWinklerNormalizedName9() {
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
|
||||
double result = jaroWinklerNormalizedName.distance("Istanbul Commerce University", "İstanbul Ticarət Universiteti", conf);
|
||||
|
||||
System.out.println("result = " + result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJaroWinklerNormalizedName10(){
|
||||
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
|
||||
double result = jaroWinklerNormalizedName.distance("Firenze University Press", "University of Florence", conf);
|
||||
|
||||
System.out.println("result = " + result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cityMatchTest() {
|
||||
final CityMatch cityMatch = new CityMatch(params);
|
||||
|
@ -147,6 +53,53 @@ public class ComparatorTest extends AbstractPaceFunctions {
|
|||
//both names with cities (different)
|
||||
assertEquals(0.0, cityMatch.distance("Universita di Bologna", "Universita di Torino", conf));
|
||||
|
||||
//particular cases
|
||||
assertEquals(1.0, cityMatch.distance("Free University of Bozen-Bolzano", "Università di Bolzano", conf));
|
||||
assertEquals(1.0, cityMatch.distance("Politechniki Warszawskiej (Warsaw University of Technology)", "Warsaw University of Technology", conf));
|
||||
}
|
||||
|
||||
// @Test
|
||||
// public void testJaroWinklerNormalizedName6() {
|
||||
//
|
||||
// final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
// double result = jaroWinklerNormalizedName.distance("Fonds zur Förderung der wissenschaftlichen Forschung (Austrian Science Fund)", "Fonds zur Förderung der wissenschaftlichen Forschung", conf);
|
||||
//
|
||||
// System.out.println("result = " + result);
|
||||
// assertTrue(result > 0.9);
|
||||
//
|
||||
// }
|
||||
// @Test
|
||||
// public void testJaroWinklerNormalizedName10(){
|
||||
//
|
||||
// final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
//
|
||||
// double result = jaroWinklerNormalizedName.distance("Firenze University Press", "University of Florence", conf);
|
||||
//
|
||||
// System.out.println("result = " + result);
|
||||
// }
|
||||
|
||||
@Test
|
||||
public void keywordMatchTest(){
|
||||
params.put("threshold", "0.4");
|
||||
|
||||
final KeywordMatch keywordMatch = new KeywordMatch(params);
|
||||
|
||||
assertEquals(1.0, keywordMatch.distance("Biblioteca dell'Universita di Bologna", "Università di Bologna", conf));
|
||||
assertEquals(1.0, keywordMatch.distance("Universita degli studi di Pisa", "Universita di Pisa", conf));
|
||||
assertEquals(1.0, keywordMatch.distance("Polytechnic University of Turin", "POLITECNICO DI TORINO", conf));
|
||||
assertEquals(1.0, keywordMatch.distance("Istanbul Commerce University", "İstanbul Ticarət Universiteti", conf));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void containsMatchTest(){
|
||||
|
||||
params.put("string", "openorgs");
|
||||
params.put("bool", "XOR");
|
||||
params.put("caseSensitive", "false");
|
||||
|
||||
final ContainsMatch containsMatch = new ContainsMatch(params);
|
||||
|
||||
assertEquals(0.0, containsMatch.distance("openorgs", "openorgs", conf));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -7,7 +7,6 @@ import java.util.Map;
|
|||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class ConfigTest extends AbstractPaceTest {
|
||||
|
||||
|
|
|
@ -8,11 +8,11 @@ import java.util.Map;
|
|||
|
||||
public class UtilTest {
|
||||
|
||||
Map<String, Number> params;
|
||||
Map<String, String> params;
|
||||
|
||||
@Before
|
||||
public void setUp(){
|
||||
params = new HashMap<String, Number>();
|
||||
params = new HashMap<String, String>();
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue