the param map has been updated: now it accepts string parameters

This commit is contained in:
miconis 2019-11-21 09:37:56 +01:00
parent ddd40540aa
commit 8c0d346005
36 changed files with 169 additions and 162 deletions

View File

@ -11,7 +11,7 @@ import java.util.Map;
@ComparatorClass("alwaysMatch") @ComparatorClass("alwaysMatch")
public class AlwaysMatch extends AbstractComparator { public class AlwaysMatch extends AbstractComparator {
public AlwaysMatch(final Map<String, Number> params){ public AlwaysMatch(final Map<String, String> params){
super(params, new com.wcohen.ss.JaroWinkler()); super(params, new com.wcohen.ss.JaroWinkler());
} }

View File

@ -10,9 +10,9 @@ import java.util.Set;
@ComparatorClass("cityMatch") @ComparatorClass("cityMatch")
public class CityMatch extends AbstractComparator { public class CityMatch extends AbstractComparator {
private Map<String, Number> params; private Map<String, String> params;
public CityMatch(Map<String, Number> params) { public CityMatch(Map<String, String> params) {
super(params); super(params);
this.params = params; this.params = params;
} }
@ -29,8 +29,8 @@ public class CityMatch extends AbstractComparator {
ca = filterAllStopWords(ca); ca = filterAllStopWords(ca);
cb = filterAllStopWords(cb); cb = filterAllStopWords(cb);
Set<String> cities1 = getCities(ca, params.getOrDefault("windowSize", 4).intValue()); Set<String> cities1 = getCities(ca, Integer.parseInt(params.getOrDefault("windowSize", "4")));
Set<String> cities2 = getCities(cb, params.getOrDefault("windowSize", 4).intValue()); Set<String> cities2 = getCities(cb, Integer.parseInt(params.getOrDefault("windowSize", "4")));
Set<String> codes1 = citiesToCodes(cities1); Set<String> codes1 = citiesToCodes(cities1);
Set<String> codes2 = citiesToCodes(cities2); Set<String> codes2 = citiesToCodes(cities2);
@ -41,7 +41,7 @@ public class CityMatch extends AbstractComparator {
else { else {
if (codes1.isEmpty() ^ codes2.isEmpty()) if (codes1.isEmpty() ^ codes2.isEmpty())
return -1; //undefined if one of the two has no cities return -1; //undefined if one of the two has no cities
return commonElementsPercentage(codes1, codes2) > params.getOrDefault("threshold", 0).intValue() ? 1.0 : 0.0; return commonElementsPercentage(codes1, codes2) > Integer.parseInt(params.getOrDefault("threshold", "0")) ? 1.0 : 0.0;
} }
} }
} }

View File

@ -0,0 +1,57 @@
package eu.dnetlib.pace.tree;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.tree.support.AbstractComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass;
import java.util.Map;
/**
* The Class Contains match
*
* @author miconis
* */
@ComparatorClass("containsMatch")
public class ContainsMatch extends AbstractComparator {
private Map<String, String> params;
public ContainsMatch(Map<String, String> params) {
super(params);
this.params = params;
}
@Override
public double distance(final String a, final String b, final Config conf) {
//read parameters
boolean caseSensitive = Boolean.parseBoolean(params.getOrDefault("caseSensitive", "false"));
String string = params.get("string");
String agg = params.get("bool");
String ca = a;
String cb = b;
if (!caseSensitive) {
ca = a.toLowerCase();
cb = b.toLowerCase();
}
switch(agg) {
case "AND":
if(ca.contains(string) && cb.contains(string))
return 1.0;
break;
case "OR":
if(ca.contains(string) || cb.contains(string))
return 1.0;
break;
case "XOR":
if(ca.contains(string) ^ cb.contains(string))
return 1.0;
break;
default:
return 0.0;
}
return 0.0;
}
}

View File

@ -1,6 +1,5 @@
package eu.dnetlib.pace.tree; package eu.dnetlib.pace.tree;
import java.util.List;
import java.util.Map; import java.util.Map;
import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.Field;
@ -16,7 +15,7 @@ public class DoiExactMatch extends ExactMatchIgnoreCase {
public final String PREFIX = "(http:\\/\\/dx\\.doi\\.org\\/)|(doi:)"; public final String PREFIX = "(http:\\/\\/dx\\.doi\\.org\\/)|(doi:)";
public DoiExactMatch(final Map<String, Number> params) { public DoiExactMatch(final Map<String, String> params) {
super(params); super(params);
} }

View File

@ -10,7 +10,7 @@ import java.util.Map;
@ComparatorClass("domainExactMatch") @ComparatorClass("domainExactMatch")
public class DomainExactMatch extends ExactMatchIgnoreCase { public class DomainExactMatch extends ExactMatchIgnoreCase {
public DomainExactMatch(final Map<String, Number> params) { public DomainExactMatch(final Map<String, String> params) {
super(params); super(params);
} }

View File

@ -10,7 +10,7 @@ import java.util.Map;
@ComparatorClass("exactMatch") @ComparatorClass("exactMatch")
public class ExactMatch extends AbstractComparator { public class ExactMatch extends AbstractComparator {
public ExactMatch(Map<String, Number> params){ public ExactMatch(Map<String, String> params){
super(params, new com.wcohen.ss.JaroWinkler()); super(params, new com.wcohen.ss.JaroWinkler());
} }

View File

@ -10,7 +10,7 @@ import java.util.Map;
@ComparatorClass("exactMatchIgnoreCase") @ComparatorClass("exactMatchIgnoreCase")
public class ExactMatchIgnoreCase extends AbstractComparator { public class ExactMatchIgnoreCase extends AbstractComparator {
public ExactMatchIgnoreCase(Map<String, Number> params) { public ExactMatchIgnoreCase(Map<String, String> params) {
super(params); super(params);
} }

View File

@ -11,7 +11,7 @@ import java.util.Map;
@ComparatorClass("jaroWinkler") @ComparatorClass("jaroWinkler")
public class JaroWinkler extends AbstractComparator { public class JaroWinkler extends AbstractComparator {
public JaroWinkler(Map<String, Number> params){ public JaroWinkler(Map<String, String> params){
super(params, new com.wcohen.ss.JaroWinkler()); super(params, new com.wcohen.ss.JaroWinkler());
} }

View File

@ -13,9 +13,9 @@ import java.util.Set;
@ComparatorClass("jaroWinklerNormalizedName") @ComparatorClass("jaroWinklerNormalizedName")
public class JaroWinklerNormalizedName extends AbstractComparator { public class JaroWinklerNormalizedName extends AbstractComparator {
private Map<String, Number> params; private Map<String, String> params;
public JaroWinklerNormalizedName(Map<String, Number> params){ public JaroWinklerNormalizedName(Map<String, String> params){
super(params, new com.wcohen.ss.JaroWinkler()); super(params, new com.wcohen.ss.JaroWinkler());
this.params = params; this.params = params;
} }
@ -39,11 +39,11 @@ public class JaroWinklerNormalizedName extends AbstractComparator {
ca = filterAllStopWords(ca); ca = filterAllStopWords(ca);
cb = filterAllStopWords(cb); cb = filterAllStopWords(cb);
Set<String> keywords1 = getKeywords(ca, conf.translationMap(), params.getOrDefault("windowSize", 4).intValue()); Set<String> keywords1 = getKeywords(ca, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
Set<String> keywords2 = getKeywords(cb, conf.translationMap(), params.getOrDefault("windowSize", 4).intValue()); Set<String> keywords2 = getKeywords(cb, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
Set<String> cities1 = getCities(ca, params.getOrDefault("windowSize", 4).intValue()); Set<String> cities1 = getCities(ca, Integer.parseInt(params.getOrDefault("windowSize", "4")));
Set<String> cities2 = getCities(cb, params.getOrDefault("windowSize", 4).intValue()); Set<String> cities2 = getCities(cb, Integer.parseInt(params.getOrDefault("windowSize", "4")));
ca = removeKeywords(ca, keywords1); ca = removeKeywords(ca, keywords1);
ca = removeKeywords(ca, cities1); ca = removeKeywords(ca, cities1);

View File

@ -12,7 +12,7 @@ import java.util.Map;
@ComparatorClass("jaroWinklerTitle") @ComparatorClass("jaroWinklerTitle")
public class JaroWinklerTitle extends AbstractComparator { public class JaroWinklerTitle extends AbstractComparator {
public JaroWinklerTitle(Map<String, Number> params){ public JaroWinklerTitle(Map<String, String> params){
super(params, new com.wcohen.ss.JaroWinkler()); super(params, new com.wcohen.ss.JaroWinkler());
} }

View File

@ -10,9 +10,9 @@ import java.util.Set;
@ComparatorClass("keywordMatch") @ComparatorClass("keywordMatch")
public class KeywordMatch extends AbstractComparator { public class KeywordMatch extends AbstractComparator {
Map<String, Number> params; Map<String, String> params;
public KeywordMatch(Map<String, Number> params) { public KeywordMatch(Map<String, String> params) {
super(params); super(params);
this.params = params; this.params = params;
} }
@ -29,8 +29,8 @@ public class KeywordMatch extends AbstractComparator {
ca = filterAllStopWords(ca); ca = filterAllStopWords(ca);
cb = filterAllStopWords(cb); cb = filterAllStopWords(cb);
Set<String> keywords1 = getKeywords(ca, conf.translationMap(), params.getOrDefault("windowSize", 4).intValue()); Set<String> keywords1 = getKeywords(ca, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
Set<String> keywords2 = getKeywords(cb, conf.translationMap(), params.getOrDefault("windowSize", 4).intValue()); Set<String> keywords2 = getKeywords(cb, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4")));
Set<String> codes1 = toCodes(keywords1, conf.translationMap()); Set<String> codes1 = toCodes(keywords1, conf.translationMap());
Set<String> codes2 = toCodes(keywords2, conf.translationMap()); Set<String> codes2 = toCodes(keywords2, conf.translationMap());
@ -41,7 +41,7 @@ public class KeywordMatch extends AbstractComparator {
else { else {
if (codes1.isEmpty() ^ codes2.isEmpty()) if (codes1.isEmpty() ^ codes2.isEmpty())
return -1; //undefined if one of the two has no keywords return -1; //undefined if one of the two has no keywords
return commonElementsPercentage(codes1, codes2) > params.getOrDefault("threshold", 0).intValue() ? 1.0 : 0.0; return commonElementsPercentage(codes1, codes2) > Double.parseDouble(params.getOrDefault("threshold", "0.0")) ? 1.0 : 0.0;
} }
} }
} }

View File

@ -9,7 +9,7 @@ import java.util.Map;
@ComparatorClass("level2JaroWinkler") @ComparatorClass("level2JaroWinkler")
public class Level2JaroWinkler extends AbstractComparator { public class Level2JaroWinkler extends AbstractComparator {
public Level2JaroWinkler(Map<String, Number> params){ public Level2JaroWinkler(Map<String, String> params){
super(params, new com.wcohen.ss.Level2JaroWinkler()); super(params, new com.wcohen.ss.Level2JaroWinkler());
} }

View File

@ -10,7 +10,7 @@ import java.util.Map;
@ComparatorClass("level2JaroWinklerTitle") @ComparatorClass("level2JaroWinklerTitle")
public class Level2JaroWinklerTitle extends AbstractComparator { public class Level2JaroWinklerTitle extends AbstractComparator {
public Level2JaroWinklerTitle(Map<String,Number> params){ public Level2JaroWinklerTitle(Map<String,String> params){
super(params, new com.wcohen.ss.Level2JaroWinkler()); super(params, new com.wcohen.ss.Level2JaroWinkler());
} }

View File

@ -9,7 +9,7 @@ import java.util.Map;
@ComparatorClass("level2Levenstein") @ComparatorClass("level2Levenstein")
public class Level2Levenstein extends AbstractComparator { public class Level2Levenstein extends AbstractComparator {
public Level2Levenstein(Map<String,Number> params){ public Level2Levenstein(Map<String,String> params){
super(params, new com.wcohen.ss.Level2Levenstein()); super(params, new com.wcohen.ss.Level2Levenstein());
} }

View File

@ -9,7 +9,7 @@ import java.util.Map;
@ComparatorClass("levenstein") @ComparatorClass("levenstein")
public class Levenstein extends AbstractComparator { public class Levenstein extends AbstractComparator {
public Levenstein(Map<String,Number> params){ public Levenstein(Map<String,String> params){
super(params, new com.wcohen.ss.Levenstein()); super(params, new com.wcohen.ss.Levenstein());
} }

View File

@ -15,7 +15,7 @@ public class LevensteinTitle extends AbstractComparator {
private static final Log log = LogFactory.getLog(LevensteinTitle.class); private static final Log log = LogFactory.getLog(LevensteinTitle.class);
public LevensteinTitle(Map<String,Number> params){ public LevensteinTitle(Map<String,String> params){
super(params, new com.wcohen.ss.Levenstein()); super(params, new com.wcohen.ss.Levenstein());
} }

View File

@ -14,7 +14,7 @@ import java.util.Map;
@ComparatorClass("levensteinTitleIgnoreVersion") @ComparatorClass("levensteinTitleIgnoreVersion")
public class LevensteinTitleIgnoreVersion extends AbstractComparator { public class LevensteinTitleIgnoreVersion extends AbstractComparator {
public LevensteinTitleIgnoreVersion(Map<String,Number> params){ public LevensteinTitleIgnoreVersion(Map<String,String> params){
super(params, new com.wcohen.ss.Levenstein()); super(params, new com.wcohen.ss.Levenstein());
} }

View File

@ -10,7 +10,7 @@ import java.util.Map;
@ComparatorClass("mustBeDifferent") @ComparatorClass("mustBeDifferent")
public class MustBeDifferent extends AbstractComparator { public class MustBeDifferent extends AbstractComparator {
public MustBeDifferent(Map<String,Number> params){ public MustBeDifferent(Map<String,String> params){
super(params, new com.wcohen.ss.Levenstein()); super(params, new com.wcohen.ss.Levenstein());
} }

View File

@ -14,7 +14,7 @@ import java.util.Map;
@ComparatorClass("null") @ComparatorClass("null")
public class NullDistanceAlgo implements Comparator { public class NullDistanceAlgo implements Comparator {
public NullDistanceAlgo(Map<String, Number> params){ public NullDistanceAlgo(Map<String, String> params){
} }
@Override @Override

View File

@ -20,9 +20,9 @@ import java.util.stream.Collectors;
public class PidMatch extends AbstractComparator { public class PidMatch extends AbstractComparator {
private static final Log log = LogFactory.getLog(PidMatch.class); private static final Log log = LogFactory.getLog(PidMatch.class);
private Map<String, Number> params; private Map<String, String> params;
public PidMatch(final Map<String, Number> params) { public PidMatch(final Map<String, String> params) {
super(params); super(params);
this.params = params; this.params = params;
} }
@ -50,7 +50,7 @@ public class PidMatch extends AbstractComparator {
return 0.0; return 0.0;
} }
return (double)incommon / (incommon + simDiff) > params.getOrDefault("threshold", 0.5).doubleValue() ? 1 : 0; return (double)incommon / (incommon + simDiff) > Double.parseDouble(params.getOrDefault("threshold", "0.5")) ? 1 : 0;
} }

View File

@ -24,7 +24,7 @@ public class SizeMatch extends AbstractComparator {
* @param params * @param params
* the parameters * the parameters
*/ */
public SizeMatch(final Map<String, Number> params) { public SizeMatch(final Map<String, String> params) {
super(params); super(params);
} }

View File

@ -12,7 +12,7 @@ import java.util.Map;
@ComparatorClass("sortedJaroWinkler") @ComparatorClass("sortedJaroWinkler")
public class SortedJaroWinkler extends AbstractSortedComparator { public class SortedJaroWinkler extends AbstractSortedComparator {
public SortedJaroWinkler(Map<String,Number> params){ public SortedJaroWinkler(Map<String,String> params){
super(params, new com.wcohen.ss.Levenstein()); super(params, new com.wcohen.ss.Levenstein());
} }

View File

@ -22,7 +22,7 @@ public class SortedLevel2JaroWinkler extends AbstractSortedComparator {
super(weight, new com.wcohen.ss.Level2JaroWinkler()); super(weight, new com.wcohen.ss.Level2JaroWinkler());
} }
public SortedLevel2JaroWinkler(final Map<String, Number> params){ public SortedLevel2JaroWinkler(final Map<String, String> params){
super(params, new com.wcohen.ss.Level2JaroWinkler()); super(params, new com.wcohen.ss.Level2JaroWinkler());
} }

View File

@ -31,9 +31,9 @@ public class SubStringLevenstein extends AbstractComparator {
super(w, new com.wcohen.ss.Levenstein()); super(w, new com.wcohen.ss.Levenstein());
} }
public SubStringLevenstein(Map<String, Number> params){ public SubStringLevenstein(Map<String, String> params){
super(params, new com.wcohen.ss.Levenstein()); super(params, new com.wcohen.ss.Levenstein());
this.limit = params.get("limit").intValue(); this.limit = Integer.parseInt(params.getOrDefault("limit", "1"));
} }
/** /**

View File

@ -17,7 +17,7 @@ import eu.dnetlib.pace.tree.support.ComparatorClass;
@ComparatorClass("titleVersionMatch") @ComparatorClass("titleVersionMatch")
public class TitleVersionMatch extends AbstractComparator { public class TitleVersionMatch extends AbstractComparator {
public TitleVersionMatch(final Map<String, Number> params) { public TitleVersionMatch(final Map<String, String> params) {
super(params); super(params);
} }

View File

@ -12,19 +12,19 @@ import java.util.Map;
@ComparatorClass("urlMatcher") @ComparatorClass("urlMatcher")
public class UrlMatcher extends Levenstein { public class UrlMatcher extends Levenstein {
private Map<String, Number> params; private Map<String, String> params;
public UrlMatcher(Map<String, Number> params){ public UrlMatcher(Map<String, String> params){
super(params); super(params);
this.params = params; this.params = params;
} }
public UrlMatcher(double weight, Map<String, Number> params) { public UrlMatcher(double weight, Map<String, String> params) {
super(weight); super(weight);
this.params = params; this.params = params;
} }
public void setParams(Map<String, Number> params) { public void setParams(Map<String, String> params) {
this.params = params; this.params = params;
} }
@ -37,8 +37,8 @@ public class UrlMatcher extends Levenstein {
return 0.0; return 0.0;
} }
Double hostW = params.get("host").doubleValue(); Double hostW = Double.parseDouble(params.getOrDefault("host", "0.5"));
Double pathW = params.get("path").doubleValue(); Double pathW = Double.parseDouble(params.getOrDefault("path", "0.5"));
if (StringUtils.isBlank(urlA.getPath()) || StringUtils.isBlank(urlB.getPath())) { if (StringUtils.isBlank(urlA.getPath()) || StringUtils.isBlank(urlB.getPath())) {
return hostW * 0.5; return hostW * 0.5;

View File

@ -18,7 +18,7 @@ public class YearMatch extends AbstractComparator {
private int limit = 4; private int limit = 4;
public YearMatch(final Map<String, Number> params) { public YearMatch(final Map<String, String> params) {
super(params); super(params);
} }

View File

@ -18,13 +18,13 @@ public abstract class AbstractComparator extends AbstractPaceFunctions implement
/** The weight. */ /** The weight. */
protected double weight = 0.0; protected double weight = 0.0;
private Map<String, Number> params; private Map<String, String> params;
protected AbstractComparator(Map<String, Number> params) { protected AbstractComparator(Map<String, String> params) {
this.params = params; this.params = params;
} }
protected AbstractComparator(Map<String, Number> params, final AbstractStringDistance ssalgo){ protected AbstractComparator(Map<String, String> params, final AbstractStringDistance ssalgo){
this.params = params; this.params = params;
this.weight = 1.0; this.weight = 1.0;
this.ssalgo = ssalgo; this.ssalgo = ssalgo;

View File

@ -23,8 +23,8 @@ public abstract class AbstractSortedComparator extends AbstractComparator {
super(weight, ssalgo); super(weight, ssalgo);
} }
protected AbstractSortedComparator(final Map<String, Number> params, final AbstractStringDistance ssalgo){ protected AbstractSortedComparator(final Map<String, String> params, final AbstractStringDistance ssalgo){
super(params.get("weight").doubleValue(), ssalgo); super(Double.parseDouble(params.get("weight")), ssalgo);
} }
@Override @Override

View File

@ -12,7 +12,7 @@ public class FieldConf implements Serializable {
private String field; //name of the field on which apply the comparator private String field; //name of the field on which apply the comparator
private String comparator; //comparator name private String comparator; //comparator name
private double weight = 1.0; //weight for the field (to be used in the aggregation) private double weight = 1.0; //weight for the field (to be used in the aggregation)
private Map<String,Number> params; //parameters private Map<String,String> params; //parameters
private boolean countIfUndefined; private boolean countIfUndefined;
@ -27,7 +27,7 @@ public class FieldConf implements Serializable {
public FieldConf() { public FieldConf() {
} }
public FieldConf(String field, String comparator, double weight, Map<String, Number> params, boolean countIfUndefined) { public FieldConf(String field, String comparator, double weight, Map<String, String> params, boolean countIfUndefined) {
this.field = field; this.field = field;
this.comparator = comparator; this.comparator = comparator;
this.weight = weight; this.weight = weight;
@ -59,11 +59,11 @@ public class FieldConf implements Serializable {
this.weight = weight; this.weight = weight;
} }
public Map<String, Number> getParams() { public Map<String, String> getParams() {
return params; return params;
} }
public void setParams(Map<String, Number> params) { public void setParams(Map<String, String> params) {
this.params = params; this.params = params;
} }

View File

@ -46,7 +46,7 @@ public class TreeNodeDef implements Serializable {
double result = comparator(fieldConf).compare(doc1.getFieldMap().get(fieldConf.getField()), doc2.getFieldMap().get(fieldConf.getField()), conf); double result = comparator(fieldConf).compare(doc1.getFieldMap().get(fieldConf.getField()), doc2.getFieldMap().get(fieldConf.getField()), conf);
stats.addFieldStats(fieldConf.getComparator() + " on " + fieldConf.getField(), new FieldStats(weight, result, fieldConf.isCountIfUndefined())); stats.addFieldStats(fieldConf.getComparator() + " on " + fieldConf.getField() + " " + fields.indexOf(fieldConf), new FieldStats(weight, result, fieldConf.isCountIfUndefined()));
} }

View File

@ -23,7 +23,6 @@ public class TreeProcessor{
} }
public boolean compare(final MapDocument a, final MapDocument b) { public boolean compare(final MapDocument a, final MapDocument b) {
//evaluate the decision tree //evaluate the decision tree
return evaluateTree(a, b).getResult() == MatchType.MATCH; return evaluateTree(a, b).getResult() == MatchType.MATCH;
} }

View File

@ -38,7 +38,7 @@ public class PaceResolver implements Serializable {
} }
} }
public Comparator getComparator(String name, Map<String, Number> params) throws PaceException { public Comparator getComparator(String name, Map<String, String> params) throws PaceException {
try { try {
return comparators.get(name).getDeclaredConstructor(Map.class).newInstance(params); return comparators.get(name).getDeclaredConstructor(Map.class).newInstance(params);
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException | NullPointerException e) { } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException | NullPointerException e) {

View File

@ -2,9 +2,12 @@ package eu.dnetlib.pace.comparators;
import eu.dnetlib.pace.clustering.NGramUtils; import eu.dnetlib.pace.clustering.NGramUtils;
import eu.dnetlib.pace.tree.CityMatch; import eu.dnetlib.pace.tree.CityMatch;
import eu.dnetlib.pace.tree.ContainsMatch;
import eu.dnetlib.pace.tree.JaroWinklerNormalizedName; import eu.dnetlib.pace.tree.JaroWinklerNormalizedName;
import eu.dnetlib.pace.config.DedupConfig; import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.tree.KeywordMatch;
import org.junit.Before; import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.common.AbstractPaceFunctions;
@ -17,13 +20,13 @@ import static junit.framework.Assert.assertTrue;
public class ComparatorTest extends AbstractPaceFunctions { public class ComparatorTest extends AbstractPaceFunctions {
private Map<String, Number> params; private Map<String, String> params;
private DedupConfig conf; private DedupConfig conf;
@Before @Before
public void setup() { public void setup() {
params = new HashMap<>(); params = new HashMap<>();
params.put("weight", 1.0); params.put("weight", "1.0");
conf = DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/config/organization.current.conf", ComparatorTest.class)); conf = DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/config/organization.current.conf", ComparatorTest.class));
} }
@ -34,103 +37,6 @@ public class ComparatorTest extends AbstractPaceFunctions {
System.out.println("utils = " + utils.cleanupForOrdering("University of Pisa")); System.out.println("utils = " + utils.cleanupForOrdering("University of Pisa"));
} }
@Test
public void testJaroWinklerNormalizedName() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("Free University of Bozen-Bolzano", "University of the Free State", conf);
System.out.println("result = " + result);
assertEquals(0.0, result);
}
@Test
public void testJaroWinklerNormalizedName2() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("University of New York", "Università di New York", conf);
assertEquals(1.0, result);
}
@Test
public void testJaroWinklerNormalizedName3() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("Biblioteca dell'Universita di Bologna", "Università di Bologna", conf);
System.out.println("result = " + result);
assertEquals(0.0, result);
}
@Test
public void testJaroWinklerNormalizedName4() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("Universita degli studi di Pisa", "Universita di Pisa", conf);
System.out.println("result = " + result);
assertEquals(1.0, result);
}
@Test
public void testJaroWinklerNormalizedName5() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("RESEARCH PROMOTION FOUNDATION", "IDRYMA PROOTHISIS EREVNAS", conf);
System.out.println("result = " + result);
assertEquals(1.0, result);
}
@Test
public void testJaroWinklerNormalizedName6() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("Fonds zur Förderung der wissenschaftlichen Forschung (Austrian Science Fund)", "Fonds zur Förderung der wissenschaftlichen Forschung", conf);
System.out.println("result = " + result);
assertTrue(result > 0.9);
}
@Test
public void testJaroWinklerNormalizedName7() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("Polytechnic University of Turin", "POLITECNICO DI TORINO", conf);
System.out.println("result = " + result);
assertTrue(result > 0.9);
}
@Test
public void testJaroWinklerNormalizedName8() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("Politechniki Warszawskiej (Warsaw University of Technology)", "Warsaw University of Technology", conf);
System.out.println("result = " + result);
}
@Test
public void testJaroWinklerNormalizedName9() {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("Istanbul Commerce University", "İstanbul Ticarət Universiteti", conf);
System.out.println("result = " + result);
}
@Test
public void testJaroWinklerNormalizedName10(){
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
double result = jaroWinklerNormalizedName.distance("Firenze University Press", "University of Florence", conf);
System.out.println("result = " + result);
}
@Test @Test
public void cityMatchTest() { public void cityMatchTest() {
final CityMatch cityMatch = new CityMatch(params); final CityMatch cityMatch = new CityMatch(params);
@ -147,6 +53,53 @@ public class ComparatorTest extends AbstractPaceFunctions {
//both names with cities (different) //both names with cities (different)
assertEquals(0.0, cityMatch.distance("Universita di Bologna", "Universita di Torino", conf)); assertEquals(0.0, cityMatch.distance("Universita di Bologna", "Universita di Torino", conf));
//particular cases
assertEquals(1.0, cityMatch.distance("Free University of Bozen-Bolzano", "Università di Bolzano", conf));
assertEquals(1.0, cityMatch.distance("Politechniki Warszawskiej (Warsaw University of Technology)", "Warsaw University of Technology", conf));
}
// @Test
// public void testJaroWinklerNormalizedName6() {
//
// final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
// double result = jaroWinklerNormalizedName.distance("Fonds zur Förderung der wissenschaftlichen Forschung (Austrian Science Fund)", "Fonds zur Förderung der wissenschaftlichen Forschung", conf);
//
// System.out.println("result = " + result);
// assertTrue(result > 0.9);
//
// }
// @Test
// public void testJaroWinklerNormalizedName10(){
//
// final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
//
// double result = jaroWinklerNormalizedName.distance("Firenze University Press", "University of Florence", conf);
//
// System.out.println("result = " + result);
// }
@Test
public void keywordMatchTest(){
params.put("threshold", "0.4");
final KeywordMatch keywordMatch = new KeywordMatch(params);
assertEquals(1.0, keywordMatch.distance("Biblioteca dell'Universita di Bologna", "Università di Bologna", conf));
assertEquals(1.0, keywordMatch.distance("Universita degli studi di Pisa", "Universita di Pisa", conf));
assertEquals(1.0, keywordMatch.distance("Polytechnic University of Turin", "POLITECNICO DI TORINO", conf));
assertEquals(1.0, keywordMatch.distance("Istanbul Commerce University", "İstanbul Ticarət Universiteti", conf));
}
@Test
public void containsMatchTest(){
params.put("string", "openorgs");
params.put("bool", "XOR");
params.put("caseSensitive", "false");
final ContainsMatch containsMatch = new ContainsMatch(params);
assertEquals(0.0, containsMatch.distance("openorgs", "openorgs", conf));
} }

View File

@ -7,7 +7,6 @@ import java.util.Map;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
public class ConfigTest extends AbstractPaceTest { public class ConfigTest extends AbstractPaceTest {

View File

@ -8,11 +8,11 @@ import java.util.Map;
public class UtilTest { public class UtilTest {
Map<String, Number> params; Map<String, String> params;
@Before @Before
public void setUp(){ public void setUp(){
params = new HashMap<String, Number>(); params = new HashMap<String, String>();
} }
@Test @Test