package eu.dnetlib.pace.tree; import com.wcohen.ss.AbstractStringDistance; import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.distance.DistanceClass; import eu.dnetlib.pace.distance.SecondStringDistanceAlgo; import eu.dnetlib.pace.tree.support.AbstractComparator; import eu.dnetlib.pace.tree.support.ComparatorClass; import java.util.Map; import java.util.Set; @ComparatorClass("jaroWinklerNormalizedName") public class JaroWinklerNormalizedName extends AbstractComparator { private Map params; public JaroWinklerNormalizedName(Map params){ super(params, new com.wcohen.ss.JaroWinkler()); this.params = params; } public JaroWinklerNormalizedName(double weight) { super(weight, new com.wcohen.ss.JaroWinkler()); } protected JaroWinklerNormalizedName(double weight, AbstractStringDistance ssalgo) { super(weight, ssalgo); } @Override public double distance(String a, String b) { String ca = cleanup(a); String cb = cleanup(b); ca = normalize(ca); cb = normalize(cb); ca = filterAllStopWords(ca); cb = filterAllStopWords(cb); Set keywords1 = getKeywords(ca, params.getOrDefault("windowSize", 4).intValue()); Set keywords2 = getKeywords(cb, params.getOrDefault("windowSize", 4).intValue()); Set cities1 = getCities(ca, params.getOrDefault("windowSize", 4).intValue()); Set cities2 = getCities(cb, params.getOrDefault("windowSize", 4).intValue()); if (sameCity(cities1,cities2)) { if (keywordsCompare(keywords1, keywords2)>params.getOrDefault("threshold", 0.5).doubleValue()) { ca = removeKeywords(ca, keywords1); ca = removeKeywords(ca, cities1); cb = removeKeywords(cb, keywords2); cb = removeKeywords(cb, cities2); if (ca.isEmpty() && cb.isEmpty()) return 1.0; else return normalize(ssalgo.score(ca,cb)); } } return 0.0; } @Override public double getWeight() { return super.weight; } @Override protected double normalize(double d) { return d; } }