package eu.dnetlib.pace.tree; import java.util.Map; import java.util.Set; import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.tree.support.AbstractStringComparator; import eu.dnetlib.pace.tree.support.ComparatorClass; @ComparatorClass("keywordMatch") public class KeywordMatch extends AbstractStringComparator { Map params; public KeywordMatch(Map params) { super(params); this.params = params; } @Override public double distance(final String a, final String b, final Config conf) { String ca = cleanup(a); String cb = cleanup(b); ca = normalize(ca); cb = normalize(cb); ca = filterAllStopWords(ca); cb = filterAllStopWords(cb); Set keywords1 = getKeywords( ca, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4"))); Set keywords2 = getKeywords( cb, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4"))); Set codes1 = toCodes(keywords1, conf.translationMap()); Set codes2 = toCodes(keywords2, conf.translationMap()); // if no cities are detected, the comparator gives 1.0 if (codes1.isEmpty() && codes2.isEmpty()) return 1.0; else { if (codes1.isEmpty() ^ codes2.isEmpty()) return -1.0; // undefined if one of the two has no keywords return commonElementsPercentage(codes1, codes2); } } }