package eu.dnetlib.pace.tree; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; import com.google.common.collect.Sets; import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.tree.support.AbstractListComparator; import eu.dnetlib.pace.tree.support.ComparatorClass; @ComparatorClass("instanceTypeMatch") public class InstanceTypeMatch extends AbstractListComparator { final Map translationMap = new HashMap<>(); public InstanceTypeMatch(Map params) { super(params); // jolly types translationMap.put("Conference object", "*"); translationMap.put("Research", "*"); translationMap.put("Other literature type", "*"); translationMap.put("Unknown", "*"); translationMap.put("UNKNOWN", "*"); // article types translationMap.put("Article", "Article"); translationMap.put("Journal", "Article"); translationMap.put("Data Paper", "Article"); translationMap.put("Software Paper", "Article"); translationMap.put("Preprint", "Article"); translationMap.put("Part of book or chapter of book", "Article"); // thesis types translationMap.put("Thesis", "Thesis"); translationMap.put("Master thesis", "Thesis"); translationMap.put("Bachelor thesis", "Thesis"); translationMap.put("Doctoral thesis", "Thesis"); } @Override public double compare(final List a, final List b, final Config conf) { if (a == null || b == null) { return -1; } if (a.isEmpty() || b.isEmpty()) { return -1; } final Set ca = a.stream().map(this::translate).collect(Collectors.toSet()); final Set cb = b.stream().map(this::translate).collect(Collectors.toSet()); // if at least one is a jolly type, it must produce a match if (ca.contains("*") || cb.contains("*")) return 1.0; int incommon = Sets.intersection(ca, cb).size(); // if at least one is in common, it must produce a match return incommon >= 1 ? 1 : 0; } public String translate(String term) { return translationMap.getOrDefault(term, term); } @Override public double getWeight() { return super.weight; } @Override protected double normalize(final double d) { return d; } }