package eu.dnetlib.pace.tree; import com.google.common.collect.Sets; import eu.dnetlib.pace.config.Config; import eu.dnetlib.pace.model.Field; import eu.dnetlib.pace.model.FieldList; import eu.dnetlib.pace.tree.support.AbstractComparator; import eu.dnetlib.pace.tree.support.ComparatorClass; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import java.util.HashSet; import java.util.Map; import java.util.Set; @ComparatorClass("stringListMatch") public class StringListMatch extends AbstractComparator { private static final Log log = LogFactory.getLog(StringListMatch.class); private Map params; final private String TYPE; //percentage or count public StringListMatch(final Map params) { super(params); this.params = params; TYPE = params.getOrDefault("type", "percentage"); } @Override public double compare(final Field a, final Field b, final Config conf) { final Set pa = new HashSet<>(((FieldList) a).stringList()); final Set pb = new HashSet<>(((FieldList) b).stringList()); if (pa.isEmpty() || pb.isEmpty()) { return -1; //return undefined if one of the two lists is empty } int incommon = Sets.intersection(pa, pb).size(); int simDiff = Sets.symmetricDifference(pa, pb).size(); if (incommon + simDiff == 0) { return 0.0; } if(TYPE.equals("percentage")) return (double)incommon / (incommon + simDiff); else return incommon; } }