54 lines
1.6 KiB
Java
54 lines
1.6 KiB
Java
package eu.dnetlib.pace.tree;
|
|
|
|
import com.google.common.collect.Sets;
|
|
import eu.dnetlib.pace.config.Config;
|
|
import eu.dnetlib.pace.model.Field;
|
|
import eu.dnetlib.pace.model.FieldList;
|
|
import eu.dnetlib.pace.tree.support.AbstractComparator;
|
|
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
|
import java.util.HashSet;
|
|
import java.util.Map;
|
|
import java.util.Set;
|
|
|
|
@ComparatorClass("stringListMatch")
|
|
public class StringListMatch extends AbstractComparator {
|
|
|
|
private static final Log log = LogFactory.getLog(StringListMatch.class);
|
|
private Map<String, String> params;
|
|
|
|
final private String TYPE; //percentage or count
|
|
|
|
public StringListMatch(final Map<String, String> params) {
|
|
super(params);
|
|
this.params = params;
|
|
|
|
TYPE = params.getOrDefault("type", "percentage");
|
|
}
|
|
|
|
@Override
|
|
public double compare(final Field a, final Field b, final Config conf) {
|
|
|
|
final Set<String> pa = new HashSet<>(((FieldList) a).stringList());
|
|
final Set<String> pb = new HashSet<>(((FieldList) b).stringList());
|
|
|
|
if (pa.isEmpty() || pb.isEmpty()) {
|
|
return -1; //return undefined if one of the two lists is empty
|
|
}
|
|
|
|
int incommon = Sets.intersection(pa, pb).size();
|
|
int simDiff = Sets.symmetricDifference(pa, pb).size();
|
|
|
|
if (incommon + simDiff == 0) {
|
|
return 0.0;
|
|
}
|
|
|
|
if(TYPE.equals("percentage"))
|
|
return (double)incommon / (incommon + simDiff);
|
|
else
|
|
return incommon;
|
|
|
|
}
|
|
} |