forked from D-Net/dnet-hadoop
53 lines
1.5 KiB
Java
53 lines
1.5 KiB
Java
package eu.dnetlib.pace.tree;
|
|
|
|
import com.google.common.collect.Sets;
|
|
import eu.dnetlib.pace.config.Config;
|
|
import eu.dnetlib.pace.tree.support.AbstractListComparator;
|
|
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
|
import java.util.HashSet;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Set;
|
|
|
|
@ComparatorClass("stringListMatch")
|
|
public class StringListMatch extends AbstractListComparator {
|
|
|
|
private static final Log log = LogFactory.getLog(StringListMatch.class);
|
|
private Map<String, String> params;
|
|
|
|
final private String TYPE; //percentage or count
|
|
|
|
public StringListMatch(final Map<String, String> params) {
|
|
super(params);
|
|
this.params = params;
|
|
|
|
TYPE = params.getOrDefault("type", "percentage");
|
|
}
|
|
|
|
@Override
|
|
public double compare(final List<String> a, final List<String> b, final Config conf) {
|
|
|
|
final Set<String> pa = new HashSet<>(a);
|
|
final Set<String> pb = new HashSet<>(b);
|
|
|
|
if (pa.isEmpty() || pb.isEmpty()) {
|
|
return -1; //return undefined if one of the two lists is empty
|
|
}
|
|
|
|
int incommon = Sets.intersection(pa, pb).size();
|
|
int simDiff = Sets.symmetricDifference(pa, pb).size();
|
|
|
|
if (incommon + simDiff == 0) {
|
|
return 0.0;
|
|
}
|
|
|
|
if(TYPE.equals("percentage"))
|
|
return (double)incommon / (incommon + simDiff);
|
|
else
|
|
return incommon;
|
|
|
|
}
|
|
} |