forked from antonis.lempesis/dnet-hadoop
implementation of the decision tree for the deduplication of the authors, implementation of multiple comparators to be used in a tree node and definition of the proto for person entity
This commit is contained in:
parent
39613dbbd6
commit
7a8d28991f
|
@ -193,11 +193,38 @@ public abstract class AbstractPaceFunctions {
|
|||
}
|
||||
|
||||
public String normalizeCities(String s1, Map<String,String> cityMap){
|
||||
//TODO change normalization mode
|
||||
|
||||
for (String city : cityMap.keySet())
|
||||
s1 = s1.replaceAll(" " + city + " ", " " + cityMap.get(city) + " ");
|
||||
return s1;
|
||||
}
|
||||
|
||||
public String normalizeCities2 (String s1, Map<String, String> cityMap, int windowSize){
|
||||
|
||||
List<String> tokens = Arrays.asList(s1.split(" "));
|
||||
|
||||
if (tokens.size()<windowSize)
|
||||
windowSize = tokens.size();
|
||||
|
||||
int length = windowSize;
|
||||
|
||||
while (length != 0) {
|
||||
|
||||
for (int i = 0; i<=tokens.size()-length; i++){
|
||||
String candidate = Joiner.on(" ").join(tokens.subList(i, i + length));
|
||||
if (cityMap.containsKey(candidate)) {
|
||||
s1 = (" " + s1 + " ").replaceAll(" " + candidate + " ", " " + cityMap.get(candidate) + " ");
|
||||
return s1;
|
||||
}
|
||||
}
|
||||
length-=1;
|
||||
}
|
||||
|
||||
return s1;
|
||||
}
|
||||
|
||||
|
||||
public String removeCodes(String s) {
|
||||
final String regexKey = "\\bkey::[0-9]*\\b";
|
||||
final String regexCity = "\\bcity::[0-9]*\\b";
|
||||
|
|
|
@ -47,9 +47,14 @@ public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo {
|
|||
ca = translate(ca, translationMap);
|
||||
cb = translate(cb, translationMap);
|
||||
|
||||
String norm = normalizeCities(" " + ca + " ||| " + cb + " ", cityMap);
|
||||
ca = norm.split("\\|\\|\\|")[0].trim();
|
||||
cb = norm.split("\\|\\|\\|")[1].trim();
|
||||
//replace cities with codes
|
||||
// String norm = normalizeCities(" " + ca + " ||| " + cb + " ", cityMap);
|
||||
// ca = norm.split("\\|\\|\\|")[0].trim();
|
||||
// cb = norm.split("\\|\\|\\|")[1].trim();
|
||||
|
||||
ca = normalizeCities2(ca, cityMap, 4);
|
||||
cb = normalizeCities2(cb, cityMap, 4);
|
||||
|
||||
|
||||
if (sameCity(ca,cb)){
|
||||
if (sameKeywords(ca,cb)){
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
package eu.dnetlib.pace.model;
|
||||
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
public class FieldConf implements Serializable {
|
||||
|
||||
private String field; //name of the field on which apply the comparator
|
||||
private String comparator; //comparator name
|
||||
private double weight = 1.0; //weight for the field (to be used in the aggregation)
|
||||
private Map<String,Number> params; //parameters
|
||||
|
||||
public FieldConf() {
|
||||
}
|
||||
|
||||
public FieldConf(String field, String comparator, double weight, Map<String, Number> params) {
|
||||
this.field = field;
|
||||
this.comparator = comparator;
|
||||
this.weight = weight;
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
public void setField(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
public String getComparator() {
|
||||
return comparator;
|
||||
}
|
||||
|
||||
public void setComparator(String comparator) {
|
||||
this.comparator = comparator;
|
||||
}
|
||||
|
||||
public double getWeight() {
|
||||
return weight;
|
||||
}
|
||||
|
||||
public void setWeight(double weight) {
|
||||
this.weight = weight;
|
||||
}
|
||||
|
||||
public Map<String, Number> getParams() {
|
||||
return params;
|
||||
}
|
||||
|
||||
public void setParams(Map<String, Number> params) {
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
try {
|
||||
return new ObjectMapper().writeValueAsString(this);
|
||||
} catch (IOException e) {
|
||||
throw new PaceException("Impossible to convert to JSON: ", e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,60 +1,113 @@
|
|||
package eu.dnetlib.pace.model;
|
||||
|
||||
import eu.dnetlib.pace.config.PaceConfig;
|
||||
import eu.dnetlib.pace.tree.TreeNode;
|
||||
import eu.dnetlib.pace.tree.Comparator;
|
||||
import eu.dnetlib.pace.tree.support.AggType;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
|
||||
public class TreeNodeDef implements Serializable {
|
||||
|
||||
private String name;
|
||||
private String field;
|
||||
private List<FieldConf> fields; //list of fields involved in the tree node (contains comparators to be used and field on which apply the comparator)
|
||||
private AggType aggregation; //how to aggregate similarity measures for every field
|
||||
|
||||
private String positive;
|
||||
private String negative;
|
||||
private String undefined;
|
||||
private double threshold; //threshold on the similarity measure
|
||||
|
||||
private Map<String, Number> params;
|
||||
private String positive; //specifies the next node in case of positive result: similarity>=th
|
||||
private String negative; //specifies the next node in case of negative result: similarity<th
|
||||
private String undefined; //specifies the next node in case of undefined result: similarity=-1
|
||||
|
||||
boolean ignoreMissing = true; //specifies what to do in case of missing field
|
||||
|
||||
public TreeNodeDef() {
|
||||
}
|
||||
|
||||
public TreeNodeDef(String name, String field, String positive, String negative, String undefined, Map<String, Number> params) {
|
||||
this.name = name;
|
||||
this.field = field;
|
||||
//compute the similarity measure between two documents
|
||||
public double evaluate(MapDocument doc1, MapDocument doc2) {
|
||||
|
||||
DescriptiveStatistics stats = new DescriptiveStatistics();
|
||||
|
||||
for (FieldConf fieldConf : fields) {
|
||||
|
||||
double weight = fieldConf.getWeight();
|
||||
|
||||
double similarity = comparator(fieldConf).compare(doc1.getFieldMap().get(fieldConf.getField()), doc2.getFieldMap().get(fieldConf.getField()));
|
||||
|
||||
//if similarity is -1 means that a comparator gave undefined, do not add result to the stats
|
||||
if (similarity != -1) {
|
||||
stats.addValue(weight * similarity);
|
||||
}
|
||||
else {
|
||||
if (!ignoreMissing) //if the missing value has not to be ignored, return -1
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
switch (aggregation){
|
||||
|
||||
case AVG:
|
||||
return stats.getMean();
|
||||
case SUM:
|
||||
return stats.getSum();
|
||||
case MAX:
|
||||
return stats.getMax();
|
||||
case MIN:
|
||||
return stats.getMin();
|
||||
default:
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private Comparator comparator(final FieldConf field){
|
||||
|
||||
return PaceConfig.paceResolver.getComparator(field.getComparator(), field.getParams());
|
||||
}
|
||||
|
||||
public TreeNodeDef(List<FieldConf> fields, double threshold, AggType aggregation, String positive, String negative, String undefined) {
|
||||
this.fields = fields;
|
||||
this.threshold = threshold;
|
||||
this.aggregation = aggregation;
|
||||
this.positive = positive;
|
||||
this.negative = negative;
|
||||
this.undefined = undefined;
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
public TreeNode treeNode() {
|
||||
try {
|
||||
return PaceConfig.paceResolver.getTreeNode(getName(), params);
|
||||
} catch (PaceException e) {
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
}
|
||||
public boolean isIgnoreMissing() {
|
||||
return ignoreMissing;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
public void setIgnoreMissing(boolean ignoreMissing) {
|
||||
this.ignoreMissing = ignoreMissing;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
public List<FieldConf> getFields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
public String getField() {
|
||||
return field;
|
||||
public void setFields(List<FieldConf> fields) {
|
||||
this.fields = fields;
|
||||
}
|
||||
|
||||
public void setField(String field) {
|
||||
this.field = field;
|
||||
public double getThreshold() {
|
||||
return threshold;
|
||||
}
|
||||
|
||||
public void setThreshold(double threshold) {
|
||||
this.threshold = threshold;
|
||||
}
|
||||
|
||||
public AggType getAggregation() {
|
||||
return aggregation;
|
||||
}
|
||||
|
||||
public void setAggregation(AggType aggregation) {
|
||||
this.aggregation = aggregation;
|
||||
}
|
||||
|
||||
public String getPositive() {
|
||||
|
@ -81,20 +134,12 @@ public class TreeNodeDef implements Serializable {
|
|||
this.undefined = undefined;
|
||||
}
|
||||
|
||||
public Map<String, Number> getParams() {
|
||||
return params;
|
||||
}
|
||||
|
||||
public void setParams(Map<String, Number> params) {
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
try {
|
||||
return new ObjectMapper().writeValueAsString(this);
|
||||
} catch (IOException e) {
|
||||
return e.getStackTrace().toString();
|
||||
throw new PaceException("Impossible to convert to JSON: ", e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -5,17 +5,17 @@ import org.apache.commons.lang.StringUtils;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
public class AbstractTreeNode implements TreeNode {
|
||||
abstract class AbstractComparator implements Comparator {
|
||||
|
||||
Map<String, Number> params;
|
||||
|
||||
public AbstractTreeNode(Map<String, Number> params){
|
||||
public AbstractComparator(Map<String, Number> params){
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(Field a, Field b) {
|
||||
return 0;
|
||||
public double compare(Field a, Field b) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
public static double stringSimilarity(String s1, String s2) {
|
|
@ -6,15 +6,15 @@ import eu.dnetlib.pace.model.FieldList;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@TreeNodeClass("coauthorsMatch")
|
||||
public class CoauthorsMatch extends AbstractTreeNode {
|
||||
@ComparatorClass("coauthorsMatch")
|
||||
public class CoauthorsMatch extends AbstractComparator {
|
||||
|
||||
public CoauthorsMatch(Map<String, Number> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(Field a, Field b) {
|
||||
public double compare(Field a, Field b) {
|
||||
|
||||
final List<String> c1 = ((FieldList) a).stringList();
|
||||
final List<String> c2 = ((FieldList) b).stringList();
|
||||
|
@ -24,7 +24,7 @@ public class CoauthorsMatch extends AbstractTreeNode {
|
|||
|
||||
//few coauthors or too many coauthors
|
||||
if (size1 < params.getOrDefault("minCoauthors", 5).intValue() || size2 < params.getOrDefault("minCoauthors", 5).intValue() || (size1+size2 > params.getOrDefault("maxCoauthors", 200).intValue()))
|
||||
return 0;
|
||||
return -1;
|
||||
|
||||
int coauthorship = 0;
|
||||
for (String ca1: c1){
|
||||
|
@ -36,11 +36,7 @@ public class CoauthorsMatch extends AbstractTreeNode {
|
|||
}
|
||||
}
|
||||
|
||||
if (coauthorship>=params.getOrDefault("th", 5).intValue())
|
||||
return 1;
|
||||
else if (coauthorship == 0)
|
||||
return -1;
|
||||
else
|
||||
return 0;
|
||||
return coauthorship;
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
|
||||
public interface Comparator {
|
||||
|
||||
//compare two fields and returns: the distace measure, -1 if undefined
|
||||
public double compare(Field a, Field b);
|
||||
|
||||
}
|
|
@ -7,7 +7,7 @@ import java.lang.annotation.Target;
|
|||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface TreeNodeClass {
|
||||
public @interface ComparatorClass {
|
||||
|
||||
public String value();
|
||||
}
|
|
@ -4,22 +4,22 @@ import eu.dnetlib.pace.model.Field;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
@TreeNodeClass("exactMatch")
|
||||
public class ExactMatch extends AbstractTreeNode {
|
||||
@ComparatorClass("exactMatch")
|
||||
public class ExactMatch extends AbstractComparator {
|
||||
|
||||
public ExactMatch(Map<String, Number> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(Field a, Field b) {
|
||||
public double compare(Field a, Field b) {
|
||||
|
||||
if (a.stringValue().isEmpty() || b.stringValue().isEmpty())
|
||||
return 0;
|
||||
return -1;
|
||||
else if (a.stringValue().equals(b.stringValue()))
|
||||
return 1;
|
||||
else
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -4,18 +4,18 @@ import eu.dnetlib.pace.model.Field;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
@TreeNodeClass("similar")
|
||||
public class SimilarMatch extends AbstractTreeNode {
|
||||
@ComparatorClass("similar")
|
||||
public class SimilarMatch extends AbstractComparator {
|
||||
|
||||
public SimilarMatch(Map<String, Number> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(Field a, Field b) {
|
||||
public double compare(Field a, Field b) {
|
||||
|
||||
if (a.stringValue().isEmpty() || b.stringValue().isEmpty())
|
||||
return 0; //undefined if one name is missing
|
||||
return -1; //undefined if one name is missing
|
||||
|
||||
//take only the first name
|
||||
String firstname1 = a.stringValue().split(" ")[0];
|
||||
|
@ -24,12 +24,7 @@ public class SimilarMatch extends AbstractTreeNode {
|
|||
if (firstname1.toLowerCase().trim().replaceAll("\\.","").replaceAll("\\s","").length()<=2 || firstname2.toLowerCase().replaceAll("\\.", "").replaceAll("\\s","").length()<=2) //too short names (considered similar)
|
||||
return 1;
|
||||
|
||||
if (stringSimilarity(firstname1,firstname2)>params.getOrDefault("th", 0.7).doubleValue()){
|
||||
return 1; //similar names, go on with the analysis
|
||||
}
|
||||
else {
|
||||
return -1; //names too different, no need to compare
|
||||
}
|
||||
return stringSimilarity(firstname1,firstname2);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -5,21 +5,21 @@ import eu.dnetlib.pace.model.FieldListImpl;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
@TreeNodeClass("topicsMatch")
|
||||
public class TopicsMatch extends AbstractTreeNode {
|
||||
@ComparatorClass("topicsMatch")
|
||||
public class TopicsMatch extends AbstractComparator {
|
||||
|
||||
public TopicsMatch(Map<String, Number> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(Field a, Field b) {
|
||||
public double compare(Field a, Field b) {
|
||||
|
||||
double[] t1 = ((FieldListImpl) a).doubleArray();
|
||||
double[] t2 = ((FieldListImpl) b).doubleArray();
|
||||
|
||||
if (t1 == null || t2 == null)
|
||||
return 0; //0 similarity if no topics in one of the authors or in both
|
||||
return -1; //0 similarity if no topics in one of the authors or in both
|
||||
|
||||
double area = 0.0;
|
||||
|
||||
|
@ -30,7 +30,7 @@ public class TopicsMatch extends AbstractTreeNode {
|
|||
area += min_value[i];
|
||||
}
|
||||
|
||||
return area>params.getOrDefault("th", 0.7).doubleValue()?+1:-1;
|
||||
return area;
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
|
||||
public interface TreeNode {
|
||||
|
||||
//compare two fields and returns: +1 if match, 0 if undefined, -1 if do not match
|
||||
public int compare(Field a, Field b);
|
||||
|
||||
}
|
|
@ -6,13 +6,13 @@ import eu.dnetlib.pace.model.FieldList;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@TreeNodeClass("undefined")
|
||||
public class UndefinedNode implements TreeNode {
|
||||
@ComparatorClass("undefined")
|
||||
public class UndefinedNode implements Comparator {
|
||||
|
||||
Map<String, Number> params;
|
||||
|
||||
@Override
|
||||
public int compare(Field a, Field b) {
|
||||
public double compare(Field a, Field b) {
|
||||
|
||||
final List<String> sa = ((FieldList) a).stringList();
|
||||
final List<String> sb = ((FieldList) b).stringList();
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
package eu.dnetlib.pace.tree.support;
|
||||
|
||||
public enum AggType {
|
||||
|
||||
AVG,
|
||||
SUM,
|
||||
MAX,
|
||||
MIN
|
||||
}
|
|
@ -1,13 +1,12 @@
|
|||
package eu.dnetlib.pace.tree.support;
|
||||
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
|
||||
public enum MatchType {
|
||||
|
||||
ORCID_MATCH,
|
||||
COAUTHORS_MATCH,
|
||||
TOPICS_MATCH,
|
||||
NO_MATCH;
|
||||
NO_MATCH,
|
||||
UNDEFINED;
|
||||
|
||||
public static MatchType getEnum(String value) {
|
||||
|
||||
|
@ -15,7 +14,7 @@ public enum MatchType {
|
|||
return MatchType.valueOf(value);
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
throw new PaceException("The match type is not valid");
|
||||
return MatchType.UNDEFINED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -70,36 +70,40 @@ public class BlockProcessor {
|
|||
final String idCurr = curr.getIdentifier();
|
||||
|
||||
//check if pivot and current element are similar by processing the tree
|
||||
if (navigateTree(pivot, curr))
|
||||
if (navigateTree(pivot, curr)!=MatchType.NO_MATCH)
|
||||
writeSimilarity(context, idPivot, idCurr);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private boolean navigateTree(final MapDocument doc1, final MapDocument doc2){
|
||||
private MatchType navigateTree(final MapDocument doc1, final MapDocument doc2){
|
||||
|
||||
final Map<String, TreeNodeDef> decisionTree = dedupConf.getPace().getDecisionTree();
|
||||
|
||||
String current = "start";
|
||||
|
||||
while (!current.equals(MatchType.NO_MATCH.toString()) && !current.equals(MatchType.ORCID_MATCH.toString()) && !current.equals(MatchType.TOPICS_MATCH.toString()) && !current.equals(MatchType.COAUTHORS_MATCH.toString())) {
|
||||
while (MatchType.getEnum(current)==MatchType.UNDEFINED) {
|
||||
|
||||
TreeNodeDef currentNode = decisionTree.get(current);
|
||||
//throw an exception if the node doesn't exist
|
||||
if (currentNode == null)
|
||||
throw new PaceException("The Tree Node doesn't exist: " + current);
|
||||
|
||||
int compare = currentNode.treeNode().compare(doc1.getFieldMap().get(currentNode.getField()), doc2.getFieldMap().get(currentNode.getField()));
|
||||
double similarity = currentNode.evaluate(doc1, doc2);
|
||||
|
||||
if (similarity == -1) {
|
||||
current = currentNode.getUndefined();
|
||||
}
|
||||
else if (similarity>=currentNode.getThreshold()){
|
||||
current = currentNode.getPositive();
|
||||
}
|
||||
else {
|
||||
current = currentNode.getNegative();
|
||||
}
|
||||
|
||||
current = (compare==0)?currentNode.getUndefined():(compare==-1)?currentNode.getNegative():currentNode.getPositive();
|
||||
}
|
||||
|
||||
if (!current.equals(MatchType.NO_MATCH.toString()))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
return MatchType.getEnum(current);
|
||||
}
|
||||
|
||||
private Queue<MapDocument> prepare(final Iterable<MapDocument> documents) {
|
||||
|
|
|
@ -7,8 +7,8 @@ import eu.dnetlib.pace.condition.ConditionClass;
|
|||
import eu.dnetlib.pace.distance.DistanceAlgo;
|
||||
import eu.dnetlib.pace.distance.DistanceClass;
|
||||
import eu.dnetlib.pace.model.FieldDef;
|
||||
import eu.dnetlib.pace.tree.TreeNode;
|
||||
import eu.dnetlib.pace.tree.TreeNodeClass;
|
||||
import eu.dnetlib.pace.tree.Comparator;
|
||||
import eu.dnetlib.pace.tree.ComparatorClass;
|
||||
import org.reflections.Reflections;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -22,7 +22,7 @@ public class PaceResolver implements Serializable {
|
|||
private final Map<String, Class<ClusteringFunction>> clusteringFunctions;
|
||||
private final Map<String, Class<ConditionAlgo>> conditionAlgos;
|
||||
private final Map<String, Class<DistanceAlgo>> distanceAlgos;
|
||||
private final Map<String, Class<TreeNode>> treeNodes;
|
||||
private final Map<String, Class<Comparator>> comparators;
|
||||
|
||||
public PaceResolver() {
|
||||
|
||||
|
@ -38,9 +38,9 @@ public class PaceResolver implements Serializable {
|
|||
.filter(DistanceAlgo.class::isAssignableFrom)
|
||||
.collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class<DistanceAlgo>)cl));
|
||||
|
||||
this.treeNodes = new Reflections("eu.dnetlib").getTypesAnnotatedWith(TreeNodeClass.class).stream()
|
||||
.filter(TreeNode.class::isAssignableFrom)
|
||||
.collect(Collectors.toMap(cl -> cl.getAnnotation(TreeNodeClass.class).value(), cl -> (Class<TreeNode>) cl));
|
||||
this.comparators = new Reflections("eu.dnetlib").getTypesAnnotatedWith(ComparatorClass.class).stream()
|
||||
.filter(Comparator.class::isAssignableFrom)
|
||||
.collect(Collectors.toMap(cl -> cl.getAnnotation(ComparatorClass.class).value(), cl -> (Class<Comparator>) cl));
|
||||
}
|
||||
|
||||
public ClusteringFunction getClusteringFunction(String name, Map<String, Integer> params) throws PaceException {
|
||||
|
@ -67,9 +67,9 @@ public class PaceResolver implements Serializable {
|
|||
}
|
||||
}
|
||||
|
||||
public TreeNode getTreeNode(String name, Map<String, Number> params) throws PaceException {
|
||||
public Comparator getComparator(String name, Map<String, Number> params) throws PaceException {
|
||||
try {
|
||||
return treeNodes.get(name).getDeclaredConstructor(Map.class).newInstance(params);
|
||||
return comparators.get(name).getDeclaredConstructor(Map.class).newInstance(params);
|
||||
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException | NullPointerException e) {
|
||||
throw new PaceException(name + " not found ", e);
|
||||
}
|
||||
|
|
|
@ -56,9 +56,10 @@ public class DistanceAlgoTest extends AbstractPaceFunctions {
|
|||
public void testJaroWinklerNormalizedName2() {
|
||||
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
double result = jaroWinklerNormalizedName.distance("University of Pisa", "Universita degli studi di Pisa");
|
||||
double result = jaroWinklerNormalizedName.distance("University of New York", "Università di New York");
|
||||
|
||||
assertEquals(result, 1.0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue