1
0
Fork 0

code cleaning and implementation of the TreeDedup + minor changes

This commit is contained in:
miconis 2019-11-14 10:01:21 +01:00
parent 0973899865
commit c687956371
3 changed files with 36 additions and 6 deletions

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-dedup</artifactId>
<version>3.0.15-SNAPSHOT</version>
<version>4.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@ -256,9 +256,8 @@ public class WfConfig implements Serializable {
return maxIterations;
}
public WfConfig setMaxIterations(int maxIterations) {
public void setMaxIterations(int maxIterations) {
this.maxIterations = maxIterations;
return this;
}
/*

View File

@ -24,16 +24,16 @@ public class TreeProcessor {
public boolean compare(final MapDocument a, final MapDocument b) {
//evaluate the decision tree
return evaluateTree(a, b, config.decisionTree()) == MatchType.MATCH;
return evaluateTree(a, b) == MatchType.MATCH;
}
public MatchType evaluateTree(final MapDocument doc1, final MapDocument doc2, final Map<String, TreeNodeDef> decisionTree){
public MatchType evaluateTree(final MapDocument doc1, final MapDocument doc2){
String current = "start";
while (MatchType.parse(current)==MatchType.UNDEFINED) {
TreeNodeDef currentNode = decisionTree.get(current);
TreeNodeDef currentNode = config.decisionTree().get(current);
//throw an exception if the node doesn't exist
if (currentNode == null)
throw new PaceException("The Tree Node doesn't exist: " + current);
@ -57,4 +57,35 @@ public class TreeProcessor {
return MatchType.parse(current);
}
public double computeScore(final MapDocument doc1, final MapDocument doc2) {
String current = "start";
double score = 0.0;
while (MatchType.parse(current)==MatchType.UNDEFINED) {
TreeNodeDef currentNode = config.decisionTree().get(current);
//throw an exception if the node doesn't exist
if (currentNode == null)
throw new PaceException("The Tree Node doesn't exist: " + current);
TreeNodeStats stats = currentNode.evaluate(doc1, doc2, config);
score = stats.getFinalScore(currentNode.getAggregation());
//if ignoreUndefined=false the miss is considered as undefined
if (!currentNode.isIgnoreUndefined() && stats.getUndefinedCount()>0) {
current = currentNode.getUndefined();
}
//if ignoreUndefined=true the miss is ignored and the score computed anyway
else if (stats.getFinalScore(currentNode.getAggregation()) >= currentNode.getThreshold()) {
current = currentNode.getPositive();
}
else {
current = currentNode.getNegative();
}
}
return score;
}
}