code cleaning and implementation of the TreeDedup + minor changes

This commit is contained in:
miconis 2019-11-14 10:01:21 +01:00
parent 0973899865
commit c687956371
3 changed files with 36 additions and 6 deletions

View File

@ -6,7 +6,7 @@
<parent> <parent>
<groupId>eu.dnetlib</groupId> <groupId>eu.dnetlib</groupId>
<artifactId>dnet-dedup</artifactId> <artifactId>dnet-dedup</artifactId>
<version>3.0.15-SNAPSHOT</version> <version>4.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@ -256,9 +256,8 @@ public class WfConfig implements Serializable {
return maxIterations; return maxIterations;
} }
public WfConfig setMaxIterations(int maxIterations) { public void setMaxIterations(int maxIterations) {
this.maxIterations = maxIterations; this.maxIterations = maxIterations;
return this;
} }
/* /*

View File

@ -24,16 +24,16 @@ public class TreeProcessor {
public boolean compare(final MapDocument a, final MapDocument b) { public boolean compare(final MapDocument a, final MapDocument b) {
//evaluate the decision tree //evaluate the decision tree
return evaluateTree(a, b, config.decisionTree()) == MatchType.MATCH; return evaluateTree(a, b) == MatchType.MATCH;
} }
public MatchType evaluateTree(final MapDocument doc1, final MapDocument doc2, final Map<String, TreeNodeDef> decisionTree){ public MatchType evaluateTree(final MapDocument doc1, final MapDocument doc2){
String current = "start"; String current = "start";
while (MatchType.parse(current)==MatchType.UNDEFINED) { while (MatchType.parse(current)==MatchType.UNDEFINED) {
TreeNodeDef currentNode = decisionTree.get(current); TreeNodeDef currentNode = config.decisionTree().get(current);
//throw an exception if the node doesn't exist //throw an exception if the node doesn't exist
if (currentNode == null) if (currentNode == null)
throw new PaceException("The Tree Node doesn't exist: " + current); throw new PaceException("The Tree Node doesn't exist: " + current);
@ -57,4 +57,35 @@ public class TreeProcessor {
return MatchType.parse(current); return MatchType.parse(current);
} }
public double computeScore(final MapDocument doc1, final MapDocument doc2) {
String current = "start";
double score = 0.0;
while (MatchType.parse(current)==MatchType.UNDEFINED) {
TreeNodeDef currentNode = config.decisionTree().get(current);
//throw an exception if the node doesn't exist
if (currentNode == null)
throw new PaceException("The Tree Node doesn't exist: " + current);
TreeNodeStats stats = currentNode.evaluate(doc1, doc2, config);
score = stats.getFinalScore(currentNode.getAggregation());
//if ignoreUndefined=false the miss is considered as undefined
if (!currentNode.isIgnoreUndefined() && stats.getUndefinedCount()>0) {
current = currentNode.getUndefined();
}
//if ignoreUndefined=true the miss is ignored and the score computed anyway
else if (stats.getFinalScore(currentNode.getAggregation()) >= currentNode.getThreshold()) {
current = currentNode.getPositive();
}
else {
current = currentNode.getNegative();
}
}
return score;
}
} }