forked from antonis.lempesis/dnet-hadoop
code cleaning and implementation of the TreeDedup + minor changes
This commit is contained in:
parent
0973899865
commit
c687956371
|
@ -6,7 +6,7 @@
|
|||
<parent>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<artifactId>dnet-dedup</artifactId>
|
||||
<version>3.0.15-SNAPSHOT</version>
|
||||
<version>4.0.0-SNAPSHOT</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -256,9 +256,8 @@ public class WfConfig implements Serializable {
|
|||
return maxIterations;
|
||||
}
|
||||
|
||||
public WfConfig setMaxIterations(int maxIterations) {
|
||||
public void setMaxIterations(int maxIterations) {
|
||||
this.maxIterations = maxIterations;
|
||||
return this;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -24,16 +24,16 @@ public class TreeProcessor {
|
|||
public boolean compare(final MapDocument a, final MapDocument b) {
|
||||
|
||||
//evaluate the decision tree
|
||||
return evaluateTree(a, b, config.decisionTree()) == MatchType.MATCH;
|
||||
return evaluateTree(a, b) == MatchType.MATCH;
|
||||
}
|
||||
|
||||
public MatchType evaluateTree(final MapDocument doc1, final MapDocument doc2, final Map<String, TreeNodeDef> decisionTree){
|
||||
public MatchType evaluateTree(final MapDocument doc1, final MapDocument doc2){
|
||||
|
||||
String current = "start";
|
||||
|
||||
while (MatchType.parse(current)==MatchType.UNDEFINED) {
|
||||
|
||||
TreeNodeDef currentNode = decisionTree.get(current);
|
||||
TreeNodeDef currentNode = config.decisionTree().get(current);
|
||||
//throw an exception if the node doesn't exist
|
||||
if (currentNode == null)
|
||||
throw new PaceException("The Tree Node doesn't exist: " + current);
|
||||
|
@ -57,4 +57,35 @@ public class TreeProcessor {
|
|||
return MatchType.parse(current);
|
||||
}
|
||||
|
||||
public double computeScore(final MapDocument doc1, final MapDocument doc2) {
|
||||
String current = "start";
|
||||
double score = 0.0;
|
||||
|
||||
while (MatchType.parse(current)==MatchType.UNDEFINED) {
|
||||
|
||||
TreeNodeDef currentNode = config.decisionTree().get(current);
|
||||
//throw an exception if the node doesn't exist
|
||||
if (currentNode == null)
|
||||
throw new PaceException("The Tree Node doesn't exist: " + current);
|
||||
|
||||
TreeNodeStats stats = currentNode.evaluate(doc1, doc2, config);
|
||||
|
||||
score = stats.getFinalScore(currentNode.getAggregation());
|
||||
//if ignoreUndefined=false the miss is considered as undefined
|
||||
if (!currentNode.isIgnoreUndefined() && stats.getUndefinedCount()>0) {
|
||||
current = currentNode.getUndefined();
|
||||
}
|
||||
//if ignoreUndefined=true the miss is ignored and the score computed anyway
|
||||
else if (stats.getFinalScore(currentNode.getAggregation()) >= currentNode.getThreshold()) {
|
||||
current = currentNode.getPositive();
|
||||
}
|
||||
else {
|
||||
current = currentNode.getNegative();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue