forked from D-Net/dnet-hadoop
code cleaning and implementation of the TreeDedup + minor changes
This commit is contained in:
parent
0973899865
commit
c687956371
|
@ -6,7 +6,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib</groupId>
|
<groupId>eu.dnetlib</groupId>
|
||||||
<artifactId>dnet-dedup</artifactId>
|
<artifactId>dnet-dedup</artifactId>
|
||||||
<version>3.0.15-SNAPSHOT</version>
|
<version>4.0.0-SNAPSHOT</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|
|
@ -256,9 +256,8 @@ public class WfConfig implements Serializable {
|
||||||
return maxIterations;
|
return maxIterations;
|
||||||
}
|
}
|
||||||
|
|
||||||
public WfConfig setMaxIterations(int maxIterations) {
|
public void setMaxIterations(int maxIterations) {
|
||||||
this.maxIterations = maxIterations;
|
this.maxIterations = maxIterations;
|
||||||
return this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -24,16 +24,16 @@ public class TreeProcessor {
|
||||||
public boolean compare(final MapDocument a, final MapDocument b) {
|
public boolean compare(final MapDocument a, final MapDocument b) {
|
||||||
|
|
||||||
//evaluate the decision tree
|
//evaluate the decision tree
|
||||||
return evaluateTree(a, b, config.decisionTree()) == MatchType.MATCH;
|
return evaluateTree(a, b) == MatchType.MATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
public MatchType evaluateTree(final MapDocument doc1, final MapDocument doc2, final Map<String, TreeNodeDef> decisionTree){
|
public MatchType evaluateTree(final MapDocument doc1, final MapDocument doc2){
|
||||||
|
|
||||||
String current = "start";
|
String current = "start";
|
||||||
|
|
||||||
while (MatchType.parse(current)==MatchType.UNDEFINED) {
|
while (MatchType.parse(current)==MatchType.UNDEFINED) {
|
||||||
|
|
||||||
TreeNodeDef currentNode = decisionTree.get(current);
|
TreeNodeDef currentNode = config.decisionTree().get(current);
|
||||||
//throw an exception if the node doesn't exist
|
//throw an exception if the node doesn't exist
|
||||||
if (currentNode == null)
|
if (currentNode == null)
|
||||||
throw new PaceException("The Tree Node doesn't exist: " + current);
|
throw new PaceException("The Tree Node doesn't exist: " + current);
|
||||||
|
@ -57,4 +57,35 @@ public class TreeProcessor {
|
||||||
return MatchType.parse(current);
|
return MatchType.parse(current);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public double computeScore(final MapDocument doc1, final MapDocument doc2) {
|
||||||
|
String current = "start";
|
||||||
|
double score = 0.0;
|
||||||
|
|
||||||
|
while (MatchType.parse(current)==MatchType.UNDEFINED) {
|
||||||
|
|
||||||
|
TreeNodeDef currentNode = config.decisionTree().get(current);
|
||||||
|
//throw an exception if the node doesn't exist
|
||||||
|
if (currentNode == null)
|
||||||
|
throw new PaceException("The Tree Node doesn't exist: " + current);
|
||||||
|
|
||||||
|
TreeNodeStats stats = currentNode.evaluate(doc1, doc2, config);
|
||||||
|
|
||||||
|
score = stats.getFinalScore(currentNode.getAggregation());
|
||||||
|
//if ignoreUndefined=false the miss is considered as undefined
|
||||||
|
if (!currentNode.isIgnoreUndefined() && stats.getUndefinedCount()>0) {
|
||||||
|
current = currentNode.getUndefined();
|
||||||
|
}
|
||||||
|
//if ignoreUndefined=true the miss is ignored and the score computed anyway
|
||||||
|
else if (stats.getFinalScore(currentNode.getAggregation()) >= currentNode.getThreshold()) {
|
||||||
|
current = currentNode.getPositive();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
current = currentNode.getNegative();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue