From c687956371cedd9cf0347ae781f1d4d51a2e7809 Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 14 Nov 2019 10:01:21 +0100 Subject: [PATCH] code cleaning and implementation of the TreeDedup + minor changes --- dnet-pace-core/pom.xml | 2 +- .../java/eu/dnetlib/pace/config/WfConfig.java | 3 +- .../pace/tree/support/TreeProcessor.java | 37 +++++++++++++++++-- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/dnet-pace-core/pom.xml b/dnet-pace-core/pom.xml index 34138cc85..a34ed9f63 100644 --- a/dnet-pace-core/pom.xml +++ b/dnet-pace-core/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib dnet-dedup - 3.0.15-SNAPSHOT + 4.0.0-SNAPSHOT ../pom.xml diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java index bd00f2fb9..d2722ac53 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/WfConfig.java @@ -256,9 +256,8 @@ public class WfConfig implements Serializable { return maxIterations; } - public WfConfig setMaxIterations(int maxIterations) { + public void setMaxIterations(int maxIterations) { this.maxIterations = maxIterations; - return this; } /* diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java index 50d3ec667..5663a79dd 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java @@ -24,16 +24,16 @@ public class TreeProcessor { public boolean compare(final MapDocument a, final MapDocument b) { //evaluate the decision tree - return evaluateTree(a, b, config.decisionTree()) == MatchType.MATCH; + return evaluateTree(a, b) == MatchType.MATCH; } - public MatchType evaluateTree(final MapDocument doc1, final MapDocument doc2, final Map decisionTree){ + public MatchType evaluateTree(final MapDocument doc1, final MapDocument doc2){ String current = "start"; while (MatchType.parse(current)==MatchType.UNDEFINED) { - TreeNodeDef currentNode = decisionTree.get(current); + TreeNodeDef currentNode = config.decisionTree().get(current); //throw an exception if the node doesn't exist if (currentNode == null) throw new PaceException("The Tree Node doesn't exist: " + current); @@ -57,4 +57,35 @@ public class TreeProcessor { return MatchType.parse(current); } + public double computeScore(final MapDocument doc1, final MapDocument doc2) { + String current = "start"; + double score = 0.0; + + while (MatchType.parse(current)==MatchType.UNDEFINED) { + + TreeNodeDef currentNode = config.decisionTree().get(current); + //throw an exception if the node doesn't exist + if (currentNode == null) + throw new PaceException("The Tree Node doesn't exist: " + current); + + TreeNodeStats stats = currentNode.evaluate(doc1, doc2, config); + + score = stats.getFinalScore(currentNode.getAggregation()); + //if ignoreUndefined=false the miss is considered as undefined + if (!currentNode.isIgnoreUndefined() && stats.getUndefinedCount()>0) { + current = currentNode.getUndefined(); + } + //if ignoreUndefined=true the miss is ignored and the score computed anyway + else if (stats.getFinalScore(currentNode.getAggregation()) >= currentNode.getThreshold()) { + current = currentNode.getPositive(); + } + else { + current = currentNode.getNegative(); + } + + } + + return score; + } + }