From 56b05cde0b621d91e9b974453d6fe28f7061561b Mon Sep 17 00:00:00 2001 From: Giambattista Bloisi Date: Wed, 9 Oct 2024 23:00:13 +0200 Subject: [PATCH] Revert the changes for IgnoreUndefined management in tree evaluation --- .../pace/tree/support/TreeNodeDef.java | 2 +- .../pace/tree/support/TreeNodeStats.java | 33 ++++--------------- .../pace/tree/support/TreeProcessor.java | 8 +++-- 3 files changed, 12 insertions(+), 31 deletions(-) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java index 0ff03f5e1..0973fdf1e 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java @@ -48,7 +48,7 @@ public class TreeNodeDef implements Serializable { // function for the evaluation of the node public TreeNodeStats evaluate(Row doc1, Row doc2, Config conf) { - TreeNodeStats stats = new TreeNodeStats(ignoreUndefined); + TreeNodeStats stats = new TreeNodeStats(); // for each field in the node, it computes the for (FieldConf fieldConf : fields) { diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java index f6b210a8c..2b96048b4 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java @@ -9,11 +9,8 @@ public class TreeNodeStats implements Serializable { private Map results; // this is an accumulator for the results of the node - private final boolean ignoreUndefined; - - public TreeNodeStats(boolean ignoreUndefined) { + public TreeNodeStats() { this.results = new HashMap<>(); - this.ignoreUndefined = ignoreUndefined; } public Map getResults() { @@ -25,10 +22,7 @@ public class TreeNodeStats implements Serializable { } public int fieldsCount() { - if (ignoreUndefined) - return this.results.size(); - else - return this.results.size() - undefinedCount(); // do not count undefined + return this.results.size(); } public int undefinedCount() { @@ -84,22 +78,11 @@ public class TreeNodeStats implements Serializable { double min = 100.0; // random high value for (FieldStats fs : this.results.values()) { if (fs.getResult() < min) { - if (fs.getResult() == -1) { - if (fs.isCountIfUndefined()) { - min = 0.0; - } else { - min = -1; - } - } else { + if (fs.getResult() >= 0.0 || (fs.getResult() == -1 && fs.isCountIfUndefined())) min = fs.getResult(); - } } } - if (ignoreUndefined) { - return min == -1.0 ? 0.0 : min; - } else { - return min; - } + return min; } // if at least one is true, return 1.0 @@ -108,11 +91,7 @@ public class TreeNodeStats implements Serializable { if (fieldStats.getResult() >= fieldStats.getThreshold()) return 1.0; } - if (!ignoreUndefined && undefinedCount() > 0) { - return -1.0; - } else { - return 0.0; - } + return 0.0; } // if at least one is false, return 0.0 @@ -121,7 +100,7 @@ public class TreeNodeStats implements Serializable { if (fieldStats.getResult() == -1) { if (fieldStats.isCountIfUndefined()) - return ignoreUndefined ? 0.0 : -1.0; + return 0.0; } else { if (fieldStats.getResult() < fieldStats.getThreshold()) return 0.0; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java index 28b3a82af..263504dbb 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java @@ -44,10 +44,12 @@ public class TreeProcessor { TreeNodeStats stats = currentNode.evaluate(doc1, doc2, config); treeStats.addNodeStats(nextNodeName, stats); - double finalScore = stats.getFinalScore(currentNode.getAggregation()); - if (finalScore == -1.0) + // if ignoreUndefined=false the miss is considered as undefined + if (!currentNode.isIgnoreUndefined() && stats.undefinedCount() > 0) { nextNodeName = currentNode.getUndefined(); - else if (finalScore >= currentNode.getThreshold()) { + } + // if ignoreUndefined=true the miss is ignored and the score computed anyway + else if (stats.getFinalScore(currentNode.getAggregation()) >= currentNode.getThreshold()) { nextNodeName = currentNode.getPositive(); } else { nextNodeName = currentNode.getNegative();