dnet-dedup/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java

61 lines
1.7 KiB
Java

package eu.dnetlib.pace.tree.support;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.*;
import eu.dnetlib.pace.util.PaceException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.util.Map;
/**
* The compare between two documents is given by the weighted mean of the field distances
*/
public class TreeProcessor {
private static final Log log = LogFactory.getLog(TreeProcessor.class);
private Config config;
public TreeProcessor(final Config config) {
this.config = config;
}
public boolean compare(final MapDocument a, final MapDocument b) {
//evaluate the decision tree
return evaluateTree(a, b, config.decisionTree()) == MatchType.MATCH;
}
public MatchType evaluateTree(final MapDocument doc1, final MapDocument doc2, final Map<String, TreeNodeDef> decisionTree){
String current = "start";
while (MatchType.parse(current)==MatchType.UNDEFINED) {
TreeNodeDef currentNode = decisionTree.get(current);
//throw an exception if the node doesn't exist
if (currentNode == null)
throw new PaceException("The Tree Node doesn't exist: " + current);
TreeNodeStats stats = currentNode.evaluate(doc1, doc2, config);
//if ignoreUndefined=false the miss is considered as undefined
if (!currentNode.isIgnoreUndefined() && stats.getUndefinedCount()>0) {
current = currentNode.getUndefined();
}
//if ignoreUndefined=true the miss is ignored and the score computed anyway
else if (stats.getFinalScore(currentNode.getAggregation()) >= currentNode.getThreshold()) {
current = currentNode.getPositive();
}
else {
current = currentNode.getNegative();
}
}
return MatchType.parse(current);
}
}