uoa-validator-engine2/src/main/java/eu/dnetlib/validator2/engine/builtins/XPathExpressionHelper.java

105 lines
3.8 KiB
Java

package eu.dnetlib.validator2.engine.builtins;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import eu.dnetlib.validator2.engine.RuleEvaluationException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.xpath.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
public class XPathExpressionHelper {
private static final Logger logger = LogManager.getLogger();
// XPath expression compilation
private static final XPath XPATH = XPathFactory.newInstance().newXPath();
private static final ConcurrentHashMap<String, XPathExpression> COMPILED_EXPRESSIONS = new ConcurrentHashMap<>();
// XPath expression evaluation
private static final int MAX_CACHE_ENTRIES = 100; // TODO: Make this configurable
private static final LoadingCache<Document, ConcurrentMap<XPathExpression, NodeList>>
EVALUATED_EXPRESSIONS =
CacheBuilder.
newBuilder().
maximumSize(MAX_CACHE_ENTRIES).
build(new CacheLoader<Document, ConcurrentMap<XPathExpression, NodeList>>() {
@Override
public ConcurrentMap<XPathExpression, NodeList> load(Document key) {
return new ConcurrentHashMap<>();
}
});
public static XPathExpression compile(String xpath) {
return COMPILED_EXPRESSIONS.computeIfAbsent(xpath, s -> {
try {
logger.debug("Compiling {}", s);
XPathExpression expr = XPATH.compile(s);
logger.debug("Compiled {} = {}", s, expr);
return expr;
} catch (XPathExpressionException e) {
logger.error("Compilation failure", e);
throw new RuntimeException(e.getMessage(), e);
}
});
}
public static NodeList nodeList(String xpath, Document doc) {
logger.debug("Evaluating nodeList {}", xpath);
return nodeList(compile(xpath), doc);
}
public static NodeList nodeList(XPathExpression expr, Document doc) {
try {
return EVALUATED_EXPRESSIONS.get(doc).computeIfAbsent(expr, (XPathExpression xpath) -> {
try {
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
return nodes;
}
catch(XPathExpressionException ex) {
throw new RuleEvaluationException(ex.getMessage(), ex);
}
});
} catch (ExecutionException e) {
throw new RuleEvaluationException(e.getMessage(), e);
}
}
public static String attr(String xpath, Node node) {
logger.debug("Evaluating attr {}", xpath);
return attr(compile(xpath), node);
}
public static String attr(XPathExpression expr, Node node) {
try {
String value = expr.evaluate(node);
return value;
}
catch(XPathExpressionException ex) {
throw new RuleEvaluationException(ex.getMessage(), ex);
}
}
public static Node node(String xpath, Node node) {
logger.debug("Evaluating node {}", xpath);
return node(compile(xpath), node);
}
public static Node node(XPathExpression expr, Node node) {
try {
Node resultNode = (Node) expr.evaluate(node, XPathConstants.NODE);
return resultNode;
}
catch(XPathExpressionException ex) {
throw new RuleEvaluationException(ex.getMessage(), ex);
}
}
}