uoa-validator-engine2/src/main/java/eu/dnetlib/validator2/validation/guideline/ElementSpecCompiler.java

622 lines
26 KiB
Java

package eu.dnetlib.validator2.validation.guideline;
import eu.dnetlib.validator2.engine.Helper;
import eu.dnetlib.validator2.engine.Rule;
import eu.dnetlib.validator2.engine.RuleContext;
import eu.dnetlib.validator2.engine.RuleEvaluationException;
import eu.dnetlib.validator2.engine.builtins.SimpleContext;
import eu.dnetlib.validator2.engine.builtins.StandardXMLContext;
import eu.dnetlib.validator2.engine.builtins.XMLRule;
import eu.dnetlib.validator2.engine.builtins.XPathExpressionHelper;
import eu.dnetlib.validator2.engine.contexts.XMLContext;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.xpath.XPathExpression;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
class ElementSpecCompiler {
private static final Logger logger = LogManager.getLogger();
private static final String[] EMPTY = new String[]{};
private final List<ElementSpec> specs = new ArrayList<>();
private final CompilationResult compilationResult = new CompilationResult();
CompilationResult compile(ElementSpec elementSpec, Supplier<GuidelineEvaluation> runtimeInfo) {
ElementStruct rootElement = new ElementStruct(elementSpec, runtimeInfo);
generateRulesForElement(rootElement);
return compilationResult;
}
private void generateRulesForElement(ElementStruct currentElement) {
specs.add(currentElement.spec);
if (currentElement.parent == null) {
currentElement.createRootElemRule(compilationResult);
}
else {
currentElement.createSubElemRule(compilationResult);
}
for (AttributeSpec attrSpec : currentElement.spec.attributeSpecs()) {
currentElement.createAttrRule(compilationResult, attrSpec);
}
for(ElementSpec subElementSpec: currentElement.spec.subElementSpecs()) {
generateRulesForElement(new ElementStruct(currentElement.elementsArray, subElementSpec, currentElement));
}
}
private static String[] copyAndAppend(String[] elements, String newElem) {
int len = elements.length;
String[] newElements = new String[len + 1];
System.arraycopy(elements, 0, newElements, 0, len);
newElements[len] = newElem;
return newElements;
}
// TODO: Should we escape the arg?
private static String xpathForNodeName(String name) {
return "local-name()='" + name + "'";
}
private static String xpathForNodeName(String name, String valuePrefix) {
String[] components = new String[] { xpathForNodeName(name) };
if (valuePrefix != null) {
components = copyAndAppend(components, "starts-with(normalize-space(text()), '" + valuePrefix + "')"); // normalize-space needed for cases text isn't in the same line with starting xml tag
}
return String.join(" and ", components);
}
// TODO: Should we escape the arg?
private static String xpathForAttributeName(String name) {
return "@" + name;
}
private static String xpathWithText(String xpath, boolean withText) {
return withText ? xpath + "/text()" : xpath;
}
static String nodeRuleIdFor(String nodeType,
String nodeName,
RequirementLevel requirementLevel,
ElementPosition position,
String valuePrefix,
Predicate<String> allowedValuesPredicate,
Cardinality cardinality) {
StringBuilder builder =
new StringBuilder(requirementLevel.name()).append(" rule: The ").append(nodeName).append(" ").append(nodeType);
if (position != ElementPosition.ALL) {
builder.append(" at ").append(position.name().toLowerCase()).append(" position");
}
if (valuePrefix != null) {
builder.append(" has values that start with ").append(valuePrefix).append(",");
}
if (!allowsAllValues(allowedValuesPredicate)) {
builder.append(" contains allowed values only,");
}
builder.append(" has cardinality ").append(cardinality.asText());
return builder.toString();
}
static String cardinalityRuleIdFor(String nodeName,
RequirementLevel requirementLevel,
Cardinality cardinality,
String valuePrefix) {
String id = requirementLevel.name() + " rule: " + nodeName + " has cardinality " + cardinality.asText();
if(Helper.isEmpty(valuePrefix)) {
return id;
}
else {
return id + " for values starting with '" + valuePrefix + "'";
}
}
static final String mergeNodeNames(String... names) {
return Stream.of(names).collect(Collectors.joining("/"));
}
// Heuristic for avoiding to create rules for checking the allowed values, when all values are allowed.
private static boolean allowsAllValues(Predicate<String> allowedValuesPredicate) {
return (allowedValuesPredicate == null || // null value will never occur through our Builders class.
allowedValuesPredicate == Builders.ALLOW_ALL_VALUES
// || allowedValuesPredicate.test(null) // this seems to give undesired results against allowedValues negated predicates
);
}
private static SyntheticRule<Document> createActualRootElementRule(Supplier<GuidelineEvaluation> runtimeInfo,
String id,
String xpath,
String xpathForValue,
Cardinality cardinality,
Predicate<String> allowedValuesPredicate,
Rule<Document> applicabilityRule) {
XMLContext ctx = createCustomXMLContext(id, xpath);
Predicate<Integer> lengthPredicate = Helper.
createCardinalityPredicate(cardinality.getLowerBound(), cardinality.getUpperBound(), true);
Predicate<NodeList> lengthChecker = (NodeList elements) -> lengthPredicate.test(elements.getLength());
if (allowsAllValues(allowedValuesPredicate)) {
return new RootElemRule(ctx, lengthChecker, applicabilityRule, runtimeInfo);
}
else {
Predicate<NodeList> lengthCheckerOnAllowedElements = (NodeList elements) -> {
NodeList filtered = filterNodes(elements, xpathForValue, allowedValuesPredicate);
return lengthChecker.test(filtered);
};
return new RootElemRule(ctx, lengthCheckerOnAllowedElements, applicabilityRule, runtimeInfo);
}
}
// The NodeList contains the parent elements
private static SyntheticRule<Document> createActualAttributeSpecRule(Supplier<GuidelineEvaluation> runtimeInfo,
SyntheticRule<Document> parentRule,
String id,
String attrName,
Predicate<String> allowedValuesPredicate,
Rule<Document> applicabilityRule) {
SimpleContext ctx = new SimpleContext(id);
if (allowsAllValues(allowedValuesPredicate)) {
return new NodeSpecRule<>(ctx, parentRule, (NodeList elements) -> {
NodeList attrs = attributesOf(elements, attrName, (String s) -> !Helper.isEmpty(s));
return (elements != null && attrs.getLength() == elements.getLength());
}, applicabilityRule, runtimeInfo);
}
else {
Predicate<NodeList> allowedAttrsLengthEqualsElemLength = (NodeList elements) -> {
NodeList attrs = attributesOf(elements, attrName, allowedValuesPredicate);
return (elements != null && elements.getLength() == attrs.getLength());
};
return new NodeSpecRule<>(ctx, parentRule, allowedAttrsLengthEqualsElemLength, applicabilityRule, runtimeInfo);
}
}
private static SyntheticRule<Document> createActualSubElementRule(Supplier<GuidelineEvaluation> runtimeInfo,
SyntheticRule<Document> parentRule,
String id,
String xpath,
String xpathForValue,
Cardinality cardinality,
Predicate<String> allowedValuesPredicate,
Rule<Document> applicabilityRule) {
XMLContext ctx = createCustomXMLContext(id, xpath);
Predicate<NodeList> lengthChecker = (NodeList elements) -> {
// Calculate the length predicate at rule runtime
Predicate<Integer> lengthPredicate = createLengthPredicateForSubElements(runtimeInfo, parentRule, cardinality);
return lengthPredicate.test(elements.getLength());
};
if (allowsAllValues(allowedValuesPredicate)) {
return new SubElemRule(ctx, parentRule, lengthChecker, applicabilityRule, runtimeInfo);
}
else {
Predicate<NodeList> lengthCheckerOnAllowedElements = (NodeList elements) -> {
NodeList filtered = filterNodes(elements, xpathForValue, allowedValuesPredicate);
return lengthChecker.test(filtered);
};
return new SubElemRule(ctx, parentRule, lengthCheckerOnAllowedElements, applicabilityRule, runtimeInfo);
}
}
private static Predicate<Integer> createLengthPredicateForSubElements(Supplier<GuidelineEvaluation> runtimeInfo,
SyntheticRule<Document> parentRule,
Cardinality cardinality) {
GuidelineEvaluation evaluation = runtimeInfo.get();
NodeList parents = evaluation.getNodesOf(parentRule.getContext().getIdProperty().getValue());
if (parents == null) {
return (Integer count) -> false; // There are no parents, actually
}
int parentsLength = parents.getLength();
if (cardinality == Cardinality.ONE) {
return (Integer count) -> count == parentsLength; // A sub-element can be present in each parent
}
else if (cardinality == Cardinality.ONE_TO_N) {
return (Integer count) -> count >= parentsLength; // One or more sub-elements can be present in each parent
}
else if (cardinality == Cardinality.TWO) {
return (Integer count) -> count == parentsLength * 2; // Two sub-elements can be present in each parent
}
else if (cardinality == Cardinality.FOUR_TO_N){
return (Integer count) -> count >= parentsLength * 4; // Four or more sub-elements can be present in each parent
}
else{ // not reachable
throw new RuleEvaluationException(" Unsupported cardinality " + cardinality, null);
}
}
private static NodeList attributesOf(NodeList elements, String attrName, Predicate<String> allowedValues) {
return Helper.nodesThatMatchThePredicate(
elements,
(Node node) -> Helper.getAttributeValue(node, attrName),
allowedValues
);
}
private static XMLContext createCustomXMLContext(String id, String xpath) {
StandardXMLContext context = new StandardXMLContext();
context.getIdProperty().setValue(id);
context.getXPathExpressionProperty().setValue(xpath);
context.getNodeListActionProperty().setValue("custom");
return context;
}
private static NodeList filterNodes(NodeList nodes, String xpathToReadValues, Predicate<String> allowedValuesPredicate) {
logger.debug("Filtering nodes with {}", xpathToReadValues);
return Helper.nodesThatMatchThePredicate(nodes, new NodeValueReader(xpathToReadValues), allowedValuesPredicate);
}
private static class NodeValueReader implements Function<Node, String> {
private final String nodeExpr;
NodeValueReader(String nodeExpr) {
this.nodeExpr = nodeExpr;
}
@Override
public String apply(Node parent) {
logger.debug("Reading node value {} from parent {}", nodeExpr, parent);
Node node = XPathExpressionHelper.node(nodeExpr, parent);
return node == null ? null : node.getNodeValue();
}
}
private static class ElementStruct {
Supplier<GuidelineEvaluation> runtimeInfo;
final ElementSpec spec;
final String[] parentElementNames;
final ElementStruct parent;
//e.g. [elem1,sub1]
final String[] elementsArray;
// e.g. elem1/sub1
final String elementsId;
SyntheticRule<Document> rule;
ElementStruct(String[] parentElementNames, ElementSpec spec, ElementStruct parent) {
this.runtimeInfo = parent.runtimeInfo;
this.parentElementNames = parentElementNames;
this.spec = spec;
this.parent = parent;
elementsArray = copyAndAppend(parentElementNames, spec.nodeName());
// e.g. elem1/sub1
elementsId = mergeNodeNames(elementsArray);
}
ElementStruct(ElementSpec spec, Supplier<GuidelineEvaluation> runtimeInfo) {
this.runtimeInfo = runtimeInfo;
this.parentElementNames = EMPTY;
this.spec = spec;
this.parent = null;
elementsArray = copyAndAppend(parentElementNames, spec.nodeName());
// e.g. elem1/sub1
elementsId = mergeNodeNames(elementsArray);
}
private String xpath(boolean withText) {
String xpathForThis = xpathForNodeName(spec.nodeName(), spec.valuePrefix());
String xpath;
if (parent == null) {
// This is the top-level element rule
List<String> pathComponents = spec.parents().stream().
map( s -> "*[" + xpathForNodeName(s) + "]").collect(Collectors.toList());
pathComponents.add("*[" + xpathForThis + "]");
xpath = "//" + String.join("/", pathComponents);
}
else {
xpath = parent.xpath(false) + "/*[" + xpathForThis + "]";
}
if (spec.position().xpath != null) {
xpath = "(" + xpath + ")[" + spec.position().xpath + "]";
}
// System.out.println(xpath);
return xpathWithText(xpath, withText);
}
private void createRootElemRule(CompilationResult compilationResult) {
String id = nodeRuleIdFor(
"Element",
elementsId,
spec.requirementLevel(),
spec.position(),
spec.valuePrefix(),
spec.allowedValuesPredicate(),
spec.cardinality()
);
SyntheticRule<Document> rule = createActualRootElementRule(
runtimeInfo,
id,
xpath(false),
"text()",
spec.cardinality(),
spec.allowedValuesPredicate(),
spec.applicabilityRule()
);
compilationResult.ruleIdToRequirementLevel.put(id, spec.requirementLevel());
compilationResult.rootNodeRule = rule;
this.rule = rule;
}
private void createAttrRule(CompilationResult compilationResult, AttributeSpec attrSpec) {
String attrRuleIdComponent = mergeNodeNames(elementsId, attrSpec.nodeName());
String attrRuleId = nodeRuleIdFor(
"Attribute",
attrRuleIdComponent,
attrSpec.requirementLevel(),
ElementPosition.ALL,
null,
spec.allowedValuesPredicate(),
spec.cardinality()
);
SyntheticRule<Document> rule = createActualAttributeSpecRule(
runtimeInfo,
this.rule,
attrRuleId,
attrSpec.nodeName(), // this is resolved in the context of its parent elements
attrSpec.allowedValuesPredicate(),
attrSpec.applicabilityRule()
);
compilationResult.ruleIdToRequirementLevel.put(attrRuleId, attrSpec.requirementLevel());
compilationResult.nodeRules.add(rule);
this.rule = rule;
}
private void createSubElemRule(CompilationResult compilationResult) {
String id = nodeRuleIdFor(
"Element",
elementsId,
spec.requirementLevel(),
spec.position(),
spec.valuePrefix(),
spec.allowedValuesPredicate(),
spec.cardinality()
);
SyntheticRule<Document> rule = createActualSubElementRule(
runtimeInfo,
parent.rule,
id,
xpath(false),
"text()",
spec.cardinality(),
spec.allowedValuesPredicate(),
spec.applicabilityRule()
);
compilationResult.ruleIdToRequirementLevel.put(id, spec.requirementLevel());
compilationResult.nodeRules.add(rule);
this.rule = rule;
}
}
private static class RootElemRule extends ElemRule {
RootElemRule(XMLContext context,
Predicate<NodeList> nodeListPredicate,
Rule<Document> applicabilityRule,
Supplier<GuidelineEvaluation> runtimeInfo) {
super(context, null, nodeListPredicate, applicabilityRule, runtimeInfo);
}
@Override
public boolean test(Document doc) throws RuleEvaluationException {
GuidelineEvaluation guidelineEvaluation = runtimeInfo.get();
String thisId = getContext().getIdProperty().getValue();
if (isApplicable(this, guidelineEvaluation).test(doc)) {
try {
logger.debug("Applying {}", thisId);
NodeList nodes = getContext().getXPathExpressionProperty().evaluate(doc);
boolean result = predicate.test(nodes);
if (result) {
logger.debug("Setting node list of this rule {}", thisId);
guidelineEvaluation.setNodesOf(thisId, nodes);
}
return result;
} catch (Throwable t) {
throw new RuleEvaluationException(t.getMessage(), t);
}
}
else {
return true;
}
}
}
private static class SubElemRule extends ElemRule {
SubElemRule(XMLContext context,
SyntheticRule<Document> parentRule,
Predicate<NodeList> nodeListPredicate,
Rule<Document> applicabilityRule,
Supplier<GuidelineEvaluation> runtimeInfo) {
super(context, parentRule, nodeListPredicate, applicabilityRule, runtimeInfo);
}
@Override
public boolean test(Document doc) throws RuleEvaluationException {
GuidelineEvaluation guidelineEvaluation = runtimeInfo.get();
String thisId = getContext().getIdProperty().getValue();
String parentRuleId = parentRule.getContext().getIdProperty().getValue();
if (guidelineEvaluation.getRequirementLevelOf(parentRuleId) == RequirementLevel.NOT_APPLICABLE) {
// Our parent is not applicable, set ourselves as not_applicable too and silently pass
guidelineEvaluation.setRequirementLevelOf(thisId, RequirementLevel.NOT_APPLICABLE);
return true;
}
if (isApplicable(this, guidelineEvaluation).test(doc)) {
try {
logger.debug("Appying {}", thisId);;
NodeList nodes = getContext().getXPathExpressionProperty().evaluate(doc);
boolean result = predicate.test(nodes);
if (result) {
logger.debug("Setting node list of this rule {}", thisId);
guidelineEvaluation.setNodesOf(thisId, nodes);
}
return result;
} catch (Throwable t) {
throw new RuleEvaluationException(t.getMessage(), t);
}
}
else {
return true;
}
}
}
private static class ElemRule extends XMLRule<XMLContext> implements SyntheticRule<Document> {
protected final SyntheticRule<Document> parentRule;
protected final Supplier<GuidelineEvaluation> runtimeInfo;
protected final Rule<Document> applicabilityRule;
ElemRule(XMLContext context,
SyntheticRule<Document> parentRule,
Predicate<NodeList> nodeListPredicate,
Rule<Document> applicabilityRule,
Supplier<GuidelineEvaluation> runtimeInfo) {
super(context, nodeListPredicate);
this.parentRule = parentRule;
this.applicabilityRule = applicabilityRule;
this.runtimeInfo = runtimeInfo;
}
@Override
public SyntheticRule<Document> parentRule() {
return parentRule;
}
@Override
public Rule<Document> applicabilityRule() {
return applicabilityRule;
}
}
private static class NodeSpecRule<C extends RuleContext> implements SyntheticRule<Document> {
private final C context;
private final Predicate<NodeList> nodeListPredicate;
private final Rule<Document> applicabilityRule;
private final SyntheticRule<Document> parentRule;
private final Supplier<GuidelineEvaluation> runtimeInfo;
NodeSpecRule(C context,
SyntheticRule<Document> parentRule,
Predicate<NodeList> nodeListPredicate,
Rule<Document> applicabilityRule,
Supplier<GuidelineEvaluation> runtimeInfo) {
this.context = context;
this.parentRule = parentRule;
this.nodeListPredicate = nodeListPredicate;
this.applicabilityRule = applicabilityRule;
this.runtimeInfo = runtimeInfo;
}
@Override
public C getContext() {
return context;
}
@Override
public SyntheticRule<Document> parentRule() {
return parentRule;
}
@Override
public Rule<Document> applicabilityRule() {
return applicabilityRule;
}
@Override
public boolean test(Document doc) throws RuleEvaluationException {
GuidelineEvaluation guidelineEvaluation = runtimeInfo.get();
String thisId = getContext().getIdProperty().getValue();
String parentRuleId = parentRule.getContext().getIdProperty().getValue();
if (guidelineEvaluation.getRequirementLevelOf(parentRuleId) == RequirementLevel.NOT_APPLICABLE) {
// Our parent is not applicable, set ourselves as not_applicable too and silently pass
guidelineEvaluation.setRequirementLevelOf(thisId, RequirementLevel.NOT_APPLICABLE);
return true;
}
if (isApplicable(this, guidelineEvaluation).test(doc)) {
// We just use the doc to lookup the nodes of our parent
NodeList nodes = guidelineEvaluation.getNodesOf(parentRuleId);
logger.debug("Acquired node list of parent rule {} = {}", parentRuleId, (nodes == null ? "null" : nodes.getLength()));
boolean result = nodeListPredicate.test(nodes);
if (result) {
logger.debug("Setting node list of this rule {}", thisId);
guidelineEvaluation.setNodesOf(thisId, nodes);
}
return result;
}
else {
return true;
}
}
@Override
public String toString() {
return getContext().getIdProperty().getValue();
}
}
private static Predicate<Document> isApplicable(SyntheticRule<Document> rule,
GuidelineEvaluation guidelineEvaluation) {
return (Document doc) -> {
Rule<Document> applicabilityRule = rule.applicabilityRule();
if (applicabilityRule == null) {
logger.debug("Null applicability rule of {}", rule);
return true;
}
String thisId = rule.getContext().getIdProperty().getValue();
if (applicabilityRule.test(doc)) {
logger.debug("Success of applicability rule of {}", rule);
guidelineEvaluation.setRequirementLevelOf(thisId, RequirementLevel.MANDATORY);
return true;
} else {
logger.debug("Failure of applicability rule of {}", rule);
guidelineEvaluation.setRequirementLevelOf(thisId, RequirementLevel.NOT_APPLICABLE);
return false;
}
};
}
}