package eu.dnetlib.validator2.validation.guideline; import eu.dnetlib.validator2.engine.Helper; import eu.dnetlib.validator2.engine.Rule; import eu.dnetlib.validator2.engine.RuleContext; import eu.dnetlib.validator2.engine.RuleEvaluationException; import eu.dnetlib.validator2.engine.builtins.SimpleContext; import eu.dnetlib.validator2.engine.builtins.StandardXMLContext; import eu.dnetlib.validator2.engine.builtins.XMLRule; import eu.dnetlib.validator2.engine.builtins.XPathExpressionHelper; import eu.dnetlib.validator2.engine.contexts.XMLContext; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import javax.xml.xpath.XPathExpression; import java.util.ArrayList; import java.util.List; import java.util.function.Function; import java.util.function.Predicate; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; class ElementSpecCompiler { private static final Logger logger = LogManager.getLogger(); private static final String[] EMPTY = new String[]{}; private final List specs = new ArrayList<>(); private final CompilationResult compilationResult = new CompilationResult(); CompilationResult compile(ElementSpec elementSpec, Supplier runtimeInfo) { ElementStruct rootElement = new ElementStruct(elementSpec, runtimeInfo); generateRulesForElement(rootElement); return compilationResult; } private void generateRulesForElement(ElementStruct currentElement) { specs.add(currentElement.spec); if (currentElement.parent == null) { currentElement.createRootElemRule(compilationResult); } else { currentElement.createSubElemRule(compilationResult); } for (AttributeSpec attrSpec : currentElement.spec.attributeSpecs()) { currentElement.createAttrRule(compilationResult, attrSpec); } for(ElementSpec subElementSpec: currentElement.spec.subElementSpecs()) { generateRulesForElement(new ElementStruct(currentElement.elementsArray, subElementSpec, currentElement)); } } private static String[] copyAndAppend(String[] elements, String newElem) { int len = elements.length; String[] newElements = new String[len + 1]; System.arraycopy(elements, 0, newElements, 0, len); newElements[len] = newElem; return newElements; } // TODO: Should we escape the arg? private static String xpathForNodeName(String name) { return "local-name()='" + name + "'"; } private static String xpathForNodeName(String name, String valuePrefix) { String[] components = new String[] { xpathForNodeName(name) }; if (valuePrefix != null) { components = copyAndAppend(components, "starts-with(normalize-space(text()), '" + valuePrefix + "')"); // normalize-space needed for cases text isn't in the same line with starting xml tag } return String.join(" and ", components); } // TODO: Should we escape the arg? private static String xpathForAttributeName(String name) { return "@" + name; } private static String xpathWithText(String xpath, boolean withText) { return withText ? xpath + "/text()" : xpath; } static String nodeRuleIdFor(String nodeType, String nodeName, RequirementLevel requirementLevel, ElementPosition position, String valuePrefix, Predicate allowedValuesPredicate, Cardinality cardinality) { StringBuilder builder = new StringBuilder(requirementLevel.name()).append(" rule: The ").append(nodeName).append(" ").append(nodeType); if (position != ElementPosition.ALL) { builder.append(" at ").append(position.name().toLowerCase()).append(" position"); } if (valuePrefix != null) { builder.append(" has values that start with ").append(valuePrefix).append(","); } if (!allowsAllValues(allowedValuesPredicate)) { builder.append(" contains allowed values only,"); } builder.append(" has cardinality ").append(cardinality.asText()); return builder.toString(); } static String cardinalityRuleIdFor(String nodeName, RequirementLevel requirementLevel, Cardinality cardinality, String valuePrefix) { String id = requirementLevel.name() + " rule: " + nodeName + " has cardinality " + cardinality.asText(); if(Helper.isEmpty(valuePrefix)) { return id; } else { return id + " for values starting with '" + valuePrefix + "'"; } } static final String mergeNodeNames(String... names) { return Stream.of(names).collect(Collectors.joining("/")); } // Heuristic for avoiding to create rules for checking the allowed values, when all values are allowed. private static boolean allowsAllValues(Predicate allowedValuesPredicate) { return (allowedValuesPredicate == null || // null value will never occur through our Builders class. allowedValuesPredicate == Builders.ALLOW_ALL_VALUES // || allowedValuesPredicate.test(null) // this seems to give undesired results against allowedValues negated predicates ); } private static SyntheticRule createActualRootElementRule(Supplier runtimeInfo, String id, String xpath, String xpathForValue, Cardinality cardinality, Predicate allowedValuesPredicate, Rule applicabilityRule) { XMLContext ctx = createCustomXMLContext(id, xpath); Predicate lengthPredicate = Helper. createCardinalityPredicate(cardinality.getLowerBound(), cardinality.getUpperBound(), true); Predicate lengthChecker = (NodeList elements) -> lengthPredicate.test(elements.getLength()); if (allowsAllValues(allowedValuesPredicate)) { return new RootElemRule(ctx, lengthChecker, applicabilityRule, runtimeInfo); } else { Predicate lengthCheckerOnAllowedElements = (NodeList elements) -> { NodeList filtered = filterNodes(elements, xpathForValue, allowedValuesPredicate); return lengthChecker.test(filtered); }; return new RootElemRule(ctx, lengthCheckerOnAllowedElements, applicabilityRule, runtimeInfo); } } // The NodeList contains the parent elements private static SyntheticRule createActualAttributeSpecRule(Supplier runtimeInfo, SyntheticRule parentRule, String id, String attrName, Predicate allowedValuesPredicate, Rule applicabilityRule) { SimpleContext ctx = new SimpleContext(id); if (allowsAllValues(allowedValuesPredicate)) { return new NodeSpecRule<>(ctx, parentRule, (NodeList elements) -> { NodeList attrs = attributesOf(elements, attrName, (String s) -> !Helper.isEmpty(s)); return (elements != null && attrs.getLength() == elements.getLength()); }, applicabilityRule, runtimeInfo); } else { Predicate allowedAttrsLengthEqualsElemLength = (NodeList elements) -> { NodeList attrs = attributesOf(elements, attrName, allowedValuesPredicate); return (elements != null && elements.getLength() == attrs.getLength()); }; return new NodeSpecRule<>(ctx, parentRule, allowedAttrsLengthEqualsElemLength, applicabilityRule, runtimeInfo); } } private static SyntheticRule createActualSubElementRule(Supplier runtimeInfo, SyntheticRule parentRule, String id, String xpath, String xpathForValue, Cardinality cardinality, Predicate allowedValuesPredicate, Rule applicabilityRule) { XMLContext ctx = createCustomXMLContext(id, xpath); Predicate lengthChecker = (NodeList elements) -> { // Calculate the length predicate at rule runtime Predicate lengthPredicate = createLengthPredicateForSubElements(runtimeInfo, parentRule, cardinality); return lengthPredicate.test(elements.getLength()); }; if (allowsAllValues(allowedValuesPredicate)) { return new SubElemRule(ctx, parentRule, lengthChecker, applicabilityRule, runtimeInfo); } else { Predicate lengthCheckerOnAllowedElements = (NodeList elements) -> { NodeList filtered = filterNodes(elements, xpathForValue, allowedValuesPredicate); return lengthChecker.test(filtered); }; return new SubElemRule(ctx, parentRule, lengthCheckerOnAllowedElements, applicabilityRule, runtimeInfo); } } private static Predicate createLengthPredicateForSubElements(Supplier runtimeInfo, SyntheticRule parentRule, Cardinality cardinality) { GuidelineEvaluation evaluation = runtimeInfo.get(); NodeList parents = evaluation.getNodesOf(parentRule.getContext().getIdProperty().getValue()); if (parents == null) { return (Integer count) -> false; // There are no parents, actually } int parentsLength = parents.getLength(); if (cardinality == Cardinality.ONE) { return (Integer count) -> count == parentsLength; // A sub-element can be present in each parent } else if (cardinality == Cardinality.ONE_TO_N) { return (Integer count) -> count >= parentsLength; // One or more sub-elements can be present in each parent } else if (cardinality == Cardinality.TWO) { return (Integer count) -> count == parentsLength * 2; // Two sub-elements can be present in each parent } else if (cardinality == Cardinality.FOUR_TO_N){ return (Integer count) -> count >= parentsLength * 4; // Four or more sub-elements can be present in each parent } else{ // not reachable throw new RuleEvaluationException(" Unsupported cardinality " + cardinality, null); } } private static NodeList attributesOf(NodeList elements, String attrName, Predicate allowedValues) { return Helper.nodesThatMatchThePredicate( elements, (Node node) -> Helper.getAttributeValue(node, attrName), allowedValues ); } private static XMLContext createCustomXMLContext(String id, String xpath) { StandardXMLContext context = new StandardXMLContext(); context.getIdProperty().setValue(id); context.getXPathExpressionProperty().setValue(xpath); context.getNodeListActionProperty().setValue("custom"); return context; } private static NodeList filterNodes(NodeList nodes, String xpathToReadValues, Predicate allowedValuesPredicate) { logger.debug("Filtering nodes with {}", xpathToReadValues); return Helper.nodesThatMatchThePredicate(nodes, new NodeValueReader(xpathToReadValues), allowedValuesPredicate); } private static class NodeValueReader implements Function { private final String nodeExpr; NodeValueReader(String nodeExpr) { this.nodeExpr = nodeExpr; } @Override public String apply(Node parent) { logger.debug("Reading node value {} from parent {}", nodeExpr, parent); Node node = XPathExpressionHelper.node(nodeExpr, parent); return node == null ? null : node.getNodeValue(); } } private static class ElementStruct { Supplier runtimeInfo; final ElementSpec spec; final String[] parentElementNames; final ElementStruct parent; //e.g. [elem1,sub1] final String[] elementsArray; // e.g. elem1/sub1 final String elementsId; SyntheticRule rule; ElementStruct(String[] parentElementNames, ElementSpec spec, ElementStruct parent) { this.runtimeInfo = parent.runtimeInfo; this.parentElementNames = parentElementNames; this.spec = spec; this.parent = parent; elementsArray = copyAndAppend(parentElementNames, spec.nodeName()); // e.g. elem1/sub1 elementsId = mergeNodeNames(elementsArray); } ElementStruct(ElementSpec spec, Supplier runtimeInfo) { this.runtimeInfo = runtimeInfo; this.parentElementNames = EMPTY; this.spec = spec; this.parent = null; elementsArray = copyAndAppend(parentElementNames, spec.nodeName()); // e.g. elem1/sub1 elementsId = mergeNodeNames(elementsArray); } private String xpath(boolean withText) { String xpathForThis = xpathForNodeName(spec.nodeName(), spec.valuePrefix()); String xpath; if (parent == null) { // This is the top-level element rule List pathComponents = spec.parents().stream(). map( s -> "*[" + xpathForNodeName(s) + "]").collect(Collectors.toList()); pathComponents.add("*[" + xpathForThis + "]"); xpath = "//" + String.join("/", pathComponents); } else { xpath = parent.xpath(false) + "/*[" + xpathForThis + "]"; } if (spec.position().xpath != null) { xpath = "(" + xpath + ")[" + spec.position().xpath + "]"; } // System.out.println(xpath); return xpathWithText(xpath, withText); } private void createRootElemRule(CompilationResult compilationResult) { String id = nodeRuleIdFor( "Element", elementsId, spec.requirementLevel(), spec.position(), spec.valuePrefix(), spec.allowedValuesPredicate(), spec.cardinality() ); SyntheticRule rule = createActualRootElementRule( runtimeInfo, id, xpath(false), "text()", spec.cardinality(), spec.allowedValuesPredicate(), spec.applicabilityRule() ); compilationResult.ruleIdToRequirementLevel.put(id, spec.requirementLevel()); compilationResult.rootNodeRule = rule; this.rule = rule; } private void createAttrRule(CompilationResult compilationResult, AttributeSpec attrSpec) { String attrRuleIdComponent = mergeNodeNames(elementsId, attrSpec.nodeName()); String attrRuleId = nodeRuleIdFor( "Attribute", attrRuleIdComponent, attrSpec.requirementLevel(), ElementPosition.ALL, null, spec.allowedValuesPredicate(), spec.cardinality() ); SyntheticRule rule = createActualAttributeSpecRule( runtimeInfo, this.rule, attrRuleId, attrSpec.nodeName(), // this is resolved in the context of its parent elements attrSpec.allowedValuesPredicate(), attrSpec.applicabilityRule() ); compilationResult.ruleIdToRequirementLevel.put(attrRuleId, attrSpec.requirementLevel()); compilationResult.nodeRules.add(rule); this.rule = rule; } private void createSubElemRule(CompilationResult compilationResult) { String id = nodeRuleIdFor( "Element", elementsId, spec.requirementLevel(), spec.position(), spec.valuePrefix(), spec.allowedValuesPredicate(), spec.cardinality() ); SyntheticRule rule = createActualSubElementRule( runtimeInfo, parent.rule, id, xpath(false), "text()", spec.cardinality(), spec.allowedValuesPredicate(), spec.applicabilityRule() ); compilationResult.ruleIdToRequirementLevel.put(id, spec.requirementLevel()); compilationResult.nodeRules.add(rule); this.rule = rule; } } private static class RootElemRule extends ElemRule { RootElemRule(XMLContext context, Predicate nodeListPredicate, Rule applicabilityRule, Supplier runtimeInfo) { super(context, null, nodeListPredicate, applicabilityRule, runtimeInfo); } @Override public boolean test(Document doc) throws RuleEvaluationException { GuidelineEvaluation guidelineEvaluation = runtimeInfo.get(); String thisId = getContext().getIdProperty().getValue(); if (isApplicable(this, guidelineEvaluation).test(doc)) { try { logger.debug("Applying {}", thisId); NodeList nodes = getContext().getXPathExpressionProperty().evaluate(doc); boolean result = predicate.test(nodes); if (result) { logger.debug("Setting node list of this rule {}", thisId); guidelineEvaluation.setNodesOf(thisId, nodes); } return result; } catch (Throwable t) { throw new RuleEvaluationException(t.getMessage(), t); } } else { return true; } } } private static class SubElemRule extends ElemRule { SubElemRule(XMLContext context, SyntheticRule parentRule, Predicate nodeListPredicate, Rule applicabilityRule, Supplier runtimeInfo) { super(context, parentRule, nodeListPredicate, applicabilityRule, runtimeInfo); } @Override public boolean test(Document doc) throws RuleEvaluationException { GuidelineEvaluation guidelineEvaluation = runtimeInfo.get(); String thisId = getContext().getIdProperty().getValue(); String parentRuleId = parentRule.getContext().getIdProperty().getValue(); if (guidelineEvaluation.getRequirementLevelOf(parentRuleId) == RequirementLevel.NOT_APPLICABLE) { // Our parent is not applicable, set ourselves as not_applicable too and silently pass guidelineEvaluation.setRequirementLevelOf(thisId, RequirementLevel.NOT_APPLICABLE); return true; } if (isApplicable(this, guidelineEvaluation).test(doc)) { try { logger.debug("Appying {}", thisId);; NodeList nodes = getContext().getXPathExpressionProperty().evaluate(doc); boolean result = predicate.test(nodes); if (result) { logger.debug("Setting node list of this rule {}", thisId); guidelineEvaluation.setNodesOf(thisId, nodes); } return result; } catch (Throwable t) { throw new RuleEvaluationException(t.getMessage(), t); } } else { return true; } } } private static class ElemRule extends XMLRule implements SyntheticRule { protected final SyntheticRule parentRule; protected final Supplier runtimeInfo; protected final Rule applicabilityRule; ElemRule(XMLContext context, SyntheticRule parentRule, Predicate nodeListPredicate, Rule applicabilityRule, Supplier runtimeInfo) { super(context, nodeListPredicate); this.parentRule = parentRule; this.applicabilityRule = applicabilityRule; this.runtimeInfo = runtimeInfo; } @Override public SyntheticRule parentRule() { return parentRule; } @Override public Rule applicabilityRule() { return applicabilityRule; } } private static class NodeSpecRule implements SyntheticRule { private final C context; private final Predicate nodeListPredicate; private final Rule applicabilityRule; private final SyntheticRule parentRule; private final Supplier runtimeInfo; NodeSpecRule(C context, SyntheticRule parentRule, Predicate nodeListPredicate, Rule applicabilityRule, Supplier runtimeInfo) { this.context = context; this.parentRule = parentRule; this.nodeListPredicate = nodeListPredicate; this.applicabilityRule = applicabilityRule; this.runtimeInfo = runtimeInfo; } @Override public C getContext() { return context; } @Override public SyntheticRule parentRule() { return parentRule; } @Override public Rule applicabilityRule() { return applicabilityRule; } @Override public boolean test(Document doc) throws RuleEvaluationException { GuidelineEvaluation guidelineEvaluation = runtimeInfo.get(); String thisId = getContext().getIdProperty().getValue(); String parentRuleId = parentRule.getContext().getIdProperty().getValue(); if (guidelineEvaluation.getRequirementLevelOf(parentRuleId) == RequirementLevel.NOT_APPLICABLE) { // Our parent is not applicable, set ourselves as not_applicable too and silently pass guidelineEvaluation.setRequirementLevelOf(thisId, RequirementLevel.NOT_APPLICABLE); return true; } if (isApplicable(this, guidelineEvaluation).test(doc)) { // We just use the doc to lookup the nodes of our parent NodeList nodes = guidelineEvaluation.getNodesOf(parentRuleId); logger.debug("Acquired node list of parent rule {} = {}", parentRuleId, (nodes == null ? "null" : nodes.getLength())); boolean result = nodeListPredicate.test(nodes); if (result) { logger.debug("Setting node list of this rule {}", thisId); guidelineEvaluation.setNodesOf(thisId, nodes); } return result; } else { return true; } } @Override public String toString() { return getContext().getIdProperty().getValue(); } } private static Predicate isApplicable(SyntheticRule rule, GuidelineEvaluation guidelineEvaluation) { return (Document doc) -> { Rule applicabilityRule = rule.applicabilityRule(); if (applicabilityRule == null) { logger.debug("Null applicability rule of {}", rule); return true; } String thisId = rule.getContext().getIdProperty().getValue(); if (applicabilityRule.test(doc)) { logger.debug("Success of applicability rule of {}", rule); guidelineEvaluation.setRequirementLevelOf(thisId, RequirementLevel.MANDATORY); return true; } else { logger.debug("Failure of applicability rule of {}", rule); guidelineEvaluation.setRequirementLevelOf(thisId, RequirementLevel.NOT_APPLICABLE); return false; } }; } }