2022-09-16 12:56:52 +02:00
package eu.dnetlib.validator2.validation.guideline ;
import eu.dnetlib.validator2.engine.Helper ;
import eu.dnetlib.validator2.engine.Rule ;
import eu.dnetlib.validator2.engine.RuleContext ;
import eu.dnetlib.validator2.engine.RuleEvaluationException ;
import eu.dnetlib.validator2.engine.builtins.SimpleContext ;
import eu.dnetlib.validator2.engine.builtins.StandardXMLContext ;
import eu.dnetlib.validator2.engine.builtins.XMLRule ;
import eu.dnetlib.validator2.engine.builtins.XPathExpressionHelper ;
import eu.dnetlib.validator2.engine.contexts.XMLContext ;
2023-07-10 13:38:06 +02:00
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
2022-09-16 12:56:52 +02:00
import org.w3c.dom.Document ;
import org.w3c.dom.Node ;
import org.w3c.dom.NodeList ;
import java.util.ArrayList ;
import java.util.List ;
import java.util.function.Function ;
import java.util.function.Predicate ;
import java.util.function.Supplier ;
import java.util.stream.Collectors ;
import java.util.stream.Stream ;
class ElementSpecCompiler {
2023-07-10 13:38:06 +02:00
private static final org . slf4j . Logger logger = LoggerFactory . getLogger ( ElementSpecCompiler . class ) ;
2022-09-16 12:56:52 +02:00
private static final String [ ] EMPTY = new String [ ] { } ;
private final List < ElementSpec > specs = new ArrayList < > ( ) ;
private final CompilationResult compilationResult = new CompilationResult ( ) ;
CompilationResult compile ( ElementSpec elementSpec , Supplier < GuidelineEvaluation > runtimeInfo ) {
ElementStruct rootElement = new ElementStruct ( elementSpec , runtimeInfo ) ;
generateRulesForElement ( rootElement ) ;
return compilationResult ;
}
private void generateRulesForElement ( ElementStruct currentElement ) {
specs . add ( currentElement . spec ) ;
if ( currentElement . parent = = null ) {
currentElement . createRootElemRule ( compilationResult ) ;
}
else {
currentElement . createSubElemRule ( compilationResult ) ;
}
for ( AttributeSpec attrSpec : currentElement . spec . attributeSpecs ( ) ) {
currentElement . createAttrRule ( compilationResult , attrSpec ) ;
}
for ( ElementSpec subElementSpec : currentElement . spec . subElementSpecs ( ) ) {
generateRulesForElement ( new ElementStruct ( currentElement . elementsArray , subElementSpec , currentElement ) ) ;
}
}
private static String [ ] copyAndAppend ( String [ ] elements , String newElem ) {
int len = elements . length ;
String [ ] newElements = new String [ len + 1 ] ;
System . arraycopy ( elements , 0 , newElements , 0 , len ) ;
newElements [ len ] = newElem ;
return newElements ;
}
// TODO: Should we escape the arg?
private static String xpathForNodeName ( String name ) {
return " local-name()=' " + name + " ' " ;
}
private static String xpathForNodeName ( String name , String valuePrefix ) {
String [ ] components = new String [ ] { xpathForNodeName ( name ) } ;
if ( valuePrefix ! = null ) {
components = copyAndAppend ( components , " starts-with(normalize-space(text()), ' " + valuePrefix + " ') " ) ; // normalize-space needed for cases text isn't in the same line with starting xml tag
}
return String . join ( " and " , components ) ;
}
// TODO: Should we escape the arg?
private static String xpathForAttributeName ( String name ) {
return " @ " + name ;
}
private static String xpathWithText ( String xpath , boolean withText ) {
return withText ? xpath + " /text() " : xpath ;
}
static String nodeRuleIdFor ( String nodeType ,
String nodeName ,
RequirementLevel requirementLevel ,
ElementPosition position ,
String valuePrefix ,
Predicate < String > allowedValuesPredicate ,
Cardinality cardinality ) {
StringBuilder builder =
new StringBuilder ( requirementLevel . name ( ) ) . append ( " rule: The " ) . append ( nodeName ) . append ( " " ) . append ( nodeType ) ;
if ( position ! = ElementPosition . ALL ) {
builder . append ( " at " ) . append ( position . name ( ) . toLowerCase ( ) ) . append ( " position " ) ;
}
if ( valuePrefix ! = null ) {
builder . append ( " has values that start with " ) . append ( valuePrefix ) . append ( " , " ) ;
}
if ( ! allowsAllValues ( allowedValuesPredicate ) ) {
builder . append ( " contains allowed values only, " ) ;
}
builder . append ( " has cardinality " ) . append ( cardinality . asText ( ) ) ;
return builder . toString ( ) ;
}
static String cardinalityRuleIdFor ( String nodeName ,
RequirementLevel requirementLevel ,
Cardinality cardinality ,
String valuePrefix ) {
String id = requirementLevel . name ( ) + " rule: " + nodeName + " has cardinality " + cardinality . asText ( ) ;
if ( Helper . isEmpty ( valuePrefix ) ) {
return id ;
}
else {
return id + " for values starting with ' " + valuePrefix + " ' " ;
}
}
static final String mergeNodeNames ( String . . . names ) {
return Stream . of ( names ) . collect ( Collectors . joining ( " / " ) ) ;
}
// Heuristic for avoiding to create rules for checking the allowed values, when all values are allowed.
private static boolean allowsAllValues ( Predicate < String > allowedValuesPredicate ) {
return ( allowedValuesPredicate = = null | | // null value will never occur through our Builders class.
allowedValuesPredicate = = Builders . ALLOW_ALL_VALUES
// || allowedValuesPredicate.test(null) // this seems to give undesired results against allowedValues negated predicates
) ;
}
private static SyntheticRule < Document > createActualRootElementRule ( Supplier < GuidelineEvaluation > runtimeInfo ,
String id ,
String xpath ,
String xpathForValue ,
Cardinality cardinality ,
Predicate < String > allowedValuesPredicate ,
Rule < Document > applicabilityRule ) {
XMLContext ctx = createCustomXMLContext ( id , xpath ) ;
Predicate < Integer > lengthPredicate = Helper .
createCardinalityPredicate ( cardinality . getLowerBound ( ) , cardinality . getUpperBound ( ) , true ) ;
Predicate < NodeList > lengthChecker = ( NodeList elements ) - > lengthPredicate . test ( elements . getLength ( ) ) ;
if ( allowsAllValues ( allowedValuesPredicate ) ) {
return new RootElemRule ( ctx , lengthChecker , applicabilityRule , runtimeInfo ) ;
}
else {
Predicate < NodeList > lengthCheckerOnAllowedElements = ( NodeList elements ) - > {
NodeList filtered = filterNodes ( elements , xpathForValue , allowedValuesPredicate ) ;
return lengthChecker . test ( filtered ) ;
} ;
return new RootElemRule ( ctx , lengthCheckerOnAllowedElements , applicabilityRule , runtimeInfo ) ;
}
}
// The NodeList contains the parent elements
private static SyntheticRule < Document > createActualAttributeSpecRule ( Supplier < GuidelineEvaluation > runtimeInfo ,
SyntheticRule < Document > parentRule ,
String id ,
String attrName ,
Predicate < String > allowedValuesPredicate ,
Rule < Document > applicabilityRule ) {
SimpleContext ctx = new SimpleContext ( id ) ;
if ( allowsAllValues ( allowedValuesPredicate ) ) {
return new NodeSpecRule < > ( ctx , parentRule , ( NodeList elements ) - > {
NodeList attrs = attributesOf ( elements , attrName , ( String s ) - > ! Helper . isEmpty ( s ) ) ;
return ( elements ! = null & & attrs . getLength ( ) = = elements . getLength ( ) ) ;
} , applicabilityRule , runtimeInfo ) ;
}
else {
Predicate < NodeList > allowedAttrsLengthEqualsElemLength = ( NodeList elements ) - > {
NodeList attrs = attributesOf ( elements , attrName , allowedValuesPredicate ) ;
return ( elements ! = null & & elements . getLength ( ) = = attrs . getLength ( ) ) ;
} ;
return new NodeSpecRule < > ( ctx , parentRule , allowedAttrsLengthEqualsElemLength , applicabilityRule , runtimeInfo ) ;
}
}
private static SyntheticRule < Document > createActualSubElementRule ( Supplier < GuidelineEvaluation > runtimeInfo ,
SyntheticRule < Document > parentRule ,
String id ,
String xpath ,
String xpathForValue ,
Cardinality cardinality ,
Predicate < String > allowedValuesPredicate ,
Rule < Document > applicabilityRule ) {
XMLContext ctx = createCustomXMLContext ( id , xpath ) ;
Predicate < NodeList > lengthChecker = ( NodeList elements ) - > {
// Calculate the length predicate at rule runtime
Predicate < Integer > lengthPredicate = createLengthPredicateForSubElements ( runtimeInfo , parentRule , cardinality ) ;
return lengthPredicate . test ( elements . getLength ( ) ) ;
} ;
if ( allowsAllValues ( allowedValuesPredicate ) ) {
return new SubElemRule ( ctx , parentRule , lengthChecker , applicabilityRule , runtimeInfo ) ;
}
else {
Predicate < NodeList > lengthCheckerOnAllowedElements = ( NodeList elements ) - > {
NodeList filtered = filterNodes ( elements , xpathForValue , allowedValuesPredicate ) ;
return lengthChecker . test ( filtered ) ;
} ;
return new SubElemRule ( ctx , parentRule , lengthCheckerOnAllowedElements , applicabilityRule , runtimeInfo ) ;
}
}
private static Predicate < Integer > createLengthPredicateForSubElements ( Supplier < GuidelineEvaluation > runtimeInfo ,
SyntheticRule < Document > parentRule ,
Cardinality cardinality ) {
GuidelineEvaluation evaluation = runtimeInfo . get ( ) ;
NodeList parents = evaluation . getNodesOf ( parentRule . getContext ( ) . getIdProperty ( ) . getValue ( ) ) ;
if ( parents = = null ) {
return ( Integer count ) - > false ; // There are no parents, actually
}
int parentsLength = parents . getLength ( ) ;
if ( cardinality = = Cardinality . ONE ) {
return ( Integer count ) - > count = = parentsLength ; // A sub-element can be present in each parent
}
else if ( cardinality = = Cardinality . ONE_TO_N ) {
return ( Integer count ) - > count > = parentsLength ; // One or more sub-elements can be present in each parent
}
else if ( cardinality = = Cardinality . TWO ) {
return ( Integer count ) - > count = = parentsLength * 2 ; // Two sub-elements can be present in each parent
}
else if ( cardinality = = Cardinality . FOUR_TO_N ) {
return ( Integer count ) - > count > = parentsLength * 4 ; // Four or more sub-elements can be present in each parent
}
else { // not reachable
throw new RuleEvaluationException ( " Unsupported cardinality " + cardinality , null ) ;
}
}
private static NodeList attributesOf ( NodeList elements , String attrName , Predicate < String > allowedValues ) {
return Helper . nodesThatMatchThePredicate (
elements ,
( Node node ) - > Helper . getAttributeValue ( node , attrName ) ,
allowedValues
) ;
}
private static XMLContext createCustomXMLContext ( String id , String xpath ) {
StandardXMLContext context = new StandardXMLContext ( ) ;
context . getIdProperty ( ) . setValue ( id ) ;
context . getXPathExpressionProperty ( ) . setValue ( xpath ) ;
context . getNodeListActionProperty ( ) . setValue ( " custom " ) ;
return context ;
}
private static NodeList filterNodes ( NodeList nodes , String xpathToReadValues , Predicate < String > allowedValuesPredicate ) {
logger . debug ( " Filtering nodes with {} " , xpathToReadValues ) ;
return Helper . nodesThatMatchThePredicate ( nodes , new NodeValueReader ( xpathToReadValues ) , allowedValuesPredicate ) ;
}
private static class NodeValueReader implements Function < Node , String > {
private final String nodeExpr ;
NodeValueReader ( String nodeExpr ) {
this . nodeExpr = nodeExpr ;
}
@Override
public String apply ( Node parent ) {
logger . debug ( " Reading node value {} from parent {} " , nodeExpr , parent ) ;
Node node = XPathExpressionHelper . node ( nodeExpr , parent ) ;
return node = = null ? null : node . getNodeValue ( ) ;
}
}
private static class ElementStruct {
2023-07-10 13:38:06 +02:00
private static final Logger logger = LoggerFactory . getLogger ( ElementStruct . class ) ;
2022-09-16 12:56:52 +02:00
Supplier < GuidelineEvaluation > runtimeInfo ;
final ElementSpec spec ;
final String [ ] parentElementNames ;
final ElementStruct parent ;
//e.g. [elem1,sub1]
final String [ ] elementsArray ;
// e.g. elem1/sub1
final String elementsId ;
SyntheticRule < Document > rule ;
ElementStruct ( String [ ] parentElementNames , ElementSpec spec , ElementStruct parent ) {
this . runtimeInfo = parent . runtimeInfo ;
this . parentElementNames = parentElementNames ;
this . spec = spec ;
this . parent = parent ;
elementsArray = copyAndAppend ( parentElementNames , spec . nodeName ( ) ) ;
// e.g. elem1/sub1
elementsId = mergeNodeNames ( elementsArray ) ;
}
ElementStruct ( ElementSpec spec , Supplier < GuidelineEvaluation > runtimeInfo ) {
this . runtimeInfo = runtimeInfo ;
this . parentElementNames = EMPTY ;
this . spec = spec ;
this . parent = null ;
elementsArray = copyAndAppend ( parentElementNames , spec . nodeName ( ) ) ;
// e.g. elem1/sub1
elementsId = mergeNodeNames ( elementsArray ) ;
}
private String xpath ( boolean withText ) {
String xpathForThis = xpathForNodeName ( spec . nodeName ( ) , spec . valuePrefix ( ) ) ;
String xpath ;
2023-07-14 13:02:03 +02:00
if ( parent = = null ) {
2022-09-16 12:56:52 +02:00
// This is the top-level element rule
List < String > pathComponents = spec . parents ( ) . stream ( ) .
map ( s - > " *[ " + xpathForNodeName ( s ) + " ] " ) . collect ( Collectors . toList ( ) ) ;
pathComponents . add ( " *[ " + xpathForThis + " ] " ) ;
xpath = " // " + String . join ( " / " , pathComponents ) ;
2023-07-14 13:02:03 +02:00
} else {
2022-09-16 12:56:52 +02:00
xpath = parent . xpath ( false ) + " /*[ " + xpathForThis + " ] " ;
}
2023-07-14 13:02:03 +02:00
if ( spec . position ( ) . xpath ! = null ) {
2022-09-16 12:56:52 +02:00
xpath = " ( " + xpath + " )[ " + spec . position ( ) . xpath + " ] " ;
}
2023-07-14 13:02:03 +02:00
if ( logger . isTraceEnabled ( ) )
logger . trace ( xpath ) ;
2022-09-16 12:56:52 +02:00
return xpathWithText ( xpath , withText ) ;
}
private void createRootElemRule ( CompilationResult compilationResult ) {
String id = nodeRuleIdFor (
" Element " ,
elementsId ,
spec . requirementLevel ( ) ,
spec . position ( ) ,
spec . valuePrefix ( ) ,
spec . allowedValuesPredicate ( ) ,
spec . cardinality ( )
) ;
SyntheticRule < Document > rule = createActualRootElementRule (
runtimeInfo ,
id ,
xpath ( false ) ,
" text() " ,
spec . cardinality ( ) ,
spec . allowedValuesPredicate ( ) ,
spec . applicabilityRule ( )
) ;
compilationResult . ruleIdToRequirementLevel . put ( id , spec . requirementLevel ( ) ) ;
compilationResult . rootNodeRule = rule ;
this . rule = rule ;
}
private void createAttrRule ( CompilationResult compilationResult , AttributeSpec attrSpec ) {
String attrRuleIdComponent = mergeNodeNames ( elementsId , attrSpec . nodeName ( ) ) ;
String attrRuleId = nodeRuleIdFor (
" Attribute " ,
attrRuleIdComponent ,
attrSpec . requirementLevel ( ) ,
ElementPosition . ALL ,
null ,
spec . allowedValuesPredicate ( ) ,
spec . cardinality ( )
) ;
SyntheticRule < Document > rule = createActualAttributeSpecRule (
runtimeInfo ,
this . rule ,
attrRuleId ,
attrSpec . nodeName ( ) , // this is resolved in the context of its parent elements
attrSpec . allowedValuesPredicate ( ) ,
attrSpec . applicabilityRule ( )
) ;
compilationResult . ruleIdToRequirementLevel . put ( attrRuleId , attrSpec . requirementLevel ( ) ) ;
compilationResult . nodeRules . add ( rule ) ;
this . rule = rule ;
}
private void createSubElemRule ( CompilationResult compilationResult ) {
String id = nodeRuleIdFor (
" Element " ,
elementsId ,
spec . requirementLevel ( ) ,
spec . position ( ) ,
spec . valuePrefix ( ) ,
spec . allowedValuesPredicate ( ) ,
spec . cardinality ( )
) ;
SyntheticRule < Document > rule = createActualSubElementRule (
runtimeInfo ,
parent . rule ,
id ,
xpath ( false ) ,
" text() " ,
spec . cardinality ( ) ,
spec . allowedValuesPredicate ( ) ,
spec . applicabilityRule ( )
) ;
compilationResult . ruleIdToRequirementLevel . put ( id , spec . requirementLevel ( ) ) ;
compilationResult . nodeRules . add ( rule ) ;
this . rule = rule ;
}
}
private static class RootElemRule extends ElemRule {
RootElemRule ( XMLContext context ,
Predicate < NodeList > nodeListPredicate ,
Rule < Document > applicabilityRule ,
Supplier < GuidelineEvaluation > runtimeInfo ) {
super ( context , null , nodeListPredicate , applicabilityRule , runtimeInfo ) ;
}
@Override
public boolean test ( Document doc ) throws RuleEvaluationException {
GuidelineEvaluation guidelineEvaluation = runtimeInfo . get ( ) ;
String thisId = getContext ( ) . getIdProperty ( ) . getValue ( ) ;
if ( isApplicable ( this , guidelineEvaluation ) . test ( doc ) ) {
try {
logger . debug ( " Applying {} " , thisId ) ;
NodeList nodes = getContext ( ) . getXPathExpressionProperty ( ) . evaluate ( doc ) ;
boolean result = predicate . test ( nodes ) ;
if ( result ) {
logger . debug ( " Setting node list of this rule {} " , thisId ) ;
guidelineEvaluation . setNodesOf ( thisId , nodes ) ;
}
return result ;
} catch ( Throwable t ) {
throw new RuleEvaluationException ( t . getMessage ( ) , t ) ;
}
}
else {
return true ;
}
}
}
private static class SubElemRule extends ElemRule {
SubElemRule ( XMLContext context ,
SyntheticRule < Document > parentRule ,
Predicate < NodeList > nodeListPredicate ,
Rule < Document > applicabilityRule ,
Supplier < GuidelineEvaluation > runtimeInfo ) {
super ( context , parentRule , nodeListPredicate , applicabilityRule , runtimeInfo ) ;
}
@Override
public boolean test ( Document doc ) throws RuleEvaluationException {
GuidelineEvaluation guidelineEvaluation = runtimeInfo . get ( ) ;
String thisId = getContext ( ) . getIdProperty ( ) . getValue ( ) ;
String parentRuleId = parentRule . getContext ( ) . getIdProperty ( ) . getValue ( ) ;
if ( guidelineEvaluation . getRequirementLevelOf ( parentRuleId ) = = RequirementLevel . NOT_APPLICABLE ) {
// Our parent is not applicable, set ourselves as not_applicable too and silently pass
guidelineEvaluation . setRequirementLevelOf ( thisId , RequirementLevel . NOT_APPLICABLE ) ;
return true ;
}
if ( isApplicable ( this , guidelineEvaluation ) . test ( doc ) ) {
try {
logger . debug ( " Appying {} " , thisId ) ; ;
NodeList nodes = getContext ( ) . getXPathExpressionProperty ( ) . evaluate ( doc ) ;
boolean result = predicate . test ( nodes ) ;
if ( result ) {
logger . debug ( " Setting node list of this rule {} " , thisId ) ;
guidelineEvaluation . setNodesOf ( thisId , nodes ) ;
}
return result ;
} catch ( Throwable t ) {
throw new RuleEvaluationException ( t . getMessage ( ) , t ) ;
}
}
else {
return true ;
}
}
}
private static class ElemRule extends XMLRule < XMLContext > implements SyntheticRule < Document > {
protected final SyntheticRule < Document > parentRule ;
protected final Supplier < GuidelineEvaluation > runtimeInfo ;
protected final Rule < Document > applicabilityRule ;
ElemRule ( XMLContext context ,
SyntheticRule < Document > parentRule ,
Predicate < NodeList > nodeListPredicate ,
Rule < Document > applicabilityRule ,
Supplier < GuidelineEvaluation > runtimeInfo ) {
super ( context , nodeListPredicate ) ;
this . parentRule = parentRule ;
this . applicabilityRule = applicabilityRule ;
this . runtimeInfo = runtimeInfo ;
}
@Override
public SyntheticRule < Document > parentRule ( ) {
return parentRule ;
}
@Override
public Rule < Document > applicabilityRule ( ) {
return applicabilityRule ;
}
}
private static class NodeSpecRule < C extends RuleContext > implements SyntheticRule < Document > {
private final C context ;
private final Predicate < NodeList > nodeListPredicate ;
private final Rule < Document > applicabilityRule ;
private final SyntheticRule < Document > parentRule ;
private final Supplier < GuidelineEvaluation > runtimeInfo ;
NodeSpecRule ( C context ,
SyntheticRule < Document > parentRule ,
Predicate < NodeList > nodeListPredicate ,
Rule < Document > applicabilityRule ,
Supplier < GuidelineEvaluation > runtimeInfo ) {
this . context = context ;
this . parentRule = parentRule ;
this . nodeListPredicate = nodeListPredicate ;
this . applicabilityRule = applicabilityRule ;
this . runtimeInfo = runtimeInfo ;
}
@Override
public C getContext ( ) {
return context ;
}
@Override
public SyntheticRule < Document > parentRule ( ) {
return parentRule ;
}
@Override
public Rule < Document > applicabilityRule ( ) {
return applicabilityRule ;
}
@Override
public boolean test ( Document doc ) throws RuleEvaluationException {
GuidelineEvaluation guidelineEvaluation = runtimeInfo . get ( ) ;
String thisId = getContext ( ) . getIdProperty ( ) . getValue ( ) ;
String parentRuleId = parentRule . getContext ( ) . getIdProperty ( ) . getValue ( ) ;
if ( guidelineEvaluation . getRequirementLevelOf ( parentRuleId ) = = RequirementLevel . NOT_APPLICABLE ) {
// Our parent is not applicable, set ourselves as not_applicable too and silently pass
guidelineEvaluation . setRequirementLevelOf ( thisId , RequirementLevel . NOT_APPLICABLE ) ;
return true ;
}
if ( isApplicable ( this , guidelineEvaluation ) . test ( doc ) ) {
// We just use the doc to lookup the nodes of our parent
NodeList nodes = guidelineEvaluation . getNodesOf ( parentRuleId ) ;
logger . debug ( " Acquired node list of parent rule {} = {} " , parentRuleId , ( nodes = = null ? " null " : nodes . getLength ( ) ) ) ;
boolean result = nodeListPredicate . test ( nodes ) ;
if ( result ) {
logger . debug ( " Setting node list of this rule {} " , thisId ) ;
guidelineEvaluation . setNodesOf ( thisId , nodes ) ;
}
return result ;
}
else {
return true ;
}
}
@Override
public String toString ( ) {
return getContext ( ) . getIdProperty ( ) . getValue ( ) ;
}
}
private static Predicate < Document > isApplicable ( SyntheticRule < Document > rule ,
GuidelineEvaluation guidelineEvaluation ) {
return ( Document doc ) - > {
Rule < Document > applicabilityRule = rule . applicabilityRule ( ) ;
if ( applicabilityRule = = null ) {
logger . debug ( " Null applicability rule of {} " , rule ) ;
return true ;
}
String thisId = rule . getContext ( ) . getIdProperty ( ) . getValue ( ) ;
if ( applicabilityRule . test ( doc ) ) {
logger . debug ( " Success of applicability rule of {} " , rule ) ;
guidelineEvaluation . setRequirementLevelOf ( thisId , RequirementLevel . MANDATORY ) ;
return true ;
} else {
logger . debug ( " Failure of applicability rule of {} " , rule ) ;
guidelineEvaluation . setRequirementLevelOf ( thisId , RequirementLevel . NOT_APPLICABLE ) ;
return false ;
}
} ;
}
}