DataFrame = (RecordId, TS, XMLRec) LiteratureGuidelinesV3Profile profile = new LiteratureGuidelinesV3Profile(); spark.sql("Select * from DataFrame").map( String id = RecordId; Document doc = parse(XMLRec); profile.validate(id, doc); profile.guidelines().forEach() { } ) private static final ElementSpec TITLE_SPEC = Builders. forElement("datacite:title", RequirementLevel.MANDATORY, Cardinality.ONE_TO_N). //TODO: Add allowed values (IETF BCP 47, the IANA Language Subtag Registry) withAttribute("xml:lang", RequirementLevel.OPTIONAL, Cardinality.ZERO_TO_N). withAttribute("titleType", RequirementLevel.OPTIONAL, Cardinality.ZERO_TO_ONE, "AlternativeTitle", "Subtitle", "TranslatedTitle", "Other"). build(); Builders.forElement("foo").optional(upperBound).recommended(upperBound).mandatory(lower, upper). mandatoryIfApplicable(lower, upper, XMLRule) // Context is always full path to element Builders.forContext("record", "metadata", "oai_dc:dc") // check for : to determine xpath "syntax" Builders.forElement("foo").valueMustStartWith("eu:info:///asdadf").allowedValues("one", "two" | Predicate); Builders.forElement("foo").valueMustMatchPredicate(Predicate)... Builders.forElement("foo").valueMustNotMatchPredicate(Predicate)... Builders.forElement("dc:type").firstOccurrence("...").other("..."); Builders.forElement("foo").withCustomXpathValuePredicate(""); Builders.forElement("foo").withFilter(); Builders.forElement("foo", Req.Level.MANDATORY|REC|OPT|MandatoryIfApplicable(Predicate | Rule)) Builders.forElement("foo", Req.LevelMandatoryIfApplicable).applicableWhen("...") (TITLE_SPEC). # CRIS ## Spec builders ### The general approach TYPE_SPEC = Builders.forElement("Type").mplah().mplah(); FOO_SPEC = Builders.forElement("Foo").mplah().mplah(); Builders.forElement("Publication").inContext("record", "metadata"). withSubElement(TYPE_SPEC). withSubElement(FOO_SPEC). build(); ### An element may contain "self-references" Builders.ElementSpecBuilder PUBLISHED_IN_SPEC = Builders. forOptionalElement("PublishedIn"). withSubElement(null); //TODO: Pass self >> Introduce a "magic" self-reference method: Builders.ElementSpecBuilder PUBLISHED_IN_SPEC = Builders. forOptionalElement("PublishedIn"). withSubElement(rootSpec()); // or selfSpec() or thisSpec() ### An element may contain "supported classes" of sub-elements Builders. forOptionalRepeatableElement("References"). //TODO: Optional 1 of 3 (Publication, Patent, Product) withSubElement(PUBLICATION_SPEC). //TODO: Pass proper spec withSubElement(null). //TODO: Pass self withSubElement(PRODUCT_SPEC); //TODO: Pass proper spec >> Introduce a new withAllowedSubElements method, that accepts a list of element specs: Builders. forOptionalRepeatableElement("References"). withAllowedSubElements(PUBLICATION_SPEC, rootSpec(), PRODUCT_SPEC); >> Note: We can unify the above with a "magic" ref method, e.g. ref(FOO_SPEC). ### Support namespaces For example: http://purl.org/coar/resource_type/c_6501 ... >> TBD ## CRIS Profile Set / Family class CRISProfileSet { boolean enableCrossChecking = false String baseURL = "oai:cris.example.org:" XMLApplicationProfile[] } class NewXMLApplicationProfile { String baseURL = "oai:cris.example.org:" Resolver resolver = new Resolver(baseURL) validate(id, doc) { Rule } }