uoa-validator-engine2/notes.txt

128 lines
3.7 KiB
Plaintext

DataFrame = (RecordId, TS, XMLRec)
LiteratureGuidelinesV3Profile profile = new LiteratureGuidelinesV3Profile();
spark.sql("Select * from DataFrame").map(
String id = RecordId;
Document doc = parse(XMLRec);
profile.validate(id, doc);
profile.guidelines().forEach() {
}
)
private static final ElementSpec TITLE_SPEC = Builders.
forElement("datacite:title", RequirementLevel.MANDATORY, Cardinality.ONE_TO_N).
//TODO: Add allowed values (IETF BCP 47, the IANA Language Subtag Registry)
withAttribute("xml:lang", RequirementLevel.OPTIONAL, Cardinality.ZERO_TO_N).
withAttribute("titleType", RequirementLevel.OPTIONAL, Cardinality.ZERO_TO_ONE,
"AlternativeTitle", "Subtitle", "TranslatedTitle", "Other").
build();
Builders.forElement("foo").optional(upperBound).recommended(upperBound).mandatory(lower, upper).
mandatoryIfApplicable(lower, upper, XMLRule)
// Context is always full path to element
Builders.forContext("record", "metadata", "oai_dc:dc") // check for : to determine xpath "syntax"
Builders.forElement("foo").valueMustStartWith("eu:info:///asdadf").allowedValues("one", "two" | Predicate);
Builders.forElement("foo").valueMustMatchPredicate(Predicate<String>)...
Builders.forElement("foo").valueMustNotMatchPredicate(Predicate<String>)...
Builders.forElement("dc:type").firstOccurrence("...").other("...");
Builders.forElement("foo").withCustomXpathValuePredicate("");
Builders.forElement("foo").withFilter();
Builders.forElement("foo", Req.Level.MANDATORY|REC|OPT|MandatoryIfApplicable(Predicate | Rule))
Builders.forElement("foo", Req.LevelMandatoryIfApplicable).applicableWhen("...") (TITLE_SPEC).
# CRIS
## Spec builders
### The general approach
TYPE_SPEC = Builders.forElement("Type").mplah().mplah();
FOO_SPEC = Builders.forElement("Foo").mplah().mplah();
Builders.forElement("Publication").inContext("record", "metadata").
withSubElement(TYPE_SPEC).
withSubElement(FOO_SPEC).
build();
### An element may contain "self-references"
Builders.ElementSpecBuilder PUBLISHED_IN_SPEC = Builders.
forOptionalElement("PublishedIn").
withSubElement(null); //TODO: Pass self
>> Introduce a "magic" self-reference method:
Builders.ElementSpecBuilder PUBLISHED_IN_SPEC = Builders.
forOptionalElement("PublishedIn").
withSubElement(rootSpec()); // or selfSpec() or thisSpec()
### An element may contain "supported classes" of sub-elements
Builders.
forOptionalRepeatableElement("References").
//TODO: Optional 1 of 3 (Publication, Patent, Product)
withSubElement(PUBLICATION_SPEC). //TODO: Pass proper spec
withSubElement(null). //TODO: Pass self
withSubElement(PRODUCT_SPEC); //TODO: Pass proper spec
>> Introduce a new withAllowedSubElements method, that accepts a list of element specs:
Builders.
forOptionalRepeatableElement("References").
withAllowedSubElements(PUBLICATION_SPEC, rootSpec(), PRODUCT_SPEC);
>> Note: We can unify the above with a "magic" ref method, e.g. ref(FOO_SPEC).
### Support namespaces
For example:
<Publication xmlns="https://www.openaire.eu/cerif-profile/1.1/" id="812348"><!-- Linking Data and Publications: Towards a Cross-Disciplinary Approach -->
<Type xmlns="https://www.openaire.eu/cerif-profile/vocab/COAR_Publication_Types">http://purl.org/coar/resource_type/c_6501<!-- journal article --></Type>
...
</Publication>
>> TBD
## CRIS Profile Set / Family
class CRISProfileSet {
boolean enableCrossChecking = false
String baseURL = "oai:cris.example.org:"
XMLApplicationProfile[]
}
class NewXMLApplicationProfile {
String baseURL = "oai:cris.example.org:"
Resolver resolver = new Resolver(baseURL)
validate(id, doc) {
Rule
}
}