128 lines
3.7 KiB
Plaintext
128 lines
3.7 KiB
Plaintext
DataFrame = (RecordId, TS, XMLRec)
|
|
|
|
LiteratureGuidelinesV3Profile profile = new LiteratureGuidelinesV3Profile();
|
|
|
|
spark.sql("Select * from DataFrame").map(
|
|
String id = RecordId;
|
|
Document doc = parse(XMLRec);
|
|
profile.validate(id, doc);
|
|
profile.guidelines().forEach() {
|
|
|
|
}
|
|
)
|
|
|
|
|
|
private static final ElementSpec TITLE_SPEC = Builders.
|
|
forElement("datacite:title", RequirementLevel.MANDATORY, Cardinality.ONE_TO_N).
|
|
//TODO: Add allowed values (IETF BCP 47, the IANA Language Subtag Registry)
|
|
withAttribute("xml:lang", RequirementLevel.OPTIONAL, Cardinality.ZERO_TO_N).
|
|
withAttribute("titleType", RequirementLevel.OPTIONAL, Cardinality.ZERO_TO_ONE,
|
|
"AlternativeTitle", "Subtitle", "TranslatedTitle", "Other").
|
|
build();
|
|
|
|
|
|
|
|
Builders.forElement("foo").optional(upperBound).recommended(upperBound).mandatory(lower, upper).
|
|
mandatoryIfApplicable(lower, upper, XMLRule)
|
|
|
|
|
|
// Context is always full path to element
|
|
Builders.forContext("record", "metadata", "oai_dc:dc") // check for : to determine xpath "syntax"
|
|
Builders.forElement("foo").valueMustStartWith("eu:info:///asdadf").allowedValues("one", "two" | Predicate);
|
|
Builders.forElement("foo").valueMustMatchPredicate(Predicate<String>)...
|
|
Builders.forElement("foo").valueMustNotMatchPredicate(Predicate<String>)...
|
|
Builders.forElement("dc:type").firstOccurrence("...").other("...");
|
|
Builders.forElement("foo").withCustomXpathValuePredicate("");
|
|
Builders.forElement("foo").withFilter();
|
|
Builders.forElement("foo", Req.Level.MANDATORY|REC|OPT|MandatoryIfApplicable(Predicate | Rule))
|
|
Builders.forElement("foo", Req.LevelMandatoryIfApplicable).applicableWhen("...") (TITLE_SPEC).
|
|
|
|
# CRIS
|
|
|
|
## Spec builders
|
|
|
|
### The general approach
|
|
|
|
TYPE_SPEC = Builders.forElement("Type").mplah().mplah();
|
|
FOO_SPEC = Builders.forElement("Foo").mplah().mplah();
|
|
Builders.forElement("Publication").inContext("record", "metadata").
|
|
withSubElement(TYPE_SPEC).
|
|
withSubElement(FOO_SPEC).
|
|
build();
|
|
|
|
### An element may contain "self-references"
|
|
|
|
Builders.ElementSpecBuilder PUBLISHED_IN_SPEC = Builders.
|
|
forOptionalElement("PublishedIn").
|
|
withSubElement(null); //TODO: Pass self
|
|
|
|
>> Introduce a "magic" self-reference method:
|
|
Builders.ElementSpecBuilder PUBLISHED_IN_SPEC = Builders.
|
|
forOptionalElement("PublishedIn").
|
|
withSubElement(rootSpec()); // or selfSpec() or thisSpec()
|
|
|
|
|
|
### An element may contain "supported classes" of sub-elements
|
|
Builders.
|
|
forOptionalRepeatableElement("References").
|
|
//TODO: Optional 1 of 3 (Publication, Patent, Product)
|
|
withSubElement(PUBLICATION_SPEC). //TODO: Pass proper spec
|
|
withSubElement(null). //TODO: Pass self
|
|
withSubElement(PRODUCT_SPEC); //TODO: Pass proper spec
|
|
|
|
>> Introduce a new withAllowedSubElements method, that accepts a list of element specs:
|
|
Builders.
|
|
forOptionalRepeatableElement("References").
|
|
withAllowedSubElements(PUBLICATION_SPEC, rootSpec(), PRODUCT_SPEC);
|
|
|
|
|
|
>> Note: We can unify the above with a "magic" ref method, e.g. ref(FOO_SPEC).
|
|
|
|
### Support namespaces
|
|
For example:
|
|
<Publication xmlns="https://www.openaire.eu/cerif-profile/1.1/" id="812348"><!-- Linking Data and Publications: Towards a Cross-Disciplinary Approach -->
|
|
<Type xmlns="https://www.openaire.eu/cerif-profile/vocab/COAR_Publication_Types">http://purl.org/coar/resource_type/c_6501<!-- journal article --></Type>
|
|
...
|
|
</Publication>
|
|
|
|
>> TBD
|
|
|
|
## CRIS Profile Set / Family
|
|
|
|
class CRISProfileSet {
|
|
boolean enableCrossChecking = false
|
|
String baseURL = "oai:cris.example.org:"
|
|
XMLApplicationProfile[]
|
|
}
|
|
|
|
class NewXMLApplicationProfile {
|
|
String baseURL = "oai:cris.example.org:"
|
|
Resolver resolver = new Resolver(baseURL)
|
|
validate(id, doc) {
|
|
Rule
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|