2022-09-16 12:56:52 +02:00
package eu.dnetlib.validator2.validation.guideline.openaire ;
import eu.dnetlib.validator2.engine.Rule ;
import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule ;
import eu.dnetlib.validator2.engine.builtins.XMLVocabularyRule ;
2023-08-02 11:58:45 +02:00
import eu.dnetlib.validator2.validation.guideline.* ;
2022-09-16 12:56:52 +02:00
import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate ;
import eu.dnetlib.validator2.validation.utils.MediaTypesValuePredicate ;
import eu.dnetlib.validator2.validation.utils.RegexValuePredicate ;
import org.w3c.dom.Document ;
import java.util.* ;
import java.util.stream.Collectors ;
import static eu.dnetlib.validator2.validation.guideline.Cardinality.* ;
2023-03-15 16:02:14 +01:00
import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.* ;
2022-09-16 12:56:52 +02:00
public final class LiteratureGuidelinesV4Profile extends AbstractOpenAireProfile {
private static final String [ ] TITLE_TYPES = {
" AlternativeTitle " , " Subtitle " , " TranslatedTitle " , " Other "
} ;
private static final String [ ] NAME_TYPES = {
" Organizational " , " Personal "
} ;
private static final String [ ] CONTRIBUTOR_TYPES = {
" ContactPerson " , " DataCollector " , " DataCurator " , " DataManager " , " Distributor " ,
" Editor " , " HostingInstitution " , " Producer " , " ProjectLeader " , " ProjectManager " , " ProjectMember " ,
" RegistrationAgency " , " RegistrationAuthority " , " RelatedPerson " , " Researcher " , " ResearchGroup " ,
" RightsHolder " , " Sponsor " , " Supervisor " , " WorkPackageLeader " , " Other "
} ;
private static final String [ ] FUNDER_IDENTIFIER_TYPES = {
" ISNI " , " GRID " , " Crossref Funder "
} ;
private static final String [ ] IDENTIFIER_TYPES = {
" ARK " , " arXiv " , " bibcode " , " DOI " , " EAN13 " , " EISSN " , " Handle " , " IGSN " , " ISBN " ,
" ISSN " , " ISTC " , " LISSN " , " LSID " , " PISSN " , " PMID " , " PURL " , " UPC " , " URL " , " URN " , " WOS " ,
} ;
private static final String [ ] RELATION_TYPES = {
" IsCitedBy " , " Cites " , " IsSupplementTo " , " IsSupplementedBy " , " IsContinuedBy " ,
" Continues " , " IsDescribedBy " , " Describes " , " HasMetadata " , " IsMetadataFor " , " HasVersion " ,
" IsVersionOf " , " IsNewVersionOf " , " IsPreviousVersionOf " , " IsPartOf " , " HasPart " , " IsReferencedBy " ,
" References " , " IsDocumentedBy " , " Documents " , " IsCompiledBy " , " Compiles " , " IsVariantFormOf " ,
" IsOriginalFormOf " , " IsIdenticalTo " , " IsReviewedBy " , " Reviews " , " IsDerivedFrom " , " IsSourceOf " ,
" IsRequiredBy " , " Requires "
} ;
private static final String [ ] RELATED_RESOURCE_GENERAL_TYPES = {
" Audiovisual " , " Collection " , " DataPaper " , " Dataset " , " Event " , " Image " , " InteractiveResource " ,
" Model " , " PhysicalObject " , " Service " , " Software " , " Sound " , " Text " , " Workflow " , " Other "
} ;
private static final String [ ] EMBARGO_DATE_TYPES = {
" Accepted " , " Available "
} ;
private static final String [ ] PUBLICATION_DATE_TYPE = {
" Issued "
} ;
private static final String [ ] RESOURCE_GENERAL_TYPES = {
" literature " , " dataset " , " software " , " other research product "
} ;
private static final String [ ] RESOURCE_CONCEPT_URIS = {
" http://purl.org/coar/resource_type/c_1162 " , " http://purl.org/coar/resource_type/c_6501 " ,
" http://purl.org/coar/resource_type/c_545b " , " http://purl.org/coar/resource_type/c_b239 " ,
" http://purl.org/coar/resource_type/c_2df8fbb1 " , " http://purl.org/coar/resource_type/c_dcae04bc " ,
" http://purl.org/coar/resource_type/c_beb9 " , " http://purl.org/coar/resource_type/c_3e5a " ,
" http://purl.org/coar/resource_type/c_ba08 " , " http://purl.org/coar/resource_type/c_3248 " ,
" http://purl.org/coar/resource_type/c_2f33 " , " http://purl.org/coar/resource_type/c_86bc " ,
" http://purl.org/coar/resource_type/c_816b " , " http://purl.org/coar/resource_type/c_8042 " ,
" http://purl.org/coar/resource_type/c_71bd " , " http://purl.org/coar/resource_type/c_18gh " ,
" http://purl.org/coar/resource_type/c_18ws " , " http://purl.org/coar/resource_type/c_18hj " ,
" http://purl.org/coar/resource_type/c_18op " , " http://purl.org/coar/resource_type/c_186u " ,
" http://purl.org/coar/resource_type/c_18wq " , " http://purl.org/coar/resource_type/c_18wz " ,
" http://purl.org/coar/resource_type/c_18ww " , " http://purl.org/coar/resource_type/c_efa0 " ,
" http://purl.org/coar/resource_type/c_baaf " , " http://purl.org/coar/resource_type/c_ba1f " ,
" http://purl.org/coar/resource_type/c_93fc " , " http://purl.org/coar/resource_type/c_15cd " ,
" http://purl.org/coar/resource_type/c_18co " , " http://purl.org/coar/resource_type/c_18cp " ,
" http://purl.org/coar/resource_type/c_6670 " , " http://purl.org/coar/resource_type/c_5794 " ,
" http://purl.org/coar/resource_type/c_c94f " , " http://purl.org/coar/resource_type/c_f744 " ,
" http://purl.org/coar/resource_type/c_7a1f " , " http://purl.org/coar/resource_type/c_bdcc " ,
" http://purl.org/coar/resource_type/c_db06 " , " http://purl.org/coar/resource_type/c_46ec " ,
" http://purl.org/coar/resource_type/c_0857 " , " http://purl.org/coar/resource_type/c_8544 " ,
" http://purl.org/coar/resource_type/c_18cf " , " http://purl.org/coar/resource_type/c_18cw " ,
" http://purl.org/coar/resource_type/c_18cd " , " http://purl.org/coar/resource_type/c_18cc " ,
" http://purl.org/coar/resource_type/c_12ce " , " http://purl.org/coar/resource_type/c_8a7e " ,
" http://purl.org/coar/resource_type/c_ecc8 " , " http://purl.org/coar/resource_type/c_c513 " ,
" http://purl.org/coar/resource_type/c_12cd " , " http://purl.org/coar/resource_type/c_12cc " ,
" http://purl.org/coar/resource_type/c_5ce6 " , " http://purl.org/coar/resource_type/c_ddb1 " ,
" http://purl.org/coar/resource_type/c_e9a0 " , " http://purl.org/coar/resource_type/c_7ad9 " ,
" http://purl.org/coar/resource_type/c_393c " , " http://purl.org/coar/resource_type/c_1843 "
} ;
private static final String [ ] RESOURCE_IDENTIFIER_TYPES = {
" ARK " , " DOI " , " Handle " , " PURL " , " URL " , " URN "
} ;
2023-03-15 16:02:14 +01:00
private static final String [ ] ACCESS_RIGHTS_TYPES = {
" open access " , " embargoed access " ,
" restricted access " , " metadata only access "
} ;
2022-09-16 12:56:52 +02:00
private static final String [ ] ACCESS_RIGHTS_URIS = {
" http://purl.org/coar/access_right/c_abf2 " , " http://purl.org/coar/access_right/c_f1cf " ,
" http://purl.org/coar/access_right/c_16ec " , " http://purl.org/coar/access_right/c_14cb "
} ;
private static final String [ ] RESOURCE_VERSION_URIS = {
" http://purl.org/coar/version/c_b1a7d7d4d402bcce " , " http://purl.org/coar/version/c_71e4c1898caa6e32 " ,
" http://purl.org/coar/version/c_ab4af688f83e57aa " , " http://purl.org/coar/version/c_fa2ee174bc00049f " ,
" http://purl.org/coar/version/c_970fb48d4fbd8a85 " , " http://purl.org/coar/version/c_e19f295774971610 " ,
" http://purl.org/coar/version/c_dc82b40f9837b551 " , " http://purl.org/coar/version/c_be7fb7dd8ff6fe43 "
} ;
private static final String [ ] RESOURCE_VERSION_LABELS = {
" AO " , " SMUR " , " AM " , " P " , " VoR " , " CVoR " , " EVoR " , " NA "
} ;
private static final String [ ] FILE_OBJECT_TYPES = {
" fulltext " , " dataset " , " software " , " other "
} ;
private static final String [ ] AUDIENCE_VOCABULARY = {
" Administrators " , " Community Groups " , " Counsellors " , " Federal Funds Recipients and Applicants " ,
" Librarians " , " News Media " , " Other " , " Parents and Families " , " Policymakers " , " Researchers " ,
" School Support Staff " , " Student Financial Aid Providers " , " Students " , " Teachers "
} ;
private static final ElementSpec TITLE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:title " , ONE_TO_N ) .
2022-09-16 12:56:52 +02:00
withOptionalAttribute ( " xml:lang " , new RegexValuePredicate ( COMPILED_BCP47_LANG_TAGS_REG_EX ) ) .
withOptionalAttribute ( " titleType " , TITLE_TYPES ) .
build ( ) ;
private static final ElementSpec CREATOR_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:creator " , ONE_TO_N ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:creatorName " , ONE ) .
2022-09-16 12:56:52 +02:00
withRecommendedAttribute ( " nameType " , NAME_TYPES ) ) .
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " datacite:givenName " ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " datacite:familyName " ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forRecommendedRepeatableElement ( " datacite:nameIdentifier " ) .
2022-09-16 12:56:52 +02:00
withMandatoryAttribute ( " nameIdentifierScheme " ) .
withRecommendedAttribute ( " schemeURI " ) ) .
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forRecommendedRepeatableElement ( " datacite:affiliation " ) ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec CONTRIBUTOR_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryIfApplicableElement ( " datacite:contributor " , ONE_TO_N , elementIsPresent ( " datacite:contributor " ) ) .
2022-09-16 12:56:52 +02:00
withMandatoryAttribute ( " contributorType " , CONTRIBUTOR_TYPES ) .
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:contributorName " , ONE ) .
2022-09-16 12:56:52 +02:00
withRecommendedAttribute ( " nameType " , NAME_TYPES ) ) .
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forOptionalElement ( " datacite:familyName " ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forOptionalElement ( " datacite:givenName " ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forRecommendedRepeatableElement ( " datacite:nameIdentifier " ) .
2022-09-16 12:56:52 +02:00
withMandatoryAttribute ( " nameIdentifierScheme " ) .
withRecommendedAttribute ( " schemeURI " ) ) .
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forRecommendedRepeatableElement ( " datacite:affiliation " ) ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
//This property has some issues/annotations in documentation
private static final ElementSpec FUNDING_REFERENCE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryIfApplicableElement ( " oaire:fundingReference " , ONE_TO_N , elementIsPresent ( " oaire:fundingReference " ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " oaire:funderName " , ONE ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:funderIdentifier " ) .
2022-09-16 12:56:52 +02:00
withRecommendedAttribute ( " funderIdentifierType " , FUNDER_IDENTIFIER_TYPES ) ) .
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forOptionalElement ( " oaire:fundingStream " ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryIfApplicableElement ( " oaire:awardNumber " , ONE , elementIsPresent ( " oaire:awardNumber " ) ) .
2022-09-16 12:56:52 +02:00
withRecommendedAttribute ( " awardURI " ) ) .
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:awardTitle " ) ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
//TODO: Allowed values are referred as "suggested" in the documentation, but then a controlled list is given.
// Relevant issues:
// https://bitbucket.org/saikos/openaire-validator/issues/40
// https://bitbucket.org/saikos/openaire-validator/issues/32/
private static final ElementSpec ALTERNATE_IDENTIFIER_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedRepeatableElement ( " datacite:alternateIdentifier " ) .
2022-09-16 12:56:52 +02:00
withMandatoryAttribute ( " alternateIdentifierType " , IDENTIFIER_TYPES ) .
build ( ) ;
private static final ElementSpec RELATED_IDENTIFIER_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedRepeatableElement ( " datacite:relatedIdentifier " ) .
2022-09-16 12:56:52 +02:00
withMandatoryAttribute ( " relatedIdentifierType " , IDENTIFIER_TYPES ) .
withMandatoryAttribute ( " relationType " , RELATION_TYPES ) .
//TODO: For following 3 attributes. Need a way to target relationType attribute of current element
// - Should be used only with relation type (HasMetadata/IsMetadataFor).
withOptionalAttribute ( " relatedMetadataScheme " ) .
withOptionalAttribute ( " schemeURI " ) .
withOptionalAttribute ( " schemeType " ) .
withOptionalAttribute ( " resourceTypeGeneral " , RELATED_RESOURCE_GENERAL_TYPES ) .
build ( ) ;
/ *
Applicable when Access Rights is set to :
< datacite : rights uri = " http://purl.org/coar/access_right/c_f1cf " > embargoed access < / datacite : rights >
Date encoding " YYYY-MM-DD " is referred as best practice . Should introduce in allowed values ?
* /
//TODO: Implement proper applicability rule
2023-03-15 16:02:14 +01:00
//LEONIDAS: The withMandatoryAttribute fails whe another date element eg. for Publication Date exists
2022-09-16 12:56:52 +02:00
private static final ElementSpec EMBARGO_PERIOD_DATE_SPEC = Builders .
2023-10-19 15:27:27 +02:00
forMandatoryIfApplicableElement ( " datacite:date " , TWO , applicabilityRuleForEmbargoPeriodDate ( ) ) .
2022-09-16 12:56:52 +02:00
withMandatoryAttribute ( " dateType " , EMBARGO_DATE_TYPES ) .
2023-03-15 16:02:14 +01:00
allowedValues ( new RegexValuePredicate ( COMPILED_YYYY_MM_DD_RANGE_REGEX ) . or ( new RegexValuePredicate ( COMPILED_YEAR_YYYY_REG_EX ) ) ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
2023-03-15 16:02:14 +01:00
// private static final ElementSpec EMBARGO_PERIOD_DATE_SPEC = Builders.
2023-07-27 13:41:00 +02:00
// forMandatoryIfApplicableElement("datacite:date", TWO, applicabilityRuleForEmbargoPeriodDate()).
2023-03-15 16:02:14 +01:00
// withMandatoryAttribute("dateType", EMBARGO_DATE_TYPES).
// build();
2022-09-16 12:56:52 +02:00
/ *
There are no " strict " allowed values . Recommendations are IETF BCP 47 and ISO 639 - x
* /
private static final ElementSpec LANGUAGE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryIfApplicableElement ( " dc:language " , ONE_TO_N , elementIsPresent ( " dc:language " ) ) .
2022-09-16 12:56:52 +02:00
allowedValues ( new RegexValuePredicate ( COMPILED_BCP47_LANG_TAGS_REG_EX ) . or ( new ISO639ValuePredicate ( ) ) ) .
build ( ) ;
private static final ElementSpec PUBLISHER_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryIfApplicableElement ( " dc:publisher " , ONE_TO_N , elementIsPresent ( " dc:publisher " ) ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
/ *
TODO : Same name as EMBARGO_PERIOD_DATE_SPEC above , with different attribute allowed value .
Should probably revisit , take that into consideration , when making relevant rules .
" Recommended " best practice for encoding the date value is ISO 8601 [ W3CDTF ] ( YYYY - MM - DD ) ( YYYY mandatory )
* /
private static final ElementSpec PUBLICATION_DATE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:date " , ONE ) .
2022-09-16 12:56:52 +02:00
withMandatoryAttribute ( " dateType " , PUBLICATION_DATE_TYPE ) .
2023-03-15 16:02:14 +01:00
allowedValues ( new RegexValuePredicate ( COMPILED_PUBLICATION_DATE_REG_EX ) . or ( new RegexValuePredicate ( COMPILED_YYYY_MM_DD_RANGE_REGEX ) . or ( new RegexValuePredicate ( COMPILED_YEAR_YYYY_REG_EX ) ) ) ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec RESOURCE_TYPE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " oaire:resourceType " , ONE ) .
2022-09-16 12:56:52 +02:00
withMandatoryAttribute ( " resourceTypeGeneral " , RESOURCE_GENERAL_TYPES ) .
withMandatoryAttribute ( " uri " , RESOURCE_CONCEPT_URIS ) .
build ( ) ;
private static final ElementSpec DESCRIPTION_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryIfApplicableElement ( " dc:description " , ONE_TO_N , elementIsPresent ( " dc:description " ) ) .
2022-09-16 12:56:52 +02:00
withOptionalAttribute ( " xml:lang " , new RegexValuePredicate ( COMPILED_BCP47_LANG_TAGS_REG_EX ) ) .
build ( ) ;
private static final ElementSpec FORMAT_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedRepeatableElement ( " dc:format " ) .
2022-09-16 12:56:52 +02:00
allowedValues ( new MediaTypesValuePredicate ( ) ) .
build ( ) ;
private static final ElementSpec RESOURCE_IDENTIFIER_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:identifier " , ONE ) .
2022-09-16 12:56:52 +02:00
withMandatoryAttribute ( " identifierType " , RESOURCE_IDENTIFIER_TYPES ) .
build ( ) ;
private static final ElementSpec ACCESS_RIGHTS_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:rights " , ONE ) .
2023-10-19 09:53:06 +02:00
withMandatoryAttribute ( " rightsURI " , ACCESS_RIGHTS_URIS ) .
2023-03-15 16:02:14 +01:00
allowedValues ( ACCESS_RIGHTS_TYPES ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec SOURCE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedRepeatableElement ( " dc:source " ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
//TODO: Should we check URI attribute values are valid?
private static final ElementSpec SUBJECT_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryIfApplicableElement ( " datacite:subject " , ONE_TO_N , elementIsPresent ( " datacite:subject " ) ) .
2022-09-16 12:56:52 +02:00
withOptionalAttribute ( " subjectScheme " ) .
withOptionalAttribute ( " schemeURI " ) .
withOptionalAttribute ( " valueURI " ) .
build ( ) ;
private static final ElementSpec LICENSE_CONDITION_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:licenseCondition " ) .
withMandatoryIfApplicableAttribute ( " uri " , elementIsPresent ( " oaire:licenseCondition " ) ) .
withMandatoryIfApplicableAttribute ( " startDate " , elementIsPresent ( " oaire:licenseCondition " ) ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec COVERAGE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedRepeatableElement ( " dc:coverage " ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec SIZE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forOptionalRepeatableElement ( " datacite:size " ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec GEO_LOCATION_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forOptionalRepeatableElement ( " datacite:geoLocation " ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forOptionalElement ( " datacite:geoLocationPoint " ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:pointLongitude " , ONE ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:pointLatitude " , ONE ) ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forOptionalElement ( " datacite:geoLocationBox " ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:westBoundLongitude " , ONE ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:eastBoundLongitude " , ONE ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:southBoundLatitude " , ONE ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:northBoundLatitude " , ONE ) ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forOptionalElement ( " datacite:geoLocationPlace " ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forOptionalRepeatableElement ( " datacite:geoLocationPolygon " ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:polygonPoint " , FOUR_TO_N ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:pointLongitude " , ONE ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:pointLatitude " , ONE ) ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forOptionalElement ( " datacite:inPolygonPoint " ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:pointLongitude " , ONE ) ) .
2022-09-16 12:56:52 +02:00
withSubElement ( Builders .
2023-07-27 13:41:00 +02:00
forMandatoryElement ( " datacite:pointLatitude " , ONE ) ) ) ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
/ *
TODO : RequirementLevel . RECOMMENDED , Cardinality . ONE ?
If uri attribute is present , element also has relevant controlled allowed values mapped to it ' s value .
uri attribute applicable when element value is one of controlled values .
Must be relevant value of [ AO , SMUR , AM , P , VoR , CVoR , EVoR , NA ]
Otherwise can be a number
TODO : Should we cross - check attribute and element value are relevant ?
* /
private static final ElementSpec RESOURCE_VERSION_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:version " ) .
2022-09-16 12:56:52 +02:00
withMandatoryIfApplicableAttribute ( " uri " , applicabilityRuleForURIAttributeOfResourceVersion ( ) , RESOURCE_VERSION_URIS ) .
build ( ) ;
//TODO: Has annotation/issue: accessRightsURI attribute values also appears on ACCESS_RIGHTS_SPEC. Should check it's the same?
private static final ElementSpec FILE_LOCATION_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forMandatoryIfApplicableElement ( " oaire:file " , ONE_TO_N , elementIsPresent ( " oaire:file " ) ) .
2022-09-16 12:56:52 +02:00
withRecommendedAttribute ( " accessRightsURI " , ACCESS_RIGHTS_URIS ) .
withRecommendedAttribute ( " mimeType " , new MediaTypesValuePredicate ( ) ) .
withRecommendedAttribute ( " objectType " , FILE_OBJECT_TYPES ) .
build ( ) ;
private static final ElementSpec CITATION_TITLE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:citationTitle " ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec CITATION_VOLUME_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:citationVolume " ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec CITATION_ISSUE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:citationIssue " ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec CITATION_START_PAGE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:citationStartPage " ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec CITATION_END_PAGE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:citationEndPage " ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec CITATION_EDITION_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:citationEdition " ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
private static final ElementSpec CITATION_CONFERENCE_PLACE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:citationConferencePlace " ) .
2022-09-16 12:56:52 +02:00
build ( ) ;
//TODO: Implement regex/allowedValuesPredicate
// Date has recommended best practice ISO 8601 [W3CDTF], and two [single date] [start date - end date] formats
private static final ElementSpec CITATION_CONFERENCE_DATE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forRecommendedElement ( " oaire:citationConferenceDate " ) .
2022-09-16 12:56:52 +02:00
allowedValues ( new RegexValuePredicate ( COMPILED_YYYY_MM_DD_REGEX ) . or ( new RegexValuePredicate ( COMPILED_YYYY_MM_DD_RANGE_REGEX ) ) ) .
build ( ) ;
//TODO: A non-exhaustive list is provided for values, derived from the Common Education Data Standards vocabulary
// Should we add it?
private static final ElementSpec AUDIENCE_SPEC = Builders .
2023-07-27 13:41:00 +02:00
forOptionalRepeatableElement ( " dcterms:audience " ) .
2022-09-16 12:56:52 +02:00
allowedValues ( AUDIENCE_VOCABULARY ) .
build ( ) ;
private static Rule < Document > applicabilityRuleForEmbargoPeriodDate ( ) {
return XMLCardinalityRule . builder ( ) .
setId ( ElementSpec . APPLICABILITY_RULE_ID ) .
// first predicate count(...) makes sure there is only one "Access Rights" element, and the second predicate verifies its value.
2023-03-15 16:02:14 +01:00
setXPathExpression ( " //*[count(//*[name()='datacite:rights'])=1][name()='datacite:rights' and @uri='http://purl.org/coar/access_right/c_f1cf' and normalize-space(text())='embargoed access'] " ) .
2022-09-16 12:56:52 +02:00
setRange ( 1 , 1 ) .
setIsInclusive ( true ) .
build ( ) ;
}
private static Rule < Document > applicabilityRuleForURIAttributeOfResourceVersion ( ) {
return XMLVocabularyRule . builder ( ) .
setId ( ElementSpec . APPLICABILITY_RULE_ID ) .
setXPathExpression ( " //*[name()='oaire:version']/text() " ) .
setNodeListAction ( " 1 " ) .
setVocabularyTermsAndTermsType ( String . join ( " , " , RESOURCE_VERSION_LABELS ) , " whitelist " ) .
build ( ) ;
}
//TODO: weights for guidelines haven't been finalized. They've been given an arbitrary value of 1.
2023-08-02 14:15:42 +02:00
public static SyntheticGuideline TITLE = SyntheticGuideline . of ( " Title " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_title.html#dci-title " , " F " ,
2023-08-02 11:58:45 +02:00
1 , RequirementLevel . MANDATORY , TITLE_SPEC ) ;
2023-08-02 14:15:42 +02:00
public static SyntheticGuideline CREATOR = SyntheticGuideline . of ( " Creator " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_creator.html#dci-creator " , " F " ,
2023-08-02 11:58:45 +02:00
1 , RequirementLevel . MANDATORY , CREATOR_SPEC ) ;
2023-08-02 14:15:42 +02:00
public static SyntheticGuideline CONTRIBUTOR = SyntheticGuideline . of ( " Contributor " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_contributor.html#dci-contributor " , " F " ,
1 , RequirementLevel . MANDATORY_IF_APPLICABLE , CONTRIBUTOR_SPEC ) ;
public static SyntheticGuideline FUNDING_REFERENCE = SyntheticGuideline . of ( " Funding Reference " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_projectid.html#aire-fundingreference " , " F " ,
1 , RequirementLevel . MANDATORY_IF_APPLICABLE , FUNDING_REFERENCE_SPEC ) ;
public static SyntheticGuideline ALTERNATE_IDENTIFIER = SyntheticGuideline . of ( " Alternate Identifier " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_alternativeidentifier.html#dci-alternativeidentifier " , " F " ,
1 , RequirementLevel . RECOMMENDED , ALTERNATE_IDENTIFIER_SPEC ) ;
public static SyntheticGuideline RELATED_IDENTIFIER = SyntheticGuideline . of ( " Related Identifier " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_relatedidentifier.html#dci-relatedidentifier " , " F " ,
1 , RequirementLevel . RECOMMENDED , RELATED_IDENTIFIER_SPEC ) ;
public static SyntheticGuideline EMBARGO_PERIOD_DATE = SyntheticGuideline . of ( " Embargo Period Date " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_embargoenddate.html#dci-dateembargo " , " F " ,
1 , RequirementLevel . MANDATORY_IF_APPLICABLE , EMBARGO_PERIOD_DATE_SPEC ) ;
public static SyntheticGuideline LANGUAGE = SyntheticGuideline . of ( " Language " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_language.html#dc-language " , " F " ,
1 , RequirementLevel . MANDATORY_IF_APPLICABLE , LANGUAGE_SPEC ) ;
public static SyntheticGuideline PUBLISHER = SyntheticGuideline . of ( " Publisher " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_publisher.html#dc-publisher " , " F " ,
1 , RequirementLevel . MANDATORY_IF_APPLICABLE , PUBLISHER_SPEC ) ;
public static SyntheticGuideline PUBLICATION_DATE = SyntheticGuideline . of ( " Publication Date " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_publicationdate.html#dci-datepublication " , " F " ,
2023-08-02 11:58:45 +02:00
1 , RequirementLevel . MANDATORY , PUBLICATION_DATE_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static SyntheticGuideline RESOURCE_TYPE = SyntheticGuideline . of ( " Resource Type " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_publicationtype.html#aire-resourcetype " , " F, R " ,
2023-08-02 11:58:45 +02:00
1 , RequirementLevel . MANDATORY , RESOURCE_TYPE_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static SyntheticGuideline DESCRIPTION = SyntheticGuideline . of ( " Description " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_description.html#dc-description " , " F, R " ,
2023-08-02 14:15:42 +02:00
1 , RequirementLevel . MANDATORY_IF_APPLICABLE , DESCRIPTION_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static SyntheticGuideline FORMAT = SyntheticGuideline . of ( " Format " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_format.html#dc-format " , " F, R " ,
2023-08-02 14:15:42 +02:00
1 , RequirementLevel . RECOMMENDED , FORMAT_SPEC ) ;
public static SyntheticGuideline RESOURCE_IDENTIFIER = SyntheticGuideline . of ( " Resource Identifier " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_resourceidentifier.html#dci-identifier " , " F " ,
2023-08-02 11:58:45 +02:00
1 , RequirementLevel . MANDATORY , RESOURCE_IDENTIFIER_SPEC ) ;
2023-08-02 14:15:42 +02:00
public static SyntheticGuideline ACCESS_RIGHTS = SyntheticGuideline . of ( " Access Rights " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_accessrights.html#dci-accessrights " , " F " ,
2023-08-02 11:58:45 +02:00
1 , RequirementLevel . MANDATORY , ACCESS_RIGHTS_SPEC ) ;
2023-08-02 14:15:42 +02:00
public static SyntheticGuideline SOURCE = SyntheticGuideline . of ( " Source " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_source.html#dc-source " , " F " ,
1 , RequirementLevel . RECOMMENDED , SOURCE_SPEC ) ;
public static SyntheticGuideline SUBJECT = SyntheticGuideline . of ( " Subject " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_subject.html#dci-subject " , " F " ,
1 , RequirementLevel . MANDATORY_IF_APPLICABLE , SUBJECT_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static SyntheticGuideline LICENSE_CONDITION = SyntheticGuideline . of ( " License Condition " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_licensecondition.html#aire-licensecondition " , " F, R " ,
2023-08-02 14:15:42 +02:00
1 , RequirementLevel . RECOMMENDED , LICENSE_CONDITION_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static SyntheticGuideline COVERAGE = SyntheticGuideline . of ( " Coverage " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_coverage.html#dc-coverage " , " F, R " ,
2023-08-02 14:15:42 +02:00
1 , RequirementLevel . RECOMMENDED , COVERAGE_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static SyntheticGuideline SIZE = SyntheticGuideline . of ( " Size " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_size.html#dci-size " , " F, R " ,
2023-08-02 14:15:42 +02:00
1 , RequirementLevel . OPTIONAL , SIZE_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static SyntheticGuideline GEO_LOCATION = SyntheticGuideline . of ( " Geo Location " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_geolocation.html#dci-geolocation " , " F, R " ,
2023-08-02 14:15:42 +02:00
1 , RequirementLevel . OPTIONAL , GEO_LOCATION_SPEC ) ;
public static SyntheticGuideline RESOURCE_VERSION = SyntheticGuideline . of ( " Resource Version " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_resourceversion.html#aire-version " , " F " ,
1 , RequirementLevel . RECOMMENDED , RESOURCE_VERSION_SPEC ) ;
public static SyntheticGuideline FILE_LOCATION = SyntheticGuideline . of ( " File Location " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_filelocation.html#aire-file " , " F " ,
1 , RequirementLevel . MANDATORY_IF_APPLICABLE , FILE_LOCATION_SPEC ) ;
public static SyntheticGuideline CITATION_TITLE = SyntheticGuideline . of ( " Citation Title " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_citationtitle.html#aire-citationtitle " , " F " ,
1 , RequirementLevel . RECOMMENDED , CITATION_TITLE_SPEC ) ;
public static SyntheticGuideline CITATION_VOLUME = SyntheticGuideline . of ( " Citation Volume " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_citationvolume.html#aire-citationvolume " , " F " ,
1 , RequirementLevel . RECOMMENDED , CITATION_VOLUME_SPEC ) ;
public static SyntheticGuideline CITATION_ISSUE = SyntheticGuideline . of ( " Citation Issue " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_citationissue.html#aire-citationissue " , " F " ,
1 , RequirementLevel . RECOMMENDED , CITATION_ISSUE_SPEC ) ;
public static SyntheticGuideline CITATION_START_PAGE = SyntheticGuideline . of ( " Citation Start Page " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_citationstartpage.html#aire-citationstartpage " , " F " ,
1 , RequirementLevel . RECOMMENDED , CITATION_START_PAGE_SPEC ) ;
public static SyntheticGuideline CITATION_END_PAGE = SyntheticGuideline . of ( " Citation End Page " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_citationendpage.html#aire-citationendpage " , " F " ,
1 , RequirementLevel . RECOMMENDED , CITATION_END_PAGE_SPEC ) ;
public static SyntheticGuideline CITATION_EDITION = SyntheticGuideline . of ( " Citation Edition " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_citationedition.html#aire-citationedition " , " F " ,
1 , RequirementLevel . RECOMMENDED , CITATION_EDITION_SPEC ) ;
public static SyntheticGuideline CITATION_CONFERENCE_PLACE = SyntheticGuideline . of ( " Citation Conference Place " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_citationconferenceplace.html#aire-citationconferenceplace " , " F " ,
1 , RequirementLevel . RECOMMENDED , CITATION_CONFERENCE_PLACE_SPEC ) ;
public static SyntheticGuideline CITATION_CONFERENCE_DATE = SyntheticGuideline . of ( " Citation Conference Date " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_citationconferencedate.html#aire-citationconferencedate " , " F " ,
1 , RequirementLevel . RECOMMENDED , CITATION_CONFERENCE_DATE_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static SyntheticGuideline AUDIENCE = SyntheticGuideline . of ( " Audience " , " description " , " https://openaire-guidelines-for-literature-repository-managers.readthedocs.io/en/v4.0.0/field_audience.html#dct-audience " , " F, R " ,
2023-08-02 14:15:42 +02:00
1 , RequirementLevel . OPTIONAL , AUDIENCE_SPEC ) ;
2022-09-16 12:56:52 +02:00
private static final List < SyntheticGuideline > GUIDELINES = Collections . unmodifiableList (
Arrays . asList (
TITLE ,
CREATOR ,
CONTRIBUTOR ,
FUNDING_REFERENCE ,
ALTERNATE_IDENTIFIER ,
RELATED_IDENTIFIER ,
EMBARGO_PERIOD_DATE ,
LANGUAGE ,
PUBLISHER ,
PUBLICATION_DATE ,
RESOURCE_TYPE ,
DESCRIPTION ,
FORMAT ,
RESOURCE_IDENTIFIER ,
ACCESS_RIGHTS ,
SOURCE ,
SUBJECT ,
LICENSE_CONDITION ,
COVERAGE ,
SIZE ,
GEO_LOCATION ,
RESOURCE_VERSION ,
FILE_LOCATION ,
CITATION_TITLE ,
CITATION_VOLUME ,
CITATION_ISSUE ,
CITATION_START_PAGE ,
CITATION_END_PAGE ,
CITATION_EDITION ,
CITATION_CONFERENCE_PLACE ,
CITATION_CONFERENCE_DATE ,
AUDIENCE
)
) ;
private static final Map < String , SyntheticGuideline > GUIDELINE_MAP = GUIDELINES .
stream ( ) .
collect ( Collectors . toMap ( SyntheticGuideline : : getName , ( guideline ) - > guideline ) ) ;
private static final int MAX_SCORE = GUIDELINES . stream ( ) . map ( SyntheticGuideline : : getWeight ) . reduce ( 0 , Integer : : sum ) ;
public LiteratureGuidelinesV4Profile ( ) {
super ( " OpenAIRE Guidelines for Literature Repositories Profile v4 " ) ;
}
@Override
public Collection < ? extends Guideline < Document > > guidelines ( ) {
return GUIDELINES ;
}
@Override
public SyntheticGuideline guideline ( String guidelineName ) {
return GUIDELINE_MAP . get ( guidelineName ) ;
}
@Override
public int maxScore ( ) {
return MAX_SCORE ;
}
}