package eu.dnetlib.validator2.validation.guideline.openaire; import eu.dnetlib.validator2.engine.Rule; import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule; import eu.dnetlib.validator2.validation.guideline.*; import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate; import eu.dnetlib.validator2.validation.utils.RegexValuePredicate; import org.w3c.dom.Document; import java.util.*; import java.util.stream.Collectors; import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE; import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE_TO_N; import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.COMPILED_ISO_8601_DATE_REG_EX; import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.COMPILED_YEAR_YYYY_REG_EX; public class DataArchiveGuidelinesV2Profile extends AbstractOpenAireProfile { private static final String[] identifierTypes = { "ARK", "DOI", "Handle", "PURL", "URN", "URL" }; private static final String[] titleTypes = { "AlternativeTitle", "Subtitle", "TranslatedTitle" }; private static final String[] contributorTypes = { "ContactPerson", "DataCollector", "DataCurator", "DataManager", "Distributor", "Editor", "Funder", "HostingInstitution", "Producer", "ProjectLeader", "ProjectManager", "ProjectMember", "RegistrationAgency", "RegistrationAuthority", "RelatedPerson", "Researcher", "ResearchGroup", "RightsHolder", "Sponsor", "Supervisor", "WorkPackageLeader", "Other" }; private static final String[] dateTypes = { "Accepted", "Available", "Copyrighted", "Collected", "Created", "Issued", "Submitted", "Updated", "Valid" }; private static final String[] resourceTypeGeneralValues = { "Audiovisual", "Collection", "Dataset", "Event", "Image", "InteractiveResource", "Model", "PhysicalObject", "Service", "Software", "Sound", "Text", "Workflow", "Other" }; private static final String[] relatedIdentifierTypes = { "ARK", "arXiv", "bibcode", "DOI", "EAN13", "EISSN", "Handle", "IGSN", "ISBN", "ISSN", "ISTC", "LISSN", "LSID", "PISSN", "PMID", "PURL", "UPC", "URL", "URN", "WOS" }; private static final String[] relationTypes = { "IsCitedBy", "Cites", "IsSupplementTo", "IsSupplementedBy", "IsContinuedBy", "Continues", "HasMetadata", "IsMetadataFor", "IsNewVersionOf", "IsPreviousVersionOf", "IsPartOf", "HasPart", "IsReferencedBy", "References", "IsDocumentedBy", "Documents", "IsCompiledBy", "Compiles", "IsVariantFormOf", "IsOriginalFormOf", "IsIdenticalTo", "IsReviewedBy", "Reviews", "IsDerivedFrom", "IsSourceOf" }; private static final String[] rightsURIList = { "info:eu-repo/semantics/closedAccess", "info:eu-repo/semantics/embargoedAccess", "info:eu-repo/semantics/restrictedAccess", "info:eu-repo/semantics/openAccess" }; private static final String[] descriptionTypes = { "Abstract", "Methods", "SeriesInformation", "TableOfContents", "Other" }; private static final ElementSpec IDENTIFIER_SPEC = Builders .forMandatoryElement("identifier", ONE).inContext("metadata", "oai_datacite", "payload", "resource") .withMandatoryAttribute("identifierType", identifierTypes) .build(); private static final ElementSpec CREATOR_SPEC = Builders .forMandatoryElement("creator", ONE_TO_N) .withSubElement(Builders.forMandatoryElement("creatorName", ONE)) .withSubElement(Builders .forRecommendedElement("nameIdentifier") .withRecommendedAttribute("nameIdentifierScheme") .withRecommendedAttribute("schemeURI") ) .withSubElement(Builders.forRecommendedRepeatableElement("affiliation")) .build(); private static final ElementSpec TITLE_SPEC = Builders .forMandatoryElement("title", ONE_TO_N) .withOptionalAttribute("titleType", titleTypes) .build(); private static final ElementSpec PUBLISHER_SPEC = Builders .forMandatoryElement("publisher", ONE) .build(); private static final ElementSpec PUBLICATION_YEAR_SPEC = Builders .forMandatoryElement("publicationYear", ONE).allowedValues(new RegexValuePredicate(COMPILED_YEAR_YYYY_REG_EX)) .build(); private static final ElementSpec SUBJECT_SPEC = Builders .forRecommendedRepeatableElement("subject") .withOptionalAttribute("subjectScheme") .withOptionalAttribute("schemeURI") .build(); //TODO mandatory if contributorType=Funder (for element and subelements/properties) private static final ElementSpec CONTRIBUTOR_SPEC = Builders .forMandatoryIfApplicableElement("contributor", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("contributor")) .withMandatoryIfApplicableAttribute("contributorType", appRuleForContributorType(), contributorTypes) //TODO .withSubElement(Builders .forMandatoryIfApplicableElement("contributorName", ONE, AbstractOpenAireProfile.elementIsPresent("contributor")) ) //TODO the RegexValueProdicate for info:eu-repo/grantAgreement must be applied if contributorType is Funder .withSubElement(Builders.forMandatoryIfApplicableElement("nameIdentifier", ONE, appRuleForContributorNameIdentifier()) .withMandatoryIfApplicableAttribute("nameIdentifierScheme", appRuleForContributorNameIdentifier()) .withOptionalAttribute("schemeURI ") ) .withSubElement(Builders.forOptionalRepeatableElement("affiliation")) .build(); //TODO: guideline is invalid (0..n). Since Date is mandatory 1..n //TODO how to enforce that available dateTypes appear only once? private static final ElementSpec DATE_SPEC = Builders .forMandatoryElement("date", ONE_TO_N).allowedValues(new RegexValuePredicate(COMPILED_ISO_8601_DATE_REG_EX)) .withMandatoryAttribute("dateType", dateTypes) .build(); private static final ElementSpec LANGUAGE_SPEC = Builders .forRecommendedRepeatableElement("language").allowedValues(new ISO639ValuePredicate()) .build(); private static final ElementSpec RESOURCE_TYPE_SPEC = Builders .forRecommendedElement("resourceType") .withRecommendedAttribute("resourceTypeGeneral", resourceTypeGeneralValues) .build(); private static final ElementSpec ALTERNATE_IDENTIFIER_SPEC = Builders .forOptionalRepeatableElement("alternateIdentifier") .withOptionalAttribute("alternateIdentifierType") .build(); private static final ElementSpec RELATED_IDENTIFIER_SPEC = Builders .forMandatoryIfApplicableElement("relatedIdentifier", ONE_TO_N, appRuleForRelatedIdentifier()) .withMandatoryAttribute("relatedIdentifierType", relatedIdentifierTypes) .withMandatoryAttribute("relationType", relationTypes) //TODO use the following three attributes only when relationType=HasMetadata|IsMetadataFor .withOptionalAttribute("relatedMetadataScheme") .withOptionalAttribute("schemeURI") .withOptionalAttribute("schemeType") .build(); private static final ElementSpec SIZE_SPEC = Builders .forOptionalRepeatableElement("size") .build(); private static final ElementSpec FORMAT_SPEC = Builders .forOptionalRepeatableElement("format") .build(); private static final ElementSpec VERSION_SPEC = Builders .forOptionalRepeatableElement("version") .build(); private static final ElementSpec RIGHTS_SPEC = Builders .forMandatoryIfApplicableElement("rights", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("rights")) .atPosition(ElementPosition.FIRST) .withMandatoryAttribute("rightsURI", rightsURIList) .build(); // private static final ElementSpec RIGHTS_SPEC2 = Builders // .forMandatoryIfApplicableElement("rights", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("rights")) // .atPosition(ElementPosition.SECOND) // .withMandatoryAttribute("rightsURI") // .build(); private static final ElementSpec DESCRIPTION_SPEC = Builders .forMandatoryIfApplicableElement("description", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("rights")) .withMandatoryAttribute("descriptionType", descriptionTypes) .build(); private static final ElementSpec GEOLOCATION_SPEC = Builders .forOptionalRepeatableElement("geoLocation") .withSubElement(Builders.forOptionalElement("geoLocationPoint")) .withSubElement(Builders.forOptionalElement("geoLocationBox")) .withSubElement(Builders.forOptionalElement("geoLocationPlace")) .build(); private static Rule appRuleForContributorType() { //TODO return XMLCardinalityRule .builder() .setId(ElementSpec.APPLICABILITY_RULE_ID) .setXPathExpression("//*[name()='contributor']/@contributorType") .setIsInclusive(true).setRange(1,1).build(); } private static Rule appRuleForContributorNameIdentifier() { return XMLCardinalityRule .builder() .setId(ElementSpec.APPLICABILITY_RULE_ID) .setXPathExpression("//*[name()='contributor' and @contributorType='Funder']") .setIsInclusive(true).setRange(1,1).build(); } private static Rule appRuleForRelatedIdentifier() { //TODO return XMLCardinalityRule .builder() .setId(ElementSpec.APPLICABILITY_RULE_ID) .setXPathExpression("//*[name()='relatedIdentifier']") .setIsInclusive(true).setRange(1,Long.MAX_VALUE - 1).build(); } //TODO: weights for guidelines haven't been finalized. They've been given an arbitrary value of 1. public static final SyntheticGuideline IDENTIFIER = SyntheticGuideline.of("Identifier", "description", "https://guidelines.openaire.eu/en/latest/literature/field_identifier.html", "F", 4, RequirementLevel.MANDATORY, IDENTIFIER_SPEC); public static final SyntheticGuideline CREATOR = SyntheticGuideline.of("Creator", "description", "https://guidelines.openaire.eu/en/latest/data/field_creator.html", "F", 4, RequirementLevel.MANDATORY, CREATOR_SPEC); public static final SyntheticGuideline TITLE = SyntheticGuideline.of("Title", "description", "https://guidelines.openaire.eu/en/latest/data/field_title.html", "F", 4, RequirementLevel.MANDATORY, TITLE_SPEC); public static final SyntheticGuideline PUBLISHER = SyntheticGuideline.of("Publisher", "description", "https://guidelines.openaire.eu/en/latest/data/field_publisher.html", "F", 4, RequirementLevel.MANDATORY, PUBLISHER_SPEC); public static final SyntheticGuideline PUBLICATION_YEAR = SyntheticGuideline.of("Publication Year", "description", "https://guidelines.openaire.eu/en/latest/data/field_publicationyear.html", "F", 4, RequirementLevel.MANDATORY, PUBLICATION_YEAR_SPEC); public static final SyntheticGuideline SUBJECT = SyntheticGuideline.of("Subject", "description", "https://guidelines.openaire.eu/en/latest/data/field_subject.html", "F", 4, RequirementLevel.RECOMMENDED, SUBJECT_SPEC); public static final SyntheticGuideline CONTRIBUTOR = SyntheticGuideline.of("Contributor", "description", "https://guidelines.openaire.eu/en/latest/data/field_contributor.html", "F", 4, RequirementLevel.MANDATORY_IF_APPLICABLE, CONTRIBUTOR_SPEC); public static final SyntheticGuideline DATE = SyntheticGuideline.of("Date", "description", "https://guidelines.openaire.eu/en/latest/data/field_date.html", "F", 4, RequirementLevel.MANDATORY, DATE_SPEC); public static final SyntheticGuideline LANGUAGE = SyntheticGuideline.of("Language", "description", "https://guidelines.openaire.eu/en/latest/data/field_language.html", "F", 4, RequirementLevel.RECOMMENDED, LANGUAGE_SPEC); public static final SyntheticGuideline RESOURCE_TYPE = SyntheticGuideline.of("Resource Type", "description", "https://guidelines.openaire.eu/en/latest/data/field_resourcetype.html", "F", 4, RequirementLevel.RECOMMENDED, RESOURCE_TYPE_SPEC); public static final SyntheticGuideline ALTERNATE_IDENTIFIER = SyntheticGuideline.of("Alternate Identifier", "description", "https://guidelines.openaire.eu/en/latest/data/field_alternateidentifier.html", "F", 4, RequirementLevel.OPTIONAL, ALTERNATE_IDENTIFIER_SPEC); public static final SyntheticGuideline RELATED_IDENTIFIER = SyntheticGuideline.of("Related Identifier", "description", "https://guidelines.openaire.eu/en/latest/data/field_relatedidentifier.html", "F", 4, RequirementLevel.MANDATORY_IF_APPLICABLE, RELATED_IDENTIFIER_SPEC); public static final SyntheticGuideline SIZE = SyntheticGuideline.of("Size", "description", "https://guidelines.openaire.eu/en/latest/data/field_size.html", "F", 4, RequirementLevel.OPTIONAL, SIZE_SPEC); public static final SyntheticGuideline FORMAT = SyntheticGuideline.of("Format", "description", "https://guidelines.openaire.eu/en/latest/data/field_format.html", "F", 4, RequirementLevel.OPTIONAL, FORMAT_SPEC); public static final SyntheticGuideline VERSION = SyntheticGuideline.of("Version", "description", "https://guidelines.openaire.eu/en/latest/data/field_version.html", "F", 4, RequirementLevel.OPTIONAL, VERSION_SPEC); public static final SyntheticGuideline RIGHTS = SyntheticGuideline.of("Rights", "description", "https://guidelines.openaire.eu/en/latest/data/field_rights.html", "F", 4, RequirementLevel.MANDATORY_IF_APPLICABLE, RIGHTS_SPEC); public static final SyntheticGuideline DESCRIPTION = SyntheticGuideline.of("Description", "description", "https://guidelines.openaire.eu/en/latest/data/field_description.html", "F", 4, RequirementLevel.MANDATORY_IF_APPLICABLE, DESCRIPTION_SPEC); public static final SyntheticGuideline GEOLOCATION = SyntheticGuideline.of("Geolocation", "description", "https://guidelines.openaire.eu/en/latest/data/field_geolocation.html", "F", 4, RequirementLevel.OPTIONAL, GEOLOCATION_SPEC); private static final List GUIDELINES = Collections.unmodifiableList( Arrays.asList( IDENTIFIER, CREATOR, TITLE, PUBLISHER, PUBLICATION_YEAR, SUBJECT, CONTRIBUTOR, DATE, LANGUAGE, RESOURCE_TYPE, ALTERNATE_IDENTIFIER, RELATED_IDENTIFIER, SIZE, FORMAT, VERSION, RIGHTS, DESCRIPTION, GEOLOCATION ) ); private static final Map GUIDELINE_MAP = GUIDELINES. stream(). collect(Collectors.toMap(SyntheticGuideline::getName, (guideline) -> guideline)); private static final int MAX_SCORE = GUIDELINES.stream().map(SyntheticGuideline::getWeight).reduce(0, Integer::sum); public DataArchiveGuidelinesV2Profile() { super("OpenAIRE Guidelines for Data Archives Profile v2"); } @Override public Collection> guidelines() { return GUIDELINES; } @Override public SyntheticGuideline guideline(String guidelineName) { return GUIDELINE_MAP.get(guidelineName); } @Override public int maxScore() { return MAX_SCORE; } }