package eu.dnetlib.validator2.validation.guideline.openaire; import eu.dnetlib.validator2.engine.Predicates; import eu.dnetlib.validator2.engine.Rule; import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule; import eu.dnetlib.validator2.validation.guideline.*; import eu.dnetlib.validator2.validation.utils.EmbargoedEndDateValuePredicate; import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate; import eu.dnetlib.validator2.validation.utils.MediaTypesValuePredicate; import eu.dnetlib.validator2.validation.utils.RegexValuePredicate; import org.w3c.dom.Document; import java.util.*; import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE; import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE_TO_N; import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.*; public final class LiteratureGuidelinesV3Profile extends AbstractOpenAireProfile { private static final String[] repoAccessTerms = { "info:eu-repo/semantics/closedAccess", "info:eu-repo/semantics/embargoedAccess", "info:eu-repo/semantics/restrictedAccess", "info:eu-repo/semantics/openAccess" }; private static final String[] publicationTypes = { "info:eu-repo/semantics/article", "info:eu-repo/semantics/bachelorThesis", "info:eu-repo/semantics/masterThesis", "info:eu-repo/semantics/doctoralThesis", "info:eu-repo/semantics/book", "info:eu-repo/semantics/bookPart", "info:eu-repo/semantics/review", "info:eu-repo/semantics/conferenceObject", "info:eu-repo/semantics/lecture", "info:eu-repo/semantics/workingPaper", "info:eu-repo/semantics/preprint", "info:eu-repo/semantics/report", "info:eu-repo/semantics/annotation", "info:eu-repo/semantics/contributionToPeriodical", "info:eu-repo/semantics/patent", "info:eu-repo/semantics/other" }; private static final String[] publicationVersions = { "info:eu-repo/semantics/draft", "info:eu-repo/semantics/submittedVersion", "info:eu-repo/semantics/acceptedVersion", "info:eu-repo/semantics/publishedVersion", "info:eu-repo/semantics/updateVersion" }; private static final String[] publicationTypesAndVersions = Stream .concat(Arrays.stream(publicationTypes), Arrays.stream(publicationVersions)) .toArray(String[]::new); private static final String[] audiences = { "Administrators", "Community Groups", "Counsellors", "Federal Funds Recipients and Applicants", "Librarians", "News Media", "Other", "Parents and Families", "Policymakers", "Researchers", "School Support Staff", "Student Financial Aid Providers", "Students", "Teachers" }; private static final ElementSpec TITLE_SPEC = Builders .forMandatoryElement("dc:title", ONE_TO_N) .build(); private static final ElementSpec CREATOR_SPEC = Builders .forMandatoryElement("dc:creator", ONE_TO_N) .build(); private static final ElementSpec PROJECT_IDENTIFIER_SPEC = Builders .forMandatoryIfApplicableElement("dc:relation", ONE, elementIsPresent("dc:relation")) .allowedValues(new RegexValuePredicate(COMPILED_PROJECT_IDENTIFIER_REGEX)) .build(); private static final ElementSpec ACCESS_LEVEL_SPEC = Builders .forMandatoryElement("dc:rights", ONE).allowedValues(repoAccessTerms) .build(); private static final ElementSpec LICENSE_CONDITION_SPEC = Builders .forRecommendedRepeatableElement("dc:rights") .allowedValues(new RegexValuePredicate(COMPILED_LICENSE_CONDITION_REG_EX)) .build(); private static final ElementSpec EMBARGO_END_DATE_SPEC = Builders .forMandatoryIfApplicableElement("dc:date", ONE, applicabilityRuleForEmbargoEndDate()) .allowedValues(new EmbargoedEndDateValuePredicate()) .build(); private static final ElementSpec ALT_IDENTIFIER_SPEC = Builders .forRecommendedRepeatableElement("dc:relation") .allowedValues(new RegexValuePredicate(COMPILED_ALT_IDENTIFIER_REG_EX)) .build(); private static final ElementSpec PUBLICATION_REF_SPEC = Builders .forRecommendedRepeatableElement("dc:relation") .allowedValues(new RegexValuePredicate(COMPILED_PUBLICATION_REFERENCE_REG_EX)) .build(); private static final ElementSpec DATASET_REF_SPEC = Builders .forRecommendedRepeatableElement("dc:relation") .allowedValues(new RegexValuePredicate(COMPILED_DATASET_REFERENCE_REG_EX)) .build(); //TODO value is either a keyword (free text) or a classification (info:eu-repo/classification) // v3 guideliness recommends ddc classification (Dewey Decimal Classification) private static final ElementSpec SUBJECT_SPEC = Builders .forMandatoryIfApplicableElement("dc:subject", ONE_TO_N, elementIsPresent("dc:subject")) .build(); private static final ElementSpec DESCRIPTION_SPEC = Builders .forMandatoryIfApplicableElement("dc:description", ONE_TO_N, elementIsPresent("dc:description")) .build(); private static final ElementSpec PUBLISHER_SPEC = Builders .forMandatoryIfApplicableElement("dc:publisher", ONE_TO_N, elementIsPresent("dc:publisher")) .build(); private static final ElementSpec CONTRIBUTOR_SPEC = Builders .forRecommendedRepeatableElement("dc:contributor") .build(); //TODO // Search element -> dc:date AND NOT embargoedDate private static final ElementSpec PUBLICATION_DATE_SPEC = Builders .forMandatoryElement("dc:date", ONE) .allowedValues(new RegexValuePredicate(COMPILED_PUBLICATION_DATE_REG_EX)) .build(); //TODO values from publication types // Search element -> dc:type AND values IN publicationTypes set //TODO: Check it is first occurrence private static final ElementSpec PUBLICATION_TYPE_M_SPEC = Builders .forMandatoryElement("dc:type", ONE) .atPosition(ElementPosition.FIRST) .allowedValues(publicationTypes) .build(); //TODO // Search element -> dc:type AND values NOT IN publicationTypes //TODO: check it is second occurrence private static final ElementSpec PUBLICATION_TYPE_O_SPEC = Builders .forOptionalElement("dc:type") .atPosition(ElementPosition.SECOND) .allowedValues(new Predicates.SetOfCaseInsensitiveAllowedValues(publicationTypesAndVersions).negate()) .build(); private static final ElementSpec PUBLICATION_VERSION_SPEC = Builders .forRecommendedElement("dc:type") .allowedValues(publicationVersions) .build(); private static final ElementSpec FORMAT_SPEC = Builders .forRecommendedRepeatableElement("dc:format") .allowedValues(new MediaTypesValuePredicate()) .build(); private static final ElementSpec RESOURCE_IDENTIFIER_SPEC = Builders .forMandatoryElement("dc:identifier", ONE_TO_N) .build(); private static final ElementSpec SOURCE_SPEC = Builders .forRecommendedRepeatableElement("dc:source") .build(); //TODO values from ISO 639-1 or 639-2 or 639-3 (recommended) private static final ElementSpec LANGUAGE_SPEC = Builders .forRecommendedRepeatableElement("dc:language") .allowedValues(new ISO639ValuePredicate()) .build(); //TODO: Should exclude other dc:relation elements e.g. !containsAllowedValuesOF -> Project Identifier (MA), Alternative Identifier (R), Publication Reference (R), Dataset Reference (R) private static final ElementSpec RELATION_SPEC = Builders .forOptionalRepeatableElement("dc:relation") .allowedValues(relationSpecAllowedValuesPredicate()) .build(); private static final ElementSpec COVERAGE_SPEC = Builders .forRecommendedRepeatableElement("dc:coverage") .build(); private static final ElementSpec AUDIENCE_SPEC = Builders .forRecommendedRepeatableElement("dc:audience") .allowedValues(audiences) .build(); private static Rule applicabilityRuleForEmbargoEndDate() { return XMLCardinalityRule.builder(). setId(ElementSpec.APPLICABILITY_RULE_ID). // first predicate count(...) makes sure there is only one Access Level set and then the second predicate verifies its value. setXPathExpression("//*[count(//*[name()='dc:rights' and starts-with(normalize-space(text()), 'info:eu-repo/semantics/')])=1][name()='dc:rights' and normalize-space(text())='info:eu-repo/semantics/embargoedAccess']"). setRange(1,1). setIsInclusive(true). build(); } private static Predicate relationSpecAllowedValuesPredicate() { return new RegexValuePredicate(COMPILED_PROJECT_IDENTIFIER_REGEX).negate(). and(new RegexValuePredicate(COMPILED_ALT_IDENTIFIER_REG_EX).negate(). and(new RegexValuePredicate(COMPILED_PUBLICATION_REFERENCE_REG_EX).negate(). and(new RegexValuePredicate(COMPILED_DATASET_REFERENCE_REG_EX).negate()))); } public static final SyntheticGuideline TITLE = SyntheticGuideline.of("Title", "description", "https://guidelines.openaire.eu/en/latest/literature/field_title.html", "F", 4, RequirementLevel.MANDATORY, TITLE_SPEC); public static final SyntheticGuideline CREATOR = SyntheticGuideline.of("Creator", "description", "https://guidelines.openaire.eu/en/latest/literature/field_creator.html", "F", 4, RequirementLevel.MANDATORY, CREATOR_SPEC); public static final SyntheticGuideline PROJECT_IDENTIFIER = SyntheticGuideline.of("Project Identifier", "description", "https://guidelines.openaire.eu/en/latest/literature/field_projectid.html", "F", 5, RequirementLevel.MANDATORY_IF_APPLICABLE, PROJECT_IDENTIFIER_SPEC); public static final SyntheticGuideline ACCESS_LEVEL = SyntheticGuideline.of("Access Level", "description", "https://guidelines.openaire.eu/en/latest/literature/field_accesslevel.html", "F", 5, RequirementLevel.MANDATORY, ACCESS_LEVEL_SPEC); public static final SyntheticGuideline LICENSE_CONDITION = SyntheticGuideline.of("License Condition", "description", "https://guidelines.openaire.eu/en/latest/literature/field_licensecondition.html", "F,R", 1, RequirementLevel.RECOMMENDED, LICENSE_CONDITION_SPEC); public static final SyntheticGuideline EMBARGO_END_DATE = SyntheticGuideline.of("Embargo End Date", "description", "https://guidelines.openaire.eu/en/latest/literature/field_embargoenddate.html", "F", 5, RequirementLevel.MANDATORY_IF_APPLICABLE, EMBARGO_END_DATE_SPEC); public static final SyntheticGuideline ALTERNATIVE_IDENTIFIER = SyntheticGuideline.of("Alternative Identifier", "description", "https://guidelines.openaire.eu/en/latest/literature/field_alternativeidentifier.html", "F", 5, RequirementLevel.RECOMMENDED, ALT_IDENTIFIER_SPEC); public static final SyntheticGuideline PUBLICATION_REFERENCE = SyntheticGuideline.of("Publication Reference", "description", "https://guidelines.openaire.eu/en/latest/literature/field_publicationreference.html", "F", 2, RequirementLevel.RECOMMENDED, PUBLICATION_REF_SPEC); public static final SyntheticGuideline DATASET_REFERENCE = SyntheticGuideline.of("Dataset Reference", "description", "https://guidelines.openaire.eu/en/latest/literature/field_datasetreference.html", "F", 2, RequirementLevel.RECOMMENDED, DATASET_REF_SPEC); public static final SyntheticGuideline SUBJECT = SyntheticGuideline.of("Subject", "description", "https://guidelines.openaire.eu/en/latest/literature/field_subject.html", "F", 5, RequirementLevel.MANDATORY_IF_APPLICABLE, SUBJECT_SPEC); public static final SyntheticGuideline DESCRIPTION = SyntheticGuideline.of("Description", "description", "https://guidelines.openaire.eu/en/latest/literature/field_description.html", "F,R", 5, RequirementLevel.MANDATORY_IF_APPLICABLE, DESCRIPTION_SPEC); public static final SyntheticGuideline PUBLISHER = SyntheticGuideline.of("Publisher", "description", "https://guidelines.openaire.eu/en/latest/literature/field_publisher.html", "F", 5, RequirementLevel.MANDATORY_IF_APPLICABLE, PUBLISHER_SPEC); public static final SyntheticGuideline CONTRIBUTOR = SyntheticGuideline.of("Contributor", "description", "https://guidelines.openaire.eu/en/latest/literature/field_contributor.html", "F", 2, RequirementLevel.RECOMMENDED, CONTRIBUTOR_SPEC); public static final SyntheticGuideline PUBLICATION_DATE = SyntheticGuideline.of("Publication Date", "description", "https://guidelines.openaire.eu/en/latest/literature/field_publicationdate.html", "F", 5, RequirementLevel.MANDATORY, PUBLICATION_DATE_SPEC); public static final SyntheticGuideline PUBLICATION_TYPE_MANDATORY = SyntheticGuideline.of("Publication Type M", "description", "https://guidelines.openaire.eu/en/latest/literature/field_publicationtype.html", "F,R", 5, RequirementLevel.MANDATORY, PUBLICATION_TYPE_M_SPEC); public static final SyntheticGuideline PUBLICATION_TYPE_OPTIONAL = SyntheticGuideline.of("Publication Type O", "description", "https://guidelines.openaire.eu/en/latest/literature/field_publicationtype.html", "F,R", 1, RequirementLevel.OPTIONAL, PUBLICATION_TYPE_O_SPEC); public static final SyntheticGuideline PUBLICATION_VERSION = SyntheticGuideline.of("Publication Version", "description", "https://guidelines.openaire.eu/en/latest/literature/field_publicationversion.html", "F", 2, RequirementLevel.RECOMMENDED, PUBLICATION_VERSION_SPEC); public static final SyntheticGuideline FORMAT = SyntheticGuideline.of("Format", "description", "https://guidelines.openaire.eu/en/latest/literature/field_format.html", "F,R", 2, RequirementLevel.RECOMMENDED, FORMAT_SPEC); public static final SyntheticGuideline RESOURCE_IDENTIFIER = SyntheticGuideline.of("Resource Identifier", "description", "https://guidelines.openaire.eu/en/latest/literature/field_resourceidentifier.html", "F", 5, RequirementLevel.MANDATORY, RESOURCE_IDENTIFIER_SPEC); public static final SyntheticGuideline SOURCE = SyntheticGuideline.of("Source", "description", "https://guidelines.openaire.eu/en/latest/literature/field_source.html", "F", 5, RequirementLevel.RECOMMENDED, SOURCE_SPEC); public static final SyntheticGuideline LANGUAGE = SyntheticGuideline.of("Language", "description", "https://guidelines.openaire.eu/en/latest/literature/field_language.html", "F", 2, RequirementLevel.RECOMMENDED, LANGUAGE_SPEC); public static final SyntheticGuideline RELATION = SyntheticGuideline.of("Relation", "description", "https://guidelines.openaire.eu/en/latest/literature/field_relation.html", "F", 1, RequirementLevel.OPTIONAL, RELATION_SPEC); public static final SyntheticGuideline COVERAGE = SyntheticGuideline.of("Coverage", "description", "https://guidelines.openaire.eu/en/latest/literature/field_coverage.html", "F,R", 1, RequirementLevel.RECOMMENDED, COVERAGE_SPEC); public static final SyntheticGuideline AUDIENCE = SyntheticGuideline.of("Audience", "description", "https://guidelines.openaire.eu/en/latest/literature/field_audience.html", "F,R", 2, RequirementLevel.RECOMMENDED, AUDIENCE_SPEC); private static final List GUIDELINES = Collections.unmodifiableList( Arrays.asList( TITLE, CREATOR, PROJECT_IDENTIFIER, ACCESS_LEVEL, LICENSE_CONDITION, EMBARGO_END_DATE, ALTERNATIVE_IDENTIFIER, PUBLICATION_REFERENCE, DATASET_REFERENCE, SUBJECT, DESCRIPTION, PUBLISHER, CONTRIBUTOR, PUBLICATION_DATE, PUBLICATION_TYPE_MANDATORY, PUBLICATION_TYPE_OPTIONAL, PUBLICATION_VERSION, FORMAT, RESOURCE_IDENTIFIER, SOURCE, LANGUAGE, RELATION, COVERAGE, AUDIENCE ) ); private static final Map GUIDELINE_MAP = GUIDELINES. stream(). collect(Collectors.toMap(SyntheticGuideline::getName, (guideline) -> guideline)); private static final int MAX_SCORE = GUIDELINES.stream().map(SyntheticGuideline::getWeight).reduce(0, Integer::sum); public LiteratureGuidelinesV3Profile() { super("OpenAIRE Guidelines for Literature Repositories Profile v3"); } @Override public Collection> guidelines() { return GUIDELINES; } @Override public SyntheticGuideline guideline(String guidelineName) { return GUIDELINE_MAP.get(guidelineName); } @Override public int maxScore() { return MAX_SCORE; } }