uoa-validator-engine2/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV3Profi...

299 lines
14 KiB
Java

package eu.dnetlib.validator2.validation.guideline.openaire;
import eu.dnetlib.validator2.engine.Predicates;
import eu.dnetlib.validator2.engine.Rule;
import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule;
import eu.dnetlib.validator2.validation.guideline.*;
import eu.dnetlib.validator2.validation.utils.EmbargoedEndDateValuePredicate;
import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate;
import eu.dnetlib.validator2.validation.utils.MediaTypesValuePredicate;
import eu.dnetlib.validator2.validation.utils.RegexValuePredicate;
import org.w3c.dom.Document;
import java.util.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE;
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE_TO_N;
import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.*;
public final class LiteratureGuidelinesV3Profile extends AbstractOpenAireProfile {
private static final String[] repoAccessTerms = {
"info:eu-repo/semantics/closedAccess",
"info:eu-repo/semantics/embargoedAccess",
"info:eu-repo/semantics/restrictedAccess",
"info:eu-repo/semantics/openAccess"
};
private static final String[] publicationTypes = {
"info:eu-repo/semantics/article",
"info:eu-repo/semantics/bachelorThesis",
"info:eu-repo/semantics/masterThesis",
"info:eu-repo/semantics/doctoralThesis",
"info:eu-repo/semantics/book",
"info:eu-repo/semantics/bookPart",
"info:eu-repo/semantics/review",
"info:eu-repo/semantics/conferenceObject",
"info:eu-repo/semantics/lecture",
"info:eu-repo/semantics/workingPaper",
"info:eu-repo/semantics/preprint",
"info:eu-repo/semantics/report",
"info:eu-repo/semantics/annotation",
"info:eu-repo/semantics/contributionToPeriodical",
"info:eu-repo/semantics/patent",
"info:eu-repo/semantics/other"
};
private static final String[] publicationVersions = {
"info:eu-repo/semantics/draft",
"info:eu-repo/semantics/submittedVersion",
"info:eu-repo/semantics/acceptedVersion",
"info:eu-repo/semantics/publishedVersion",
"info:eu-repo/semantics/updateVersion"
};
private static final String[] publicationTypesAndVersions = Stream
.concat(Arrays.stream(publicationTypes), Arrays.stream(publicationVersions))
.toArray(String[]::new);
private static final String[] audiences = {
"Administrators",
"Community Groups",
"Counsellors",
"Federal Funds Recipients and Applicants",
"Librarians",
"News Media",
"Other",
"Parents and Families",
"Policymakers",
"Researchers",
"School Support Staff",
"Student Financial Aid Providers",
"Students",
"Teachers"
};
private static final ElementSpec TITLE_SPEC = Builders
.forMandatoryElement("dc:title", ONE_TO_N)
.build();
private static final ElementSpec CREATOR_SPEC = Builders
.forMandatoryElement("dc:creator", ONE_TO_N)
.build();
private static final ElementSpec PROJECT_IDENTIFIER_SPEC = Builders
.forMandatoryIfApplicableElement("dc:relation", ONE, elementIsPresent("dc:relation"))
.allowedValues(new RegexValuePredicate(COMPILED_PROJECT_IDENTIFIER_REGEX))
.build();
private static final ElementSpec ACCESS_LEVEL_SPEC = Builders
.forMandatoryElement("dc:rights", ONE).allowedValues(repoAccessTerms)
.build();
private static final ElementSpec LICENSE_CONDITION_SPEC = Builders
.forRecommendedRepeatableElement("dc:rights")
.allowedValues(new RegexValuePredicate(COMPILED_LICENSE_CONDITION_REG_EX))
.build();
private static final ElementSpec EMBARGO_END_DATE_SPEC = Builders
.forMandatoryIfApplicableElement("dc:date", ONE, applicabilityRuleForEmbargoEndDate())
.allowedValues(new EmbargoedEndDateValuePredicate())
.build();
private static final ElementSpec ALT_IDENTIFIER_SPEC = Builders
.forRecommendedRepeatableElement("dc:relation")
.allowedValues(new RegexValuePredicate(COMPILED_ALT_IDENTIFIER_REG_EX))
.build();
private static final ElementSpec PUBLICATION_REF_SPEC = Builders
.forRecommendedRepeatableElement("dc:relation")
.allowedValues(new RegexValuePredicate(COMPILED_PUBLICATION_REFERENCE_REG_EX))
.build();
private static final ElementSpec DATASET_REF_SPEC = Builders
.forRecommendedRepeatableElement("dc:relation")
.allowedValues(new RegexValuePredicate(COMPILED_DATASET_REFERENCE_REG_EX))
.build();
//TODO value is either a keyword (free text) or a classification (info:eu-repo/classification)
// v3 guideliness recommends ddc classification (Dewey Decimal Classification)
private static final ElementSpec SUBJECT_SPEC = Builders
.forMandatoryIfApplicableElement("dc:subject", ONE_TO_N, elementIsPresent("dc:subject"))
.build();
private static final ElementSpec DESCRIPTION_SPEC = Builders
.forMandatoryIfApplicableElement("dc:description", ONE_TO_N, elementIsPresent("dc:description"))
.build();
private static final ElementSpec PUBLISHER_SPEC = Builders
.forMandatoryIfApplicableElement("dc:publisher", ONE_TO_N, elementIsPresent("dc:publisher"))
.build();
private static final ElementSpec CONTRIBUTOR_SPEC = Builders
.forRecommendedRepeatableElement("dc:contributor")
.build();
//TODO
// Search element -> dc:date AND NOT embargoedDate
private static final ElementSpec PUBLICATION_DATE_SPEC = Builders
.forMandatoryElement("dc:date", ONE)
.allowedValues(new RegexValuePredicate(COMPILED_PUBLICATION_DATE_REG_EX))
.build();
//TODO values from publication types
// Search element -> dc:type AND values IN publicationTypes set
//TODO: Check it is first occurrence
private static final ElementSpec PUBLICATION_TYPE_M_SPEC = Builders
.forMandatoryElement("dc:type", ONE)
.atPosition(ElementPosition.FIRST)
.allowedValues(publicationTypes)
.build();
//TODO
// Search element -> dc:type AND values NOT IN publicationTypes
//TODO: check it is second occurrence
private static final ElementSpec PUBLICATION_TYPE_O_SPEC = Builders
.forOptionalElement("dc:type")
.atPosition(ElementPosition.SECOND)
.allowedValues(new Predicates.SetOfCaseInsensitiveAllowedValues(publicationTypesAndVersions).negate())
.build();
private static final ElementSpec PUBLICATION_VERSION_SPEC = Builders
.forRecommendedElement("dc:type")
.allowedValues(publicationVersions)
.build();
private static final ElementSpec FORMAT_SPEC = Builders
.forRecommendedRepeatableElement("dc:format")
.allowedValues(new MediaTypesValuePredicate())
.build();
private static final ElementSpec RESOURCE_IDENTIFIER_SPEC = Builders
.forMandatoryElement("dc:identifier", ONE_TO_N)
.build();
private static final ElementSpec SOURCE_SPEC = Builders
.forRecommendedRepeatableElement("dc:source")
.build();
//TODO values from ISO 639-1 or 639-2 or 639-3 (recommended)
private static final ElementSpec LANGUAGE_SPEC = Builders
.forRecommendedRepeatableElement("dc:language")
.allowedValues(new ISO639ValuePredicate())
.build();
//TODO: Should exclude other dc:relation elements e.g. !containsAllowedValuesOF -> Project Identifier (MA), Alternative Identifier (R), Publication Reference (R), Dataset Reference (R)
private static final ElementSpec RELATION_SPEC = Builders
.forOptionalRepeatableElement("dc:relation")
.allowedValues(relationSpecAllowedValuesPredicate())
.build();
private static final ElementSpec COVERAGE_SPEC = Builders
.forRecommendedRepeatableElement("dc:coverage")
.build();
private static final ElementSpec AUDIENCE_SPEC = Builders
.forRecommendedRepeatableElement("dc:audience")
.allowedValues(audiences)
.build();
private static Rule<Document> applicabilityRuleForEmbargoEndDate() {
return XMLCardinalityRule.builder().
setId(ElementSpec.APPLICABILITY_RULE_ID).
// first predicate count(...) makes sure there is only one Access Level set and then the second predicate verifies its value.
setXPathExpression("//*[count(//*[name()='dc:rights' and starts-with(normalize-space(text()), 'info:eu-repo/semantics/')])=1][name()='dc:rights' and normalize-space(text())='info:eu-repo/semantics/embargoedAccess']").
setRange(1,1).
setIsInclusive(true).
build();
}
private static Predicate<String> relationSpecAllowedValuesPredicate() {
return new RegexValuePredicate(COMPILED_PROJECT_IDENTIFIER_REGEX).negate().
and(new RegexValuePredicate(COMPILED_ALT_IDENTIFIER_REG_EX).negate().
and(new RegexValuePredicate(COMPILED_PUBLICATION_REFERENCE_REG_EX).negate().
and(new RegexValuePredicate(COMPILED_DATASET_REFERENCE_REG_EX).negate())));
}
public static final SyntheticGuideline TITLE = SyntheticGuideline.of("Title", 4, TITLE_SPEC);
public static final SyntheticGuideline CREATOR = SyntheticGuideline.of("Creator", 4, CREATOR_SPEC);
public static final SyntheticGuideline PROJECT_IDENTIFIER = SyntheticGuideline.of("Project Identifier", 5, PROJECT_IDENTIFIER_SPEC);
public static final SyntheticGuideline ACCESS_LEVEL = SyntheticGuideline.of("Access Level", 5, ACCESS_LEVEL_SPEC);
public static final SyntheticGuideline LICENSE_CONDITION = SyntheticGuideline.of("License Condition", 1, LICENSE_CONDITION_SPEC);
public static final SyntheticGuideline EMBARGO_END_DATE = SyntheticGuideline.of("Embargo End Date", 5, EMBARGO_END_DATE_SPEC);
public static final SyntheticGuideline ALTERNATIVE_IDENTIFIER = SyntheticGuideline.of("Alternative Identifier", 5, ALT_IDENTIFIER_SPEC);
public static final SyntheticGuideline PUBLICATION_REFERENCE = SyntheticGuideline.of("Publication Reference", 2, PUBLICATION_REF_SPEC);
public static final SyntheticGuideline DATASET_REFERENCE = SyntheticGuideline.of("Dataset Reference", 2, DATASET_REF_SPEC);
public static final SyntheticGuideline SUBJECT = SyntheticGuideline.of("Subject", 5, SUBJECT_SPEC);
public static final SyntheticGuideline DESCRIPTION = SyntheticGuideline.of("Description", 5, DESCRIPTION_SPEC);
public static final SyntheticGuideline PUBLISHER = SyntheticGuideline.of("Publisher", 5, PUBLISHER_SPEC);
public static final SyntheticGuideline CONTRIBUTOR = SyntheticGuideline.of("Contributor", 2, CONTRIBUTOR_SPEC);
public static final SyntheticGuideline PUBLICATION_DATE = SyntheticGuideline.of("Publication Date", 5, PUBLICATION_DATE_SPEC);
public static final SyntheticGuideline PUBLICATION_TYPE_MANDATORY = SyntheticGuideline.of("Publication Type M", 5, PUBLICATION_TYPE_M_SPEC);
public static final SyntheticGuideline PUBLICATION_TYPE_OPTIONAL = SyntheticGuideline.of("Publication Type O", 1, PUBLICATION_TYPE_O_SPEC);
public static final SyntheticGuideline PUBLICATION_VERSION = SyntheticGuideline.of("Publication Version", 2, PUBLICATION_VERSION_SPEC);
public static final SyntheticGuideline FORMAT = SyntheticGuideline.of("Format", 2, FORMAT_SPEC);
public static final SyntheticGuideline RESOURCE_IDENTIFIER = SyntheticGuideline.of("Resource Identifier", 5, RESOURCE_IDENTIFIER_SPEC);
public static final SyntheticGuideline SOURCE = SyntheticGuideline.of("Source", 5, SOURCE_SPEC);
public static final SyntheticGuideline LANGUAGE = SyntheticGuideline.of("Language", 2, LANGUAGE_SPEC);
public static final SyntheticGuideline RELATION = SyntheticGuideline.of("Relation", 1, RELATION_SPEC);
public static final SyntheticGuideline COVERAGE = SyntheticGuideline.of("Coverage", 1, COVERAGE_SPEC);
public static final SyntheticGuideline AUDIENCE = SyntheticGuideline.of("Audience", 2, AUDIENCE_SPEC);
private static final List<SyntheticGuideline> GUIDELINES = Collections.unmodifiableList(
Arrays.asList(
TITLE,
CREATOR,
PROJECT_IDENTIFIER,
ACCESS_LEVEL,
LICENSE_CONDITION,
EMBARGO_END_DATE,
ALTERNATIVE_IDENTIFIER,
PUBLICATION_REFERENCE,
DATASET_REFERENCE,
SUBJECT,
DESCRIPTION,
PUBLISHER,
CONTRIBUTOR,
PUBLICATION_DATE,
PUBLICATION_TYPE_MANDATORY,
PUBLICATION_TYPE_OPTIONAL,
PUBLICATION_VERSION,
FORMAT,
RESOURCE_IDENTIFIER,
SOURCE,
LANGUAGE,
RELATION,
COVERAGE,
AUDIENCE
)
);
private static final Map<String, SyntheticGuideline> GUIDELINE_MAP = GUIDELINES.
stream().
collect(Collectors.toMap(SyntheticGuideline::getName, (guideline) -> guideline));
private static final int MAX_SCORE = GUIDELINES.stream().map(SyntheticGuideline::getWeight).reduce(0, Integer::sum);
public LiteratureGuidelinesV3Profile() {
super("OpenAIRE Guidelines for Literature Repositories Profile v3");
}
@Override
public Collection<? extends Guideline<Document>> guidelines() {
return GUIDELINES;
}
@Override
public SyntheticGuideline guideline(String guidelineName) {
return GUIDELINE_MAP.get(guidelineName);
}
@Override
public int maxScore() {
return MAX_SCORE;
}
}