299 lines
14 KiB
Java
299 lines
14 KiB
Java
package eu.dnetlib.validator2.validation.guideline.openaire;
|
|
|
|
import eu.dnetlib.validator2.engine.Predicates;
|
|
import eu.dnetlib.validator2.engine.Rule;
|
|
import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule;
|
|
import eu.dnetlib.validator2.validation.guideline.*;
|
|
import eu.dnetlib.validator2.validation.utils.EmbargoedEndDateValuePredicate;
|
|
import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate;
|
|
import eu.dnetlib.validator2.validation.utils.MediaTypesValuePredicate;
|
|
import eu.dnetlib.validator2.validation.utils.RegexValuePredicate;
|
|
import org.w3c.dom.Document;
|
|
|
|
import java.util.*;
|
|
import java.util.function.Predicate;
|
|
import java.util.stream.Collectors;
|
|
import java.util.stream.Stream;
|
|
|
|
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE;
|
|
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE_TO_N;
|
|
import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.*;
|
|
|
|
public final class LiteratureGuidelinesV3Profile extends AbstractOpenAireProfile {
|
|
|
|
private static final String[] repoAccessTerms = {
|
|
"info:eu-repo/semantics/closedAccess",
|
|
"info:eu-repo/semantics/embargoedAccess",
|
|
"info:eu-repo/semantics/restrictedAccess",
|
|
"info:eu-repo/semantics/openAccess"
|
|
};
|
|
|
|
private static final String[] publicationTypes = {
|
|
"info:eu-repo/semantics/article",
|
|
"info:eu-repo/semantics/bachelorThesis",
|
|
"info:eu-repo/semantics/masterThesis",
|
|
"info:eu-repo/semantics/doctoralThesis",
|
|
"info:eu-repo/semantics/book",
|
|
"info:eu-repo/semantics/bookPart",
|
|
"info:eu-repo/semantics/review",
|
|
"info:eu-repo/semantics/conferenceObject",
|
|
"info:eu-repo/semantics/lecture",
|
|
"info:eu-repo/semantics/workingPaper",
|
|
"info:eu-repo/semantics/preprint",
|
|
"info:eu-repo/semantics/report",
|
|
"info:eu-repo/semantics/annotation",
|
|
"info:eu-repo/semantics/contributionToPeriodical",
|
|
"info:eu-repo/semantics/patent",
|
|
"info:eu-repo/semantics/other"
|
|
};
|
|
|
|
private static final String[] publicationVersions = {
|
|
"info:eu-repo/semantics/draft",
|
|
"info:eu-repo/semantics/submittedVersion",
|
|
"info:eu-repo/semantics/acceptedVersion",
|
|
"info:eu-repo/semantics/publishedVersion",
|
|
"info:eu-repo/semantics/updateVersion"
|
|
};
|
|
|
|
private static final String[] publicationTypesAndVersions = Stream
|
|
.concat(Arrays.stream(publicationTypes), Arrays.stream(publicationVersions))
|
|
.toArray(String[]::new);
|
|
|
|
private static final String[] audiences = {
|
|
"Administrators",
|
|
"Community Groups",
|
|
"Counsellors",
|
|
"Federal Funds Recipients and Applicants",
|
|
"Librarians",
|
|
"News Media",
|
|
"Other",
|
|
"Parents and Families",
|
|
"Policymakers",
|
|
"Researchers",
|
|
"School Support Staff",
|
|
"Student Financial Aid Providers",
|
|
"Students",
|
|
"Teachers"
|
|
};
|
|
|
|
private static final ElementSpec TITLE_SPEC = Builders
|
|
.forMandatoryElement("dc:title", ONE_TO_N)
|
|
.build();
|
|
|
|
private static final ElementSpec CREATOR_SPEC = Builders
|
|
.forMandatoryElement("dc:creator", ONE_TO_N)
|
|
.build();
|
|
|
|
private static final ElementSpec PROJECT_IDENTIFIER_SPEC = Builders
|
|
.forMandatoryIfApplicableElement("dc:relation", ONE, elementIsPresent("dc:relation"))
|
|
.allowedValues(new RegexValuePredicate(COMPILED_PROJECT_IDENTIFIER_REGEX))
|
|
.build();
|
|
|
|
private static final ElementSpec ACCESS_LEVEL_SPEC = Builders
|
|
.forMandatoryElement("dc:rights", ONE).allowedValues(repoAccessTerms)
|
|
.build();
|
|
|
|
private static final ElementSpec LICENSE_CONDITION_SPEC = Builders
|
|
.forRecommendedRepeatableElement("dc:rights")
|
|
.allowedValues(new RegexValuePredicate(COMPILED_LICENSE_CONDITION_REG_EX))
|
|
.build();
|
|
|
|
private static final ElementSpec EMBARGO_END_DATE_SPEC = Builders
|
|
.forMandatoryIfApplicableElement("dc:date", ONE, applicabilityRuleForEmbargoEndDate())
|
|
.allowedValues(new EmbargoedEndDateValuePredicate())
|
|
.build();
|
|
|
|
private static final ElementSpec ALT_IDENTIFIER_SPEC = Builders
|
|
.forRecommendedRepeatableElement("dc:relation")
|
|
.allowedValues(new RegexValuePredicate(COMPILED_ALT_IDENTIFIER_REG_EX))
|
|
.build();
|
|
|
|
private static final ElementSpec PUBLICATION_REF_SPEC = Builders
|
|
.forRecommendedRepeatableElement("dc:relation")
|
|
.allowedValues(new RegexValuePredicate(COMPILED_PUBLICATION_REFERENCE_REG_EX))
|
|
.build();
|
|
|
|
private static final ElementSpec DATASET_REF_SPEC = Builders
|
|
.forRecommendedRepeatableElement("dc:relation")
|
|
.allowedValues(new RegexValuePredicate(COMPILED_DATASET_REFERENCE_REG_EX))
|
|
.build();
|
|
|
|
//TODO value is either a keyword (free text) or a classification (info:eu-repo/classification)
|
|
// v3 guideliness recommends ddc classification (Dewey Decimal Classification)
|
|
private static final ElementSpec SUBJECT_SPEC = Builders
|
|
.forMandatoryIfApplicableElement("dc:subject", ONE_TO_N, elementIsPresent("dc:subject"))
|
|
.build();
|
|
|
|
private static final ElementSpec DESCRIPTION_SPEC = Builders
|
|
.forMandatoryIfApplicableElement("dc:description", ONE_TO_N, elementIsPresent("dc:description"))
|
|
.build();
|
|
|
|
private static final ElementSpec PUBLISHER_SPEC = Builders
|
|
.forMandatoryIfApplicableElement("dc:publisher", ONE_TO_N, elementIsPresent("dc:publisher"))
|
|
.build();
|
|
|
|
private static final ElementSpec CONTRIBUTOR_SPEC = Builders
|
|
.forRecommendedRepeatableElement("dc:contributor")
|
|
.build();
|
|
|
|
//TODO
|
|
// Search element -> dc:date AND NOT embargoedDate
|
|
private static final ElementSpec PUBLICATION_DATE_SPEC = Builders
|
|
.forMandatoryElement("dc:date", ONE)
|
|
.allowedValues(new RegexValuePredicate(COMPILED_PUBLICATION_DATE_REG_EX))
|
|
.build();
|
|
|
|
//TODO values from publication types
|
|
// Search element -> dc:type AND values IN publicationTypes set
|
|
//TODO: Check it is first occurrence
|
|
private static final ElementSpec PUBLICATION_TYPE_M_SPEC = Builders
|
|
.forMandatoryElement("dc:type", ONE)
|
|
.atPosition(ElementPosition.FIRST)
|
|
.allowedValues(publicationTypes)
|
|
.build();
|
|
|
|
//TODO
|
|
// Search element -> dc:type AND values NOT IN publicationTypes
|
|
//TODO: check it is second occurrence
|
|
private static final ElementSpec PUBLICATION_TYPE_O_SPEC = Builders
|
|
.forOptionalElement("dc:type")
|
|
.atPosition(ElementPosition.SECOND)
|
|
.allowedValues(new Predicates.SetOfCaseInsensitiveAllowedValues(publicationTypesAndVersions).negate())
|
|
.build();
|
|
|
|
private static final ElementSpec PUBLICATION_VERSION_SPEC = Builders
|
|
.forRecommendedElement("dc:type")
|
|
.allowedValues(publicationVersions)
|
|
.build();
|
|
|
|
private static final ElementSpec FORMAT_SPEC = Builders
|
|
.forRecommendedRepeatableElement("dc:format")
|
|
.allowedValues(new MediaTypesValuePredicate())
|
|
.build();
|
|
|
|
private static final ElementSpec RESOURCE_IDENTIFIER_SPEC = Builders
|
|
.forMandatoryElement("dc:identifier", ONE_TO_N)
|
|
.build();
|
|
|
|
private static final ElementSpec SOURCE_SPEC = Builders
|
|
.forRecommendedRepeatableElement("dc:source")
|
|
.build();
|
|
|
|
//TODO values from ISO 639-1 or 639-2 or 639-3 (recommended)
|
|
private static final ElementSpec LANGUAGE_SPEC = Builders
|
|
.forRecommendedRepeatableElement("dc:language")
|
|
.allowedValues(new ISO639ValuePredicate())
|
|
.build();
|
|
|
|
//TODO: Should exclude other dc:relation elements e.g. !containsAllowedValuesOF -> Project Identifier (MA), Alternative Identifier (R), Publication Reference (R), Dataset Reference (R)
|
|
private static final ElementSpec RELATION_SPEC = Builders
|
|
.forOptionalRepeatableElement("dc:relation")
|
|
.allowedValues(relationSpecAllowedValuesPredicate())
|
|
.build();
|
|
|
|
private static final ElementSpec COVERAGE_SPEC = Builders
|
|
.forRecommendedRepeatableElement("dc:coverage")
|
|
.build();
|
|
|
|
private static final ElementSpec AUDIENCE_SPEC = Builders
|
|
.forRecommendedRepeatableElement("dc:audience")
|
|
.allowedValues(audiences)
|
|
.build();
|
|
|
|
private static Rule<Document> applicabilityRuleForEmbargoEndDate() {
|
|
return XMLCardinalityRule.builder().
|
|
setId(ElementSpec.APPLICABILITY_RULE_ID).
|
|
// first predicate count(...) makes sure there is only one Access Level set and then the second predicate verifies its value.
|
|
setXPathExpression("//*[count(//*[name()='dc:rights' and starts-with(normalize-space(text()), 'info:eu-repo/semantics/')])=1][name()='dc:rights' and normalize-space(text())='info:eu-repo/semantics/embargoedAccess']").
|
|
setRange(1,1).
|
|
setIsInclusive(true).
|
|
build();
|
|
}
|
|
|
|
private static Predicate<String> relationSpecAllowedValuesPredicate() {
|
|
return new RegexValuePredicate(COMPILED_PROJECT_IDENTIFIER_REGEX).negate().
|
|
and(new RegexValuePredicate(COMPILED_ALT_IDENTIFIER_REG_EX).negate().
|
|
and(new RegexValuePredicate(COMPILED_PUBLICATION_REFERENCE_REG_EX).negate().
|
|
and(new RegexValuePredicate(COMPILED_DATASET_REFERENCE_REG_EX).negate())));
|
|
}
|
|
|
|
public static final SyntheticGuideline TITLE = SyntheticGuideline.of("Title", 4, TITLE_SPEC);
|
|
public static final SyntheticGuideline CREATOR = SyntheticGuideline.of("Creator", 4, CREATOR_SPEC);
|
|
public static final SyntheticGuideline PROJECT_IDENTIFIER = SyntheticGuideline.of("Project Identifier", 5, PROJECT_IDENTIFIER_SPEC);
|
|
public static final SyntheticGuideline ACCESS_LEVEL = SyntheticGuideline.of("Access Level", 5, ACCESS_LEVEL_SPEC);
|
|
public static final SyntheticGuideline LICENSE_CONDITION = SyntheticGuideline.of("License Condition", 1, LICENSE_CONDITION_SPEC);
|
|
public static final SyntheticGuideline EMBARGO_END_DATE = SyntheticGuideline.of("Embargo End Date", 5, EMBARGO_END_DATE_SPEC);
|
|
public static final SyntheticGuideline ALTERNATIVE_IDENTIFIER = SyntheticGuideline.of("Alternative Identifier", 5, ALT_IDENTIFIER_SPEC);
|
|
public static final SyntheticGuideline PUBLICATION_REFERENCE = SyntheticGuideline.of("Publication Reference", 2, PUBLICATION_REF_SPEC);
|
|
public static final SyntheticGuideline DATASET_REFERENCE = SyntheticGuideline.of("Dataset Reference", 2, DATASET_REF_SPEC);
|
|
public static final SyntheticGuideline SUBJECT = SyntheticGuideline.of("Subject", 5, SUBJECT_SPEC);
|
|
public static final SyntheticGuideline DESCRIPTION = SyntheticGuideline.of("Description", 5, DESCRIPTION_SPEC);
|
|
public static final SyntheticGuideline PUBLISHER = SyntheticGuideline.of("Publisher", 5, PUBLISHER_SPEC);
|
|
public static final SyntheticGuideline CONTRIBUTOR = SyntheticGuideline.of("Contributor", 2, CONTRIBUTOR_SPEC);
|
|
public static final SyntheticGuideline PUBLICATION_DATE = SyntheticGuideline.of("Publication Date", 5, PUBLICATION_DATE_SPEC);
|
|
public static final SyntheticGuideline PUBLICATION_TYPE_MANDATORY = SyntheticGuideline.of("Publication Type M", 5, PUBLICATION_TYPE_M_SPEC);
|
|
public static final SyntheticGuideline PUBLICATION_TYPE_OPTIONAL = SyntheticGuideline.of("Publication Type O", 1, PUBLICATION_TYPE_O_SPEC);
|
|
public static final SyntheticGuideline PUBLICATION_VERSION = SyntheticGuideline.of("Publication Version", 2, PUBLICATION_VERSION_SPEC);
|
|
public static final SyntheticGuideline FORMAT = SyntheticGuideline.of("Format", 2, FORMAT_SPEC);
|
|
public static final SyntheticGuideline RESOURCE_IDENTIFIER = SyntheticGuideline.of("Resource Identifier", 5, RESOURCE_IDENTIFIER_SPEC);
|
|
public static final SyntheticGuideline SOURCE = SyntheticGuideline.of("Source", 5, SOURCE_SPEC);
|
|
public static final SyntheticGuideline LANGUAGE = SyntheticGuideline.of("Language", 2, LANGUAGE_SPEC);
|
|
public static final SyntheticGuideline RELATION = SyntheticGuideline.of("Relation", 1, RELATION_SPEC);
|
|
public static final SyntheticGuideline COVERAGE = SyntheticGuideline.of("Coverage", 1, COVERAGE_SPEC);
|
|
public static final SyntheticGuideline AUDIENCE = SyntheticGuideline.of("Audience", 2, AUDIENCE_SPEC);
|
|
|
|
private static final List<SyntheticGuideline> GUIDELINES = Collections.unmodifiableList(
|
|
Arrays.asList(
|
|
TITLE,
|
|
CREATOR,
|
|
PROJECT_IDENTIFIER,
|
|
ACCESS_LEVEL,
|
|
LICENSE_CONDITION,
|
|
EMBARGO_END_DATE,
|
|
ALTERNATIVE_IDENTIFIER,
|
|
PUBLICATION_REFERENCE,
|
|
DATASET_REFERENCE,
|
|
SUBJECT,
|
|
DESCRIPTION,
|
|
PUBLISHER,
|
|
CONTRIBUTOR,
|
|
PUBLICATION_DATE,
|
|
PUBLICATION_TYPE_MANDATORY,
|
|
PUBLICATION_TYPE_OPTIONAL,
|
|
PUBLICATION_VERSION,
|
|
FORMAT,
|
|
RESOURCE_IDENTIFIER,
|
|
SOURCE,
|
|
LANGUAGE,
|
|
RELATION,
|
|
COVERAGE,
|
|
AUDIENCE
|
|
)
|
|
);
|
|
|
|
private static final Map<String, SyntheticGuideline> GUIDELINE_MAP = GUIDELINES.
|
|
stream().
|
|
collect(Collectors.toMap(SyntheticGuideline::getName, (guideline) -> guideline));
|
|
|
|
private static final int MAX_SCORE = GUIDELINES.stream().map(SyntheticGuideline::getWeight).reduce(0, Integer::sum);
|
|
|
|
public LiteratureGuidelinesV3Profile() {
|
|
super("OpenAIRE Guidelines for Literature Repositories Profile v3");
|
|
}
|
|
|
|
@Override
|
|
public Collection<? extends Guideline<Document>> guidelines() {
|
|
return GUIDELINES;
|
|
}
|
|
|
|
@Override
|
|
public SyntheticGuideline guideline(String guidelineName) {
|
|
return GUIDELINE_MAP.get(guidelineName);
|
|
}
|
|
|
|
@Override
|
|
public int maxScore() {
|
|
return MAX_SCORE;
|
|
}
|
|
}
|