2022-09-16 12:56:52 +02:00
package eu.dnetlib.validator2.validation.guideline.openaire ;
import eu.dnetlib.validator2.engine.Predicates ;
import eu.dnetlib.validator2.engine.Rule ;
import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule ;
import eu.dnetlib.validator2.validation.guideline.* ;
import eu.dnetlib.validator2.validation.utils.EmbargoedEndDateValuePredicate ;
import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate ;
import eu.dnetlib.validator2.validation.utils.MediaTypesValuePredicate ;
import eu.dnetlib.validator2.validation.utils.RegexValuePredicate ;
import org.w3c.dom.Document ;
import java.util.* ;
import java.util.function.Predicate ;
import java.util.stream.Collectors ;
import java.util.stream.Stream ;
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE ;
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE_TO_N ;
import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.* ;
public final class LiteratureGuidelinesV3Profile extends AbstractOpenAireProfile {
private static final String [ ] repoAccessTerms = {
" info:eu-repo/semantics/closedAccess " ,
" info:eu-repo/semantics/embargoedAccess " ,
" info:eu-repo/semantics/restrictedAccess " ,
" info:eu-repo/semantics/openAccess "
} ;
private static final String [ ] publicationTypes = {
" info:eu-repo/semantics/article " ,
" info:eu-repo/semantics/bachelorThesis " ,
" info:eu-repo/semantics/masterThesis " ,
" info:eu-repo/semantics/doctoralThesis " ,
" info:eu-repo/semantics/book " ,
" info:eu-repo/semantics/bookPart " ,
" info:eu-repo/semantics/review " ,
" info:eu-repo/semantics/conferenceObject " ,
" info:eu-repo/semantics/lecture " ,
" info:eu-repo/semantics/workingPaper " ,
" info:eu-repo/semantics/preprint " ,
" info:eu-repo/semantics/report " ,
" info:eu-repo/semantics/annotation " ,
" info:eu-repo/semantics/contributionToPeriodical " ,
" info:eu-repo/semantics/patent " ,
" info:eu-repo/semantics/other "
} ;
private static final String [ ] publicationVersions = {
" info:eu-repo/semantics/draft " ,
" info:eu-repo/semantics/submittedVersion " ,
" info:eu-repo/semantics/acceptedVersion " ,
" info:eu-repo/semantics/publishedVersion " ,
" info:eu-repo/semantics/updateVersion "
} ;
private static final String [ ] publicationTypesAndVersions = Stream
. concat ( Arrays . stream ( publicationTypes ) , Arrays . stream ( publicationVersions ) )
. toArray ( String [ ] : : new ) ;
private static final String [ ] audiences = {
" Administrators " ,
" Community Groups " ,
" Counsellors " ,
" Federal Funds Recipients and Applicants " ,
" Librarians " ,
" News Media " ,
" Other " ,
" Parents and Families " ,
" Policymakers " ,
" Researchers " ,
" School Support Staff " ,
" Student Financial Aid Providers " ,
" Students " ,
" Teachers "
} ;
private static final ElementSpec TITLE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:title " , ONE_TO_N )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec CREATOR_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:creator " , ONE_TO_N )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec PROJECT_IDENTIFIER_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryIfApplicableElement ( " dc:relation " , ONE , elementIsPresent ( " dc:relation " ) )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_PROJECT_IDENTIFIER_REGEX ) )
. build ( ) ;
private static final ElementSpec ACCESS_LEVEL_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:rights " , ONE ) . allowedValues ( repoAccessTerms )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec LICENSE_CONDITION_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:rights " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_LICENSE_CONDITION_REG_EX ) )
. build ( ) ;
private static final ElementSpec EMBARGO_END_DATE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryIfApplicableElement ( " dc:date " , ONE , applicabilityRuleForEmbargoEndDate ( ) )
2022-09-16 12:56:52 +02:00
. allowedValues ( new EmbargoedEndDateValuePredicate ( ) )
. build ( ) ;
private static final ElementSpec ALT_IDENTIFIER_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:relation " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_ALT_IDENTIFIER_REG_EX ) )
. build ( ) ;
private static final ElementSpec PUBLICATION_REF_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:relation " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_PUBLICATION_REFERENCE_REG_EX ) )
. build ( ) ;
private static final ElementSpec DATASET_REF_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:relation " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_DATASET_REFERENCE_REG_EX ) )
. build ( ) ;
//TODO value is either a keyword (free text) or a classification (info:eu-repo/classification)
// v3 guideliness recommends ddc classification (Dewey Decimal Classification)
private static final ElementSpec SUBJECT_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryIfApplicableElement ( " dc:subject " , ONE_TO_N , elementIsPresent ( " dc:subject " ) )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec DESCRIPTION_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryIfApplicableElement ( " dc:description " , ONE_TO_N , elementIsPresent ( " dc:description " ) )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec PUBLISHER_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryIfApplicableElement ( " dc:publisher " , ONE_TO_N , elementIsPresent ( " dc:publisher " ) )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec CONTRIBUTOR_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:contributor " )
2022-09-16 12:56:52 +02:00
. build ( ) ;
//TODO
// Search element -> dc:date AND NOT embargoedDate
private static final ElementSpec PUBLICATION_DATE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:date " , ONE )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_PUBLICATION_DATE_REG_EX ) )
. build ( ) ;
//TODO values from publication types
// Search element -> dc:type AND values IN publicationTypes set
//TODO: Check it is first occurrence
private static final ElementSpec PUBLICATION_TYPE_M_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:type " , ONE )
2022-09-16 12:56:52 +02:00
. atPosition ( ElementPosition . FIRST )
. allowedValues ( publicationTypes )
. build ( ) ;
//TODO
// Search element -> dc:type AND values NOT IN publicationTypes
//TODO: check it is second occurrence
private static final ElementSpec PUBLICATION_TYPE_O_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forOptionalElement ( " dc:type " )
2022-09-16 12:56:52 +02:00
. atPosition ( ElementPosition . SECOND )
. allowedValues ( new Predicates . SetOfCaseInsensitiveAllowedValues ( publicationTypesAndVersions ) . negate ( ) )
. build ( ) ;
private static final ElementSpec PUBLICATION_VERSION_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedElement ( " dc:type " )
2022-09-16 12:56:52 +02:00
. allowedValues ( publicationVersions )
. build ( ) ;
private static final ElementSpec FORMAT_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:format " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new MediaTypesValuePredicate ( ) )
. build ( ) ;
private static final ElementSpec RESOURCE_IDENTIFIER_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:identifier " , ONE_TO_N )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec SOURCE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:source " )
2022-09-16 12:56:52 +02:00
. build ( ) ;
//TODO values from ISO 639-1 or 639-2 or 639-3 (recommended)
private static final ElementSpec LANGUAGE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:language " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new ISO639ValuePredicate ( ) )
. build ( ) ;
//TODO: Should exclude other dc:relation elements e.g. !containsAllowedValuesOF -> Project Identifier (MA), Alternative Identifier (R), Publication Reference (R), Dataset Reference (R)
private static final ElementSpec RELATION_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forOptionalRepeatableElement ( " dc:relation " )
2022-09-16 12:56:52 +02:00
. allowedValues ( relationSpecAllowedValuesPredicate ( ) )
. build ( ) ;
private static final ElementSpec COVERAGE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:coverage " )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec AUDIENCE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:audience " )
2022-09-16 12:56:52 +02:00
. allowedValues ( audiences )
. build ( ) ;
private static Rule < Document > applicabilityRuleForEmbargoEndDate ( ) {
return XMLCardinalityRule . builder ( ) .
setId ( ElementSpec . APPLICABILITY_RULE_ID ) .
// first predicate count(...) makes sure there is only one Access Level set and then the second predicate verifies its value.
2023-07-27 13:41:00 +02:00
setXPathExpression ( " //*[count(//*[name()='dc:rights' and starts-with(normalize-space(text()), 'info:eu-repo/semantics/')])=1][name()='dc:rights' and normalize-space(text())='info:eu-repo/semantics/embargoedAccess'] " ) .
2022-09-16 12:56:52 +02:00
setRange ( 1 , 1 ) .
setIsInclusive ( true ) .
build ( ) ;
}
private static Predicate < String > relationSpecAllowedValuesPredicate ( ) {
return new RegexValuePredicate ( COMPILED_PROJECT_IDENTIFIER_REGEX ) . negate ( ) .
and ( new RegexValuePredicate ( COMPILED_ALT_IDENTIFIER_REG_EX ) . negate ( ) .
and ( new RegexValuePredicate ( COMPILED_PUBLICATION_REFERENCE_REG_EX ) . negate ( ) .
and ( new RegexValuePredicate ( COMPILED_DATASET_REFERENCE_REG_EX ) . negate ( ) ) ) ) ;
}
2023-08-02 14:15:42 +02:00
public static final SyntheticGuideline TITLE = SyntheticGuideline . of ( " Title " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_title.html " , " F " ,
2023-08-02 11:58:45 +02:00
4 , RequirementLevel . MANDATORY , TITLE_SPEC ) ;
2023-08-02 14:15:42 +02:00
public static final SyntheticGuideline CREATOR = SyntheticGuideline . of ( " Creator " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_creator.html " , " F " ,
2023-08-02 11:58:45 +02:00
4 , RequirementLevel . MANDATORY , CREATOR_SPEC ) ;
2023-08-02 14:15:42 +02:00
public static final SyntheticGuideline PROJECT_IDENTIFIER = SyntheticGuideline . of ( " Project Identifier " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_projectid.html " , " F " ,
5 , RequirementLevel . MANDATORY_IF_APPLICABLE , PROJECT_IDENTIFIER_SPEC ) ;
public static final SyntheticGuideline ACCESS_LEVEL = SyntheticGuideline . of ( " Access Level " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_accesslevel.html " , " F " ,
2023-08-02 11:58:45 +02:00
5 , RequirementLevel . MANDATORY , ACCESS_LEVEL_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static final SyntheticGuideline LICENSE_CONDITION = SyntheticGuideline . of ( " License Condition " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_licensecondition.html " , " F, R " ,
2023-08-02 14:15:42 +02:00
1 , RequirementLevel . RECOMMENDED , LICENSE_CONDITION_SPEC ) ;
public static final SyntheticGuideline EMBARGO_END_DATE = SyntheticGuideline . of ( " Embargo End Date " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_embargoenddate.html " , " F " ,
5 , RequirementLevel . MANDATORY_IF_APPLICABLE , EMBARGO_END_DATE_SPEC ) ;
public static final SyntheticGuideline ALTERNATIVE_IDENTIFIER = SyntheticGuideline . of ( " Alternative Identifier " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_alternativeidentifier.html " , " F " ,
5 , RequirementLevel . RECOMMENDED , ALT_IDENTIFIER_SPEC ) ;
public static final SyntheticGuideline PUBLICATION_REFERENCE = SyntheticGuideline . of ( " Publication Reference " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_publicationreference.html " , " F " ,
2 , RequirementLevel . RECOMMENDED , PUBLICATION_REF_SPEC ) ;
public static final SyntheticGuideline DATASET_REFERENCE = SyntheticGuideline . of ( " Dataset Reference " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_datasetreference.html " , " F " ,
2 , RequirementLevel . RECOMMENDED , DATASET_REF_SPEC ) ;
public static final SyntheticGuideline SUBJECT = SyntheticGuideline . of ( " Subject " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_subject.html " , " F " ,
5 , RequirementLevel . MANDATORY_IF_APPLICABLE , SUBJECT_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static final SyntheticGuideline DESCRIPTION = SyntheticGuideline . of ( " Description " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_description.html " , " F, R " ,
2023-08-02 14:15:42 +02:00
5 , RequirementLevel . MANDATORY_IF_APPLICABLE , DESCRIPTION_SPEC ) ;
public static final SyntheticGuideline PUBLISHER = SyntheticGuideline . of ( " Publisher " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_publisher.html " , " F " ,
5 , RequirementLevel . MANDATORY_IF_APPLICABLE , PUBLISHER_SPEC ) ;
public static final SyntheticGuideline CONTRIBUTOR = SyntheticGuideline . of ( " Contributor " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_contributor.html " , " F " ,
2 , RequirementLevel . RECOMMENDED , CONTRIBUTOR_SPEC ) ;
public static final SyntheticGuideline PUBLICATION_DATE = SyntheticGuideline . of ( " Publication Date " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_publicationdate.html " , " F " ,
2023-08-02 11:58:45 +02:00
5 , RequirementLevel . MANDATORY , PUBLICATION_DATE_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static final SyntheticGuideline PUBLICATION_TYPE_MANDATORY = SyntheticGuideline . of ( " Publication Type M " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_publicationtype.html " , " F, R " ,
2023-08-02 11:58:45 +02:00
5 , RequirementLevel . MANDATORY , PUBLICATION_TYPE_M_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static final SyntheticGuideline PUBLICATION_TYPE_OPTIONAL = SyntheticGuideline . of ( " Publication Type O " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_publicationtype.html " , " F, R " ,
2023-08-02 14:15:42 +02:00
1 , RequirementLevel . OPTIONAL , PUBLICATION_TYPE_O_SPEC ) ;
public static final SyntheticGuideline PUBLICATION_VERSION = SyntheticGuideline . of ( " Publication Version " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_publicationversion.html " , " F " ,
2 , RequirementLevel . RECOMMENDED , PUBLICATION_VERSION_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static final SyntheticGuideline FORMAT = SyntheticGuideline . of ( " Format " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_format.html " , " F, R " ,
2023-08-02 14:15:42 +02:00
2 , RequirementLevel . RECOMMENDED , FORMAT_SPEC ) ;
public static final SyntheticGuideline RESOURCE_IDENTIFIER = SyntheticGuideline . of ( " Resource Identifier " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_resourceidentifier.html " , " F " ,
2023-08-02 11:58:45 +02:00
5 , RequirementLevel . MANDATORY , RESOURCE_IDENTIFIER_SPEC ) ;
2023-08-02 14:15:42 +02:00
public static final SyntheticGuideline SOURCE = SyntheticGuideline . of ( " Source " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_source.html " , " F " ,
5 , RequirementLevel . RECOMMENDED , SOURCE_SPEC ) ;
public static final SyntheticGuideline LANGUAGE = SyntheticGuideline . of ( " Language " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_language.html " , " F " ,
2 , RequirementLevel . RECOMMENDED , LANGUAGE_SPEC ) ;
public static final SyntheticGuideline RELATION = SyntheticGuideline . of ( " Relation " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_relation.html " , " F " ,
1 , RequirementLevel . OPTIONAL , RELATION_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static final SyntheticGuideline COVERAGE = SyntheticGuideline . of ( " Coverage " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_coverage.html " , " F, R " ,
2023-08-02 14:15:42 +02:00
1 , RequirementLevel . RECOMMENDED , COVERAGE_SPEC ) ;
2023-09-21 10:22:20 +02:00
public static final SyntheticGuideline AUDIENCE = SyntheticGuideline . of ( " Audience " , " description " , " https://guidelines.openaire.eu/en/latest/literature/field_audience.html " , " F, R " ,
2023-08-02 14:15:42 +02:00
2 , RequirementLevel . RECOMMENDED , AUDIENCE_SPEC ) ;
2022-09-16 12:56:52 +02:00
private static final List < SyntheticGuideline > GUIDELINES = Collections . unmodifiableList (
Arrays . asList (
TITLE ,
CREATOR ,
PROJECT_IDENTIFIER ,
ACCESS_LEVEL ,
LICENSE_CONDITION ,
EMBARGO_END_DATE ,
ALTERNATIVE_IDENTIFIER ,
PUBLICATION_REFERENCE ,
DATASET_REFERENCE ,
SUBJECT ,
DESCRIPTION ,
PUBLISHER ,
CONTRIBUTOR ,
PUBLICATION_DATE ,
PUBLICATION_TYPE_MANDATORY ,
PUBLICATION_TYPE_OPTIONAL ,
PUBLICATION_VERSION ,
FORMAT ,
RESOURCE_IDENTIFIER ,
SOURCE ,
LANGUAGE ,
RELATION ,
COVERAGE ,
AUDIENCE
)
) ;
2023-11-07 14:37:33 +01:00
private static final Map < String , SyntheticGuideline > GUIDELINE_MAP = GUIDELINES . stream ( ) .
2022-09-16 12:56:52 +02:00
collect ( Collectors . toMap ( SyntheticGuideline : : getName , ( guideline ) - > guideline ) ) ;
private static final int MAX_SCORE = GUIDELINES . stream ( ) . map ( SyntheticGuideline : : getWeight ) . reduce ( 0 , Integer : : sum ) ;
public LiteratureGuidelinesV3Profile ( ) {
super ( " OpenAIRE Guidelines for Literature Repositories Profile v3 " ) ;
}
@Override
public Collection < ? extends Guideline < Document > > guidelines ( ) {
return GUIDELINES ;
}
@Override
public SyntheticGuideline guideline ( String guidelineName ) {
return GUIDELINE_MAP . get ( guidelineName ) ;
}
@Override
public int maxScore ( ) {
return MAX_SCORE ;
}
}