2022-09-16 12:56:52 +02:00
package eu.dnetlib.validator2.validation.guideline.openaire ;
import eu.dnetlib.validator2.engine.Predicates ;
import eu.dnetlib.validator2.engine.Rule ;
import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule ;
import eu.dnetlib.validator2.validation.guideline.* ;
import eu.dnetlib.validator2.validation.utils.EmbargoedEndDateValuePredicate ;
import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate ;
import eu.dnetlib.validator2.validation.utils.MediaTypesValuePredicate ;
import eu.dnetlib.validator2.validation.utils.RegexValuePredicate ;
import org.w3c.dom.Document ;
import java.util.* ;
import java.util.function.Predicate ;
import java.util.stream.Collectors ;
import java.util.stream.Stream ;
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE ;
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE_TO_N ;
import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.* ;
public final class LiteratureGuidelinesV3Profile extends AbstractOpenAireProfile {
private static final String [ ] repoAccessTerms = {
" info:eu-repo/semantics/closedAccess " ,
" info:eu-repo/semantics/embargoedAccess " ,
" info:eu-repo/semantics/restrictedAccess " ,
" info:eu-repo/semantics/openAccess "
} ;
private static final String [ ] publicationTypes = {
" info:eu-repo/semantics/article " ,
" info:eu-repo/semantics/bachelorThesis " ,
" info:eu-repo/semantics/masterThesis " ,
" info:eu-repo/semantics/doctoralThesis " ,
" info:eu-repo/semantics/book " ,
" info:eu-repo/semantics/bookPart " ,
" info:eu-repo/semantics/review " ,
" info:eu-repo/semantics/conferenceObject " ,
" info:eu-repo/semantics/lecture " ,
" info:eu-repo/semantics/workingPaper " ,
" info:eu-repo/semantics/preprint " ,
" info:eu-repo/semantics/report " ,
" info:eu-repo/semantics/annotation " ,
" info:eu-repo/semantics/contributionToPeriodical " ,
" info:eu-repo/semantics/patent " ,
" info:eu-repo/semantics/other "
} ;
private static final String [ ] publicationVersions = {
" info:eu-repo/semantics/draft " ,
" info:eu-repo/semantics/submittedVersion " ,
" info:eu-repo/semantics/acceptedVersion " ,
" info:eu-repo/semantics/publishedVersion " ,
" info:eu-repo/semantics/updateVersion "
} ;
private static final String [ ] publicationTypesAndVersions = Stream
. concat ( Arrays . stream ( publicationTypes ) , Arrays . stream ( publicationVersions ) )
. toArray ( String [ ] : : new ) ;
private static final String [ ] audiences = {
" Administrators " ,
" Community Groups " ,
" Counsellors " ,
" Federal Funds Recipients and Applicants " ,
" Librarians " ,
" News Media " ,
" Other " ,
" Parents and Families " ,
" Policymakers " ,
" Researchers " ,
" School Support Staff " ,
" Student Financial Aid Providers " ,
" Students " ,
" Teachers "
} ;
private static final ElementSpec TITLE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:title " , ONE_TO_N )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec CREATOR_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:creator " , ONE_TO_N )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec PROJECT_IDENTIFIER_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryIfApplicableElement ( " dc:relation " , ONE , elementIsPresent ( " dc:relation " ) )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_PROJECT_IDENTIFIER_REGEX ) )
. build ( ) ;
private static final ElementSpec ACCESS_LEVEL_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:rights " , ONE ) . allowedValues ( repoAccessTerms )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec LICENSE_CONDITION_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:rights " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_LICENSE_CONDITION_REG_EX ) )
. build ( ) ;
private static final ElementSpec EMBARGO_END_DATE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryIfApplicableElement ( " dc:date " , ONE , applicabilityRuleForEmbargoEndDate ( ) )
2022-09-16 12:56:52 +02:00
. allowedValues ( new EmbargoedEndDateValuePredicate ( ) )
. build ( ) ;
private static final ElementSpec ALT_IDENTIFIER_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:relation " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_ALT_IDENTIFIER_REG_EX ) )
. build ( ) ;
private static final ElementSpec PUBLICATION_REF_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:relation " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_PUBLICATION_REFERENCE_REG_EX ) )
. build ( ) ;
private static final ElementSpec DATASET_REF_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:relation " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_DATASET_REFERENCE_REG_EX ) )
. build ( ) ;
//TODO value is either a keyword (free text) or a classification (info:eu-repo/classification)
// v3 guideliness recommends ddc classification (Dewey Decimal Classification)
private static final ElementSpec SUBJECT_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryIfApplicableElement ( " dc:subject " , ONE_TO_N , elementIsPresent ( " dc:subject " ) )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec DESCRIPTION_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryIfApplicableElement ( " dc:description " , ONE_TO_N , elementIsPresent ( " dc:description " ) )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec PUBLISHER_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryIfApplicableElement ( " dc:publisher " , ONE_TO_N , elementIsPresent ( " dc:publisher " ) )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec CONTRIBUTOR_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:contributor " )
2022-09-16 12:56:52 +02:00
. build ( ) ;
//TODO
// Search element -> dc:date AND NOT embargoedDate
private static final ElementSpec PUBLICATION_DATE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:date " , ONE )
2022-09-16 12:56:52 +02:00
. allowedValues ( new RegexValuePredicate ( COMPILED_PUBLICATION_DATE_REG_EX ) )
. build ( ) ;
//TODO values from publication types
// Search element -> dc:type AND values IN publicationTypes set
//TODO: Check it is first occurrence
private static final ElementSpec PUBLICATION_TYPE_M_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:type " , ONE )
2022-09-16 12:56:52 +02:00
. atPosition ( ElementPosition . FIRST )
. allowedValues ( publicationTypes )
. build ( ) ;
//TODO
// Search element -> dc:type AND values NOT IN publicationTypes
//TODO: check it is second occurrence
private static final ElementSpec PUBLICATION_TYPE_O_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forOptionalElement ( " dc:type " )
2022-09-16 12:56:52 +02:00
. atPosition ( ElementPosition . SECOND )
. allowedValues ( new Predicates . SetOfCaseInsensitiveAllowedValues ( publicationTypesAndVersions ) . negate ( ) )
. build ( ) ;
private static final ElementSpec PUBLICATION_VERSION_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedElement ( " dc:type " )
2022-09-16 12:56:52 +02:00
. allowedValues ( publicationVersions )
. build ( ) ;
private static final ElementSpec FORMAT_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:format " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new MediaTypesValuePredicate ( ) )
. build ( ) ;
private static final ElementSpec RESOURCE_IDENTIFIER_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forMandatoryElement ( " dc:identifier " , ONE_TO_N )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec SOURCE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:source " )
2022-09-16 12:56:52 +02:00
. build ( ) ;
//TODO values from ISO 639-1 or 639-2 or 639-3 (recommended)
private static final ElementSpec LANGUAGE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:language " )
2022-09-16 12:56:52 +02:00
. allowedValues ( new ISO639ValuePredicate ( ) )
. build ( ) ;
//TODO: Should exclude other dc:relation elements e.g. !containsAllowedValuesOF -> Project Identifier (MA), Alternative Identifier (R), Publication Reference (R), Dataset Reference (R)
private static final ElementSpec RELATION_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forOptionalRepeatableElement ( " dc:relation " )
2022-09-16 12:56:52 +02:00
. allowedValues ( relationSpecAllowedValuesPredicate ( ) )
. build ( ) ;
private static final ElementSpec COVERAGE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:coverage " )
2022-09-16 12:56:52 +02:00
. build ( ) ;
private static final ElementSpec AUDIENCE_SPEC = Builders
2023-07-27 13:41:00 +02:00
. forRecommendedRepeatableElement ( " dc:audience " )
2022-09-16 12:56:52 +02:00
. allowedValues ( audiences )
. build ( ) ;
private static Rule < Document > applicabilityRuleForEmbargoEndDate ( ) {
return XMLCardinalityRule . builder ( ) .
setId ( ElementSpec . APPLICABILITY_RULE_ID ) .
// first predicate count(...) makes sure there is only one Access Level set and then the second predicate verifies its value.
2023-07-27 13:41:00 +02:00
setXPathExpression ( " //*[count(//*[name()='dc:rights' and starts-with(normalize-space(text()), 'info:eu-repo/semantics/')])=1][name()='dc:rights' and normalize-space(text())='info:eu-repo/semantics/embargoedAccess'] " ) .
2022-09-16 12:56:52 +02:00
setRange ( 1 , 1 ) .
setIsInclusive ( true ) .
build ( ) ;
}
private static Predicate < String > relationSpecAllowedValuesPredicate ( ) {
return new RegexValuePredicate ( COMPILED_PROJECT_IDENTIFIER_REGEX ) . negate ( ) .
and ( new RegexValuePredicate ( COMPILED_ALT_IDENTIFIER_REG_EX ) . negate ( ) .
and ( new RegexValuePredicate ( COMPILED_PUBLICATION_REFERENCE_REG_EX ) . negate ( ) .
and ( new RegexValuePredicate ( COMPILED_DATASET_REFERENCE_REG_EX ) . negate ( ) ) ) ) ;
}
public static final SyntheticGuideline TITLE = SyntheticGuideline . of ( " Title " , 4 , TITLE_SPEC ) ;
public static final SyntheticGuideline CREATOR = SyntheticGuideline . of ( " Creator " , 4 , CREATOR_SPEC ) ;
public static final SyntheticGuideline PROJECT_IDENTIFIER = SyntheticGuideline . of ( " Project Identifier " , 5 , PROJECT_IDENTIFIER_SPEC ) ;
public static final SyntheticGuideline ACCESS_LEVEL = SyntheticGuideline . of ( " Access Level " , 5 , ACCESS_LEVEL_SPEC ) ;
public static final SyntheticGuideline LICENSE_CONDITION = SyntheticGuideline . of ( " License Condition " , 1 , LICENSE_CONDITION_SPEC ) ;
public static final SyntheticGuideline EMBARGO_END_DATE = SyntheticGuideline . of ( " Embargo End Date " , 5 , EMBARGO_END_DATE_SPEC ) ;
public static final SyntheticGuideline ALTERNATIVE_IDENTIFIER = SyntheticGuideline . of ( " Alternative Identifier " , 5 , ALT_IDENTIFIER_SPEC ) ;
public static final SyntheticGuideline PUBLICATION_REFERENCE = SyntheticGuideline . of ( " Publication Reference " , 2 , PUBLICATION_REF_SPEC ) ;
public static final SyntheticGuideline DATASET_REFERENCE = SyntheticGuideline . of ( " Dataset Reference " , 2 , DATASET_REF_SPEC ) ;
public static final SyntheticGuideline SUBJECT = SyntheticGuideline . of ( " Subject " , 5 , SUBJECT_SPEC ) ;
public static final SyntheticGuideline DESCRIPTION = SyntheticGuideline . of ( " Description " , 5 , DESCRIPTION_SPEC ) ;
public static final SyntheticGuideline PUBLISHER = SyntheticGuideline . of ( " Publisher " , 5 , PUBLISHER_SPEC ) ;
public static final SyntheticGuideline CONTRIBUTOR = SyntheticGuideline . of ( " Contributor " , 2 , CONTRIBUTOR_SPEC ) ;
public static final SyntheticGuideline PUBLICATION_DATE = SyntheticGuideline . of ( " Publication Date " , 5 , PUBLICATION_DATE_SPEC ) ;
public static final SyntheticGuideline PUBLICATION_TYPE_MANDATORY = SyntheticGuideline . of ( " Publication Type M " , 5 , PUBLICATION_TYPE_M_SPEC ) ;
public static final SyntheticGuideline PUBLICATION_TYPE_OPTIONAL = SyntheticGuideline . of ( " Publication Type O " , 1 , PUBLICATION_TYPE_O_SPEC ) ;
public static final SyntheticGuideline PUBLICATION_VERSION = SyntheticGuideline . of ( " Publication Version " , 2 , PUBLICATION_VERSION_SPEC ) ;
public static final SyntheticGuideline FORMAT = SyntheticGuideline . of ( " Format " , 2 , FORMAT_SPEC ) ;
public static final SyntheticGuideline RESOURCE_IDENTIFIER = SyntheticGuideline . of ( " Resource Identifier " , 5 , RESOURCE_IDENTIFIER_SPEC ) ;
public static final SyntheticGuideline SOURCE = SyntheticGuideline . of ( " Source " , 5 , SOURCE_SPEC ) ;
public static final SyntheticGuideline LANGUAGE = SyntheticGuideline . of ( " Language " , 2 , LANGUAGE_SPEC ) ;
public static final SyntheticGuideline RELATION = SyntheticGuideline . of ( " Relation " , 1 , RELATION_SPEC ) ;
public static final SyntheticGuideline COVERAGE = SyntheticGuideline . of ( " Coverage " , 1 , COVERAGE_SPEC ) ;
public static final SyntheticGuideline AUDIENCE = SyntheticGuideline . of ( " Audience " , 2 , AUDIENCE_SPEC ) ;
private static final List < SyntheticGuideline > GUIDELINES = Collections . unmodifiableList (
Arrays . asList (
TITLE ,
CREATOR ,
PROJECT_IDENTIFIER ,
ACCESS_LEVEL ,
LICENSE_CONDITION ,
EMBARGO_END_DATE ,
ALTERNATIVE_IDENTIFIER ,
PUBLICATION_REFERENCE ,
DATASET_REFERENCE ,
SUBJECT ,
DESCRIPTION ,
PUBLISHER ,
CONTRIBUTOR ,
PUBLICATION_DATE ,
PUBLICATION_TYPE_MANDATORY ,
PUBLICATION_TYPE_OPTIONAL ,
PUBLICATION_VERSION ,
FORMAT ,
RESOURCE_IDENTIFIER ,
SOURCE ,
LANGUAGE ,
RELATION ,
COVERAGE ,
AUDIENCE
)
) ;
private static final Map < String , SyntheticGuideline > GUIDELINE_MAP = GUIDELINES .
stream ( ) .
collect ( Collectors . toMap ( SyntheticGuideline : : getName , ( guideline ) - > guideline ) ) ;
private static final int MAX_SCORE = GUIDELINES . stream ( ) . map ( SyntheticGuideline : : getWeight ) . reduce ( 0 , Integer : : sum ) ;
public LiteratureGuidelinesV3Profile ( ) {
super ( " OpenAIRE Guidelines for Literature Repositories Profile v3 " ) ;
}
@Override
public Collection < ? extends Guideline < Document > > guidelines ( ) {
return GUIDELINES ;
}
@Override
public SyntheticGuideline guideline ( String guidelineName ) {
return GUIDELINE_MAP . get ( guidelineName ) ;
}
@Override
public int maxScore ( ) {
return MAX_SCORE ;
}
}