337 lines
16 KiB
Java
337 lines
16 KiB
Java
package eu.dnetlib.validator2.validation.guideline.openaire;
|
|
|
|
import eu.dnetlib.validator2.engine.Rule;
|
|
import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule;
|
|
import eu.dnetlib.validator2.validation.guideline.*;
|
|
import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate;
|
|
import eu.dnetlib.validator2.validation.utils.RegexValuePredicate;
|
|
import org.w3c.dom.Document;
|
|
|
|
import java.util.*;
|
|
import java.util.stream.Collectors;
|
|
|
|
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE;
|
|
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE_TO_N;
|
|
import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.COMPILED_ISO_8601_DATE_REG_EX;
|
|
import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.COMPILED_YEAR_YYYY_REG_EX;
|
|
|
|
public class DataArchiveGuidelinesV2Profile extends AbstractOpenAireProfile {
|
|
|
|
private static final String[] identifierTypes = {
|
|
"ARK",
|
|
"DOI",
|
|
"Handle",
|
|
"PURL",
|
|
"URN",
|
|
"URL"
|
|
};
|
|
|
|
private static final String[] titleTypes = {
|
|
"AlternativeTitle",
|
|
"Subtitle",
|
|
"TranslatedTitle"
|
|
};
|
|
|
|
private static final String[] contributorTypes = {
|
|
"ContactPerson", "DataCollector", "DataCurator",
|
|
"DataManager", "Distributor", "Editor", "Funder",
|
|
"HostingInstitution", "Producer", "ProjectLeader",
|
|
"ProjectManager", "ProjectMember", "RegistrationAgency",
|
|
"RegistrationAuthority", "RelatedPerson", "Researcher",
|
|
"ResearchGroup", "RightsHolder", "Sponsor",
|
|
"Supervisor", "WorkPackageLeader", "Other"
|
|
};
|
|
|
|
private static final String[] dateTypes = {
|
|
"Accepted",
|
|
"Available",
|
|
"Copyrighted",
|
|
"Collected",
|
|
"Created",
|
|
"Issued",
|
|
"Submitted",
|
|
"Updated",
|
|
"Valid"
|
|
};
|
|
|
|
private static final String[] resourceTypeGeneralValues = {
|
|
"Audiovisual",
|
|
"Collection",
|
|
"Dataset",
|
|
"Event",
|
|
"Image",
|
|
"InteractiveResource",
|
|
"Model",
|
|
"PhysicalObject",
|
|
"Service",
|
|
"Software",
|
|
"Sound",
|
|
"Text",
|
|
"Workflow",
|
|
"Other"
|
|
};
|
|
|
|
private static final String[] relatedIdentifierTypes = {
|
|
"ARK", "arXiv", "bibcode", "DOI", "EAN13",
|
|
"EISSN", "Handle", "IGSN", "ISBN", "ISSN",
|
|
"ISTC", "LISSN", "LSID", "PISSN", "PMID",
|
|
"PURL", "UPC", "URL", "URN", "WOS"
|
|
};
|
|
|
|
private static final String[] relationTypes = {
|
|
"IsCitedBy", "Cites", "IsSupplementTo", "IsSupplementedBy",
|
|
"IsContinuedBy", "Continues", "HasMetadata", "IsMetadataFor",
|
|
"IsNewVersionOf", "IsPreviousVersionOf", "IsPartOf", "HasPart",
|
|
"IsReferencedBy", "References", "IsDocumentedBy", "Documents",
|
|
"IsCompiledBy", "Compiles", "IsVariantFormOf", "IsOriginalFormOf",
|
|
"IsIdenticalTo", "IsReviewedBy", "Reviews", "IsDerivedFrom",
|
|
"IsSourceOf"
|
|
};
|
|
|
|
private static final String[] rightsURIList = {
|
|
"info:eu-repo/semantics/closedAccess",
|
|
"info:eu-repo/semantics/embargoedAccess",
|
|
"info:eu-repo/semantics/restrictedAccess",
|
|
"info:eu-repo/semantics/openAccess"
|
|
};
|
|
|
|
private static final String[] descriptionTypes = {
|
|
"Abstract",
|
|
"Methods",
|
|
"SeriesInformation",
|
|
"TableOfContents",
|
|
"Other"
|
|
};
|
|
|
|
private static final ElementSpec IDENTIFIER_SPEC = Builders
|
|
.forMandatoryElement("identifier", ONE).inContext("metadata", "oai_datacite", "payload", "resource")
|
|
.withMandatoryAttribute("identifierType", identifierTypes)
|
|
.build();
|
|
|
|
private static final ElementSpec CREATOR_SPEC = Builders
|
|
.forMandatoryElement("creator", ONE_TO_N)
|
|
.withSubElement(Builders.forMandatoryElement("creatorName", ONE))
|
|
.withSubElement(Builders
|
|
.forRecommendedElement("nameIdentifier")
|
|
.withRecommendedAttribute("nameIdentifierScheme")
|
|
.withRecommendedAttribute("schemeURI")
|
|
)
|
|
.withSubElement(Builders.forRecommendedRepeatableElement("affiliation"))
|
|
.build();
|
|
|
|
private static final ElementSpec TITLE_SPEC = Builders
|
|
.forMandatoryElement("title", ONE_TO_N)
|
|
.withOptionalAttribute("titleType", titleTypes)
|
|
.build();
|
|
|
|
private static final ElementSpec PUBLISHER_SPEC = Builders
|
|
.forMandatoryElement("publisher", ONE)
|
|
.build();
|
|
|
|
private static final ElementSpec PUBLICATION_YEAR_SPEC = Builders
|
|
.forMandatoryElement("publicationYear", ONE).allowedValues(new RegexValuePredicate(COMPILED_YEAR_YYYY_REG_EX))
|
|
.build();
|
|
|
|
private static final ElementSpec SUBJECT_SPEC = Builders
|
|
.forRecommendedRepeatableElement("subject")
|
|
.withOptionalAttribute("subjectScheme")
|
|
.withOptionalAttribute("schemeURI")
|
|
.build();
|
|
|
|
|
|
//TODO mandatory if contributorType=Funder (for element and subelements/properties)
|
|
private static final ElementSpec CONTRIBUTOR_SPEC = Builders
|
|
.forMandatoryIfApplicableElement("contributor", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("contributor"))
|
|
.withMandatoryIfApplicableAttribute("contributorType", appRuleForContributorType(), contributorTypes) //TODO
|
|
.withSubElement(Builders
|
|
.forMandatoryIfApplicableElement("contributorName", ONE, AbstractOpenAireProfile.elementIsPresent("contributor"))
|
|
)
|
|
//TODO the RegexValueProdicate for info:eu-repo/grantAgreement must be applied if contributorType is Funder
|
|
.withSubElement(Builders.forMandatoryIfApplicableElement("nameIdentifier", ONE, appRuleForContributorNameIdentifier())
|
|
.withMandatoryIfApplicableAttribute("nameIdentifierScheme", appRuleForContributorNameIdentifier())
|
|
.withOptionalAttribute("schemeURI ")
|
|
)
|
|
.withSubElement(Builders.forOptionalRepeatableElement("affiliation"))
|
|
.build();
|
|
|
|
//TODO: guideline is invalid (0..n). Since Date is mandatory 1..n
|
|
//TODO how to enforce that available dateTypes appear only once?
|
|
private static final ElementSpec DATE_SPEC = Builders
|
|
.forMandatoryElement("date", ONE_TO_N).allowedValues(new RegexValuePredicate(COMPILED_ISO_8601_DATE_REG_EX))
|
|
.withMandatoryAttribute("dateType", dateTypes)
|
|
.build();
|
|
|
|
private static final ElementSpec LANGUAGE_SPEC = Builders
|
|
.forRecommendedRepeatableElement("language").allowedValues(new ISO639ValuePredicate())
|
|
.build();
|
|
|
|
private static final ElementSpec RESOURCE_TYPE_SPEC = Builders
|
|
.forRecommendedElement("resourceType")
|
|
.withRecommendedAttribute("resourceTypeGeneral", resourceTypeGeneralValues)
|
|
.build();
|
|
|
|
private static final ElementSpec ALTERNATE_IDENTIFIER_SPEC = Builders
|
|
.forOptionalRepeatableElement("alternateIdentifier")
|
|
.withOptionalAttribute("alternateIdentifierType")
|
|
.build();
|
|
|
|
private static final ElementSpec RELATED_IDENTIFIER_SPEC = Builders
|
|
.forMandatoryIfApplicableElement("relatedIdentifier", ONE_TO_N, appRuleForRelatedIdentifier())
|
|
.withMandatoryAttribute("relatedIdentifierType", relatedIdentifierTypes)
|
|
.withMandatoryAttribute("relationType", relationTypes)
|
|
//TODO use the following three attributes only when relationType=HasMetadata|IsMetadataFor
|
|
.withOptionalAttribute("relatedMetadataScheme")
|
|
.withOptionalAttribute("schemeURI")
|
|
.withOptionalAttribute("schemeType")
|
|
.build();
|
|
|
|
private static final ElementSpec SIZE_SPEC = Builders
|
|
.forOptionalRepeatableElement("size")
|
|
.build();
|
|
|
|
private static final ElementSpec FORMAT_SPEC = Builders
|
|
.forOptionalRepeatableElement("format")
|
|
.build();
|
|
|
|
private static final ElementSpec VERSION_SPEC = Builders
|
|
.forOptionalRepeatableElement("version")
|
|
.build();
|
|
|
|
private static final ElementSpec RIGHTS_SPEC = Builders
|
|
.forMandatoryIfApplicableElement("rights", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("rights"))
|
|
.atPosition(ElementPosition.FIRST)
|
|
.withMandatoryAttribute("rightsURI", rightsURIList)
|
|
.build();
|
|
|
|
// private static final ElementSpec RIGHTS_SPEC2 = Builders
|
|
// .forMandatoryIfApplicableElement("rights", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("rights"))
|
|
// .atPosition(ElementPosition.SECOND)
|
|
// .withMandatoryAttribute("rightsURI")
|
|
// .build();
|
|
|
|
private static final ElementSpec DESCRIPTION_SPEC = Builders
|
|
.forMandatoryIfApplicableElement("description", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("rights"))
|
|
.withMandatoryAttribute("descriptionType", descriptionTypes)
|
|
.build();
|
|
|
|
private static final ElementSpec GEOLOCATION_SPEC = Builders
|
|
.forOptionalRepeatableElement("geoLocation")
|
|
.withSubElement(Builders.forOptionalElement("geoLocationPoint"))
|
|
.withSubElement(Builders.forOptionalElement("geoLocationBox"))
|
|
.withSubElement(Builders.forOptionalElement("geoLocationPlace"))
|
|
.build();
|
|
|
|
private static Rule<Document> appRuleForContributorType() {
|
|
//TODO
|
|
return XMLCardinalityRule
|
|
.builder()
|
|
.setId(ElementSpec.APPLICABILITY_RULE_ID)
|
|
.setXPathExpression("//*[name()='contributor']/@contributorType")
|
|
.setIsInclusive(true).setRange(1,1).build();
|
|
}
|
|
|
|
private static Rule<Document> appRuleForContributorNameIdentifier() {
|
|
return XMLCardinalityRule
|
|
.builder()
|
|
.setId(ElementSpec.APPLICABILITY_RULE_ID)
|
|
.setXPathExpression("//*[name()='contributor' and @contributorType='Funder']")
|
|
.setIsInclusive(true).setRange(1,1).build();
|
|
}
|
|
|
|
private static Rule<Document> appRuleForRelatedIdentifier() {
|
|
//TODO
|
|
return XMLCardinalityRule
|
|
.builder()
|
|
.setId(ElementSpec.APPLICABILITY_RULE_ID)
|
|
.setXPathExpression("//*[name()='relatedIdentifier']")
|
|
.setIsInclusive(true).setRange(1,Long.MAX_VALUE - 1).build();
|
|
}
|
|
|
|
//TODO: weights for guidelines haven't been finalized. They've been given an arbitrary value of 1.
|
|
public static final SyntheticGuideline IDENTIFIER = SyntheticGuideline.of("Identifier", "description", "https://guidelines.openaire.eu/en/latest/literature/field_identifier.html", "F",
|
|
4, RequirementLevel.MANDATORY, IDENTIFIER_SPEC);
|
|
public static final SyntheticGuideline CREATOR = SyntheticGuideline.of("Creator", "description", "https://guidelines.openaire.eu/en/latest/data/field_creator.html", "F",
|
|
4, RequirementLevel.MANDATORY, CREATOR_SPEC);
|
|
public static final SyntheticGuideline TITLE = SyntheticGuideline.of("Title", "description", "https://guidelines.openaire.eu/en/latest/data/field_title.html", "F",
|
|
4, RequirementLevel.MANDATORY, TITLE_SPEC);
|
|
public static final SyntheticGuideline PUBLISHER = SyntheticGuideline.of("Publisher", "description", "https://guidelines.openaire.eu/en/latest/data/field_publisher.html", "F",
|
|
4, RequirementLevel.MANDATORY, PUBLISHER_SPEC);
|
|
public static final SyntheticGuideline PUBLICATION_YEAR = SyntheticGuideline.of("Publication Year", "description", "https://guidelines.openaire.eu/en/latest/data/field_publicationyear.html", "F",
|
|
4, RequirementLevel.MANDATORY, PUBLICATION_YEAR_SPEC);
|
|
public static final SyntheticGuideline SUBJECT = SyntheticGuideline.of("Subject", "description", "https://guidelines.openaire.eu/en/latest/data/field_subject.html", "F",
|
|
4, RequirementLevel.RECOMMENDED, SUBJECT_SPEC);
|
|
public static final SyntheticGuideline CONTRIBUTOR = SyntheticGuideline.of("Contributor", "description", "https://guidelines.openaire.eu/en/latest/data/field_contributor.html", "F",
|
|
4, RequirementLevel.MANDATORY_IF_APPLICABLE, CONTRIBUTOR_SPEC);
|
|
public static final SyntheticGuideline DATE = SyntheticGuideline.of("Date", "description", "https://guidelines.openaire.eu/en/latest/data/field_date.html", "F",
|
|
4, RequirementLevel.MANDATORY, DATE_SPEC);
|
|
public static final SyntheticGuideline LANGUAGE = SyntheticGuideline.of("Language", "description", "https://guidelines.openaire.eu/en/latest/data/field_language.html", "F",
|
|
4, RequirementLevel.RECOMMENDED, LANGUAGE_SPEC);
|
|
public static final SyntheticGuideline RESOURCE_TYPE = SyntheticGuideline.of("Resource Type", "description", "https://guidelines.openaire.eu/en/latest/data/field_resourcetype.html", "F",
|
|
4, RequirementLevel.RECOMMENDED, RESOURCE_TYPE_SPEC);
|
|
public static final SyntheticGuideline ALTERNATE_IDENTIFIER = SyntheticGuideline.of("Alternate Identifier", "description", "https://guidelines.openaire.eu/en/latest/data/field_alternateidentifier.html", "F",
|
|
4, RequirementLevel.OPTIONAL, ALTERNATE_IDENTIFIER_SPEC);
|
|
public static final SyntheticGuideline RELATED_IDENTIFIER = SyntheticGuideline.of("Related Identifier", "description", "https://guidelines.openaire.eu/en/latest/data/field_relatedidentifier.html", "F",
|
|
4, RequirementLevel.MANDATORY_IF_APPLICABLE, RELATED_IDENTIFIER_SPEC);
|
|
public static final SyntheticGuideline SIZE = SyntheticGuideline.of("Size", "description", "https://guidelines.openaire.eu/en/latest/data/field_size.html", "F",
|
|
4, RequirementLevel.OPTIONAL, SIZE_SPEC);
|
|
public static final SyntheticGuideline FORMAT = SyntheticGuideline.of("Format", "description", "https://guidelines.openaire.eu/en/latest/data/field_format.html", "F",
|
|
4, RequirementLevel.OPTIONAL, FORMAT_SPEC);
|
|
public static final SyntheticGuideline VERSION = SyntheticGuideline.of("Version", "description", "https://guidelines.openaire.eu/en/latest/data/field_version.html", "F",
|
|
4, RequirementLevel.OPTIONAL, VERSION_SPEC);
|
|
public static final SyntheticGuideline RIGHTS = SyntheticGuideline.of("Rights", "description", "https://guidelines.openaire.eu/en/latest/data/field_rights.html", "F",
|
|
4, RequirementLevel.MANDATORY_IF_APPLICABLE, RIGHTS_SPEC);
|
|
public static final SyntheticGuideline DESCRIPTION = SyntheticGuideline.of("Description", "description", "https://guidelines.openaire.eu/en/latest/data/field_description.html", "F",
|
|
4, RequirementLevel.MANDATORY_IF_APPLICABLE, DESCRIPTION_SPEC);
|
|
public static final SyntheticGuideline GEOLOCATION = SyntheticGuideline.of("Geolocation", "description", "https://guidelines.openaire.eu/en/latest/data/field_geolocation.html", "F",
|
|
4, RequirementLevel.OPTIONAL, GEOLOCATION_SPEC);
|
|
|
|
private static final List<SyntheticGuideline> GUIDELINES = Collections.unmodifiableList(
|
|
Arrays.asList(
|
|
IDENTIFIER,
|
|
CREATOR,
|
|
TITLE,
|
|
PUBLISHER,
|
|
PUBLICATION_YEAR,
|
|
SUBJECT,
|
|
CONTRIBUTOR,
|
|
DATE,
|
|
LANGUAGE,
|
|
RESOURCE_TYPE,
|
|
ALTERNATE_IDENTIFIER,
|
|
RELATED_IDENTIFIER,
|
|
SIZE,
|
|
FORMAT,
|
|
VERSION,
|
|
RIGHTS,
|
|
DESCRIPTION,
|
|
GEOLOCATION
|
|
)
|
|
);
|
|
|
|
private static final Map<String, SyntheticGuideline> GUIDELINE_MAP = GUIDELINES.
|
|
stream().
|
|
collect(Collectors.toMap(SyntheticGuideline::getName, (guideline) -> guideline));
|
|
|
|
private static final int MAX_SCORE = GUIDELINES.stream().map(SyntheticGuideline::getWeight).reduce(0, Integer::sum);
|
|
|
|
|
|
public DataArchiveGuidelinesV2Profile() {
|
|
super("OpenAIRE Guidelines for Data Archives Profile v2");
|
|
}
|
|
|
|
@Override
|
|
public Collection<? extends Guideline<Document>> guidelines() {
|
|
return GUIDELINES;
|
|
}
|
|
|
|
@Override
|
|
public SyntheticGuideline guideline(String guidelineName) {
|
|
return GUIDELINE_MAP.get(guidelineName);
|
|
}
|
|
|
|
@Override
|
|
public int maxScore() {
|
|
return MAX_SCORE;
|
|
}
|
|
}
|