uoa-validator-engine2/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/DataArchiveGuidelinesV2Prof...

337 lines
16 KiB
Java

package eu.dnetlib.validator2.validation.guideline.openaire;
import eu.dnetlib.validator2.engine.Rule;
import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule;
import eu.dnetlib.validator2.validation.guideline.*;
import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate;
import eu.dnetlib.validator2.validation.utils.RegexValuePredicate;
import org.w3c.dom.Document;
import java.util.*;
import java.util.stream.Collectors;
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE;
import static eu.dnetlib.validator2.validation.guideline.Cardinality.ONE_TO_N;
import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.COMPILED_ISO_8601_DATE_REG_EX;
import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.COMPILED_YEAR_YYYY_REG_EX;
public class DataArchiveGuidelinesV2Profile extends AbstractOpenAireProfile {
private static final String[] identifierTypes = {
"ARK",
"DOI",
"Handle",
"PURL",
"URN",
"URL"
};
private static final String[] titleTypes = {
"AlternativeTitle",
"Subtitle",
"TranslatedTitle"
};
private static final String[] contributorTypes = {
"ContactPerson", "DataCollector", "DataCurator",
"DataManager", "Distributor", "Editor", "Funder",
"HostingInstitution", "Producer", "ProjectLeader",
"ProjectManager", "ProjectMember", "RegistrationAgency",
"RegistrationAuthority", "RelatedPerson", "Researcher",
"ResearchGroup", "RightsHolder", "Sponsor",
"Supervisor", "WorkPackageLeader", "Other"
};
private static final String[] dateTypes = {
"Accepted",
"Available",
"Copyrighted",
"Collected",
"Created",
"Issued",
"Submitted",
"Updated",
"Valid"
};
private static final String[] resourceTypeGeneralValues = {
"Audiovisual",
"Collection",
"Dataset",
"Event",
"Image",
"InteractiveResource",
"Model",
"PhysicalObject",
"Service",
"Software",
"Sound",
"Text",
"Workflow",
"Other"
};
private static final String[] relatedIdentifierTypes = {
"ARK", "arXiv", "bibcode", "DOI", "EAN13",
"EISSN", "Handle", "IGSN", "ISBN", "ISSN",
"ISTC", "LISSN", "LSID", "PISSN", "PMID",
"PURL", "UPC", "URL", "URN", "WOS"
};
private static final String[] relationTypes = {
"IsCitedBy", "Cites", "IsSupplementTo", "IsSupplementedBy",
"IsContinuedBy", "Continues", "HasMetadata", "IsMetadataFor",
"IsNewVersionOf", "IsPreviousVersionOf", "IsPartOf", "HasPart",
"IsReferencedBy", "References", "IsDocumentedBy", "Documents",
"IsCompiledBy", "Compiles", "IsVariantFormOf", "IsOriginalFormOf",
"IsIdenticalTo", "IsReviewedBy", "Reviews", "IsDerivedFrom",
"IsSourceOf"
};
private static final String[] rightsURIList = {
"info:eu-repo/semantics/closedAccess",
"info:eu-repo/semantics/embargoedAccess",
"info:eu-repo/semantics/restrictedAccess",
"info:eu-repo/semantics/openAccess"
};
private static final String[] descriptionTypes = {
"Abstract",
"Methods",
"SeriesInformation",
"TableOfContents",
"Other"
};
private static final ElementSpec IDENTIFIER_SPEC = Builders
.forMandatoryElement("identifier", ONE).inContext("metadata", "oai_datacite", "payload", "resource")
.withMandatoryAttribute("identifierType", identifierTypes)
.build();
private static final ElementSpec CREATOR_SPEC = Builders
.forMandatoryElement("creator", ONE_TO_N)
.withSubElement(Builders.forMandatoryElement("creatorName", ONE))
.withSubElement(Builders
.forRecommendedElement("nameIdentifier")
.withRecommendedAttribute("nameIdentifierScheme")
.withRecommendedAttribute("schemeURI")
)
.withSubElement(Builders.forRecommendedRepeatableElement("affiliation"))
.build();
private static final ElementSpec TITLE_SPEC = Builders
.forMandatoryElement("title", ONE_TO_N)
.withOptionalAttribute("titleType", titleTypes)
.build();
private static final ElementSpec PUBLISHER_SPEC = Builders
.forMandatoryElement("publisher", ONE)
.build();
private static final ElementSpec PUBLICATION_YEAR_SPEC = Builders
.forMandatoryElement("publicationYear", ONE).allowedValues(new RegexValuePredicate(COMPILED_YEAR_YYYY_REG_EX))
.build();
private static final ElementSpec SUBJECT_SPEC = Builders
.forRecommendedRepeatableElement("subject")
.withOptionalAttribute("subjectScheme")
.withOptionalAttribute("schemeURI")
.build();
//TODO mandatory if contributorType=Funder (for element and subelements/properties)
private static final ElementSpec CONTRIBUTOR_SPEC = Builders
.forMandatoryIfApplicableElement("contributor", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("contributor"))
.withMandatoryIfApplicableAttribute("contributorType", appRuleForContributorType(), contributorTypes) //TODO
.withSubElement(Builders
.forMandatoryIfApplicableElement("contributorName", ONE, AbstractOpenAireProfile.elementIsPresent("contributor"))
)
//TODO the RegexValueProdicate for info:eu-repo/grantAgreement must be applied if contributorType is Funder
.withSubElement(Builders.forMandatoryIfApplicableElement("nameIdentifier", ONE, appRuleForContributorNameIdentifier())
.withMandatoryIfApplicableAttribute("nameIdentifierScheme", appRuleForContributorNameIdentifier())
.withOptionalAttribute("schemeURI ")
)
.withSubElement(Builders.forOptionalRepeatableElement("affiliation"))
.build();
//TODO: guideline is invalid (0..n). Since Date is mandatory 1..n
//TODO how to enforce that available dateTypes appear only once?
private static final ElementSpec DATE_SPEC = Builders
.forMandatoryElement("date", ONE_TO_N).allowedValues(new RegexValuePredicate(COMPILED_ISO_8601_DATE_REG_EX))
.withMandatoryAttribute("dateType", dateTypes)
.build();
private static final ElementSpec LANGUAGE_SPEC = Builders
.forRecommendedRepeatableElement("language").allowedValues(new ISO639ValuePredicate())
.build();
private static final ElementSpec RESOURCE_TYPE_SPEC = Builders
.forRecommendedElement("resourceType")
.withRecommendedAttribute("resourceTypeGeneral", resourceTypeGeneralValues)
.build();
private static final ElementSpec ALTERNATE_IDENTIFIER_SPEC = Builders
.forOptionalRepeatableElement("alternateIdentifier")
.withOptionalAttribute("alternateIdentifierType")
.build();
private static final ElementSpec RELATED_IDENTIFIER_SPEC = Builders
.forMandatoryIfApplicableElement("relatedIdentifier", ONE_TO_N, appRuleForRelatedIdentifier())
.withMandatoryAttribute("relatedIdentifierType", relatedIdentifierTypes)
.withMandatoryAttribute("relationType", relationTypes)
//TODO use the following three attributes only when relationType=HasMetadata|IsMetadataFor
.withOptionalAttribute("relatedMetadataScheme")
.withOptionalAttribute("schemeURI")
.withOptionalAttribute("schemeType")
.build();
private static final ElementSpec SIZE_SPEC = Builders
.forOptionalRepeatableElement("size")
.build();
private static final ElementSpec FORMAT_SPEC = Builders
.forOptionalRepeatableElement("format")
.build();
private static final ElementSpec VERSION_SPEC = Builders
.forOptionalRepeatableElement("version")
.build();
private static final ElementSpec RIGHTS_SPEC = Builders
.forMandatoryIfApplicableElement("rights", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("rights"))
.atPosition(ElementPosition.FIRST)
.withMandatoryAttribute("rightsURI", rightsURIList)
.build();
// private static final ElementSpec RIGHTS_SPEC2 = Builders
// .forMandatoryIfApplicableElement("rights", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("rights"))
// .atPosition(ElementPosition.SECOND)
// .withMandatoryAttribute("rightsURI")
// .build();
private static final ElementSpec DESCRIPTION_SPEC = Builders
.forMandatoryIfApplicableElement("description", ONE_TO_N, AbstractOpenAireProfile.elementIsPresent("rights"))
.withMandatoryAttribute("descriptionType", descriptionTypes)
.build();
private static final ElementSpec GEOLOCATION_SPEC = Builders
.forOptionalRepeatableElement("geoLocation")
.withSubElement(Builders.forOptionalElement("geoLocationPoint"))
.withSubElement(Builders.forOptionalElement("geoLocationBox"))
.withSubElement(Builders.forOptionalElement("geoLocationPlace"))
.build();
private static Rule<Document> appRuleForContributorType() {
//TODO
return XMLCardinalityRule
.builder()
.setId(ElementSpec.APPLICABILITY_RULE_ID)
.setXPathExpression("//*[name()='contributor']/@contributorType")
.setIsInclusive(true).setRange(1,1).build();
}
private static Rule<Document> appRuleForContributorNameIdentifier() {
return XMLCardinalityRule
.builder()
.setId(ElementSpec.APPLICABILITY_RULE_ID)
.setXPathExpression("//*[name()='contributor' and @contributorType='Funder']")
.setIsInclusive(true).setRange(1,1).build();
}
private static Rule<Document> appRuleForRelatedIdentifier() {
//TODO
return XMLCardinalityRule
.builder()
.setId(ElementSpec.APPLICABILITY_RULE_ID)
.setXPathExpression("//*[name()='relatedIdentifier']")
.setIsInclusive(true).setRange(1,Long.MAX_VALUE - 1).build();
}
//TODO: weights for guidelines haven't been finalized. They've been given an arbitrary value of 1.
public static final SyntheticGuideline IDENTIFIER = SyntheticGuideline.of("Identifier", "description", "https://guidelines.openaire.eu/en/latest/literature/field_identifier.html", "F",
4, RequirementLevel.MANDATORY, IDENTIFIER_SPEC);
public static final SyntheticGuideline CREATOR = SyntheticGuideline.of("Creator", "description", "https://guidelines.openaire.eu/en/latest/data/field_creator.html", "F",
4, RequirementLevel.MANDATORY, CREATOR_SPEC);
public static final SyntheticGuideline TITLE = SyntheticGuideline.of("Title", "description", "https://guidelines.openaire.eu/en/latest/data/field_title.html", "F",
4, RequirementLevel.MANDATORY, TITLE_SPEC);
public static final SyntheticGuideline PUBLISHER = SyntheticGuideline.of("Publisher", "description", "https://guidelines.openaire.eu/en/latest/data/field_publisher.html", "F",
4, RequirementLevel.MANDATORY, PUBLISHER_SPEC);
public static final SyntheticGuideline PUBLICATION_YEAR = SyntheticGuideline.of("Publication Year", "description", "https://guidelines.openaire.eu/en/latest/data/field_publicationyear.html", "F",
4, RequirementLevel.MANDATORY, PUBLICATION_YEAR_SPEC);
public static final SyntheticGuideline SUBJECT = SyntheticGuideline.of("Subject", "description", "https://guidelines.openaire.eu/en/latest/data/field_subject.html", "F",
4, RequirementLevel.RECOMMENDED, SUBJECT_SPEC);
public static final SyntheticGuideline CONTRIBUTOR = SyntheticGuideline.of("Contributor", "description", "https://guidelines.openaire.eu/en/latest/data/field_contributor.html", "F",
4, RequirementLevel.MANDATORY_IF_APPLICABLE, CONTRIBUTOR_SPEC);
public static final SyntheticGuideline DATE = SyntheticGuideline.of("Date", "description", "https://guidelines.openaire.eu/en/latest/data/field_date.html", "F",
4, RequirementLevel.MANDATORY, DATE_SPEC);
public static final SyntheticGuideline LANGUAGE = SyntheticGuideline.of("Language", "description", "https://guidelines.openaire.eu/en/latest/data/field_language.html", "F",
4, RequirementLevel.RECOMMENDED, LANGUAGE_SPEC);
public static final SyntheticGuideline RESOURCE_TYPE = SyntheticGuideline.of("Resource Type", "description", "https://guidelines.openaire.eu/en/latest/data/field_resourcetype.html", "F",
4, RequirementLevel.RECOMMENDED, RESOURCE_TYPE_SPEC);
public static final SyntheticGuideline ALTERNATE_IDENTIFIER = SyntheticGuideline.of("Alternate Identifier", "description", "https://guidelines.openaire.eu/en/latest/data/field_alternateidentifier.html", "F",
4, RequirementLevel.OPTIONAL, ALTERNATE_IDENTIFIER_SPEC);
public static final SyntheticGuideline RELATED_IDENTIFIER = SyntheticGuideline.of("Related Identifier", "description", "https://guidelines.openaire.eu/en/latest/data/field_relatedidentifier.html", "F",
4, RequirementLevel.MANDATORY_IF_APPLICABLE, RELATED_IDENTIFIER_SPEC);
public static final SyntheticGuideline SIZE = SyntheticGuideline.of("Size", "description", "https://guidelines.openaire.eu/en/latest/data/field_size.html", "F",
4, RequirementLevel.OPTIONAL, SIZE_SPEC);
public static final SyntheticGuideline FORMAT = SyntheticGuideline.of("Format", "description", "https://guidelines.openaire.eu/en/latest/data/field_format.html", "F",
4, RequirementLevel.OPTIONAL, FORMAT_SPEC);
public static final SyntheticGuideline VERSION = SyntheticGuideline.of("Version", "description", "https://guidelines.openaire.eu/en/latest/data/field_version.html", "F",
4, RequirementLevel.OPTIONAL, VERSION_SPEC);
public static final SyntheticGuideline RIGHTS = SyntheticGuideline.of("Rights", "description", "https://guidelines.openaire.eu/en/latest/data/field_rights.html", "F",
4, RequirementLevel.MANDATORY_IF_APPLICABLE, RIGHTS_SPEC);
public static final SyntheticGuideline DESCRIPTION = SyntheticGuideline.of("Description", "description", "https://guidelines.openaire.eu/en/latest/data/field_description.html", "F",
4, RequirementLevel.MANDATORY_IF_APPLICABLE, DESCRIPTION_SPEC);
public static final SyntheticGuideline GEOLOCATION = SyntheticGuideline.of("Geolocation", "description", "https://guidelines.openaire.eu/en/latest/data/field_geolocation.html", "F",
4, RequirementLevel.OPTIONAL, GEOLOCATION_SPEC);
private static final List<SyntheticGuideline> GUIDELINES = Collections.unmodifiableList(
Arrays.asList(
IDENTIFIER,
CREATOR,
TITLE,
PUBLISHER,
PUBLICATION_YEAR,
SUBJECT,
CONTRIBUTOR,
DATE,
LANGUAGE,
RESOURCE_TYPE,
ALTERNATE_IDENTIFIER,
RELATED_IDENTIFIER,
SIZE,
FORMAT,
VERSION,
RIGHTS,
DESCRIPTION,
GEOLOCATION
)
);
private static final Map<String, SyntheticGuideline> GUIDELINE_MAP = GUIDELINES.
stream().
collect(Collectors.toMap(SyntheticGuideline::getName, (guideline) -> guideline));
private static final int MAX_SCORE = GUIDELINES.stream().map(SyntheticGuideline::getWeight).reduce(0, Integer::sum);
public DataArchiveGuidelinesV2Profile() {
super("OpenAIRE Guidelines for Data Archives Profile v2");
}
@Override
public Collection<? extends Guideline<Document>> guidelines() {
return GUIDELINES;
}
@Override
public SyntheticGuideline guideline(String guidelineName) {
return GUIDELINE_MAP.get(guidelineName);
}
@Override
public int maxScore() {
return MAX_SCORE;
}
}