From 8f6f94c971f7692015c0ea466e6c5548f52d27e9 Mon Sep 17 00:00:00 2001 From: pispis Date: Wed, 15 Mar 2023 17:02:14 +0200 Subject: [PATCH] Added v4 guidelines class without prefixes in the namespaces and kept the original. Added with a comment a warnings.isEmpty() if statement to debug the warnings. Added Test_v4.java for testing the v4 guidelines validation. --- .../validation/guideline/StandardResult.java | 7 + .../LiteratureGuidelinesV4Profile.java | 150 ++--- ...rofile_with_prefixes_in_element_names.java | 511 ++++++++++++++++++ .../eu/dnetlib/validator2/engine/Test_v4.java | 62 +++ ...terature_all_invalid_guidelines_record.xml | 2 +- 5 files changed, 662 insertions(+), 70 deletions(-) create mode 100644 src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile_with_prefixes_in_element_names.java create mode 100644 src/test/java/eu/dnetlib/validator2/engine/Test_v4.java diff --git a/src/main/java/eu/dnetlib/validator2/validation/guideline/StandardResult.java b/src/main/java/eu/dnetlib/validator2/validation/guideline/StandardResult.java index 583bbce..2178c92 100644 --- a/src/main/java/eu/dnetlib/validator2/validation/guideline/StandardResult.java +++ b/src/main/java/eu/dnetlib/validator2/validation/guideline/StandardResult.java @@ -55,6 +55,13 @@ public final class StandardResult implements Guideline.Result { public static StandardResult forSuccess(int score, List warnings) { return new StandardResult(score, Status.SUCCESS, sanitize(warnings), EMPTY, null); + // LEONIDAS +// if (warnings.isEmpty()) { +// return new StandardResult(score, Status.SUCCESS, sanitize(warnings), EMPTY, null); +// } +// else { +// return new StandardResult(0, Status.SUCCESS, sanitize(warnings), EMPTY, null); +// } } public static StandardResult forFailure(List warnings, List errors) { diff --git a/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile.java b/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile.java index 84feebd..fd9bab1 100644 --- a/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile.java +++ b/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile.java @@ -16,9 +16,7 @@ import java.util.*; import java.util.stream.Collectors; import static eu.dnetlib.validator2.validation.guideline.Cardinality.*; -import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.COMPILED_BCP47_LANG_TAGS_REG_EX; -import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.COMPILED_YYYY_MM_DD_RANGE_REGEX; -import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.COMPILED_YYYY_MM_DD_REGEX; +import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.*; public final class LiteratureGuidelinesV4Profile extends AbstractOpenAireProfile { @@ -107,6 +105,11 @@ public final class LiteratureGuidelinesV4Profile extends AbstractOpenAireProfile "ARK", "DOI", "Handle", "PURL", "URL", "URN" }; + private static final String[] ACCESS_RIGHTS_TYPES = { + "open access", "embargoed access", + "restricted access", "metadata only access" + }; + private static final String[] ACCESS_RIGHTS_URIS = { "http://purl.org/coar/access_right/c_abf2", "http://purl.org/coar/access_right/c_f1cf", "http://purl.org/coar/access_right/c_16ec", "http://purl.org/coar/access_right/c_14cb" @@ -134,61 +137,61 @@ public final class LiteratureGuidelinesV4Profile extends AbstractOpenAireProfile }; private static final ElementSpec TITLE_SPEC = Builders. - forMandatoryElement("datacite:title", ONE_TO_N). + forMandatoryElement("title", ONE_TO_N). withOptionalAttribute("xml:lang", new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX)). withOptionalAttribute("titleType", TITLE_TYPES). build(); private static final ElementSpec CREATOR_SPEC = Builders. - forMandatoryElement("datacite:creator", ONE_TO_N). + forMandatoryElement("creator", ONE_TO_N). withSubElement(Builders. - forMandatoryElement("datacite:creatorName", ONE). + forMandatoryElement("creatorName", ONE). withRecommendedAttribute("nameType", NAME_TYPES)). withSubElement(Builders. - forRecommendedElement("datacite:givenName")). + forRecommendedElement("givenName")). withSubElement(Builders. - forRecommendedElement("datacite:familyName")). + forRecommendedElement("familyName")). withSubElement(Builders. - forRecommendedRepeatableElement("datacite:nameIdentifier"). + forRecommendedRepeatableElement("nameIdentifier"). withMandatoryAttribute("nameIdentifierScheme"). withRecommendedAttribute("schemeURI")). withSubElement(Builders. - forRecommendedRepeatableElement("datacite:affiliation")). + forRecommendedRepeatableElement("affiliation")). build(); private static final ElementSpec CONTRIBUTOR_SPEC = Builders. - forMandatoryIfApplicableElement("datacite:contributor", ONE_TO_N, elementIsPresent("datacite:contributor")). + forMandatoryIfApplicableElement("contributor", ONE_TO_N, elementIsPresent("contributor")). withMandatoryAttribute("contributorType", CONTRIBUTOR_TYPES). withSubElement(Builders. - forMandatoryElement("datacite:contributorName", ONE). + forMandatoryElement("contributorName", ONE). withRecommendedAttribute("nameType", NAME_TYPES)). withSubElement(Builders. - forOptionalElement("datacite:familyName")). + forOptionalElement("familyName")). withSubElement(Builders. - forOptionalElement("datacite:givenName")). + forOptionalElement("givenName")). withSubElement(Builders. - forRecommendedRepeatableElement("datacite:nameIdentifier"). + forRecommendedRepeatableElement("nameIdentifier"). withMandatoryAttribute("nameIdentifierScheme"). withRecommendedAttribute("schemeURI")). withSubElement(Builders. - forRecommendedRepeatableElement("datacite:affiliation")). + forRecommendedRepeatableElement("affiliation")). build(); //This property has some issues/annotations in documentation private static final ElementSpec FUNDING_REFERENCE_SPEC = Builders. - forMandatoryIfApplicableElement("oaire:fundingReference", ONE_TO_N, elementIsPresent("oaire:fundingReference")). + forMandatoryIfApplicableElement("fundingReference", ONE_TO_N, elementIsPresent("fundingReference")). withSubElement(Builders. - forMandatoryElement("oaire:funderName", ONE)). + forMandatoryElement("funderName", ONE)). withSubElement(Builders. - forRecommendedElement("oaire:funderIdentifier"). + forRecommendedElement("funderIdentifier"). withRecommendedAttribute("funderIdentifierType", FUNDER_IDENTIFIER_TYPES)). withSubElement(Builders. - forOptionalElement("oaire:fundingStream")). + forOptionalElement("fundingStream")). withSubElement(Builders. - forMandatoryIfApplicableElement("oaire:awardNumber", ONE, elementIsPresent("oaire:awardNumber")). + forMandatoryIfApplicableElement("awardNumber", ONE, elementIsPresent("awardNumber")). withRecommendedAttribute("awardURI")). withSubElement(Builders. - forRecommendedElement("oaire:awardTitle")). + forRecommendedElement("awardTitle")). build(); //TODO: Allowed values are referred as "suggested" in the documentation, but then a controlled list is given. @@ -196,12 +199,12 @@ public final class LiteratureGuidelinesV4Profile extends AbstractOpenAireProfile // https://bitbucket.org/saikos/openaire-validator/issues/40 // https://bitbucket.org/saikos/openaire-validator/issues/32/ private static final ElementSpec ALTERNATE_IDENTIFIER_SPEC = Builders. - forRecommendedRepeatableElement("datacite:alternateIdentifier"). + forRecommendedRepeatableElement("alternateIdentifier"). withMandatoryAttribute("alternateIdentifierType", IDENTIFIER_TYPES). build(); private static final ElementSpec RELATED_IDENTIFIER_SPEC = Builders. - forRecommendedRepeatableElement("datacite:relatedIdentifier"). + forRecommendedRepeatableElement("relatedIdentifier"). withMandatoryAttribute("relatedIdentifierType", IDENTIFIER_TYPES). withMandatoryAttribute("relationType", RELATION_TYPES). //TODO: For following 3 attributes. Need a way to target relationType attribute of current element @@ -218,21 +221,28 @@ public final class LiteratureGuidelinesV4Profile extends AbstractOpenAireProfile Date encoding "YYYY-MM-DD" is referred as best practice. Should introduce in allowed values? */ //TODO: Implement proper applicability rule + //LEONIDAS: The withMandatoryAttribute fails whe another date element eg. for Publication Date exists private static final ElementSpec EMBARGO_PERIOD_DATE_SPEC = Builders. - forMandatoryIfApplicableElement("datacite:date", TWO, applicabilityRuleForEmbargoPeriodDate()). + forMandatoryIfApplicableElement("date", ONE_TO_N, applicabilityRuleForEmbargoPeriodDate()). withMandatoryAttribute("dateType", EMBARGO_DATE_TYPES). + allowedValues(new RegexValuePredicate(COMPILED_YYYY_MM_DD_RANGE_REGEX).or(new RegexValuePredicate(COMPILED_YEAR_YYYY_REG_EX))). build(); +// private static final ElementSpec EMBARGO_PERIOD_DATE_SPEC = Builders. +// forMandatoryIfApplicableElement("date", TWO, applicabilityRuleForEmbargoPeriodDate()). +// withMandatoryAttribute("dateType", EMBARGO_DATE_TYPES). +// build(); + /* There are no "strict" allowed values. Recommendations are IETF BCP 47 and ISO 639-x */ private static final ElementSpec LANGUAGE_SPEC = Builders. - forMandatoryIfApplicableElement("dc:language", ONE_TO_N, elementIsPresent("dc:language")). + forMandatoryIfApplicableElement("language", ONE_TO_N, elementIsPresent("language")). allowedValues(new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX).or(new ISO639ValuePredicate())). build(); private static final ElementSpec PUBLISHER_SPEC = Builders. - forMandatoryIfApplicableElement("dc:publisher", ONE_TO_N, elementIsPresent("dc:publisher")). + forMandatoryIfApplicableElement("publisher", ONE_TO_N, elementIsPresent("publisher")). build(); /* @@ -241,96 +251,98 @@ public final class LiteratureGuidelinesV4Profile extends AbstractOpenAireProfile "Recommended" best practice for encoding the date value is ISO 8601 [W3CDTF] (YYYY-MM-DD) (YYYY mandatory) */ private static final ElementSpec PUBLICATION_DATE_SPEC = Builders. - forMandatoryElement("datacite:date", ONE). + forMandatoryElement("date", ONE). withMandatoryAttribute("dateType", PUBLICATION_DATE_TYPE). + allowedValues(new RegexValuePredicate(COMPILED_PUBLICATION_DATE_REG_EX).or(new RegexValuePredicate(COMPILED_YYYY_MM_DD_RANGE_REGEX).or(new RegexValuePredicate(COMPILED_YEAR_YYYY_REG_EX)))). build(); private static final ElementSpec RESOURCE_TYPE_SPEC = Builders. - forMandatoryElement("oaire:resourceType", ONE). + forMandatoryElement("resourceType", ONE). withMandatoryAttribute("resourceTypeGeneral", RESOURCE_GENERAL_TYPES). withMandatoryAttribute("uri", RESOURCE_CONCEPT_URIS). build(); private static final ElementSpec DESCRIPTION_SPEC = Builders. - forMandatoryIfApplicableElement("dc:description", ONE_TO_N, elementIsPresent("dc:description")). + forMandatoryIfApplicableElement("description", ONE_TO_N, elementIsPresent("description")). withOptionalAttribute("xml:lang", new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX)). build(); private static final ElementSpec FORMAT_SPEC = Builders. - forRecommendedRepeatableElement("dc:format"). + forRecommendedRepeatableElement("format"). allowedValues(new MediaTypesValuePredicate()). build(); private static final ElementSpec RESOURCE_IDENTIFIER_SPEC = Builders. - forMandatoryElement("datacite:identifier", ONE). + forMandatoryElement("identifier", ONE). withMandatoryAttribute("identifierType", RESOURCE_IDENTIFIER_TYPES). build(); private static final ElementSpec ACCESS_RIGHTS_SPEC = Builders. - forMandatoryElement("datacite:rights", ONE). + forMandatoryElement("rights", ONE). withMandatoryAttribute("uri", ACCESS_RIGHTS_URIS). + allowedValues(ACCESS_RIGHTS_TYPES). build(); private static final ElementSpec SOURCE_SPEC = Builders. - forRecommendedRepeatableElement("dc:source"). + forRecommendedRepeatableElement("source"). build(); //TODO: Should we check URI attribute values are valid? private static final ElementSpec SUBJECT_SPEC = Builders. - forMandatoryIfApplicableElement("datacite:subject", ONE_TO_N, elementIsPresent("datacite:subject")). + forMandatoryIfApplicableElement("subject", ONE_TO_N, elementIsPresent("subject")). withOptionalAttribute("subjectScheme"). withOptionalAttribute("schemeURI"). withOptionalAttribute("valueURI"). build(); private static final ElementSpec LICENSE_CONDITION_SPEC = Builders. - forRecommendedElement("oaire:licenseCondition"). - withMandatoryIfApplicableAttribute("uri", elementIsPresent("oaire:licenseCondition")). - withMandatoryIfApplicableAttribute("startDate", elementIsPresent("oaire:licenseCondition")). + forRecommendedElement("licenseCondition"). + withMandatoryIfApplicableAttribute("uri", elementIsPresent("licenseCondition")). + withMandatoryIfApplicableAttribute("startDate", elementIsPresent("licenseCondition")). build(); private static final ElementSpec COVERAGE_SPEC = Builders. - forRecommendedRepeatableElement("dc:coverage"). + forRecommendedRepeatableElement("coverage"). build(); private static final ElementSpec SIZE_SPEC = Builders. - forOptionalRepeatableElement("datacite:size"). + forOptionalRepeatableElement("size"). build(); private static final ElementSpec GEO_LOCATION_SPEC = Builders. - forOptionalRepeatableElement("datacite:geoLocation"). + forOptionalRepeatableElement("geoLocation"). withSubElement(Builders. - forOptionalElement("datacite:geoLocationPoint"). + forOptionalElement("geoLocationPoint"). withSubElement(Builders. - forMandatoryElement("datacite:pointLongitude", ONE)). + forMandatoryElement("pointLongitude", ONE)). withSubElement(Builders. - forMandatoryElement("datacite:pointLatitude", ONE))). + forMandatoryElement("pointLatitude", ONE))). withSubElement(Builders. - forOptionalElement("datacite:geoLocationBox"). + forOptionalElement("geoLocationBox"). withSubElement(Builders. - forMandatoryElement("datacite:westBoundLongitude", ONE)). + forMandatoryElement("westBoundLongitude", ONE)). withSubElement(Builders. - forMandatoryElement("datacite:eastBoundLongitude", ONE)). + forMandatoryElement("eastBoundLongitude", ONE)). withSubElement(Builders. - forMandatoryElement("datacite:southBoundLatitude", ONE)). + forMandatoryElement("southBoundLatitude", ONE)). withSubElement(Builders. - forMandatoryElement("datacite:northBoundLatitude", ONE))). + forMandatoryElement("northBoundLatitude", ONE))). withSubElement(Builders. - forOptionalElement("datacite:geoLocationPlace")). + forOptionalElement("geoLocationPlace")). withSubElement(Builders. - forOptionalRepeatableElement("datacite:geoLocationPolygon"). + forOptionalRepeatableElement("geoLocationPolygon"). withSubElement(Builders. - forMandatoryElement("datacite:polygonPoint", FOUR_TO_N). + forMandatoryElement("polygonPoint", FOUR_TO_N). withSubElement(Builders. - forMandatoryElement("datacite:pointLongitude", ONE)). + forMandatoryElement("pointLongitude", ONE)). withSubElement(Builders. - forMandatoryElement("datacite:pointLatitude", ONE))). + forMandatoryElement("pointLatitude", ONE))). withSubElement(Builders. - forOptionalElement("datacite:inPolygonPoint"). + forOptionalElement("inPolygonPoint"). withSubElement(Builders. - forMandatoryElement("datacite:pointLongitude", ONE)). + forMandatoryElement("pointLongitude", ONE)). withSubElement(Builders. - forMandatoryElement("datacite:pointLatitude", ONE)))). + forMandatoryElement("pointLatitude", ONE)))). build(); /* @@ -342,57 +354,57 @@ public final class LiteratureGuidelinesV4Profile extends AbstractOpenAireProfile TODO: Should we cross-check attribute and element value are relevant? */ private static final ElementSpec RESOURCE_VERSION_SPEC = Builders. - forRecommendedElement("oaire:version"). + forRecommendedElement("version"). withMandatoryIfApplicableAttribute("uri", applicabilityRuleForURIAttributeOfResourceVersion(), RESOURCE_VERSION_URIS). build(); //TODO: Has annotation/issue: accessRightsURI attribute values also appears on ACCESS_RIGHTS_SPEC. Should check it's the same? private static final ElementSpec FILE_LOCATION_SPEC = Builders. - forMandatoryIfApplicableElement("oaire:file", ONE_TO_N, elementIsPresent("oaire:file")). + forMandatoryIfApplicableElement("file", ONE_TO_N, elementIsPresent("file")). withRecommendedAttribute("accessRightsURI", ACCESS_RIGHTS_URIS). withRecommendedAttribute("mimeType", new MediaTypesValuePredicate()). withRecommendedAttribute("objectType", FILE_OBJECT_TYPES). build(); private static final ElementSpec CITATION_TITLE_SPEC = Builders. - forRecommendedElement("oaire:citationTitle"). + forRecommendedElement("citationTitle"). build(); private static final ElementSpec CITATION_VOLUME_SPEC = Builders. - forRecommendedElement("oaire:citationVolume"). + forRecommendedElement("citationVolume"). build(); private static final ElementSpec CITATION_ISSUE_SPEC = Builders. - forRecommendedElement("oaire:citationIssue"). + forRecommendedElement("citationIssue"). build(); private static final ElementSpec CITATION_START_PAGE_SPEC = Builders. - forRecommendedElement("oaire:citationStartPage"). + forRecommendedElement("citationStartPage"). build(); private static final ElementSpec CITATION_END_PAGE_SPEC = Builders. - forRecommendedElement("oaire:citationEndPage"). + forRecommendedElement("citationEndPage"). build(); private static final ElementSpec CITATION_EDITION_SPEC = Builders. - forRecommendedElement("oaire:citationEdition"). + forRecommendedElement("citationEdition"). build(); private static final ElementSpec CITATION_CONFERENCE_PLACE_SPEC = Builders. - forRecommendedElement("oaire:citationConferencePlace"). + forRecommendedElement("citationConferencePlace"). build(); //TODO: Implement regex/allowedValuesPredicate // Date has recommended best practice ISO 8601 [W3CDTF], and two [single date] [start date - end date] formats private static final ElementSpec CITATION_CONFERENCE_DATE_SPEC = Builders. - forRecommendedElement("oaire:citationConferenceDate"). + forRecommendedElement("citationConferenceDate"). allowedValues(new RegexValuePredicate(COMPILED_YYYY_MM_DD_REGEX).or(new RegexValuePredicate(COMPILED_YYYY_MM_DD_RANGE_REGEX))). build(); //TODO: A non-exhaustive list is provided for values, derived from the Common Education Data Standards vocabulary // Should we add it? private static final ElementSpec AUDIENCE_SPEC = Builders. - forOptionalRepeatableElement("dcterms:audience"). + forOptionalRepeatableElement("audience"). allowedValues(AUDIENCE_VOCABULARY). build(); @@ -400,7 +412,7 @@ public final class LiteratureGuidelinesV4Profile extends AbstractOpenAireProfile return XMLCardinalityRule.builder(). setId(ElementSpec.APPLICABILITY_RULE_ID). // first predicate count(...) makes sure there is only one "Access Rights" element, and the second predicate verifies its value. - setXPathExpression("//*[count(//*[name()='datacite:rights'])=1][name()='datacite:rights' and @uri='http://purl.org/coar/access_right/c_f1cf' and normalize-space(text())='embargoed access']"). + setXPathExpression("//*[count(//*[name()='datacite:rights'])=1][name()='datacite:rights' and @uri='http://purl.org/coar/access_right/c_f1cf' and normalize-space(text())='embargoed access']"). setRange(1,1). setIsInclusive(true). build(); diff --git a/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile_with_prefixes_in_element_names.java b/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile_with_prefixes_in_element_names.java new file mode 100644 index 0000000..2828e2a --- /dev/null +++ b/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile_with_prefixes_in_element_names.java @@ -0,0 +1,511 @@ +package eu.dnetlib.validator2.validation.guideline.openaire; + +import eu.dnetlib.validator2.engine.Rule; +import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule; +import eu.dnetlib.validator2.engine.builtins.XMLVocabularyRule; +import eu.dnetlib.validator2.validation.guideline.Builders; +import eu.dnetlib.validator2.validation.guideline.ElementSpec; +import eu.dnetlib.validator2.validation.guideline.Guideline; +import eu.dnetlib.validator2.validation.guideline.SyntheticGuideline; +import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate; +import eu.dnetlib.validator2.validation.utils.MediaTypesValuePredicate; +import eu.dnetlib.validator2.validation.utils.RegexValuePredicate; +import org.w3c.dom.Document; + +import java.util.*; +import java.util.stream.Collectors; + +import static eu.dnetlib.validator2.validation.guideline.Cardinality.*; +import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.*; + +public final class LiteratureGuidelinesV4Profile_with_prefixes_in_element_names extends AbstractOpenAireProfile { + + private static final String[] TITLE_TYPES = { + "AlternativeTitle", "Subtitle", "TranslatedTitle", "Other" + }; + + private static final String[] NAME_TYPES = { + "Organizational", "Personal" + }; + + private static final String[] CONTRIBUTOR_TYPES = { + "ContactPerson", "DataCollector", "DataCurator", "DataManager", "Distributor", + "Editor", "HostingInstitution", "Producer", "ProjectLeader", "ProjectManager", "ProjectMember", + "RegistrationAgency", "RegistrationAuthority", "RelatedPerson", "Researcher", "ResearchGroup", + "RightsHolder", "Sponsor", "Supervisor", "WorkPackageLeader", "Other" + }; + + private static final String[] FUNDER_IDENTIFIER_TYPES = { + "ISNI", "GRID", "Crossref Funder" + }; + + private static final String[] IDENTIFIER_TYPES = { + "ARK", "arXiv", "bibcode", "DOI", "EAN13", "EISSN", "Handle", "IGSN", "ISBN", + "ISSN", "ISTC", "LISSN", "LSID", "PISSN", "PMID", "PURL", "UPC", "URL", "URN", "WOS", + }; + + private static final String[] RELATION_TYPES = { + "IsCitedBy", "Cites", "IsSupplementTo", "IsSupplementedBy", "IsContinuedBy", + "Continues", "IsDescribedBy", "Describes", "HasMetadata", "IsMetadataFor", "HasVersion", + "IsVersionOf", "IsNewVersionOf", "IsPreviousVersionOf", "IsPartOf", "HasPart", "IsReferencedBy", + "References", "IsDocumentedBy", "Documents", "IsCompiledBy", "Compiles", "IsVariantFormOf", + "IsOriginalFormOf", "IsIdenticalTo", "IsReviewedBy", "Reviews", "IsDerivedFrom", "IsSourceOf", + "IsRequiredBy", "Requires" + }; + + private static final String[] RELATED_RESOURCE_GENERAL_TYPES = { + "Audiovisual", "Collection", "DataPaper", "Dataset", "Event", "Image", "InteractiveResource", + "Model", "PhysicalObject", "Service", "Software", "Sound", "Text", "Workflow", "Other" + }; + + private static final String[] EMBARGO_DATE_TYPES = { + "Accepted", "Available" + }; + + private static final String[] PUBLICATION_DATE_TYPE = { + "Issued" + }; + + private static final String[] RESOURCE_GENERAL_TYPES = { + "literature", "dataset", "software", "other research product" + }; + + private static final String[] RESOURCE_CONCEPT_URIS = { + "http://purl.org/coar/resource_type/c_1162", "http://purl.org/coar/resource_type/c_6501", + "http://purl.org/coar/resource_type/c_545b", "http://purl.org/coar/resource_type/c_b239", + "http://purl.org/coar/resource_type/c_2df8fbb1", "http://purl.org/coar/resource_type/c_dcae04bc", + "http://purl.org/coar/resource_type/c_beb9", "http://purl.org/coar/resource_type/c_3e5a", + "http://purl.org/coar/resource_type/c_ba08", "http://purl.org/coar/resource_type/c_3248", + "http://purl.org/coar/resource_type/c_2f33", "http://purl.org/coar/resource_type/c_86bc", + "http://purl.org/coar/resource_type/c_816b", "http://purl.org/coar/resource_type/c_8042", + "http://purl.org/coar/resource_type/c_71bd", "http://purl.org/coar/resource_type/c_18gh", + "http://purl.org/coar/resource_type/c_18ws", "http://purl.org/coar/resource_type/c_18hj", + "http://purl.org/coar/resource_type/c_18op", "http://purl.org/coar/resource_type/c_186u", + "http://purl.org/coar/resource_type/c_18wq", "http://purl.org/coar/resource_type/c_18wz", + "http://purl.org/coar/resource_type/c_18ww", "http://purl.org/coar/resource_type/c_efa0", + "http://purl.org/coar/resource_type/c_baaf", "http://purl.org/coar/resource_type/c_ba1f", + "http://purl.org/coar/resource_type/c_93fc", "http://purl.org/coar/resource_type/c_15cd", + "http://purl.org/coar/resource_type/c_18co", "http://purl.org/coar/resource_type/c_18cp", + "http://purl.org/coar/resource_type/c_6670", "http://purl.org/coar/resource_type/c_5794", + "http://purl.org/coar/resource_type/c_c94f", "http://purl.org/coar/resource_type/c_f744", + "http://purl.org/coar/resource_type/c_7a1f", "http://purl.org/coar/resource_type/c_bdcc", + "http://purl.org/coar/resource_type/c_db06", "http://purl.org/coar/resource_type/c_46ec", + "http://purl.org/coar/resource_type/c_0857", "http://purl.org/coar/resource_type/c_8544", + "http://purl.org/coar/resource_type/c_18cf", "http://purl.org/coar/resource_type/c_18cw", + "http://purl.org/coar/resource_type/c_18cd", "http://purl.org/coar/resource_type/c_18cc", + "http://purl.org/coar/resource_type/c_12ce", "http://purl.org/coar/resource_type/c_8a7e", + "http://purl.org/coar/resource_type/c_ecc8", "http://purl.org/coar/resource_type/c_c513", + "http://purl.org/coar/resource_type/c_12cd", "http://purl.org/coar/resource_type/c_12cc", + "http://purl.org/coar/resource_type/c_5ce6", "http://purl.org/coar/resource_type/c_ddb1", + "http://purl.org/coar/resource_type/c_e9a0", "http://purl.org/coar/resource_type/c_7ad9", + "http://purl.org/coar/resource_type/c_393c", "http://purl.org/coar/resource_type/c_1843" + }; + + private static final String[] RESOURCE_IDENTIFIER_TYPES = { + "ARK", "DOI", "Handle", "PURL", "URL", "URN" + }; + + private static final String[] ACCESS_RIGHTS_URIS = { + "http://purl.org/coar/access_right/c_abf2", "http://purl.org/coar/access_right/c_f1cf", + "http://purl.org/coar/access_right/c_16ec", "http://purl.org/coar/access_right/c_14cb" + }; + + private static final String[] RESOURCE_VERSION_URIS = { + "http://purl.org/coar/version/c_b1a7d7d4d402bcce", "http://purl.org/coar/version/c_71e4c1898caa6e32", + "http://purl.org/coar/version/c_ab4af688f83e57aa", "http://purl.org/coar/version/c_fa2ee174bc00049f", + "http://purl.org/coar/version/c_970fb48d4fbd8a85", "http://purl.org/coar/version/c_e19f295774971610", + "http://purl.org/coar/version/c_dc82b40f9837b551", "http://purl.org/coar/version/c_be7fb7dd8ff6fe43" + }; + + private static final String[] RESOURCE_VERSION_LABELS = { + "AO", "SMUR", "AM", "P", "VoR", "CVoR", "EVoR", "NA" + }; + + private static final String[] FILE_OBJECT_TYPES = { + "fulltext", "dataset", "software", "other" + }; + + private static final String[] AUDIENCE_VOCABULARY = { + "Administrators", "Community Groups", "Counsellors", "Federal Funds Recipients and Applicants", + "Librarians", "News Media", "Other", "Parents and Families", "Policymakers", "Researchers", + "School Support Staff", "Student Financial Aid Providers", "Students", "Teachers" + }; + + private static final ElementSpec TITLE_SPEC = Builders. + forMandatoryElement("datacite:title", ONE_TO_N). + withOptionalAttribute("xml:lang", new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX)). + withOptionalAttribute("titleType", TITLE_TYPES). + build(); + + private static final ElementSpec CREATOR_SPEC = Builders. + forMandatoryElement("datacite:creator", ONE_TO_N). + withSubElement(Builders. + forMandatoryElement("datacite:creatorName", ONE). + withRecommendedAttribute("nameType", NAME_TYPES)). + withSubElement(Builders. + forRecommendedElement("datacite:givenName")). + withSubElement(Builders. + forRecommendedElement("datacite:familyName")). + withSubElement(Builders. + forRecommendedRepeatableElement("datacite:nameIdentifier"). + withMandatoryAttribute("nameIdentifierScheme"). + withRecommendedAttribute("schemeURI")). + withSubElement(Builders. + forRecommendedRepeatableElement("datacite:affiliation")). + build(); + + private static final ElementSpec CONTRIBUTOR_SPEC = Builders. + forMandatoryIfApplicableElement("datacite:contributor", ONE_TO_N, elementIsPresent("datacite:contributor")). + withMandatoryAttribute("contributorType", CONTRIBUTOR_TYPES). + withSubElement(Builders. + forMandatoryElement("datacite:contributorName", ONE). + withRecommendedAttribute("nameType", NAME_TYPES)). + withSubElement(Builders. + forOptionalElement("datacite:familyName")). + withSubElement(Builders. + forOptionalElement("datacite:givenName")). + withSubElement(Builders. + forRecommendedRepeatableElement("datacite:nameIdentifier"). + withMandatoryAttribute("nameIdentifierScheme"). + withRecommendedAttribute("schemeURI")). + withSubElement(Builders. + forRecommendedRepeatableElement("datacite:affiliation")). + build(); + + //This property has some issues/annotations in documentation + private static final ElementSpec FUNDING_REFERENCE_SPEC = Builders. + forMandatoryIfApplicableElement("oaire:fundingReference", ONE_TO_N, elementIsPresent("oaire:fundingReference")). + withSubElement(Builders. + forMandatoryElement("oaire:funderName", ONE)). + withSubElement(Builders. + forRecommendedElement("oaire:funderIdentifier"). + withRecommendedAttribute("funderIdentifierType", FUNDER_IDENTIFIER_TYPES)). + withSubElement(Builders. + forOptionalElement("oaire:fundingStream")). + withSubElement(Builders. + forMandatoryIfApplicableElement("oaire:awardNumber", ONE, elementIsPresent("oaire:awardNumber")). + withRecommendedAttribute("awardURI")). + withSubElement(Builders. + forRecommendedElement("oaire:awardTitle")). + build(); + + //TODO: Allowed values are referred as "suggested" in the documentation, but then a controlled list is given. + // Relevant issues: + // https://bitbucket.org/saikos/openaire-validator/issues/40 + // https://bitbucket.org/saikos/openaire-validator/issues/32/ + private static final ElementSpec ALTERNATE_IDENTIFIER_SPEC = Builders. + forRecommendedRepeatableElement("datacite:alternateIdentifier"). + withMandatoryAttribute("alternateIdentifierType", IDENTIFIER_TYPES). + build(); + + private static final ElementSpec RELATED_IDENTIFIER_SPEC = Builders. + forRecommendedRepeatableElement("datacite:relatedIdentifier"). + withMandatoryAttribute("relatedIdentifierType", IDENTIFIER_TYPES). + withMandatoryAttribute("relationType", RELATION_TYPES). + //TODO: For following 3 attributes. Need a way to target relationType attribute of current element + // - Should be used only with relation type (HasMetadata/IsMetadataFor). + withOptionalAttribute("relatedMetadataScheme"). + withOptionalAttribute("schemeURI"). + withOptionalAttribute("schemeType"). + withOptionalAttribute("resourceTypeGeneral", RELATED_RESOURCE_GENERAL_TYPES). + build(); + + /* + Applicable when Access Rights is set to: + embargoed access + Date encoding "YYYY-MM-DD" is referred as best practice. Should introduce in allowed values? + */ + //TODO: Implement proper applicability rule + private static final ElementSpec EMBARGO_PERIOD_DATE_SPEC = Builders. + forMandatoryIfApplicableElement("datacite:date", TWO, applicabilityRuleForEmbargoPeriodDate()). + withMandatoryAttribute("dateType", EMBARGO_DATE_TYPES). + build(); + + /* + There are no "strict" allowed values. Recommendations are IETF BCP 47 and ISO 639-x + */ + private static final ElementSpec LANGUAGE_SPEC = Builders. + forMandatoryIfApplicableElement("dc:language", ONE_TO_N, elementIsPresent("dc:language")). + allowedValues(new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX).or(new ISO639ValuePredicate())). + build(); + + private static final ElementSpec PUBLISHER_SPEC = Builders. + forMandatoryIfApplicableElement("dc:publisher", ONE_TO_N, elementIsPresent("dc:publisher")). + build(); + + /* + TODO: Same name as EMBARGO_PERIOD_DATE_SPEC above, with different attribute allowed value. + Should probably revisit, take that into consideration, when making relevant rules. + "Recommended" best practice for encoding the date value is ISO 8601 [W3CDTF] (YYYY-MM-DD) (YYYY mandatory) + */ + private static final ElementSpec PUBLICATION_DATE_SPEC = Builders. + forMandatoryElement("datacite:date", ONE). + withMandatoryAttribute("dateType", PUBLICATION_DATE_TYPE). + build(); + + private static final ElementSpec RESOURCE_TYPE_SPEC = Builders. + forMandatoryElement("oaire:resourceType", ONE). + withMandatoryAttribute("resourceTypeGeneral", RESOURCE_GENERAL_TYPES). + withMandatoryAttribute("uri", RESOURCE_CONCEPT_URIS). + build(); + + private static final ElementSpec DESCRIPTION_SPEC = Builders. + forMandatoryIfApplicableElement("dc:description", ONE_TO_N, elementIsPresent("dc:description")). + withOptionalAttribute("xml:lang", new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX)). + build(); + + private static final ElementSpec FORMAT_SPEC = Builders. + forRecommendedRepeatableElement("dc:format"). + allowedValues(new MediaTypesValuePredicate()). + build(); + + private static final ElementSpec RESOURCE_IDENTIFIER_SPEC = Builders. + forMandatoryElement("datacite:identifier", ONE). + withMandatoryAttribute("identifierType", RESOURCE_IDENTIFIER_TYPES). + build(); + + private static final ElementSpec ACCESS_RIGHTS_SPEC = Builders. + forMandatoryElement("datacite:rights", ONE). + withMandatoryAttribute("uri", ACCESS_RIGHTS_URIS). + build(); + + private static final ElementSpec SOURCE_SPEC = Builders. + forRecommendedRepeatableElement("dc:source"). + build(); + + //TODO: Should we check URI attribute values are valid? + private static final ElementSpec SUBJECT_SPEC = Builders. + forMandatoryIfApplicableElement("datacite:subject", ONE_TO_N, elementIsPresent("datacite:subject")). + withOptionalAttribute("subjectScheme"). + withOptionalAttribute("schemeURI"). + withOptionalAttribute("valueURI"). + build(); + + private static final ElementSpec LICENSE_CONDITION_SPEC = Builders. + forRecommendedElement("oaire:licenseCondition"). + withMandatoryIfApplicableAttribute("uri", elementIsPresent("oaire:licenseCondition")). + withMandatoryIfApplicableAttribute("startDate", elementIsPresent("oaire:licenseCondition")). + build(); + + private static final ElementSpec COVERAGE_SPEC = Builders. + forRecommendedRepeatableElement("dc:coverage"). + build(); + + private static final ElementSpec SIZE_SPEC = Builders. + forOptionalRepeatableElement("datacite:size"). + build(); + + private static final ElementSpec GEO_LOCATION_SPEC = Builders. + forOptionalRepeatableElement("datacite:geoLocation"). + withSubElement(Builders. + forOptionalElement("datacite:geoLocationPoint"). + withSubElement(Builders. + forMandatoryElement("datacite:pointLongitude", ONE)). + withSubElement(Builders. + forMandatoryElement("datacite:pointLatitude", ONE))). + withSubElement(Builders. + forOptionalElement("datacite:geoLocationBox"). + withSubElement(Builders. + forMandatoryElement("datacite:westBoundLongitude", ONE)). + withSubElement(Builders. + forMandatoryElement("datacite:eastBoundLongitude", ONE)). + withSubElement(Builders. + forMandatoryElement("datacite:southBoundLatitude", ONE)). + withSubElement(Builders. + forMandatoryElement("datacite:northBoundLatitude", ONE))). + withSubElement(Builders. + forOptionalElement("datacite:geoLocationPlace")). + withSubElement(Builders. + forOptionalRepeatableElement("datacite:geoLocationPolygon"). + withSubElement(Builders. + forMandatoryElement("datacite:polygonPoint", FOUR_TO_N). + withSubElement(Builders. + forMandatoryElement("datacite:pointLongitude", ONE)). + withSubElement(Builders. + forMandatoryElement("datacite:pointLatitude", ONE))). + withSubElement(Builders. + forOptionalElement("datacite:inPolygonPoint"). + withSubElement(Builders. + forMandatoryElement("datacite:pointLongitude", ONE)). + withSubElement(Builders. + forMandatoryElement("datacite:pointLatitude", ONE)))). + build(); + + /* + TODO: RequirementLevel.RECOMMENDED, Cardinality.ONE? + If uri attribute is present, element also has relevant controlled allowed values mapped to it's value. + uri attribute applicable when element value is one of controlled values. + Must be relevant value of [AO, SMUR, AM, P, VoR, CVoR, EVoR, NA] + Otherwise can be a number + TODO: Should we cross-check attribute and element value are relevant? + */ + private static final ElementSpec RESOURCE_VERSION_SPEC = Builders. + forRecommendedElement("oaire:version"). + withMandatoryIfApplicableAttribute("uri", applicabilityRuleForURIAttributeOfResourceVersion(), RESOURCE_VERSION_URIS). + build(); + + //TODO: Has annotation/issue: accessRightsURI attribute values also appears on ACCESS_RIGHTS_SPEC. Should check it's the same? + private static final ElementSpec FILE_LOCATION_SPEC = Builders. + forMandatoryIfApplicableElement("oaire:file", ONE_TO_N, elementIsPresent("oaire:file")). + withRecommendedAttribute("accessRightsURI", ACCESS_RIGHTS_URIS). + withRecommendedAttribute("mimeType", new MediaTypesValuePredicate()). + withRecommendedAttribute("objectType", FILE_OBJECT_TYPES). + build(); + + private static final ElementSpec CITATION_TITLE_SPEC = Builders. + forRecommendedElement("oaire:citationTitle"). + build(); + + private static final ElementSpec CITATION_VOLUME_SPEC = Builders. + forRecommendedElement("oaire:citationVolume"). + build(); + + private static final ElementSpec CITATION_ISSUE_SPEC = Builders. + forRecommendedElement("oaire:citationIssue"). + build(); + + private static final ElementSpec CITATION_START_PAGE_SPEC = Builders. + forRecommendedElement("oaire:citationStartPage"). + build(); + + private static final ElementSpec CITATION_END_PAGE_SPEC = Builders. + forRecommendedElement("oaire:citationEndPage"). + build(); + + private static final ElementSpec CITATION_EDITION_SPEC = Builders. + forRecommendedElement("oaire:citationEdition"). + build(); + + private static final ElementSpec CITATION_CONFERENCE_PLACE_SPEC = Builders. + forRecommendedElement("oaire:citationConferencePlace"). + build(); + + //TODO: Implement regex/allowedValuesPredicate + // Date has recommended best practice ISO 8601 [W3CDTF], and two [single date] [start date - end date] formats + private static final ElementSpec CITATION_CONFERENCE_DATE_SPEC = Builders. + forRecommendedElement("oaire:citationConferenceDate"). + allowedValues(new RegexValuePredicate(COMPILED_YYYY_MM_DD_REGEX).or(new RegexValuePredicate(COMPILED_YYYY_MM_DD_RANGE_REGEX))). + build(); + + //TODO: A non-exhaustive list is provided for values, derived from the Common Education Data Standards vocabulary + // Should we add it? + private static final ElementSpec AUDIENCE_SPEC = Builders. + forOptionalRepeatableElement("dcterms:audience"). + allowedValues(AUDIENCE_VOCABULARY). + build(); + + private static Rule applicabilityRuleForEmbargoPeriodDate() { + return XMLCardinalityRule.builder(). + setId(ElementSpec.APPLICABILITY_RULE_ID). + // first predicate count(...) makes sure there is only one "Access Rights" element, and the second predicate verifies its value. + setXPathExpression("//*[count(//*[name()='datacite:rights'])=1][name()='datacite:rights' and @uri='http://purl.org/coar/access_right/c_f1cf' and normalize-space(text())='embargoed access']"). + setRange(1,1). + setIsInclusive(true). + build(); + } + + private static Rule applicabilityRuleForURIAttributeOfResourceVersion() { + return XMLVocabularyRule.builder(). + setId(ElementSpec.APPLICABILITY_RULE_ID). + setXPathExpression("//*[name()='oaire:version']/text()"). + setNodeListAction("1"). + setVocabularyTermsAndTermsType(String.join(", ", RESOURCE_VERSION_LABELS), "whitelist"). + build(); + } + + //TODO: weights for guidelines haven't been finalized. They've been given an arbitrary value of 1. + public static SyntheticGuideline TITLE = SyntheticGuideline.of("Title", 1, TITLE_SPEC); + public static SyntheticGuideline CREATOR = SyntheticGuideline.of("Creator", 1, CREATOR_SPEC); + public static SyntheticGuideline CONTRIBUTOR = SyntheticGuideline.of("Contributor", 1, CONTRIBUTOR_SPEC); + public static SyntheticGuideline FUNDING_REFERENCE = SyntheticGuideline.of("Funding Reference", 1, FUNDING_REFERENCE_SPEC); + public static SyntheticGuideline ALTERNATE_IDENTIFIER = SyntheticGuideline.of("Alternate Identifier", 1, ALTERNATE_IDENTIFIER_SPEC); + public static SyntheticGuideline RELATED_IDENTIFIER = SyntheticGuideline.of("Related Identifier", 1, RELATED_IDENTIFIER_SPEC); + public static SyntheticGuideline EMBARGO_PERIOD_DATE = SyntheticGuideline.of("Embargo Period Date", 1, EMBARGO_PERIOD_DATE_SPEC); + public static SyntheticGuideline LANGUAGE = SyntheticGuideline.of("Language", 1, LANGUAGE_SPEC); + public static SyntheticGuideline PUBLISHER = SyntheticGuideline.of("Publisher", 1, PUBLISHER_SPEC); + public static SyntheticGuideline PUBLICATION_DATE = SyntheticGuideline.of("Publication Date", 1, PUBLICATION_DATE_SPEC); + public static SyntheticGuideline RESOURCE_TYPE = SyntheticGuideline.of("Resource Type", 1, RESOURCE_TYPE_SPEC); + public static SyntheticGuideline DESCRIPTION = SyntheticGuideline.of("Description", 1, DESCRIPTION_SPEC); + public static SyntheticGuideline FORMAT = SyntheticGuideline.of("Format", 1, FORMAT_SPEC); + public static SyntheticGuideline RESOURCE_IDENTIFIER = SyntheticGuideline.of("Resource Identifier", 1, RESOURCE_IDENTIFIER_SPEC); + public static SyntheticGuideline ACCESS_RIGHTS = SyntheticGuideline.of("Access Rights", 1, ACCESS_RIGHTS_SPEC); + public static SyntheticGuideline SOURCE = SyntheticGuideline.of("Source", 1, SOURCE_SPEC); + public static SyntheticGuideline SUBJECT = SyntheticGuideline.of("Subject", 1, SUBJECT_SPEC); + public static SyntheticGuideline LICENSE_CONDITION = SyntheticGuideline.of("License Condition", 1, LICENSE_CONDITION_SPEC); + public static SyntheticGuideline COVERAGE = SyntheticGuideline.of("Coverage", 1, COVERAGE_SPEC); + public static SyntheticGuideline SIZE = SyntheticGuideline.of("Size", 1, SIZE_SPEC); + public static SyntheticGuideline GEO_LOCATION = SyntheticGuideline.of("Geo Location", 1, GEO_LOCATION_SPEC); + public static SyntheticGuideline RESOURCE_VERSION = SyntheticGuideline.of("Resource Version", 1, RESOURCE_VERSION_SPEC); + public static SyntheticGuideline FILE_LOCATION = SyntheticGuideline.of("File Location", 1, FILE_LOCATION_SPEC); + public static SyntheticGuideline CITATION_TITLE = SyntheticGuideline.of("Citation Title", 1, CITATION_TITLE_SPEC); + public static SyntheticGuideline CITATION_VOLUME = SyntheticGuideline.of("Citation Volume", 1, CITATION_VOLUME_SPEC); + public static SyntheticGuideline CITATION_ISSUE = SyntheticGuideline.of("Citation Issue", 1, CITATION_ISSUE_SPEC); + public static SyntheticGuideline CITATION_START_PAGE = SyntheticGuideline.of("Citation Start Page", 1, CITATION_START_PAGE_SPEC); + public static SyntheticGuideline CITATION_END_PAGE = SyntheticGuideline.of("Citation End Page", 1, CITATION_END_PAGE_SPEC); + public static SyntheticGuideline CITATION_EDITION = SyntheticGuideline.of("Citation Edition", 1, CITATION_EDITION_SPEC); + public static SyntheticGuideline CITATION_CONFERENCE_PLACE = SyntheticGuideline.of("Citation Conference Place", 1, CITATION_CONFERENCE_PLACE_SPEC); + public static SyntheticGuideline CITATION_CONFERENCE_DATE = SyntheticGuideline.of("Citation Conference Date", 1, CITATION_CONFERENCE_DATE_SPEC); + public static SyntheticGuideline AUDIENCE = SyntheticGuideline.of("Audience", 1, AUDIENCE_SPEC); + + private static final List GUIDELINES = Collections.unmodifiableList( + Arrays.asList( + TITLE, + CREATOR, + CONTRIBUTOR, + FUNDING_REFERENCE, + ALTERNATE_IDENTIFIER, + RELATED_IDENTIFIER, + EMBARGO_PERIOD_DATE, + LANGUAGE, + PUBLISHER, + PUBLICATION_DATE, + RESOURCE_TYPE, + DESCRIPTION, + FORMAT, + RESOURCE_IDENTIFIER, + ACCESS_RIGHTS, + SOURCE, + SUBJECT, + LICENSE_CONDITION, + COVERAGE, + SIZE, + GEO_LOCATION, + RESOURCE_VERSION, + FILE_LOCATION, + CITATION_TITLE, + CITATION_VOLUME, + CITATION_ISSUE, + CITATION_START_PAGE, + CITATION_END_PAGE, + CITATION_EDITION, + CITATION_CONFERENCE_PLACE, + CITATION_CONFERENCE_DATE, + AUDIENCE + ) + ); + + private static final Map GUIDELINE_MAP = GUIDELINES. + stream(). + collect(Collectors.toMap(SyntheticGuideline::getName, (guideline) -> guideline)); + + private static final int MAX_SCORE = GUIDELINES.stream().map(SyntheticGuideline::getWeight).reduce(0, Integer::sum); + + public LiteratureGuidelinesV4Profile_with_prefixes_in_element_names() { + super("OpenAIRE Guidelines for Literature Repositories Profile v4"); + } + + @Override + public Collection> guidelines() { + return GUIDELINES; + } + + @Override + public SyntheticGuideline guideline(String guidelineName) { + return GUIDELINE_MAP.get(guidelineName); + } + + @Override + public int maxScore() { + return MAX_SCORE; + } +} diff --git a/src/test/java/eu/dnetlib/validator2/engine/Test_v4.java b/src/test/java/eu/dnetlib/validator2/engine/Test_v4.java new file mode 100644 index 0000000..cc1f561 --- /dev/null +++ b/src/test/java/eu/dnetlib/validator2/engine/Test_v4.java @@ -0,0 +1,62 @@ +package eu.dnetlib.validator2.engine; + +import eu.dnetlib.validator2.validation.XMLApplicationProfile; +import eu.dnetlib.validator2.validation.guideline.Guideline; +import eu.dnetlib.validator2.validation.guideline.openaire.LiteratureGuidelinesV4Profile; +import groovy.xml.DOMBuilder; +import org.w3c.dom.Document; + +import java.io.FileReader; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.stream.Collectors; + +public class Test_v4 { + + private static final String[] FILES = new String[] { + "/Users/lpisp/Documents/OpenAIRE/MaDgIK/validator-engine/src/test/resources/openaireguidelinesV4/v4_literature_all_invalid_guidelines_record.xml", +// "/Users/lpisp/Documents/OpenAIRE/MaDgIK/validator-engine/src/test/resources/openaireguidelinesV4/v4_literature_all_guidelines_record.xml", +// "/Users/lpisp/Documents/OpenAIRE/MaDgIK/validator-engine/src/test/resources/openaireguidelinesV4/oai_mediarep_org_doc_2534.xml", +// "/Users/lpisp/Documents/OpenAIRE/MaDgIK/validator-engine/src/test/resources/openaireguidelinesV4/01_gv4.xml" + }; + + public static void main(String[] args) { + // String xmlFile = args[0]; + LiteratureGuidelinesV4Profile profile = new LiteratureGuidelinesV4Profile(); + System.out.println("Max score: " + profile.maxScore()); + Map scorePerDoc = new LinkedHashMap<>(); + for (String file: FILES) { + try { + System.out.println("Processing " + file); + Document doc = DOMBuilder.parse(new FileReader(file), false, true, true); + XMLApplicationProfile.ValidationResult result = profile.validate(file, doc); + scorePerDoc.put(file, result.score()); + Map results = result.results(); + for (Map.Entry entry : results.entrySet()) { + if (entry.getKey().toString().contains("Date")) { + System.out.println(results.get(entry.getKey()).warnings().toString()); + System.out.println(results.get(entry.getKey()).errors().toString()); + System.out.println(entry.getKey() + " = " + entry.getValue()); + System.out.println("\n"); + } +// System.out.println(entry.getKey() + " = " + entry.getValue()); + } + } + catch(Exception e) { + System.out.println(e.getMessage()); + System.out.println(e); + e.printStackTrace(); + } + } +// Individual scores + String printout = scorePerDoc.entrySet().stream(). + map(entry -> entry.getValue() + ": " + entry.getKey()).collect(Collectors.joining("\n")); + // Average Score + double FinalScore = scorePerDoc.entrySet().stream().mapToDouble(entry -> entry.getValue()).average().getAsDouble(); + + System.out.println(printout); + System.out.println("\nValidation Score: "+FinalScore); + + } + +} diff --git a/src/test/resources/openaireguidelinesV4/v4_literature_all_invalid_guidelines_record.xml b/src/test/resources/openaireguidelinesV4/v4_literature_all_invalid_guidelines_record.xml index 95ae6a8..dec088e 100644 --- a/src/test/resources/openaireguidelinesV4/v4_literature_all_invalid_guidelines_record.xml +++ b/src/test/resources/openaireguidelinesV4/v4_literature_all_invalid_guidelines_record.xml @@ -111,7 +111,7 @@ application/pdf application/invalid - + http://urn.kb.se/resolve?urn=urn:nbn:se:uu:diva-160648