From 83a7e393192dbad86a507010532e1a7a7b990b9d Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Wed, 6 Sep 2023 14:23:22 +0300 Subject: [PATCH] - Use Java's "DocumentBuilder" library instead of the outdated "spock-core" wrapper, which includes vulnerable transitive dependencies. - Code optimization and polishing. - Update Guava. --- pom.xml | 9 +- .../guideline/GuidelineEvaluation.java | 6 +- ...rofile_with_prefixes_in_element_names.java | 511 ------------------ .../eu/dnetlib/validator2/engine/Example.java | 3 +- .../eu/dnetlib/validator2/engine/Test.java | 9 +- .../dnetlib/validator2/engine/TestUtils.java | 27 + .../dnetlib/validator2/engine/Test_FAIR.java | 9 +- .../validator2/engine/Test_FAIR_LIT.java | 9 +- .../eu/dnetlib/validator2/engine/Test_v4.java | 12 +- 9 files changed, 57 insertions(+), 538 deletions(-) delete mode 100644 src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile_with_prefixes_in_element_names.java create mode 100644 src/test/java/eu/dnetlib/validator2/engine/TestUtils.java diff --git a/pom.xml b/pom.xml index 556b9c9..6cf9363 100644 --- a/pom.xml +++ b/pom.xml @@ -50,17 +50,10 @@ com.google.guava guava - 32.1.1-jre + 32.1.2-jre compile - - org.spockframework - spock-core - 1.3-groovy-2.5 - test - - diff --git a/src/main/java/eu/dnetlib/validator2/validation/guideline/GuidelineEvaluation.java b/src/main/java/eu/dnetlib/validator2/validation/guideline/GuidelineEvaluation.java index 020154e..6419c46 100644 --- a/src/main/java/eu/dnetlib/validator2/validation/guideline/GuidelineEvaluation.java +++ b/src/main/java/eu/dnetlib/validator2/validation/guideline/GuidelineEvaluation.java @@ -49,8 +49,6 @@ class GuidelineEvaluation { for ( SyntheticRule rule: rules ) { - String id = rule.getContext().getIdProperty().getValue(); - RuleEngine.applyAndReport(rule, doc, reporter); Status status = diagnostics.getLastReportedStatus(); @@ -59,6 +57,8 @@ class GuidelineEvaluation { return StandardResult.forError(diagnostics.getLastReportedError().getMessage()); } + String id = rule.getContext().getIdProperty().getValue(); + if ( status == Status.SUCCESS && getRequirementLevelOf(id) == RequirementLevel.NOT_APPLICABLE ) { // Report the non-applicability of a rule as a warning // The check for both status and non-applicable requirement level is redundant @@ -71,7 +71,7 @@ class GuidelineEvaluation { // A mandatory rule has failed, yet we don't know whether we should report is as such. // Let's check the parent of the rule - if (rule.parentRule() == null) { + if ( rule.parentRule() == null ) { // This is the root rule failing! // Fail fast here, too (don't waste resources to evaluate other rules). // We will "enable" it, if it is requested. diff --git a/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile_with_prefixes_in_element_names.java b/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile_with_prefixes_in_element_names.java deleted file mode 100644 index 2828e2a..0000000 --- a/src/main/java/eu/dnetlib/validator2/validation/guideline/openaire/LiteratureGuidelinesV4Profile_with_prefixes_in_element_names.java +++ /dev/null @@ -1,511 +0,0 @@ -package eu.dnetlib.validator2.validation.guideline.openaire; - -import eu.dnetlib.validator2.engine.Rule; -import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule; -import eu.dnetlib.validator2.engine.builtins.XMLVocabularyRule; -import eu.dnetlib.validator2.validation.guideline.Builders; -import eu.dnetlib.validator2.validation.guideline.ElementSpec; -import eu.dnetlib.validator2.validation.guideline.Guideline; -import eu.dnetlib.validator2.validation.guideline.SyntheticGuideline; -import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate; -import eu.dnetlib.validator2.validation.utils.MediaTypesValuePredicate; -import eu.dnetlib.validator2.validation.utils.RegexValuePredicate; -import org.w3c.dom.Document; - -import java.util.*; -import java.util.stream.Collectors; - -import static eu.dnetlib.validator2.validation.guideline.Cardinality.*; -import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.*; - -public final class LiteratureGuidelinesV4Profile_with_prefixes_in_element_names extends AbstractOpenAireProfile { - - private static final String[] TITLE_TYPES = { - "AlternativeTitle", "Subtitle", "TranslatedTitle", "Other" - }; - - private static final String[] NAME_TYPES = { - "Organizational", "Personal" - }; - - private static final String[] CONTRIBUTOR_TYPES = { - "ContactPerson", "DataCollector", "DataCurator", "DataManager", "Distributor", - "Editor", "HostingInstitution", "Producer", "ProjectLeader", "ProjectManager", "ProjectMember", - "RegistrationAgency", "RegistrationAuthority", "RelatedPerson", "Researcher", "ResearchGroup", - "RightsHolder", "Sponsor", "Supervisor", "WorkPackageLeader", "Other" - }; - - private static final String[] FUNDER_IDENTIFIER_TYPES = { - "ISNI", "GRID", "Crossref Funder" - }; - - private static final String[] IDENTIFIER_TYPES = { - "ARK", "arXiv", "bibcode", "DOI", "EAN13", "EISSN", "Handle", "IGSN", "ISBN", - "ISSN", "ISTC", "LISSN", "LSID", "PISSN", "PMID", "PURL", "UPC", "URL", "URN", "WOS", - }; - - private static final String[] RELATION_TYPES = { - "IsCitedBy", "Cites", "IsSupplementTo", "IsSupplementedBy", "IsContinuedBy", - "Continues", "IsDescribedBy", "Describes", "HasMetadata", "IsMetadataFor", "HasVersion", - "IsVersionOf", "IsNewVersionOf", "IsPreviousVersionOf", "IsPartOf", "HasPart", "IsReferencedBy", - "References", "IsDocumentedBy", "Documents", "IsCompiledBy", "Compiles", "IsVariantFormOf", - "IsOriginalFormOf", "IsIdenticalTo", "IsReviewedBy", "Reviews", "IsDerivedFrom", "IsSourceOf", - "IsRequiredBy", "Requires" - }; - - private static final String[] RELATED_RESOURCE_GENERAL_TYPES = { - "Audiovisual", "Collection", "DataPaper", "Dataset", "Event", "Image", "InteractiveResource", - "Model", "PhysicalObject", "Service", "Software", "Sound", "Text", "Workflow", "Other" - }; - - private static final String[] EMBARGO_DATE_TYPES = { - "Accepted", "Available" - }; - - private static final String[] PUBLICATION_DATE_TYPE = { - "Issued" - }; - - private static final String[] RESOURCE_GENERAL_TYPES = { - "literature", "dataset", "software", "other research product" - }; - - private static final String[] RESOURCE_CONCEPT_URIS = { - "http://purl.org/coar/resource_type/c_1162", "http://purl.org/coar/resource_type/c_6501", - "http://purl.org/coar/resource_type/c_545b", "http://purl.org/coar/resource_type/c_b239", - "http://purl.org/coar/resource_type/c_2df8fbb1", "http://purl.org/coar/resource_type/c_dcae04bc", - "http://purl.org/coar/resource_type/c_beb9", "http://purl.org/coar/resource_type/c_3e5a", - "http://purl.org/coar/resource_type/c_ba08", "http://purl.org/coar/resource_type/c_3248", - "http://purl.org/coar/resource_type/c_2f33", "http://purl.org/coar/resource_type/c_86bc", - "http://purl.org/coar/resource_type/c_816b", "http://purl.org/coar/resource_type/c_8042", - "http://purl.org/coar/resource_type/c_71bd", "http://purl.org/coar/resource_type/c_18gh", - "http://purl.org/coar/resource_type/c_18ws", "http://purl.org/coar/resource_type/c_18hj", - "http://purl.org/coar/resource_type/c_18op", "http://purl.org/coar/resource_type/c_186u", - "http://purl.org/coar/resource_type/c_18wq", "http://purl.org/coar/resource_type/c_18wz", - "http://purl.org/coar/resource_type/c_18ww", "http://purl.org/coar/resource_type/c_efa0", - "http://purl.org/coar/resource_type/c_baaf", "http://purl.org/coar/resource_type/c_ba1f", - "http://purl.org/coar/resource_type/c_93fc", "http://purl.org/coar/resource_type/c_15cd", - "http://purl.org/coar/resource_type/c_18co", "http://purl.org/coar/resource_type/c_18cp", - "http://purl.org/coar/resource_type/c_6670", "http://purl.org/coar/resource_type/c_5794", - "http://purl.org/coar/resource_type/c_c94f", "http://purl.org/coar/resource_type/c_f744", - "http://purl.org/coar/resource_type/c_7a1f", "http://purl.org/coar/resource_type/c_bdcc", - "http://purl.org/coar/resource_type/c_db06", "http://purl.org/coar/resource_type/c_46ec", - "http://purl.org/coar/resource_type/c_0857", "http://purl.org/coar/resource_type/c_8544", - "http://purl.org/coar/resource_type/c_18cf", "http://purl.org/coar/resource_type/c_18cw", - "http://purl.org/coar/resource_type/c_18cd", "http://purl.org/coar/resource_type/c_18cc", - "http://purl.org/coar/resource_type/c_12ce", "http://purl.org/coar/resource_type/c_8a7e", - "http://purl.org/coar/resource_type/c_ecc8", "http://purl.org/coar/resource_type/c_c513", - "http://purl.org/coar/resource_type/c_12cd", "http://purl.org/coar/resource_type/c_12cc", - "http://purl.org/coar/resource_type/c_5ce6", "http://purl.org/coar/resource_type/c_ddb1", - "http://purl.org/coar/resource_type/c_e9a0", "http://purl.org/coar/resource_type/c_7ad9", - "http://purl.org/coar/resource_type/c_393c", "http://purl.org/coar/resource_type/c_1843" - }; - - private static final String[] RESOURCE_IDENTIFIER_TYPES = { - "ARK", "DOI", "Handle", "PURL", "URL", "URN" - }; - - private static final String[] ACCESS_RIGHTS_URIS = { - "http://purl.org/coar/access_right/c_abf2", "http://purl.org/coar/access_right/c_f1cf", - "http://purl.org/coar/access_right/c_16ec", "http://purl.org/coar/access_right/c_14cb" - }; - - private static final String[] RESOURCE_VERSION_URIS = { - "http://purl.org/coar/version/c_b1a7d7d4d402bcce", "http://purl.org/coar/version/c_71e4c1898caa6e32", - "http://purl.org/coar/version/c_ab4af688f83e57aa", "http://purl.org/coar/version/c_fa2ee174bc00049f", - "http://purl.org/coar/version/c_970fb48d4fbd8a85", "http://purl.org/coar/version/c_e19f295774971610", - "http://purl.org/coar/version/c_dc82b40f9837b551", "http://purl.org/coar/version/c_be7fb7dd8ff6fe43" - }; - - private static final String[] RESOURCE_VERSION_LABELS = { - "AO", "SMUR", "AM", "P", "VoR", "CVoR", "EVoR", "NA" - }; - - private static final String[] FILE_OBJECT_TYPES = { - "fulltext", "dataset", "software", "other" - }; - - private static final String[] AUDIENCE_VOCABULARY = { - "Administrators", "Community Groups", "Counsellors", "Federal Funds Recipients and Applicants", - "Librarians", "News Media", "Other", "Parents and Families", "Policymakers", "Researchers", - "School Support Staff", "Student Financial Aid Providers", "Students", "Teachers" - }; - - private static final ElementSpec TITLE_SPEC = Builders. - forMandatoryElement("datacite:title", ONE_TO_N). - withOptionalAttribute("xml:lang", new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX)). - withOptionalAttribute("titleType", TITLE_TYPES). - build(); - - private static final ElementSpec CREATOR_SPEC = Builders. - forMandatoryElement("datacite:creator", ONE_TO_N). - withSubElement(Builders. - forMandatoryElement("datacite:creatorName", ONE). - withRecommendedAttribute("nameType", NAME_TYPES)). - withSubElement(Builders. - forRecommendedElement("datacite:givenName")). - withSubElement(Builders. - forRecommendedElement("datacite:familyName")). - withSubElement(Builders. - forRecommendedRepeatableElement("datacite:nameIdentifier"). - withMandatoryAttribute("nameIdentifierScheme"). - withRecommendedAttribute("schemeURI")). - withSubElement(Builders. - forRecommendedRepeatableElement("datacite:affiliation")). - build(); - - private static final ElementSpec CONTRIBUTOR_SPEC = Builders. - forMandatoryIfApplicableElement("datacite:contributor", ONE_TO_N, elementIsPresent("datacite:contributor")). - withMandatoryAttribute("contributorType", CONTRIBUTOR_TYPES). - withSubElement(Builders. - forMandatoryElement("datacite:contributorName", ONE). - withRecommendedAttribute("nameType", NAME_TYPES)). - withSubElement(Builders. - forOptionalElement("datacite:familyName")). - withSubElement(Builders. - forOptionalElement("datacite:givenName")). - withSubElement(Builders. - forRecommendedRepeatableElement("datacite:nameIdentifier"). - withMandatoryAttribute("nameIdentifierScheme"). - withRecommendedAttribute("schemeURI")). - withSubElement(Builders. - forRecommendedRepeatableElement("datacite:affiliation")). - build(); - - //This property has some issues/annotations in documentation - private static final ElementSpec FUNDING_REFERENCE_SPEC = Builders. - forMandatoryIfApplicableElement("oaire:fundingReference", ONE_TO_N, elementIsPresent("oaire:fundingReference")). - withSubElement(Builders. - forMandatoryElement("oaire:funderName", ONE)). - withSubElement(Builders. - forRecommendedElement("oaire:funderIdentifier"). - withRecommendedAttribute("funderIdentifierType", FUNDER_IDENTIFIER_TYPES)). - withSubElement(Builders. - forOptionalElement("oaire:fundingStream")). - withSubElement(Builders. - forMandatoryIfApplicableElement("oaire:awardNumber", ONE, elementIsPresent("oaire:awardNumber")). - withRecommendedAttribute("awardURI")). - withSubElement(Builders. - forRecommendedElement("oaire:awardTitle")). - build(); - - //TODO: Allowed values are referred as "suggested" in the documentation, but then a controlled list is given. - // Relevant issues: - // https://bitbucket.org/saikos/openaire-validator/issues/40 - // https://bitbucket.org/saikos/openaire-validator/issues/32/ - private static final ElementSpec ALTERNATE_IDENTIFIER_SPEC = Builders. - forRecommendedRepeatableElement("datacite:alternateIdentifier"). - withMandatoryAttribute("alternateIdentifierType", IDENTIFIER_TYPES). - build(); - - private static final ElementSpec RELATED_IDENTIFIER_SPEC = Builders. - forRecommendedRepeatableElement("datacite:relatedIdentifier"). - withMandatoryAttribute("relatedIdentifierType", IDENTIFIER_TYPES). - withMandatoryAttribute("relationType", RELATION_TYPES). - //TODO: For following 3 attributes. Need a way to target relationType attribute of current element - // - Should be used only with relation type (HasMetadata/IsMetadataFor). - withOptionalAttribute("relatedMetadataScheme"). - withOptionalAttribute("schemeURI"). - withOptionalAttribute("schemeType"). - withOptionalAttribute("resourceTypeGeneral", RELATED_RESOURCE_GENERAL_TYPES). - build(); - - /* - Applicable when Access Rights is set to: - embargoed access - Date encoding "YYYY-MM-DD" is referred as best practice. Should introduce in allowed values? - */ - //TODO: Implement proper applicability rule - private static final ElementSpec EMBARGO_PERIOD_DATE_SPEC = Builders. - forMandatoryIfApplicableElement("datacite:date", TWO, applicabilityRuleForEmbargoPeriodDate()). - withMandatoryAttribute("dateType", EMBARGO_DATE_TYPES). - build(); - - /* - There are no "strict" allowed values. Recommendations are IETF BCP 47 and ISO 639-x - */ - private static final ElementSpec LANGUAGE_SPEC = Builders. - forMandatoryIfApplicableElement("dc:language", ONE_TO_N, elementIsPresent("dc:language")). - allowedValues(new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX).or(new ISO639ValuePredicate())). - build(); - - private static final ElementSpec PUBLISHER_SPEC = Builders. - forMandatoryIfApplicableElement("dc:publisher", ONE_TO_N, elementIsPresent("dc:publisher")). - build(); - - /* - TODO: Same name as EMBARGO_PERIOD_DATE_SPEC above, with different attribute allowed value. - Should probably revisit, take that into consideration, when making relevant rules. - "Recommended" best practice for encoding the date value is ISO 8601 [W3CDTF] (YYYY-MM-DD) (YYYY mandatory) - */ - private static final ElementSpec PUBLICATION_DATE_SPEC = Builders. - forMandatoryElement("datacite:date", ONE). - withMandatoryAttribute("dateType", PUBLICATION_DATE_TYPE). - build(); - - private static final ElementSpec RESOURCE_TYPE_SPEC = Builders. - forMandatoryElement("oaire:resourceType", ONE). - withMandatoryAttribute("resourceTypeGeneral", RESOURCE_GENERAL_TYPES). - withMandatoryAttribute("uri", RESOURCE_CONCEPT_URIS). - build(); - - private static final ElementSpec DESCRIPTION_SPEC = Builders. - forMandatoryIfApplicableElement("dc:description", ONE_TO_N, elementIsPresent("dc:description")). - withOptionalAttribute("xml:lang", new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX)). - build(); - - private static final ElementSpec FORMAT_SPEC = Builders. - forRecommendedRepeatableElement("dc:format"). - allowedValues(new MediaTypesValuePredicate()). - build(); - - private static final ElementSpec RESOURCE_IDENTIFIER_SPEC = Builders. - forMandatoryElement("datacite:identifier", ONE). - withMandatoryAttribute("identifierType", RESOURCE_IDENTIFIER_TYPES). - build(); - - private static final ElementSpec ACCESS_RIGHTS_SPEC = Builders. - forMandatoryElement("datacite:rights", ONE). - withMandatoryAttribute("uri", ACCESS_RIGHTS_URIS). - build(); - - private static final ElementSpec SOURCE_SPEC = Builders. - forRecommendedRepeatableElement("dc:source"). - build(); - - //TODO: Should we check URI attribute values are valid? - private static final ElementSpec SUBJECT_SPEC = Builders. - forMandatoryIfApplicableElement("datacite:subject", ONE_TO_N, elementIsPresent("datacite:subject")). - withOptionalAttribute("subjectScheme"). - withOptionalAttribute("schemeURI"). - withOptionalAttribute("valueURI"). - build(); - - private static final ElementSpec LICENSE_CONDITION_SPEC = Builders. - forRecommendedElement("oaire:licenseCondition"). - withMandatoryIfApplicableAttribute("uri", elementIsPresent("oaire:licenseCondition")). - withMandatoryIfApplicableAttribute("startDate", elementIsPresent("oaire:licenseCondition")). - build(); - - private static final ElementSpec COVERAGE_SPEC = Builders. - forRecommendedRepeatableElement("dc:coverage"). - build(); - - private static final ElementSpec SIZE_SPEC = Builders. - forOptionalRepeatableElement("datacite:size"). - build(); - - private static final ElementSpec GEO_LOCATION_SPEC = Builders. - forOptionalRepeatableElement("datacite:geoLocation"). - withSubElement(Builders. - forOptionalElement("datacite:geoLocationPoint"). - withSubElement(Builders. - forMandatoryElement("datacite:pointLongitude", ONE)). - withSubElement(Builders. - forMandatoryElement("datacite:pointLatitude", ONE))). - withSubElement(Builders. - forOptionalElement("datacite:geoLocationBox"). - withSubElement(Builders. - forMandatoryElement("datacite:westBoundLongitude", ONE)). - withSubElement(Builders. - forMandatoryElement("datacite:eastBoundLongitude", ONE)). - withSubElement(Builders. - forMandatoryElement("datacite:southBoundLatitude", ONE)). - withSubElement(Builders. - forMandatoryElement("datacite:northBoundLatitude", ONE))). - withSubElement(Builders. - forOptionalElement("datacite:geoLocationPlace")). - withSubElement(Builders. - forOptionalRepeatableElement("datacite:geoLocationPolygon"). - withSubElement(Builders. - forMandatoryElement("datacite:polygonPoint", FOUR_TO_N). - withSubElement(Builders. - forMandatoryElement("datacite:pointLongitude", ONE)). - withSubElement(Builders. - forMandatoryElement("datacite:pointLatitude", ONE))). - withSubElement(Builders. - forOptionalElement("datacite:inPolygonPoint"). - withSubElement(Builders. - forMandatoryElement("datacite:pointLongitude", ONE)). - withSubElement(Builders. - forMandatoryElement("datacite:pointLatitude", ONE)))). - build(); - - /* - TODO: RequirementLevel.RECOMMENDED, Cardinality.ONE? - If uri attribute is present, element also has relevant controlled allowed values mapped to it's value. - uri attribute applicable when element value is one of controlled values. - Must be relevant value of [AO, SMUR, AM, P, VoR, CVoR, EVoR, NA] - Otherwise can be a number - TODO: Should we cross-check attribute and element value are relevant? - */ - private static final ElementSpec RESOURCE_VERSION_SPEC = Builders. - forRecommendedElement("oaire:version"). - withMandatoryIfApplicableAttribute("uri", applicabilityRuleForURIAttributeOfResourceVersion(), RESOURCE_VERSION_URIS). - build(); - - //TODO: Has annotation/issue: accessRightsURI attribute values also appears on ACCESS_RIGHTS_SPEC. Should check it's the same? - private static final ElementSpec FILE_LOCATION_SPEC = Builders. - forMandatoryIfApplicableElement("oaire:file", ONE_TO_N, elementIsPresent("oaire:file")). - withRecommendedAttribute("accessRightsURI", ACCESS_RIGHTS_URIS). - withRecommendedAttribute("mimeType", new MediaTypesValuePredicate()). - withRecommendedAttribute("objectType", FILE_OBJECT_TYPES). - build(); - - private static final ElementSpec CITATION_TITLE_SPEC = Builders. - forRecommendedElement("oaire:citationTitle"). - build(); - - private static final ElementSpec CITATION_VOLUME_SPEC = Builders. - forRecommendedElement("oaire:citationVolume"). - build(); - - private static final ElementSpec CITATION_ISSUE_SPEC = Builders. - forRecommendedElement("oaire:citationIssue"). - build(); - - private static final ElementSpec CITATION_START_PAGE_SPEC = Builders. - forRecommendedElement("oaire:citationStartPage"). - build(); - - private static final ElementSpec CITATION_END_PAGE_SPEC = Builders. - forRecommendedElement("oaire:citationEndPage"). - build(); - - private static final ElementSpec CITATION_EDITION_SPEC = Builders. - forRecommendedElement("oaire:citationEdition"). - build(); - - private static final ElementSpec CITATION_CONFERENCE_PLACE_SPEC = Builders. - forRecommendedElement("oaire:citationConferencePlace"). - build(); - - //TODO: Implement regex/allowedValuesPredicate - // Date has recommended best practice ISO 8601 [W3CDTF], and two [single date] [start date - end date] formats - private static final ElementSpec CITATION_CONFERENCE_DATE_SPEC = Builders. - forRecommendedElement("oaire:citationConferenceDate"). - allowedValues(new RegexValuePredicate(COMPILED_YYYY_MM_DD_REGEX).or(new RegexValuePredicate(COMPILED_YYYY_MM_DD_RANGE_REGEX))). - build(); - - //TODO: A non-exhaustive list is provided for values, derived from the Common Education Data Standards vocabulary - // Should we add it? - private static final ElementSpec AUDIENCE_SPEC = Builders. - forOptionalRepeatableElement("dcterms:audience"). - allowedValues(AUDIENCE_VOCABULARY). - build(); - - private static Rule applicabilityRuleForEmbargoPeriodDate() { - return XMLCardinalityRule.builder(). - setId(ElementSpec.APPLICABILITY_RULE_ID). - // first predicate count(...) makes sure there is only one "Access Rights" element, and the second predicate verifies its value. - setXPathExpression("//*[count(//*[name()='datacite:rights'])=1][name()='datacite:rights' and @uri='http://purl.org/coar/access_right/c_f1cf' and normalize-space(text())='embargoed access']"). - setRange(1,1). - setIsInclusive(true). - build(); - } - - private static Rule applicabilityRuleForURIAttributeOfResourceVersion() { - return XMLVocabularyRule.builder(). - setId(ElementSpec.APPLICABILITY_RULE_ID). - setXPathExpression("//*[name()='oaire:version']/text()"). - setNodeListAction("1"). - setVocabularyTermsAndTermsType(String.join(", ", RESOURCE_VERSION_LABELS), "whitelist"). - build(); - } - - //TODO: weights for guidelines haven't been finalized. They've been given an arbitrary value of 1. - public static SyntheticGuideline TITLE = SyntheticGuideline.of("Title", 1, TITLE_SPEC); - public static SyntheticGuideline CREATOR = SyntheticGuideline.of("Creator", 1, CREATOR_SPEC); - public static SyntheticGuideline CONTRIBUTOR = SyntheticGuideline.of("Contributor", 1, CONTRIBUTOR_SPEC); - public static SyntheticGuideline FUNDING_REFERENCE = SyntheticGuideline.of("Funding Reference", 1, FUNDING_REFERENCE_SPEC); - public static SyntheticGuideline ALTERNATE_IDENTIFIER = SyntheticGuideline.of("Alternate Identifier", 1, ALTERNATE_IDENTIFIER_SPEC); - public static SyntheticGuideline RELATED_IDENTIFIER = SyntheticGuideline.of("Related Identifier", 1, RELATED_IDENTIFIER_SPEC); - public static SyntheticGuideline EMBARGO_PERIOD_DATE = SyntheticGuideline.of("Embargo Period Date", 1, EMBARGO_PERIOD_DATE_SPEC); - public static SyntheticGuideline LANGUAGE = SyntheticGuideline.of("Language", 1, LANGUAGE_SPEC); - public static SyntheticGuideline PUBLISHER = SyntheticGuideline.of("Publisher", 1, PUBLISHER_SPEC); - public static SyntheticGuideline PUBLICATION_DATE = SyntheticGuideline.of("Publication Date", 1, PUBLICATION_DATE_SPEC); - public static SyntheticGuideline RESOURCE_TYPE = SyntheticGuideline.of("Resource Type", 1, RESOURCE_TYPE_SPEC); - public static SyntheticGuideline DESCRIPTION = SyntheticGuideline.of("Description", 1, DESCRIPTION_SPEC); - public static SyntheticGuideline FORMAT = SyntheticGuideline.of("Format", 1, FORMAT_SPEC); - public static SyntheticGuideline RESOURCE_IDENTIFIER = SyntheticGuideline.of("Resource Identifier", 1, RESOURCE_IDENTIFIER_SPEC); - public static SyntheticGuideline ACCESS_RIGHTS = SyntheticGuideline.of("Access Rights", 1, ACCESS_RIGHTS_SPEC); - public static SyntheticGuideline SOURCE = SyntheticGuideline.of("Source", 1, SOURCE_SPEC); - public static SyntheticGuideline SUBJECT = SyntheticGuideline.of("Subject", 1, SUBJECT_SPEC); - public static SyntheticGuideline LICENSE_CONDITION = SyntheticGuideline.of("License Condition", 1, LICENSE_CONDITION_SPEC); - public static SyntheticGuideline COVERAGE = SyntheticGuideline.of("Coverage", 1, COVERAGE_SPEC); - public static SyntheticGuideline SIZE = SyntheticGuideline.of("Size", 1, SIZE_SPEC); - public static SyntheticGuideline GEO_LOCATION = SyntheticGuideline.of("Geo Location", 1, GEO_LOCATION_SPEC); - public static SyntheticGuideline RESOURCE_VERSION = SyntheticGuideline.of("Resource Version", 1, RESOURCE_VERSION_SPEC); - public static SyntheticGuideline FILE_LOCATION = SyntheticGuideline.of("File Location", 1, FILE_LOCATION_SPEC); - public static SyntheticGuideline CITATION_TITLE = SyntheticGuideline.of("Citation Title", 1, CITATION_TITLE_SPEC); - public static SyntheticGuideline CITATION_VOLUME = SyntheticGuideline.of("Citation Volume", 1, CITATION_VOLUME_SPEC); - public static SyntheticGuideline CITATION_ISSUE = SyntheticGuideline.of("Citation Issue", 1, CITATION_ISSUE_SPEC); - public static SyntheticGuideline CITATION_START_PAGE = SyntheticGuideline.of("Citation Start Page", 1, CITATION_START_PAGE_SPEC); - public static SyntheticGuideline CITATION_END_PAGE = SyntheticGuideline.of("Citation End Page", 1, CITATION_END_PAGE_SPEC); - public static SyntheticGuideline CITATION_EDITION = SyntheticGuideline.of("Citation Edition", 1, CITATION_EDITION_SPEC); - public static SyntheticGuideline CITATION_CONFERENCE_PLACE = SyntheticGuideline.of("Citation Conference Place", 1, CITATION_CONFERENCE_PLACE_SPEC); - public static SyntheticGuideline CITATION_CONFERENCE_DATE = SyntheticGuideline.of("Citation Conference Date", 1, CITATION_CONFERENCE_DATE_SPEC); - public static SyntheticGuideline AUDIENCE = SyntheticGuideline.of("Audience", 1, AUDIENCE_SPEC); - - private static final List GUIDELINES = Collections.unmodifiableList( - Arrays.asList( - TITLE, - CREATOR, - CONTRIBUTOR, - FUNDING_REFERENCE, - ALTERNATE_IDENTIFIER, - RELATED_IDENTIFIER, - EMBARGO_PERIOD_DATE, - LANGUAGE, - PUBLISHER, - PUBLICATION_DATE, - RESOURCE_TYPE, - DESCRIPTION, - FORMAT, - RESOURCE_IDENTIFIER, - ACCESS_RIGHTS, - SOURCE, - SUBJECT, - LICENSE_CONDITION, - COVERAGE, - SIZE, - GEO_LOCATION, - RESOURCE_VERSION, - FILE_LOCATION, - CITATION_TITLE, - CITATION_VOLUME, - CITATION_ISSUE, - CITATION_START_PAGE, - CITATION_END_PAGE, - CITATION_EDITION, - CITATION_CONFERENCE_PLACE, - CITATION_CONFERENCE_DATE, - AUDIENCE - ) - ); - - private static final Map GUIDELINE_MAP = GUIDELINES. - stream(). - collect(Collectors.toMap(SyntheticGuideline::getName, (guideline) -> guideline)); - - private static final int MAX_SCORE = GUIDELINES.stream().map(SyntheticGuideline::getWeight).reduce(0, Integer::sum); - - public LiteratureGuidelinesV4Profile_with_prefixes_in_element_names() { - super("OpenAIRE Guidelines for Literature Repositories Profile v4"); - } - - @Override - public Collection> guidelines() { - return GUIDELINES; - } - - @Override - public SyntheticGuideline guideline(String guidelineName) { - return GUIDELINE_MAP.get(guidelineName); - } - - @Override - public int maxScore() { - return MAX_SCORE; - } -} diff --git a/src/test/java/eu/dnetlib/validator2/engine/Example.java b/src/test/java/eu/dnetlib/validator2/engine/Example.java index 71dead6..2f6599c 100644 --- a/src/test/java/eu/dnetlib/validator2/engine/Example.java +++ b/src/test/java/eu/dnetlib/validator2/engine/Example.java @@ -27,8 +27,7 @@ public class Example { } public static void validateEstablishedGuideline(Document xmlDoc) { - LiteratureGuidelinesV4Profile profile = new LiteratureGuidelinesV4Profile(); - Guideline.Result result = profile.TITLE.validate(xmlDoc); + Guideline.Result result = LiteratureGuidelinesV4Profile.TITLE.validate(xmlDoc); XMLApplicationProfile profile2 = new LiteratureGuidelinesV3Profile(); result = profile2.guideline("Title").validate(xmlDoc); diff --git a/src/test/java/eu/dnetlib/validator2/engine/Test.java b/src/test/java/eu/dnetlib/validator2/engine/Test.java index 64b8181..acb5b1f 100644 --- a/src/test/java/eu/dnetlib/validator2/engine/Test.java +++ b/src/test/java/eu/dnetlib/validator2/engine/Test.java @@ -3,11 +3,11 @@ package eu.dnetlib.validator2.engine; import eu.dnetlib.validator2.validation.XMLApplicationProfile; import eu.dnetlib.validator2.validation.guideline.Guideline; import eu.dnetlib.validator2.validation.guideline.openaire.LiteratureGuidelinesV3Profile; -import groovy.xml.DOMBuilder; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; -import java.io.FileReader; +import javax.xml.parsers.DocumentBuilder; +import java.io.File; import java.util.LinkedHashMap; import java.util.Map; import java.util.stream.Collectors; @@ -30,10 +30,13 @@ public class Test { LiteratureGuidelinesV3Profile profile = new LiteratureGuidelinesV3Profile(); logger.info("Max score: " + profile.maxScore()); Map scorePerDoc = new LinkedHashMap<>(); + DocumentBuilder builder = TestUtils.getDocumentBuilder(); + if ( builder == null ) + return; for ( String fileName : FILES ) { try { logger.info("Processing \"" + fileName + "\""); - Document doc = DOMBuilder.parse(new FileReader(fileName), false, true, true); + Document doc = builder.parse(new File(fileName)); XMLApplicationProfile.ValidationResult result = profile.validate(fileName, doc); scorePerDoc.put(fileName, result.score()); Map results = result.results(); diff --git a/src/test/java/eu/dnetlib/validator2/engine/TestUtils.java b/src/test/java/eu/dnetlib/validator2/engine/TestUtils.java new file mode 100644 index 0000000..c87f0db --- /dev/null +++ b/src/test/java/eu/dnetlib/validator2/engine/TestUtils.java @@ -0,0 +1,27 @@ +package eu.dnetlib.validator2.engine; + +import org.slf4j.LoggerFactory; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; + +public class TestUtils { + + private static final org.slf4j.Logger logger = LoggerFactory.getLogger(TestUtils.class); + + + public static DocumentBuilder getDocumentBuilder() + { + try { + DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + documentBuilderFactory.setValidating(false); + documentBuilderFactory.setNamespaceAware(true); + documentBuilderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false); + return documentBuilderFactory.newDocumentBuilder(); + } catch (Exception e) { + logger.error("", e); + return null; + } + } + +} diff --git a/src/test/java/eu/dnetlib/validator2/engine/Test_FAIR.java b/src/test/java/eu/dnetlib/validator2/engine/Test_FAIR.java index ae8536e..9c9302a 100644 --- a/src/test/java/eu/dnetlib/validator2/engine/Test_FAIR.java +++ b/src/test/java/eu/dnetlib/validator2/engine/Test_FAIR.java @@ -3,11 +3,11 @@ package eu.dnetlib.validator2.engine; import eu.dnetlib.validator2.validation.XMLApplicationProfile; import eu.dnetlib.validator2.validation.guideline.Guideline; import eu.dnetlib.validator2.validation.guideline.openaire.FAIR_Data_GuidelinesProfile; -import groovy.xml.DOMBuilder; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; -import java.io.FileReader; +import javax.xml.parsers.DocumentBuilder; +import java.io.File; import java.util.LinkedHashMap; import java.util.Map; import java.util.OptionalDouble; @@ -33,10 +33,13 @@ public class Test_FAIR { FAIR_Data_GuidelinesProfile profile = new FAIR_Data_GuidelinesProfile(); logger.info("Max score: " + profile.maxScore()); Map scorePerDoc = new LinkedHashMap<>(); + DocumentBuilder builder = TestUtils.getDocumentBuilder(); + if ( builder == null ) + return; for ( String fileName : FILES ) { try { logger.info("Processing \"" + fileName + "\""); - Document doc = DOMBuilder.parse(new FileReader(fileName), false, true, true); + Document doc = builder.parse(new File(fileName)); XMLApplicationProfile.ValidationResult result = profile.validate(fileName, doc); scorePerDoc.put(fileName, result.score()); Map results = result.results(); diff --git a/src/test/java/eu/dnetlib/validator2/engine/Test_FAIR_LIT.java b/src/test/java/eu/dnetlib/validator2/engine/Test_FAIR_LIT.java index bc8e3e7..e4cfbff 100644 --- a/src/test/java/eu/dnetlib/validator2/engine/Test_FAIR_LIT.java +++ b/src/test/java/eu/dnetlib/validator2/engine/Test_FAIR_LIT.java @@ -3,11 +3,11 @@ package eu.dnetlib.validator2.engine; import eu.dnetlib.validator2.validation.XMLApplicationProfile; import eu.dnetlib.validator2.validation.guideline.Guideline; import eu.dnetlib.validator2.validation.guideline.openaire.FAIR_Literature_GuidelinesProfile; -import groovy.xml.DOMBuilder; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; -import java.io.FileReader; +import javax.xml.parsers.DocumentBuilder; +import java.io.File; import java.util.LinkedHashMap; import java.util.Map; import java.util.OptionalDouble; @@ -33,10 +33,13 @@ public class Test_FAIR_LIT { FAIR_Literature_GuidelinesProfile profile = new FAIR_Literature_GuidelinesProfile(); logger.info("Max score: " + profile.maxScore()); Map scorePerDoc = new LinkedHashMap<>(); + DocumentBuilder builder = TestUtils.getDocumentBuilder(); + if ( builder == null ) + return; for ( String fileName : FILES ) { try { logger.info("Processing \"" + fileName + "\""); - Document doc = DOMBuilder.parse(new FileReader(fileName), false, true, true); + Document doc = builder.parse(new File(fileName)); XMLApplicationProfile.ValidationResult result = profile.validate(fileName, doc); scorePerDoc.put(fileName, result.score()); Map results = result.results(); diff --git a/src/test/java/eu/dnetlib/validator2/engine/Test_v4.java b/src/test/java/eu/dnetlib/validator2/engine/Test_v4.java index 0328f36..f764d98 100644 --- a/src/test/java/eu/dnetlib/validator2/engine/Test_v4.java +++ b/src/test/java/eu/dnetlib/validator2/engine/Test_v4.java @@ -4,11 +4,11 @@ import eu.dnetlib.validator2.validation.XMLApplicationProfile; import eu.dnetlib.validator2.validation.guideline.Guideline; import eu.dnetlib.validator2.validation.guideline.openaire.AbstractOpenAireProfile; import eu.dnetlib.validator2.validation.guideline.openaire.LiteratureGuidelinesV4Profile; -import groovy.xml.DOMBuilder; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; -import java.io.FileReader; +import javax.xml.parsers.DocumentBuilder; +import java.io.File; import java.util.LinkedHashMap; import java.util.Map; import java.util.OptionalDouble; @@ -31,10 +31,13 @@ public class Test_v4 { AbstractOpenAireProfile profile = new LiteratureGuidelinesV4Profile(); logger.info("Max score: " + profile.maxScore()); Map scorePerDoc = new LinkedHashMap<>(); + DocumentBuilder builder = TestUtils.getDocumentBuilder(); + if ( builder == null ) + return; for ( String fileName : FILES ) { try { logger.info("Processing \"" + fileName + "\""); - Document doc = DOMBuilder.parse(new FileReader(fileName), false, true, true); + Document doc = builder.parse(new File(fileName)); XMLApplicationProfile.ValidationResult result = profile.validate(fileName, doc); scorePerDoc.put(fileName, result.score()); Map results = result.results(); @@ -49,8 +52,7 @@ public class Test_v4 { } } } catch (Exception e) { - logger.error(e.getMessage()); - e.printStackTrace(); + logger.error("", e); } }