- Use Java's "DocumentBuilder" library instead of the outdated "spock-core" wrapper, which includes vulnerable transitive dependencies.

- Code optimization and polishing.
- Update Guava.
This commit is contained in:
Lampros Smyrnaios 2023-09-06 14:23:22 +03:00
parent f3129ecb79
commit 83a7e39319
9 changed files with 57 additions and 538 deletions

View File

@ -50,17 +50,10 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>32.1.1-jre</version>
<version>32.1.2-jre</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.spockframework</groupId>
<artifactId>spock-core</artifactId>
<version>1.3-groovy-2.5</version>
<scope>test</scope>
</dependency>
<!-- logback versions 1.4.X require Java-11 -->
<!-- logback versions 1.3.X require Java-8, but id this project is added as Dependency in Spring Boot, then Spring Boot throws an error, since it does not yet support logback 1.3.x -->

View File

@ -49,8 +49,6 @@ class GuidelineEvaluation {
for ( SyntheticRule<Document> rule: rules ) {
String id = rule.getContext().getIdProperty().getValue();
RuleEngine.applyAndReport(rule, doc, reporter);
Status status = diagnostics.getLastReportedStatus();
@ -59,6 +57,8 @@ class GuidelineEvaluation {
return StandardResult.forError(diagnostics.getLastReportedError().getMessage());
}
String id = rule.getContext().getIdProperty().getValue();
if ( status == Status.SUCCESS && getRequirementLevelOf(id) == RequirementLevel.NOT_APPLICABLE ) {
// Report the non-applicability of a rule as a warning
// The check for both status and non-applicable requirement level is redundant

View File

@ -1,511 +0,0 @@
package eu.dnetlib.validator2.validation.guideline.openaire;
import eu.dnetlib.validator2.engine.Rule;
import eu.dnetlib.validator2.engine.builtins.XMLCardinalityRule;
import eu.dnetlib.validator2.engine.builtins.XMLVocabularyRule;
import eu.dnetlib.validator2.validation.guideline.Builders;
import eu.dnetlib.validator2.validation.guideline.ElementSpec;
import eu.dnetlib.validator2.validation.guideline.Guideline;
import eu.dnetlib.validator2.validation.guideline.SyntheticGuideline;
import eu.dnetlib.validator2.validation.utils.ISO639ValuePredicate;
import eu.dnetlib.validator2.validation.utils.MediaTypesValuePredicate;
import eu.dnetlib.validator2.validation.utils.RegexValuePredicate;
import org.w3c.dom.Document;
import java.util.*;
import java.util.stream.Collectors;
import static eu.dnetlib.validator2.validation.guideline.Cardinality.*;
import static eu.dnetlib.validator2.validation.utils.SupportedRegExs.*;
public final class LiteratureGuidelinesV4Profile_with_prefixes_in_element_names extends AbstractOpenAireProfile {
private static final String[] TITLE_TYPES = {
"AlternativeTitle", "Subtitle", "TranslatedTitle", "Other"
};
private static final String[] NAME_TYPES = {
"Organizational", "Personal"
};
private static final String[] CONTRIBUTOR_TYPES = {
"ContactPerson", "DataCollector", "DataCurator", "DataManager", "Distributor",
"Editor", "HostingInstitution", "Producer", "ProjectLeader", "ProjectManager", "ProjectMember",
"RegistrationAgency", "RegistrationAuthority", "RelatedPerson", "Researcher", "ResearchGroup",
"RightsHolder", "Sponsor", "Supervisor", "WorkPackageLeader", "Other"
};
private static final String[] FUNDER_IDENTIFIER_TYPES = {
"ISNI", "GRID", "Crossref Funder"
};
private static final String[] IDENTIFIER_TYPES = {
"ARK", "arXiv", "bibcode", "DOI", "EAN13", "EISSN", "Handle", "IGSN", "ISBN",
"ISSN", "ISTC", "LISSN", "LSID", "PISSN", "PMID", "PURL", "UPC", "URL", "URN", "WOS",
};
private static final String[] RELATION_TYPES = {
"IsCitedBy", "Cites", "IsSupplementTo", "IsSupplementedBy", "IsContinuedBy",
"Continues", "IsDescribedBy", "Describes", "HasMetadata", "IsMetadataFor", "HasVersion",
"IsVersionOf", "IsNewVersionOf", "IsPreviousVersionOf", "IsPartOf", "HasPart", "IsReferencedBy",
"References", "IsDocumentedBy", "Documents", "IsCompiledBy", "Compiles", "IsVariantFormOf",
"IsOriginalFormOf", "IsIdenticalTo", "IsReviewedBy", "Reviews", "IsDerivedFrom", "IsSourceOf",
"IsRequiredBy", "Requires"
};
private static final String[] RELATED_RESOURCE_GENERAL_TYPES = {
"Audiovisual", "Collection", "DataPaper", "Dataset", "Event", "Image", "InteractiveResource",
"Model", "PhysicalObject", "Service", "Software", "Sound", "Text", "Workflow", "Other"
};
private static final String[] EMBARGO_DATE_TYPES = {
"Accepted", "Available"
};
private static final String[] PUBLICATION_DATE_TYPE = {
"Issued"
};
private static final String[] RESOURCE_GENERAL_TYPES = {
"literature", "dataset", "software", "other research product"
};
private static final String[] RESOURCE_CONCEPT_URIS = {
"http://purl.org/coar/resource_type/c_1162", "http://purl.org/coar/resource_type/c_6501",
"http://purl.org/coar/resource_type/c_545b", "http://purl.org/coar/resource_type/c_b239",
"http://purl.org/coar/resource_type/c_2df8fbb1", "http://purl.org/coar/resource_type/c_dcae04bc",
"http://purl.org/coar/resource_type/c_beb9", "http://purl.org/coar/resource_type/c_3e5a",
"http://purl.org/coar/resource_type/c_ba08", "http://purl.org/coar/resource_type/c_3248",
"http://purl.org/coar/resource_type/c_2f33", "http://purl.org/coar/resource_type/c_86bc",
"http://purl.org/coar/resource_type/c_816b", "http://purl.org/coar/resource_type/c_8042",
"http://purl.org/coar/resource_type/c_71bd", "http://purl.org/coar/resource_type/c_18gh",
"http://purl.org/coar/resource_type/c_18ws", "http://purl.org/coar/resource_type/c_18hj",
"http://purl.org/coar/resource_type/c_18op", "http://purl.org/coar/resource_type/c_186u",
"http://purl.org/coar/resource_type/c_18wq", "http://purl.org/coar/resource_type/c_18wz",
"http://purl.org/coar/resource_type/c_18ww", "http://purl.org/coar/resource_type/c_efa0",
"http://purl.org/coar/resource_type/c_baaf", "http://purl.org/coar/resource_type/c_ba1f",
"http://purl.org/coar/resource_type/c_93fc", "http://purl.org/coar/resource_type/c_15cd",
"http://purl.org/coar/resource_type/c_18co", "http://purl.org/coar/resource_type/c_18cp",
"http://purl.org/coar/resource_type/c_6670", "http://purl.org/coar/resource_type/c_5794",
"http://purl.org/coar/resource_type/c_c94f", "http://purl.org/coar/resource_type/c_f744",
"http://purl.org/coar/resource_type/c_7a1f", "http://purl.org/coar/resource_type/c_bdcc",
"http://purl.org/coar/resource_type/c_db06", "http://purl.org/coar/resource_type/c_46ec",
"http://purl.org/coar/resource_type/c_0857", "http://purl.org/coar/resource_type/c_8544",
"http://purl.org/coar/resource_type/c_18cf", "http://purl.org/coar/resource_type/c_18cw",
"http://purl.org/coar/resource_type/c_18cd", "http://purl.org/coar/resource_type/c_18cc",
"http://purl.org/coar/resource_type/c_12ce", "http://purl.org/coar/resource_type/c_8a7e",
"http://purl.org/coar/resource_type/c_ecc8", "http://purl.org/coar/resource_type/c_c513",
"http://purl.org/coar/resource_type/c_12cd", "http://purl.org/coar/resource_type/c_12cc",
"http://purl.org/coar/resource_type/c_5ce6", "http://purl.org/coar/resource_type/c_ddb1",
"http://purl.org/coar/resource_type/c_e9a0", "http://purl.org/coar/resource_type/c_7ad9",
"http://purl.org/coar/resource_type/c_393c", "http://purl.org/coar/resource_type/c_1843"
};
private static final String[] RESOURCE_IDENTIFIER_TYPES = {
"ARK", "DOI", "Handle", "PURL", "URL", "URN"
};
private static final String[] ACCESS_RIGHTS_URIS = {
"http://purl.org/coar/access_right/c_abf2", "http://purl.org/coar/access_right/c_f1cf",
"http://purl.org/coar/access_right/c_16ec", "http://purl.org/coar/access_right/c_14cb"
};
private static final String[] RESOURCE_VERSION_URIS = {
"http://purl.org/coar/version/c_b1a7d7d4d402bcce", "http://purl.org/coar/version/c_71e4c1898caa6e32",
"http://purl.org/coar/version/c_ab4af688f83e57aa", "http://purl.org/coar/version/c_fa2ee174bc00049f",
"http://purl.org/coar/version/c_970fb48d4fbd8a85", "http://purl.org/coar/version/c_e19f295774971610",
"http://purl.org/coar/version/c_dc82b40f9837b551", "http://purl.org/coar/version/c_be7fb7dd8ff6fe43"
};
private static final String[] RESOURCE_VERSION_LABELS = {
"AO", "SMUR", "AM", "P", "VoR", "CVoR", "EVoR", "NA"
};
private static final String[] FILE_OBJECT_TYPES = {
"fulltext", "dataset", "software", "other"
};
private static final String[] AUDIENCE_VOCABULARY = {
"Administrators", "Community Groups", "Counsellors", "Federal Funds Recipients and Applicants",
"Librarians", "News Media", "Other", "Parents and Families", "Policymakers", "Researchers",
"School Support Staff", "Student Financial Aid Providers", "Students", "Teachers"
};
private static final ElementSpec TITLE_SPEC = Builders.
forMandatoryElement("datacite:title", ONE_TO_N).
withOptionalAttribute("xml:lang", new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX)).
withOptionalAttribute("titleType", TITLE_TYPES).
build();
private static final ElementSpec CREATOR_SPEC = Builders.
forMandatoryElement("datacite:creator", ONE_TO_N).
withSubElement(Builders.
forMandatoryElement("datacite:creatorName", ONE).
withRecommendedAttribute("nameType", NAME_TYPES)).
withSubElement(Builders.
forRecommendedElement("datacite:givenName")).
withSubElement(Builders.
forRecommendedElement("datacite:familyName")).
withSubElement(Builders.
forRecommendedRepeatableElement("datacite:nameIdentifier").
withMandatoryAttribute("nameIdentifierScheme").
withRecommendedAttribute("schemeURI")).
withSubElement(Builders.
forRecommendedRepeatableElement("datacite:affiliation")).
build();
private static final ElementSpec CONTRIBUTOR_SPEC = Builders.
forMandatoryIfApplicableElement("datacite:contributor", ONE_TO_N, elementIsPresent("datacite:contributor")).
withMandatoryAttribute("contributorType", CONTRIBUTOR_TYPES).
withSubElement(Builders.
forMandatoryElement("datacite:contributorName", ONE).
withRecommendedAttribute("nameType", NAME_TYPES)).
withSubElement(Builders.
forOptionalElement("datacite:familyName")).
withSubElement(Builders.
forOptionalElement("datacite:givenName")).
withSubElement(Builders.
forRecommendedRepeatableElement("datacite:nameIdentifier").
withMandatoryAttribute("nameIdentifierScheme").
withRecommendedAttribute("schemeURI")).
withSubElement(Builders.
forRecommendedRepeatableElement("datacite:affiliation")).
build();
//This property has some issues/annotations in documentation
private static final ElementSpec FUNDING_REFERENCE_SPEC = Builders.
forMandatoryIfApplicableElement("oaire:fundingReference", ONE_TO_N, elementIsPresent("oaire:fundingReference")).
withSubElement(Builders.
forMandatoryElement("oaire:funderName", ONE)).
withSubElement(Builders.
forRecommendedElement("oaire:funderIdentifier").
withRecommendedAttribute("funderIdentifierType", FUNDER_IDENTIFIER_TYPES)).
withSubElement(Builders.
forOptionalElement("oaire:fundingStream")).
withSubElement(Builders.
forMandatoryIfApplicableElement("oaire:awardNumber", ONE, elementIsPresent("oaire:awardNumber")).
withRecommendedAttribute("awardURI")).
withSubElement(Builders.
forRecommendedElement("oaire:awardTitle")).
build();
//TODO: Allowed values are referred as "suggested" in the documentation, but then a controlled list is given.
// Relevant issues:
// https://bitbucket.org/saikos/openaire-validator/issues/40
// https://bitbucket.org/saikos/openaire-validator/issues/32/
private static final ElementSpec ALTERNATE_IDENTIFIER_SPEC = Builders.
forRecommendedRepeatableElement("datacite:alternateIdentifier").
withMandatoryAttribute("alternateIdentifierType", IDENTIFIER_TYPES).
build();
private static final ElementSpec RELATED_IDENTIFIER_SPEC = Builders.
forRecommendedRepeatableElement("datacite:relatedIdentifier").
withMandatoryAttribute("relatedIdentifierType", IDENTIFIER_TYPES).
withMandatoryAttribute("relationType", RELATION_TYPES).
//TODO: For following 3 attributes. Need a way to target relationType attribute of current element
// - Should be used only with relation type (HasMetadata/IsMetadataFor).
withOptionalAttribute("relatedMetadataScheme").
withOptionalAttribute("schemeURI").
withOptionalAttribute("schemeType").
withOptionalAttribute("resourceTypeGeneral", RELATED_RESOURCE_GENERAL_TYPES).
build();
/*
Applicable when Access Rights is set to:
<datacite:rights uri="http://purl.org/coar/access_right/c_f1cf">embargoed access</datacite:rights>
Date encoding "YYYY-MM-DD" is referred as best practice. Should introduce in allowed values?
*/
//TODO: Implement proper applicability rule
private static final ElementSpec EMBARGO_PERIOD_DATE_SPEC = Builders.
forMandatoryIfApplicableElement("datacite:date", TWO, applicabilityRuleForEmbargoPeriodDate()).
withMandatoryAttribute("dateType", EMBARGO_DATE_TYPES).
build();
/*
There are no "strict" allowed values. Recommendations are IETF BCP 47 and ISO 639-x
*/
private static final ElementSpec LANGUAGE_SPEC = Builders.
forMandatoryIfApplicableElement("dc:language", ONE_TO_N, elementIsPresent("dc:language")).
allowedValues(new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX).or(new ISO639ValuePredicate())).
build();
private static final ElementSpec PUBLISHER_SPEC = Builders.
forMandatoryIfApplicableElement("dc:publisher", ONE_TO_N, elementIsPresent("dc:publisher")).
build();
/*
TODO: Same name as EMBARGO_PERIOD_DATE_SPEC above, with different attribute allowed value.
Should probably revisit, take that into consideration, when making relevant rules.
"Recommended" best practice for encoding the date value is ISO 8601 [W3CDTF] (YYYY-MM-DD) (YYYY mandatory)
*/
private static final ElementSpec PUBLICATION_DATE_SPEC = Builders.
forMandatoryElement("datacite:date", ONE).
withMandatoryAttribute("dateType", PUBLICATION_DATE_TYPE).
build();
private static final ElementSpec RESOURCE_TYPE_SPEC = Builders.
forMandatoryElement("oaire:resourceType", ONE).
withMandatoryAttribute("resourceTypeGeneral", RESOURCE_GENERAL_TYPES).
withMandatoryAttribute("uri", RESOURCE_CONCEPT_URIS).
build();
private static final ElementSpec DESCRIPTION_SPEC = Builders.
forMandatoryIfApplicableElement("dc:description", ONE_TO_N, elementIsPresent("dc:description")).
withOptionalAttribute("xml:lang", new RegexValuePredicate(COMPILED_BCP47_LANG_TAGS_REG_EX)).
build();
private static final ElementSpec FORMAT_SPEC = Builders.
forRecommendedRepeatableElement("dc:format").
allowedValues(new MediaTypesValuePredicate()).
build();
private static final ElementSpec RESOURCE_IDENTIFIER_SPEC = Builders.
forMandatoryElement("datacite:identifier", ONE).
withMandatoryAttribute("identifierType", RESOURCE_IDENTIFIER_TYPES).
build();
private static final ElementSpec ACCESS_RIGHTS_SPEC = Builders.
forMandatoryElement("datacite:rights", ONE).
withMandatoryAttribute("uri", ACCESS_RIGHTS_URIS).
build();
private static final ElementSpec SOURCE_SPEC = Builders.
forRecommendedRepeatableElement("dc:source").
build();
//TODO: Should we check URI attribute values are valid?
private static final ElementSpec SUBJECT_SPEC = Builders.
forMandatoryIfApplicableElement("datacite:subject", ONE_TO_N, elementIsPresent("datacite:subject")).
withOptionalAttribute("subjectScheme").
withOptionalAttribute("schemeURI").
withOptionalAttribute("valueURI").
build();
private static final ElementSpec LICENSE_CONDITION_SPEC = Builders.
forRecommendedElement("oaire:licenseCondition").
withMandatoryIfApplicableAttribute("uri", elementIsPresent("oaire:licenseCondition")).
withMandatoryIfApplicableAttribute("startDate", elementIsPresent("oaire:licenseCondition")).
build();
private static final ElementSpec COVERAGE_SPEC = Builders.
forRecommendedRepeatableElement("dc:coverage").
build();
private static final ElementSpec SIZE_SPEC = Builders.
forOptionalRepeatableElement("datacite:size").
build();
private static final ElementSpec GEO_LOCATION_SPEC = Builders.
forOptionalRepeatableElement("datacite:geoLocation").
withSubElement(Builders.
forOptionalElement("datacite:geoLocationPoint").
withSubElement(Builders.
forMandatoryElement("datacite:pointLongitude", ONE)).
withSubElement(Builders.
forMandatoryElement("datacite:pointLatitude", ONE))).
withSubElement(Builders.
forOptionalElement("datacite:geoLocationBox").
withSubElement(Builders.
forMandatoryElement("datacite:westBoundLongitude", ONE)).
withSubElement(Builders.
forMandatoryElement("datacite:eastBoundLongitude", ONE)).
withSubElement(Builders.
forMandatoryElement("datacite:southBoundLatitude", ONE)).
withSubElement(Builders.
forMandatoryElement("datacite:northBoundLatitude", ONE))).
withSubElement(Builders.
forOptionalElement("datacite:geoLocationPlace")).
withSubElement(Builders.
forOptionalRepeatableElement("datacite:geoLocationPolygon").
withSubElement(Builders.
forMandatoryElement("datacite:polygonPoint", FOUR_TO_N).
withSubElement(Builders.
forMandatoryElement("datacite:pointLongitude", ONE)).
withSubElement(Builders.
forMandatoryElement("datacite:pointLatitude", ONE))).
withSubElement(Builders.
forOptionalElement("datacite:inPolygonPoint").
withSubElement(Builders.
forMandatoryElement("datacite:pointLongitude", ONE)).
withSubElement(Builders.
forMandatoryElement("datacite:pointLatitude", ONE)))).
build();
/*
TODO: RequirementLevel.RECOMMENDED, Cardinality.ONE?
If uri attribute is present, element also has relevant controlled allowed values mapped to it's value.
uri attribute applicable when element value is one of controlled values.
Must be relevant value of [AO, SMUR, AM, P, VoR, CVoR, EVoR, NA]
Otherwise can be a number
TODO: Should we cross-check attribute and element value are relevant?
*/
private static final ElementSpec RESOURCE_VERSION_SPEC = Builders.
forRecommendedElement("oaire:version").
withMandatoryIfApplicableAttribute("uri", applicabilityRuleForURIAttributeOfResourceVersion(), RESOURCE_VERSION_URIS).
build();
//TODO: Has annotation/issue: accessRightsURI attribute values also appears on ACCESS_RIGHTS_SPEC. Should check it's the same?
private static final ElementSpec FILE_LOCATION_SPEC = Builders.
forMandatoryIfApplicableElement("oaire:file", ONE_TO_N, elementIsPresent("oaire:file")).
withRecommendedAttribute("accessRightsURI", ACCESS_RIGHTS_URIS).
withRecommendedAttribute("mimeType", new MediaTypesValuePredicate()).
withRecommendedAttribute("objectType", FILE_OBJECT_TYPES).
build();
private static final ElementSpec CITATION_TITLE_SPEC = Builders.
forRecommendedElement("oaire:citationTitle").
build();
private static final ElementSpec CITATION_VOLUME_SPEC = Builders.
forRecommendedElement("oaire:citationVolume").
build();
private static final ElementSpec CITATION_ISSUE_SPEC = Builders.
forRecommendedElement("oaire:citationIssue").
build();
private static final ElementSpec CITATION_START_PAGE_SPEC = Builders.
forRecommendedElement("oaire:citationStartPage").
build();
private static final ElementSpec CITATION_END_PAGE_SPEC = Builders.
forRecommendedElement("oaire:citationEndPage").
build();
private static final ElementSpec CITATION_EDITION_SPEC = Builders.
forRecommendedElement("oaire:citationEdition").
build();
private static final ElementSpec CITATION_CONFERENCE_PLACE_SPEC = Builders.
forRecommendedElement("oaire:citationConferencePlace").
build();
//TODO: Implement regex/allowedValuesPredicate
// Date has recommended best practice ISO 8601 [W3CDTF], and two [single date] [start date - end date] formats
private static final ElementSpec CITATION_CONFERENCE_DATE_SPEC = Builders.
forRecommendedElement("oaire:citationConferenceDate").
allowedValues(new RegexValuePredicate(COMPILED_YYYY_MM_DD_REGEX).or(new RegexValuePredicate(COMPILED_YYYY_MM_DD_RANGE_REGEX))).
build();
//TODO: A non-exhaustive list is provided for values, derived from the Common Education Data Standards vocabulary
// Should we add it?
private static final ElementSpec AUDIENCE_SPEC = Builders.
forOptionalRepeatableElement("dcterms:audience").
allowedValues(AUDIENCE_VOCABULARY).
build();
private static Rule<Document> applicabilityRuleForEmbargoPeriodDate() {
return XMLCardinalityRule.builder().
setId(ElementSpec.APPLICABILITY_RULE_ID).
// first predicate count(...) makes sure there is only one "Access Rights" element, and the second predicate verifies its value.
setXPathExpression("//*[count(//*[name()='datacite:rights'])=1][name()='datacite:rights' and @uri='http://purl.org/coar/access_right/c_f1cf' and normalize-space(text())='embargoed access']").
setRange(1,1).
setIsInclusive(true).
build();
}
private static Rule<Document> applicabilityRuleForURIAttributeOfResourceVersion() {
return XMLVocabularyRule.builder().
setId(ElementSpec.APPLICABILITY_RULE_ID).
setXPathExpression("//*[name()='oaire:version']/text()").
setNodeListAction("1").
setVocabularyTermsAndTermsType(String.join(", ", RESOURCE_VERSION_LABELS), "whitelist").
build();
}
//TODO: weights for guidelines haven't been finalized. They've been given an arbitrary value of 1.
public static SyntheticGuideline TITLE = SyntheticGuideline.of("Title", 1, TITLE_SPEC);
public static SyntheticGuideline CREATOR = SyntheticGuideline.of("Creator", 1, CREATOR_SPEC);
public static SyntheticGuideline CONTRIBUTOR = SyntheticGuideline.of("Contributor", 1, CONTRIBUTOR_SPEC);
public static SyntheticGuideline FUNDING_REFERENCE = SyntheticGuideline.of("Funding Reference", 1, FUNDING_REFERENCE_SPEC);
public static SyntheticGuideline ALTERNATE_IDENTIFIER = SyntheticGuideline.of("Alternate Identifier", 1, ALTERNATE_IDENTIFIER_SPEC);
public static SyntheticGuideline RELATED_IDENTIFIER = SyntheticGuideline.of("Related Identifier", 1, RELATED_IDENTIFIER_SPEC);
public static SyntheticGuideline EMBARGO_PERIOD_DATE = SyntheticGuideline.of("Embargo Period Date", 1, EMBARGO_PERIOD_DATE_SPEC);
public static SyntheticGuideline LANGUAGE = SyntheticGuideline.of("Language", 1, LANGUAGE_SPEC);
public static SyntheticGuideline PUBLISHER = SyntheticGuideline.of("Publisher", 1, PUBLISHER_SPEC);
public static SyntheticGuideline PUBLICATION_DATE = SyntheticGuideline.of("Publication Date", 1, PUBLICATION_DATE_SPEC);
public static SyntheticGuideline RESOURCE_TYPE = SyntheticGuideline.of("Resource Type", 1, RESOURCE_TYPE_SPEC);
public static SyntheticGuideline DESCRIPTION = SyntheticGuideline.of("Description", 1, DESCRIPTION_SPEC);
public static SyntheticGuideline FORMAT = SyntheticGuideline.of("Format", 1, FORMAT_SPEC);
public static SyntheticGuideline RESOURCE_IDENTIFIER = SyntheticGuideline.of("Resource Identifier", 1, RESOURCE_IDENTIFIER_SPEC);
public static SyntheticGuideline ACCESS_RIGHTS = SyntheticGuideline.of("Access Rights", 1, ACCESS_RIGHTS_SPEC);
public static SyntheticGuideline SOURCE = SyntheticGuideline.of("Source", 1, SOURCE_SPEC);
public static SyntheticGuideline SUBJECT = SyntheticGuideline.of("Subject", 1, SUBJECT_SPEC);
public static SyntheticGuideline LICENSE_CONDITION = SyntheticGuideline.of("License Condition", 1, LICENSE_CONDITION_SPEC);
public static SyntheticGuideline COVERAGE = SyntheticGuideline.of("Coverage", 1, COVERAGE_SPEC);
public static SyntheticGuideline SIZE = SyntheticGuideline.of("Size", 1, SIZE_SPEC);
public static SyntheticGuideline GEO_LOCATION = SyntheticGuideline.of("Geo Location", 1, GEO_LOCATION_SPEC);
public static SyntheticGuideline RESOURCE_VERSION = SyntheticGuideline.of("Resource Version", 1, RESOURCE_VERSION_SPEC);
public static SyntheticGuideline FILE_LOCATION = SyntheticGuideline.of("File Location", 1, FILE_LOCATION_SPEC);
public static SyntheticGuideline CITATION_TITLE = SyntheticGuideline.of("Citation Title", 1, CITATION_TITLE_SPEC);
public static SyntheticGuideline CITATION_VOLUME = SyntheticGuideline.of("Citation Volume", 1, CITATION_VOLUME_SPEC);
public static SyntheticGuideline CITATION_ISSUE = SyntheticGuideline.of("Citation Issue", 1, CITATION_ISSUE_SPEC);
public static SyntheticGuideline CITATION_START_PAGE = SyntheticGuideline.of("Citation Start Page", 1, CITATION_START_PAGE_SPEC);
public static SyntheticGuideline CITATION_END_PAGE = SyntheticGuideline.of("Citation End Page", 1, CITATION_END_PAGE_SPEC);
public static SyntheticGuideline CITATION_EDITION = SyntheticGuideline.of("Citation Edition", 1, CITATION_EDITION_SPEC);
public static SyntheticGuideline CITATION_CONFERENCE_PLACE = SyntheticGuideline.of("Citation Conference Place", 1, CITATION_CONFERENCE_PLACE_SPEC);
public static SyntheticGuideline CITATION_CONFERENCE_DATE = SyntheticGuideline.of("Citation Conference Date", 1, CITATION_CONFERENCE_DATE_SPEC);
public static SyntheticGuideline AUDIENCE = SyntheticGuideline.of("Audience", 1, AUDIENCE_SPEC);
private static final List<SyntheticGuideline> GUIDELINES = Collections.unmodifiableList(
Arrays.asList(
TITLE,
CREATOR,
CONTRIBUTOR,
FUNDING_REFERENCE,
ALTERNATE_IDENTIFIER,
RELATED_IDENTIFIER,
EMBARGO_PERIOD_DATE,
LANGUAGE,
PUBLISHER,
PUBLICATION_DATE,
RESOURCE_TYPE,
DESCRIPTION,
FORMAT,
RESOURCE_IDENTIFIER,
ACCESS_RIGHTS,
SOURCE,
SUBJECT,
LICENSE_CONDITION,
COVERAGE,
SIZE,
GEO_LOCATION,
RESOURCE_VERSION,
FILE_LOCATION,
CITATION_TITLE,
CITATION_VOLUME,
CITATION_ISSUE,
CITATION_START_PAGE,
CITATION_END_PAGE,
CITATION_EDITION,
CITATION_CONFERENCE_PLACE,
CITATION_CONFERENCE_DATE,
AUDIENCE
)
);
private static final Map<String, SyntheticGuideline> GUIDELINE_MAP = GUIDELINES.
stream().
collect(Collectors.toMap(SyntheticGuideline::getName, (guideline) -> guideline));
private static final int MAX_SCORE = GUIDELINES.stream().map(SyntheticGuideline::getWeight).reduce(0, Integer::sum);
public LiteratureGuidelinesV4Profile_with_prefixes_in_element_names() {
super("OpenAIRE Guidelines for Literature Repositories Profile v4");
}
@Override
public Collection<? extends Guideline<Document>> guidelines() {
return GUIDELINES;
}
@Override
public SyntheticGuideline guideline(String guidelineName) {
return GUIDELINE_MAP.get(guidelineName);
}
@Override
public int maxScore() {
return MAX_SCORE;
}
}

View File

@ -27,8 +27,7 @@ public class Example {
}
public static void validateEstablishedGuideline(Document xmlDoc) {
LiteratureGuidelinesV4Profile profile = new LiteratureGuidelinesV4Profile();
Guideline.Result result = profile.TITLE.validate(xmlDoc);
Guideline.Result result = LiteratureGuidelinesV4Profile.TITLE.validate(xmlDoc);
XMLApplicationProfile profile2 = new LiteratureGuidelinesV3Profile();
result = profile2.guideline("Title").validate(xmlDoc);

View File

@ -3,11 +3,11 @@ package eu.dnetlib.validator2.engine;
import eu.dnetlib.validator2.validation.XMLApplicationProfile;
import eu.dnetlib.validator2.validation.guideline.Guideline;
import eu.dnetlib.validator2.validation.guideline.openaire.LiteratureGuidelinesV3Profile;
import groovy.xml.DOMBuilder;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import java.io.FileReader;
import javax.xml.parsers.DocumentBuilder;
import java.io.File;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.stream.Collectors;
@ -30,10 +30,13 @@ public class Test {
LiteratureGuidelinesV3Profile profile = new LiteratureGuidelinesV3Profile();
logger.info("Max score: " + profile.maxScore());
Map<String, Double> scorePerDoc = new LinkedHashMap<>();
DocumentBuilder builder = TestUtils.getDocumentBuilder();
if ( builder == null )
return;
for ( String fileName : FILES ) {
try {
logger.info("Processing \"" + fileName + "\"");
Document doc = DOMBuilder.parse(new FileReader(fileName), false, true, true);
Document doc = builder.parse(new File(fileName));
XMLApplicationProfile.ValidationResult result = profile.validate(fileName, doc);
scorePerDoc.put(fileName, result.score());
Map<String, Guideline.Result> results = result.results();

View File

@ -0,0 +1,27 @@
package eu.dnetlib.validator2.engine;
import org.slf4j.LoggerFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
public class TestUtils {
private static final org.slf4j.Logger logger = LoggerFactory.getLogger(TestUtils.class);
public static DocumentBuilder getDocumentBuilder()
{
try {
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
documentBuilderFactory.setValidating(false);
documentBuilderFactory.setNamespaceAware(true);
documentBuilderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
return documentBuilderFactory.newDocumentBuilder();
} catch (Exception e) {
logger.error("", e);
return null;
}
}
}

View File

@ -3,11 +3,11 @@ package eu.dnetlib.validator2.engine;
import eu.dnetlib.validator2.validation.XMLApplicationProfile;
import eu.dnetlib.validator2.validation.guideline.Guideline;
import eu.dnetlib.validator2.validation.guideline.openaire.FAIR_Data_GuidelinesProfile;
import groovy.xml.DOMBuilder;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import java.io.FileReader;
import javax.xml.parsers.DocumentBuilder;
import java.io.File;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.OptionalDouble;
@ -33,10 +33,13 @@ public class Test_FAIR {
FAIR_Data_GuidelinesProfile profile = new FAIR_Data_GuidelinesProfile();
logger.info("Max score: " + profile.maxScore());
Map<String, Double> scorePerDoc = new LinkedHashMap<>();
DocumentBuilder builder = TestUtils.getDocumentBuilder();
if ( builder == null )
return;
for ( String fileName : FILES ) {
try {
logger.info("Processing \"" + fileName + "\"");
Document doc = DOMBuilder.parse(new FileReader(fileName), false, true, true);
Document doc = builder.parse(new File(fileName));
XMLApplicationProfile.ValidationResult result = profile.validate(fileName, doc);
scorePerDoc.put(fileName, result.score());
Map<String, Guideline.Result> results = result.results();

View File

@ -3,11 +3,11 @@ package eu.dnetlib.validator2.engine;
import eu.dnetlib.validator2.validation.XMLApplicationProfile;
import eu.dnetlib.validator2.validation.guideline.Guideline;
import eu.dnetlib.validator2.validation.guideline.openaire.FAIR_Literature_GuidelinesProfile;
import groovy.xml.DOMBuilder;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import java.io.FileReader;
import javax.xml.parsers.DocumentBuilder;
import java.io.File;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.OptionalDouble;
@ -33,10 +33,13 @@ public class Test_FAIR_LIT {
FAIR_Literature_GuidelinesProfile profile = new FAIR_Literature_GuidelinesProfile();
logger.info("Max score: " + profile.maxScore());
Map<String, Double> scorePerDoc = new LinkedHashMap<>();
DocumentBuilder builder = TestUtils.getDocumentBuilder();
if ( builder == null )
return;
for ( String fileName : FILES ) {
try {
logger.info("Processing \"" + fileName + "\"");
Document doc = DOMBuilder.parse(new FileReader(fileName), false, true, true);
Document doc = builder.parse(new File(fileName));
XMLApplicationProfile.ValidationResult result = profile.validate(fileName, doc);
scorePerDoc.put(fileName, result.score());
Map<String, Guideline.Result> results = result.results();

View File

@ -4,11 +4,11 @@ import eu.dnetlib.validator2.validation.XMLApplicationProfile;
import eu.dnetlib.validator2.validation.guideline.Guideline;
import eu.dnetlib.validator2.validation.guideline.openaire.AbstractOpenAireProfile;
import eu.dnetlib.validator2.validation.guideline.openaire.LiteratureGuidelinesV4Profile;
import groovy.xml.DOMBuilder;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import java.io.FileReader;
import javax.xml.parsers.DocumentBuilder;
import java.io.File;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.OptionalDouble;
@ -31,10 +31,13 @@ public class Test_v4 {
AbstractOpenAireProfile profile = new LiteratureGuidelinesV4Profile();
logger.info("Max score: " + profile.maxScore());
Map<String, Double> scorePerDoc = new LinkedHashMap<>();
DocumentBuilder builder = TestUtils.getDocumentBuilder();
if ( builder == null )
return;
for ( String fileName : FILES ) {
try {
logger.info("Processing \"" + fileName + "\"");
Document doc = DOMBuilder.parse(new FileReader(fileName), false, true, true);
Document doc = builder.parse(new File(fileName));
XMLApplicationProfile.ValidationResult result = profile.validate(fileName, doc);
scorePerDoc.put(fileName, result.score());
Map<String, Guideline.Result> results = result.results();
@ -49,8 +52,7 @@ public class Test_v4 {
}
}
} catch (Exception e) {
logger.error(e.getMessage());
e.printStackTrace();
logger.error("", e);
}
}