From 8f83b726fac6c8a71d09fd9d3b2c85d40ad86c71 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 8 Jul 2020 12:48:46 +0200 Subject: [PATCH 1/3] Dump json schema compliant to json schema Draft 7 --- .../dhp/oa/graph/dump/result_schema.json | 1012 +++++++++-------- 1 file changed, 511 insertions(+), 501 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json index df8ca1d5b..581754483 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json @@ -1,530 +1,540 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://www.openaire.eu/schema/json/1.0/openaire-schema.json", + "type": "object", + "title": "OpenAIRE Json dump schema v1.0", + "description": "This json schema describes the format of the dumps of research results provided by OpenAIRE", + "definitions": { + "AccessRight": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "label": { + "type": "string", + "description": "Label for the access mode" + }, + "scheme": { + "type": "string", + "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" + } + } + }, + "ControlledField": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "Description of scheme" + }, + "value": { + "type": "string", + "description": "Description of value" + } + } + }, + "KeyValue": { + "type": "object", + "properties": { + "key": { + "type": "string", + "description": "Description of key" + }, + "value": { + "type": "string", + "description": "Description of value" + } + } + } + }, "type": "object", "properties": { "author": { + "description": "List of authors of the research results", "type": "array", - "items": [ - { - "type": "object", - "properties": { - "fullname": { - "type": "string" - }, - "name": { - "type": "string" - }, - "surname": { - "type": "string" - }, - "rank": { - "type": "integer" - }, - "pid": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "required": [ - "scheme", - "value" - ] - } - ] - }, - "affiliation": { - "type": "array", - "items": [ - { - "type": "string" - } - ] + "items": { + "type": "object", + "properties": { + "affiliation": { + "description": "Affiliations of the author", + "type": "array", + "items": { + "type": "string", + "description": "One of the affiliation of the author" } }, - "required": [ - "fullname" - ] + "fullname": { + "type": "string", + "description": "Fullname of the author" + }, + "name": { + "type": "string", + "description": "First name of the author" + }, + "pid": { + "description": "Persistent identifier of the author (e.g. ORCID)", + "type": "array", + "items": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "PID scheme. For authors, OpenAIRE currently supports 'ORCID' " + }, + "value": { + "type": "string", + "description": "The PID of the author" + } + } + } + }, + "rank": { + "type": "integer", + "description": "Order in which the author appears in the authors list" + }, + "surname": { + "type": "string", + "description": "Surname of the author" + } + }, + "description": "One of the author of the research result" + } + }, + "bestaccessright": { + "allOf": [ + { + "$ref": "#/definitions/AccessRight" + }, + { + "description": "The openest access right associated to the manifestations of this research results" } ] }, - "type": { - "type": "string" + "codeRepositoryUrl": { + "type": "string", + "description": "Only for results with type 'software': the URL to the repository with the source code" + }, + "collectedfrom": { + "description": "Information about the sources from which the record has been collected", + "type": "array", + "items": { + "allOf": [ + { + "$ref": "#/definitions/KeyValue" + }, + { + "description": "Key is the OpenAIRE identifier of the data source, value is its name" + } + ] + } + }, + "contactgroup": { + "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", + "type": "array", + "items": { + "type": "string" + } + }, + "contactperson": { + "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", + "type": "array", + "items": { + "type": "string" + } + }, + "container": { + "type": "object", + "properties": { + "conferencedate": { + "type": "string", + "description": "Date of the conference" + }, + "conferenceplace": { + "type": "string", + "description": "Place of the conference" + }, + "edition": { + "type": "string", + "description": "Edition of the journal or conference proceeding" + }, + "ep": { + "type": "string", + "description": "End page" + }, + "iss": { + "type": "string", + "description": "Journal issue" + }, + "issnLinking": { + "type": "string", + "description": "Journal linking issn" + }, + "issnOnline": { + "type": "string", + "description": "Journal online issn" + }, + "issnPrinted": { + "type": "string", + "description": "Journal printed issn" + }, + "name": { + "type": "string", + "description": "Name of the journal or conference" + }, + "sp": { + "type": "string", + "description": "Start page" + }, + "vol": { + "type": "string", + "description": "Volume" + } + }, + "description": "Container has information about the conference or journal where the result has been presented or published" + }, + "context": { + "description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu", + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Code identifying the RI/RC" + }, + "label": { + "type": "string", + "description": "Label of the RI/RC" + }, + "provenance": { + "description": "Why this result is relevant for the RI/RC.", + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "contributor": { + "description": "Contributors of this result", + "type": "array", + "items": { + "type": "string" + } + }, + "country": { + "description": "Country associated to this result", + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "ISO 3166-1 alpha-2 country code" + }, + "label": { + "type": "string", + "description": "English label of the country" + }, + "provenance": { + "type": "string", + "description": "Why this result is associated to the country." + } + } + } + }, + "coverage": { + "type": "array", + "items": { + "type": "string" + } + }, + "dateofcollection": { + "type": "string", + "description": "When OpenAIRE collected the record the last time" + }, + "description": { + "type": "array", + "items": { + "type": "string" + } + }, + "documentationUrl": { + "description": "Only for results with type 'software': URL to the software documentation", + "type": "array", + "items": { + "type": "string" + } + }, + "embargoenddate": { + "type": "string", + "description": "Date when the embargo ends and this result turns Open Access" + }, + "externalReference": { + "description": "Links to external resources like entries from thematic databases (e.g. Protein Data Bank)", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "provenance": { + "type": "string", + "description": "Why this result is linked to the external resource" + }, + "trust": { + "type": "string", + "description": "Level of confidence that the link is correct" + }, + "typology": { + "type": "string" + }, + "value": { + "type": "string" + } + } + } + }, + "format": { + "type": "array", + "items": { + "type": "string" + } + }, + "geolocation": { + "description": "Geolocation information", + "type": "array", + "items": { + "type": "object", + "properties": { + "box": { + "type": "string" + }, + "place": { + "type": "string" + }, + "point": { + "type": "string" + } + } + } + }, + "id": { + "type": "string", + "description": "OpenAIRE identifier" + }, + "instance": { + "description": "Manifestations (i.e. different versions) of the result. For example: the pre-print and the published versions are two manifestations of the same research result", + "type": "array", + "items": { + "type": "object", + "properties": { + "accessright": { + "allOf": [ + { + "$ref": "#/definitions/AccessRight" + }, + { + "description": "Access right of this instance" + } + ] + }, + "collectedfrom": { + "description": "Information about the source from which the instance has been collected. Key is the OpenAIRE identifier of the data source, value is its name", + "allOf": [ + { + "$ref": "#/definitions/KeyValue" + } + ] + }, + "hostedby": { + "description": "Information about the source from which the instance can be viewed or downloaded. Key is the OpenAIRE identifier of the data source, value is its name", + "allOf": [ + { + "$ref": "#/definitions/KeyValue" + } + ] + }, + "license": { + "type": "string", + "description": "License applied to the instance" + }, + "publicationdate": { + "type": "string", + "description": "Publication date of the instance" + }, + "refereed": { + "type": "string", + "description": "Was the instance subject to peer-review? Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed' (see also https://api.openaire.eu/vocabularies/dnet:review_levels)" + }, + "type": { + "type": "string", + "description": "Type of the instance. Possible values are listed at https://api.openaire.eu/vocabularies/dnet:publication_resource" + }, + "url": { + "description": "Location where the instance is accessible", + "type": "array", + "items": { + "type": "string" + } + } + } + } }, "language": { "type": "object", "properties": { "code": { - "type": "string" + "type": "string", + "description": "alpha-3/ISO 639-2 code of the language" }, "label": { - "type": "string" + "type": "string", + "description": "English label" } - }, - "required": [ - "code", - "label" - ] + } }, - "country": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "provenance": { - "type": "string" - }, - "code": { - "type": "string" - }, - "label": { - "type": "string" - } - }, - "required": [ - "provenance", - "code", - "label" - ] - } - ] - }, - "subject": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "required": [ - "scheme", - "value" - ] - } - ] + "lastupdatetimestamp": { + "type": "integer", + "description": "Timestamp of last update of the record in OpenAIRE" }, "maintitle": { - "type": "string" + "type": "string", + "description": "Title" }, - "subtitle": { - "type": "string" - }, - "description": { + "originalId": { + "description": "Identifiers of the record at the original sources", "type": "array", - "items": [ - { - "type": "string" - } - ] - }, - "publicationdate": { - "type": "string" - }, - "publisher": { - "type": "string" - }, - "embargoenddate": { - "type": "string" - }, - "source": { - "type": "array", - "items": [ - { - "type": "string" - } - ] - }, - "format": { - "type": "array", - "items": [ - { - "type": "string" - } - ] - }, - "contributor": { - "type": "array", - "items": [ - { - "type": "string" - } - ] - }, - "coverage": { - "type": "array", - "items": [ - { - "type": "string" - } - ] - }, - "bestaccessright": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "code": { - "type": "string" - }, - "label": { - "type": "string" - } - }, - "required": [ - "scheme", - "code", - "label" - ] - }, - "context": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "provenance": { - "type": "array", - "items": [ - { - "type": "string" - } - ] - }, - "code": { - "type": "string" - }, - "label": { - "type": "string" - } - }, - "required": [ - "provenance", - "code", - "label" - ] - } - ] - }, - "externalReference": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "typology": { - "type": "string" - }, - "provenance": { - "type": "string" - }, - "trust": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "required": [ - "name", - "typology", - "provenance", - "trust", - "value" - ] - } - ] - }, - "instance": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "license": { - "type": "string" - }, - "accessright": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "code": { - "type": "string" - }, - "label": { - "type": "string" - } - }, - "required": [ - "scheme", - "code", - "label" - ] - }, - "type": { - "type": "string" - }, - "hostedby": { - "type": "object", - "properties": { - "key": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "required": [ - "key", - "value" - ] - }, - "url": { - "type": "array", - "items": [ - { - "type": "string" - }, - { - "type": "string" - } - ] - }, - "collectedfrom": { - "type": "object", - "properties": { - "key": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "required": [ - "key", - "value" - ] - }, - "publicationdate": { - "type": "string" - }, - "refereed": { - "type": "string" - } - }, - "required": [ - "license", - "accessright", - "type", - "hostedby", - "url", - "collectedfrom", - "publicationdate", - "refereed" - ] - } - ] - }, - "container": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "issnPrinted": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnLinking": { - "type": "string" - }, - "ep": { - "type": "string" - }, - "iss": { - "type": "string" - }, - "sp": { - "type": "string" - }, - "vol": { - "type": "string" - }, - "edition": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, - "conferencedate": { - "type": "string" - } - }, - "required": [ - "name", - "issnPrinted", - "issnOnline", - "issnLinking", - "ep", - "iss", - "sp", - "vol", - "edition", - "conferenceplace", - "conferencedate" - ] - }, - "documentationUrl": { - "type": "array", - "items": [ - { - "type": "string" - } - ] - }, - "codeRepositoryUrl": { - "type": "string" - }, - "programmingLanguage": { - "type": "string" - }, - "contactperson": { - "type": "array", - "items": [ - { - "type": "string" - } - ] - }, - "contactgroup": { - "type": "array", - "items": [ - { - "type": "string" - } - ] - }, - "tool": { - "type": "array", - "items": [ - { - "type": "string" - } - ] - }, - "size": { - "type": "string" - }, - "version": { - "type": "string" - }, - "geolocation": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "point": { - "type": "string" - }, - "box": { - "type": "string" - } - }, - "required": [ - "point", - "box" - ] - }, - { - "type": "object", - "properties": { - "point": { - "type": "string" - }, - "box": { - "type": "string" - } - }, - "required": [ - "point", - "box" - ] - } - ] + "items": { + "type": "string" + } }, "pid": { + "description": "Persistent identifiers of the result", "type": "array", - "items": [ - { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } + "items": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "PID scheme. List of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types " }, - "required": [ - "scheme", - "value" - ] + "value": { + "type": "string", + "description": "The PID of the result" + } } - ] + } + }, + "programmingLanguage": { + "type": "string", + "description": "Only for results with type 'software': the programming language" + }, + "projects": { + "description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results", + "type": "array", + "items": { + "type": "object", + "properties": { + "acronym": { + "type": "string", + "description": "Project acronym" + }, + "code": { + "type": "string", + "description": "Grant code" + }, + "funder": { + "type": "object", + "properties": { + "fundingStream": { + "type": "string", + "description": "Stream of funding (e.g. for European Commission can be H2020 or FP7)" + }, + "jurisdiction": { + "type": "string", + "description": "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)" + }, + "name": { + "type": "string", + "description": "Name of the funder" + }, + "shortName": { + "type": "string", + "description": "Short name or acronym of the funder" + } + }, + "description": "Information about the funder funding the project" + }, + "id": { + "type": "string", + "description": "OpenAIRE identifier of the project" + }, + "title": { + "type": "string", + "description": "Title of the project" + } + } + } + }, + "publicationdate": { + "type": "string", + "description": "Date of publication" + }, + "publisher": { + "type": "string", + "description": "Publisher" + }, + "size": { + "type": "string", + "description": "Only for results with type 'dataset': the declared size of the dataset" + }, + "source": { + "description": "See definition of Dublin Core field dc:source", + "type": "array", + "items": { + "type": "string" + } + }, + "subject": { + "description": "Keywords associated to the result", + "type": "array", + "items": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "Subject classification scheme. List of available schemes are at https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary). " + }, + "value": { + "type": "string", + "description": "The subject term" + } + } + } + }, + "subtitle": { + "type": "string", + "description": "Sub-title of the result" + }, + "tool": { + "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)" + }, + "version": { + "type": "string", + "description": "Version of the result" } - }, - "required": [ - "author", - "type", - "language", - "country", - "subject", - "maintitle", - "subtitle", - "description", - "publicationdate", - "publisher", - "embargoenddate", - "source", - "format", - "contributor", - "coverage", - "bestaccessright", - "context", - "externalReference", - "instance", - "container", - "documentationUrl", - "codeRepositoryUrl", - "programmingLanguage", - "contactperson", - "contactgroup", - "tool", - "size", - "version", - "geolocation", - "pid" - ] -} \ No newline at end of file + } +} From 636f9ce7d69113b41d035f7f07eff4759a7e2fb1 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 8 Jul 2020 12:50:57 +0200 Subject: [PATCH 2/3] json schema generator lib --- dhp-workflows/dhp-graph-mapper/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 638892a85..5dbcd00ed 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -76,6 +76,12 @@ okhttp 4.7.2 + + com.github.victools + jsonschema-generator + 4.13.0 + test + From 9a898c0e4c80f0d59d6199b222b06210660d79e4 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 8 Jul 2020 12:52:00 +0200 Subject: [PATCH 3/3] Json schema generator --- .../dhp/oa/graph/dump/GenerateJsonSchema.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java new file mode 100644 index 000000000..ba4bac45b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java @@ -0,0 +1,22 @@ +package eu.dnetlib.dhp.oa.graph.dump; + +import com.fasterxml.jackson.databind.JsonNode; +import com.github.victools.jsonschema.generator.*; +import eu.dnetlib.dhp.schema.dump.oaf.Result; +import org.junit.jupiter.api.Test; + +public class GenerateJsonSchema { + + @Test + public void generateSchema(){ + SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON) + .with(Option.SCHEMA_VERSION_INDICATOR) + .without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS); + configBuilder.forFields().withDescriptionResolver(field -> "Description of "+field.getDeclaredName()); + SchemaGeneratorConfig config = configBuilder.build(); + SchemaGenerator generator = new SchemaGenerator(config); + JsonNode jsonSchema = generator.generateSchema(Result.class); + + System.out.println(jsonSchema.toString()); +} +}