update of the schema of the dump

This commit is contained in:
Miriam Baglioni 2022-12-29 14:46:51 +01:00
parent 8ec02787f2
commit ad1ba563cd
13 changed files with 863 additions and 276 deletions

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonGetter;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSetter;
/**
@ -12,6 +13,8 @@ import com.fasterxml.jackson.annotation.JsonSetter;
*/
public class Score implements Serializable {
private String score;
@JsonProperty("class")
private String clazz;
public String getScore() {
@ -28,7 +31,7 @@ public class Score implements Serializable {
}
@JsonSetter("class")
public void setClazz(String classe) {
this.clazz = classe;
public void setClazz(String clazz) {
this.clazz = clazz;
}
}

View File

@ -1,20 +1,6 @@
{
"$schema":"http://json-schema.org/draft-07/schema#",
"definitions": {
"ControlledField": {
"type": "object",
"properties": {
"scheme": {
"type": "string"
},
"value": {
"type": "string"
}
},
"description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)"
}
},
"type":"object",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"accessrights": {
"type": "string",
@ -26,13 +12,14 @@
},
"citationguidelineurl": {
"type": "string",
"description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org."
"description": "The URL of the data source providing information on how to cite its items. As defined by re3data.org."
},
"contenttypes": {
"description": "Types of content in the data source, as defined by OpenDOAR",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Types of content in the data source, as defined by OpenDOAR"
}
},
"databaseaccessrestriction": {
@ -40,14 +27,18 @@
"description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}"
},
"datasourcetype": {
"allOf": [
{
"$ref": "#/definitions/ControlledField"
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme used to express the value (i.e. pubsrepository::journal)"
},
{
"description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies"
"value": {
"type": "string",
"description": "The value expressed in the scheme (Journal)"
}
]
},
"description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies"
},
"datauploadrestriction": {
"type": "string",
@ -57,9 +48,7 @@
"type": "string",
"description": "The date of last validation against the OpenAIRE guidelines for the datasource records"
},
"description": {
"type": "string"
},
"description": {"type": "string"},
"englishname": {
"type": "string",
"description": "The English name of the datasource"
@ -71,14 +60,11 @@
"journal": {
"type": "object",
"properties": {
"conferencedate": {
"type": "string"
},
"conferenceplace": {
"type": "string"
},
"conferencedate": {"type": "string"},
"conferenceplace": {"type": "string"},
"edition": {
"type": "string"
"type": "string",
"description": "Edition of the journal or conference proceeding"
},
"ep": {
"type": "string",
@ -86,19 +72,14 @@
},
"iss": {
"type": "string",
"description": "Issue number"
},
"issnLinking": {
"type": "string"
},
"issnOnline": {
"type": "string"
},
"issnPrinted": {
"type": "string"
"description": "Journal issue number"
},
"issnLinking": {"type": "string"},
"issnOnline": {"type": "string"},
"issnPrinted": {"type": "string"},
"name": {
"type": "string"
"type": "string",
"description": "Name of the journal or conference"
},
"sp": {
"type": "string",
@ -115,15 +96,14 @@
"description": "The languages present in the data source's content, as defined by OpenDOAR.",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "The languages present in the data source's content, as defined by OpenDOAR."
}
},
"logourl": {
"type": "string"
},
"logourl": {"type": "string"},
"missionstatementurl": {
"type": "string",
"description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org"
"description": "The URL of a mission statement describing the designated community of the data source. As defined by re3data.org"
},
"officialname": {
"type": "string",
@ -134,21 +114,29 @@
"description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu."
},
"originalId": {
"description": "Original identifiers for the datasource"
"description": "Original identifiers for the datasource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Original identifiers for the datasource"
}
},
"pid": {
"description": "Persistent identifiers of the datasource",
"type": "array",
"items": {
"allOf": [
{
"$ref": "#/definitions/ControlledField"
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme used to express the value "
},
"value": {
"type": "string",
"description": "The value expressed in the scheme "
}
]
},
"description": "Persistent identifiers of the datasource"
}
},
"pidsystems": {
@ -159,7 +147,8 @@
"description": "Policies of the data source, as defined in OpenDOAR.",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Policies of the data source, as defined in OpenDOAR."
}
},
"releaseenddate": {
@ -174,7 +163,8 @@
"description": "List of subjects associated to the datasource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "List of subjects associated to the datasource"
}
},
"uploadrights": {
@ -185,8 +175,6 @@
"type": "boolean",
"description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise."
},
"websiteurl": {
"type": "string"
}
"websiteurl": {"type": "string"}
}
}

View File

@ -0,0 +1,563 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"CfHbKeyValue": {
"type": "object",
"properties": {
"key": {
"type": "string",
"description": "the OpenAIRE identifier of the data source"
},
"value": {
"type": "string",
"description": "the name of the data source"
}
}
},
"Provenance": {
"type": "object",
"properties": {
"provenance": {"type": "string"},
"trust": {"type": "string"}
}
},
"ResultPid": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories"
},
"value": {
"type": "string",
"description": "The value expressed in the scheme (i.e. 10.1000/182)"
}
}
},
"Score": {
"type": "object",
"properties": {
"clazz": {"type": "string"},
"score": {"type": "string"}
}
}
},
"type": "object",
"properties": {
"author": {
"type": "array",
"items": {
"type": "object",
"properties": {
"fullname": {"type": "string"},
"name": {"type": "string"},
"pid": {
"type": "object",
"properties": {
"id": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The author's pid scheme. OpenAIRE currently supports 'ORCID'"
},
"value": {
"type": "string",
"description": "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)"
}
}
},
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "The reason why the pid was associated to the author"}
]
}
},
"description": "The author's persistent identifiers"
},
"rank": {"type": "integer"},
"surname": {"type": "string"}
}
}
},
"bestaccessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
},
"description": "The openest of the access rights of this result."
},
"codeRepositoryUrl": {
"type": "string",
"description": "Only for results with type 'software': the URL to the repository with the source code"
},
"collectedfrom": {
"description": "Information about the sources from which the record has been collected",
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/CfHbKeyValue"},
{"description": "Information about the sources from which the record has been collected"}
]
}
},
"contactgroup": {
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource"
}
},
"contactperson": {
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource"
}
},
"container": {
"type": "object",
"properties": {
"conferencedate": {"type": "string"},
"conferenceplace": {"type": "string"},
"edition": {
"type": "string",
"description": "Edition of the journal or conference proceeding"
},
"ep": {
"type": "string",
"description": "End page"
},
"iss": {
"type": "string",
"description": "Journal issue number"
},
"issnLinking": {"type": "string"},
"issnOnline": {"type": "string"},
"issnPrinted": {"type": "string"},
"name": {
"type": "string",
"description": "Name of the journal or conference"
},
"sp": {
"type": "string",
"description": "Start page"
},
"vol": {
"type": "string",
"description": "Volume"
}
},
"description": "Container has information about the conference or journal where the result has been presented or published"
},
"context": {
"description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu",
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "Code identifying the RI/RC"
},
"label": {
"type": "string",
"description": "Label of the RI/RC"
},
"provenance": {
"description": "Why this result is associated to the RI/RC.",
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Why this result is associated to the RI/RC."}
]
}
}
},
"description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu"
}
},
"contributor": {
"description": "Contributors for the result",
"type": "array",
"items": {
"type": "string",
"description": "Contributors for the result"
}
},
"country": {
"description": "The list of countries associated to this result",
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "ISO 3166-1 alpha-2 country code (i.e. IT)"
},
"label": {
"type": "string",
"description": "The label for that code (i.e. Italy)"
},
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Why this result is associated to the country."}
]
}
},
"description": "The list of countries associated to this result"
}
},
"coverage": {
"type": "array",
"items": {"type": "string"}
},
"dateofcollection": {
"type": "string",
"description": "When OpenAIRE collected the record the last time"
},
"description": {
"type": "array",
"items": {"type": "string"}
},
"documentationUrl": {
"description": "Only for results with type 'software': URL to the software documentation",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'software': URL to the software documentation"
}
},
"embargoenddate": {
"type": "string",
"description": "Date when the embargo ends and this result turns Open Access"
},
"format": {
"type": "array",
"items": {"type": "string"}
},
"geolocation": {
"description": "Geolocation information",
"type": "array",
"items": {
"type": "object",
"properties": {
"box": {"type": "string"},
"place": {"type": "string"},
"point": {"type": "string"}
},
"description": "Geolocation information"
}
},
"id": {
"type": "string",
"description": "The OpenAIRE identifiers for this result"
},
"indicators": {
"type": "object",
"properties": {
"impactMeasures": {
"type": "object",
"properties": {
"impulse": {"$ref": "#/definitions/Score"},
"influence": {"$ref": "#/definitions/Score"},
"influence_alt": {"$ref": "#/definitions/Score"},
"popularity": {"$ref": "#/definitions/Score"},
"popularity_alt": {"$ref": "#/definitions/Score"}
},
"description": "The impact measures (i.e. popularity)"
},
"usageCounts": {
"type": "object",
"properties": {
"downloads": {"type": "string"},
"views": {"type": "string"}
},
"description": "The usage counts (i.e. downloads)"
}
},
"description": "Indicators computed for this result, for example UsageCount ones"
},
"instance": {
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version",
"type": "array",
"items": {
"type": "object",
"properties": {
"accessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"openAccessRoute": {
"type": "string",
"enum": [
"gold",
"green",
"hybrid",
"bronze"
]
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
},
"description": "The accessRights for this materialization of the result"
},
"alternateIdentifier": {
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs",
"type": "array",
"items": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi"
},
"value": {
"type": "string",
"description": "The value expressed in the scheme"
}
},
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs"
}
},
"articleprocessingcharge": {
"type": "object",
"properties": {
"amount": {"type": "string"},
"currency": {"type": "string"}
},
"description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative."
},
"collectedfrom": {
"allOf": [
{"$ref": "#/definitions/CfHbKeyValue"},
{"description": "Information about the source from which the record has been collected"}
]
},
"hostedby": {
"allOf": [
{"$ref": "#/definitions/CfHbKeyValue"},
{"description": "Information about the source from which the instance can be viewed or downloaded."}
]
},
"license": {"type": "string"},
"pid": {
"type": "array",
"items": {"$ref": "#/definitions/ResultPid"}
},
"publicationdate": {
"type": "string",
"description": "Date of the research product"
},
"refereed": {
"type": "string",
"description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)"
},
"type": {
"type": "string",
"description": "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
},
"url": {
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ",
"type": "array",
"items": {
"type": "string",
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. "
}
}
},
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version"
}
},
"language": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "alpha-3/ISO 639-2 code of the language"
},
"label": {
"type": "string",
"description": "Language label in English"
}
}
},
"lastupdatetimestamp": {
"type": "integer",
"description": "Timestamp of last update of the record in OpenAIRE"
},
"maintitle": {
"type": "string",
"description": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software."
},
"originalId": {
"description": "Identifiers of the record at the original sources",
"type": "array",
"items": {
"type": "string",
"description": "Identifiers of the record at the original sources"
}
},
"pid": {
"description": "Persistent identifiers of the result",
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/ResultPid"},
{"description": "Persistent identifiers of the result"}
]
}
},
"programmingLanguage": {
"type": "string",
"description": "Only for results with type 'software': the programming language"
},
"projects": {
"description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results",
"type": "array",
"items": {
"type": "object",
"properties": {
"acronym": {
"type": "string",
"description": "The acronym of the project"
},
"code": {
"type": "string",
"description": "The grant agreement number"
},
"funder": {
"type": "object",
"properties": {
"fundingStream": {
"type": "string",
"description": "Stream of funding (e.g. for European Commission can be H2020 or FP7)"
},
"jurisdiction": {
"type": "string",
"description": "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)"
},
"name": {
"type": "string",
"description": "The name of the funder (European Commission)"
},
"shortName": {
"type": "string",
"description": "The short name of the funder (EC)"
}
},
"description": "Information about the funder funding the project"
},
"id": {
"type": "string",
"description": "The OpenAIRE id for the project"
},
"provenance": {"$ref": "#/definitions/Provenance"},
"title": {"type": "string"},
"validated": {
"type": "object",
"properties": {
"validatedByFunder": {"type": "boolean"},
"validationDate": {"type": "string"}
}
}
},
"description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results"
}
},
"publicationdate": {
"type": "string",
"description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it\u2019s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the \u201cwinning date\u201d because it becomes the most frequent well-formatted date."
},
"publisher": {
"type": "string",
"description": "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource."
},
"size": {
"type": "string",
"description": "Only for results with type 'dataset': the declared size of the dataset"
},
"source": {
"description": "See definition of Dublin Core field dc:source",
"type": "array",
"items": {
"type": "string",
"description": "See definition of Dublin Core field dc:source"
}
},
"subjects": {
"description": "Keywords associated to the result",
"type": "array",
"items": {
"type": "object",
"properties": {
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Why this subject is associated to the result"}
]
},
"subject": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)."
},
"value": {
"type": "string",
"description": "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."
}
}
}
},
"description": "Keywords associated to the result"
}
},
"subtitle": {
"type": "string",
"description": "Explanatory or alternative name by which a scientific result is known."
},
"tool": {
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product"
}
},
"type": {
"type": "string",
"description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)"
},
"version": {
"type": "string",
"description": "Version of the result"
}
}
}

View File

@ -6,7 +6,8 @@
"description": "Alternative names that identify the organisation",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Alternative names that identify the organisation"
}
},
"country": {
@ -14,25 +15,21 @@
"properties": {
"code": {
"type": "string",
"description": "The organisation country code"
"description": "ISO 3166-1 alpha-2 country code (i.e. IT)"
},
"label": {
"type": "string",
"description": "The organisation country label"
"description": "The label for that code (i.e. Italy)"
}
},
"description": "The country of the organisation"
"description": "The organisation country"
},
"id": {
"type": "string",
"description": "The OpenAIRE id for the organisation"
},
"legalname": {
"type": "string"
},
"legalshortname": {
"type": "string"
},
"legalname": {"type": "string"},
"legalshortname": {"type": "string"},
"pid": {
"description": "Persistent identifiers for the organisation i.e. isni 0000000090326370",
"type": "array",
@ -45,13 +42,12 @@
},
"value": {
"type": "string",
"description": "the value in the schema (i.e. 0000000090326370)"
"description": "The value in the schema (i.e. 0000000090326370)"
}
}
},
"description": "Persistent identifiers for the organisation i.e. isni 0000000090326370"
}
},
"websiteurl": {
"type": "string"
}
"websiteurl": {"type": "string"}
}
}

View File

@ -1,30 +1,32 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"ControlledField": {
"type": "object",
"properties": {
"scheme": {
"type": "string"
},
"value": {
"type": "string"
}
},
"description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)"
},
"Provenance": {
"type": "object",
"properties": {
"provenance": {
"provenance": {"type": "string"},
"trust": {"type": "string"}
}
},
"ResultPid": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The process that produced/provided the information"
"description": "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories"
},
"trust": {
"type": "string"
"value": {
"type": "string",
"description": "The value expressed in the scheme (i.e. 10.1000/182)"
}
},
"description": "Indicates the process that produced (or provided) the information, and the trust associated to the information"
}
},
"Score": {
"type": "object",
"properties": {
"class": {"type": "string"},
"score": {"type": "string"}
}
}
},
"type": "object",
@ -34,55 +36,56 @@
"items": {
"type": "object",
"properties": {
"fullname": {
"type": "string"
},
"name": {
"type": "string"
},
"fullname": {"type": "string"},
"name": {"type": "string"},
"pid": {
"type": "object",
"properties": {
"id": {
"allOf": [
{"$ref": "#/definitions/ControlledField"},
{"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"}
]
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The author's pid scheme. OpenAIRE currently supports 'ORCID'"
},
"value": {
"type": "string",
"description": "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)"
}
}
},
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Provenance of author's pid"}
{"description": "The reason why the pid was associated to the author"}
]
}
}
},
"description": "The author's persistent identifiers"
},
"rank": {
"type": "integer"
},
"surname": {
"type": "string"
}
"rank": {"type": "integer"},
"surname": {"type": "string"}
}
}
},
"bestaccessright":{
"type":"object",
"properties":{
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"bestaccessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
}
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
},
"description": "The openest of the access rights of this result."
},
"codeRepositoryUrl": {
"type": "string",
"description": "Only for results with type 'software': the URL to the repository with the source code"
@ -91,25 +94,23 @@
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource"
}
},
"contactperson": {
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource"
}
},
"container": {
"type": "object",
"properties": {
"conferencedate": {
"type": "string"
},
"conferenceplace": {
"type": "string"
},
"conferencedate": {"type": "string"},
"conferenceplace": {"type": "string"},
"edition": {
"type": "string",
"description": "Edition of the journal or conference proceeding"
@ -120,32 +121,28 @@
},
"iss": {
"type": "string",
"description": "Journal issue"
},
"issnLinking": {
"type": "string"
},
"issnOnline": {
"type": "string"
},
"issnPrinted": {
"type": "string"
"description": "Journal issue number"
},
"issnLinking": {"type": "string"},
"issnOnline": {"type": "string"},
"issnPrinted": {"type": "string"},
"name": {
"type": "string",
"description": "Name of the journal or conference"
},
"sp": {
"type": "string",
"description": "start page"
"description": "Start page"
},
"vol": {
"type": "string"
"type": "string",
"description": "Volume"
}
},
"description": "Container has information about the conference or journal where the result has been presented or published"
},
"contributor": {
"description": "Contributors for the result",
"type": "array",
"items": {
"type": "string",
@ -153,16 +150,18 @@
}
},
"country": {
"description": "The list of countries associated to this result",
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "ISO 3166-1 alpha-2 country code"
"description": "ISO 3166-1 alpha-2 country code (i.e. IT)"
},
"label": {
"type": "string"
"type": "string",
"description": "The label for that code (i.e. Italy)"
},
"provenance": {
"allOf": [
@ -170,14 +169,13 @@
{"description": "Why this result is associated to the country."}
]
}
}
},
"description": "The list of countries associated to this result"
}
},
"coverage": {
"type": "array",
"items": {
"type": "string"
}
"items": {"type": "string"}
},
"dateofcollection": {
"type": "string",
@ -185,15 +183,14 @@
},
"description": {
"type": "array",
"items": {
"type": "string"
}
"items": {"type": "string"}
},
"documentationUrl": {
"description": "Only for results with type 'software': URL to the software documentation",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'software': URL to the software documentation"
}
},
"embargoenddate": {
@ -202,9 +199,7 @@
},
"format": {
"type": "array",
"items": {
"type": "string"
}
"items": {"type": "string"}
},
"geolocation": {
"description": "Geolocation information",
@ -212,31 +207,51 @@
"items": {
"type": "object",
"properties": {
"box": {
"type": "string"
},
"place": {
"type": "string"
},
"point": {
"type": "string"
}
}
"box": {"type": "string"},
"place": {"type": "string"},
"point": {"type": "string"}
},
"description": "Geolocation information"
}
},
"id": {
"type": "string",
"description": "OpenAIRE Identifier"
"description": "The OpenAIRE identifiers for this result"
},
"instance":{
"description":"Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version",
"type":"array",
"items":{
"type":"object",
"properties":{
"accessright":{
"type":"object",
"properties":{
"indicators": {
"type": "object",
"properties": {
"impactMeasures": {
"type": "object",
"properties": {
"impulse": {"$ref": "#/definitions/Score"},
"influence": {"$ref": "#/definitions/Score"},
"influence_alt": {"$ref": "#/definitions/Score"},
"popularity": {"$ref": "#/definitions/Score"},
"popularity_alt": {"$ref": "#/definitions/Score"}
},
"description": "The impact measures (i.e. popularity)"
},
"usageCounts": {
"type": "object",
"properties": {
"downloads": {"type": "string"},
"views": {"type": "string"}
},
"description": "The usage counts (i.e. downloads)"
}
},
"description": "Indicators computed for this result, for example UsageCount ones"
},
"instance": {
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version",
"type": "array",
"items": {
"type": "object",
"properties": {
"accessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
@ -245,102 +260,75 @@
"type": "string",
"description": "Label for the access mode"
},
"openAccessRoute":{
"type":"string",
"enum":[
"openAccessRoute": {
"type": "string",
"enum": [
"gold",
"green",
"hybrid",
"bronze"
],
"description":"The type of OpenAccess applied to the result"
]
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
}
},
"description": "The accessRights for this materialization of the result"
},
"alternateIdentifier":{
"type":"array",
"items":{
"allOf":[
{
"$ref":"#/definitions/ControlledField"
"alternateIdentifier": {
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs",
"type": "array",
"items": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi"
},
{
"description":"All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs"
}
]
}
},
"articleprocessingcharge":{
"description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.",
"type":"object",
"properties":{
"amount":{
"type":"string"
},
"currency":{
"type":"string"
}
}
},
"license":{
"type":"string"
},
"measures":{
"type":"array",
"items":{
"type":"object",
"properties":{
"key":{
"type":"string",
"description":"The measure"
},
"value":{
"type":"string",
"description":"The value for the measure"
"value": {
"type": "string",
"description": "The value expressed in the scheme"
}
},
"description":"Measures computed for this instance, for example Bip!Finder ones"
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs"
}
},
"pid":{
"description":"The set of persistent identifiers associated to this instance that have been collected from an authority for the pid type (i.e. Crossref/Datacite for doi)",
"type":"array",
"items":{
"allOf":[
{
"$ref":"#/definitions/ControlledField"
},
{
"description":"The persistent identifier associated to the result"
}
]
}
},
"publicationdate":{
"type":"string",
"articleprocessingcharge": {
"type": "object",
"properties": {
"amount": {"type": "string"},
"currency": {"type": "string"}
},
"description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative."
},
"license": {"type": "string"},
"pid": {
"type": "array",
"items": {"$ref": "#/definitions/ResultPid"}
},
"publicationdate": {
"type": "string",
"description": "Date of the research product"
},
"refereed":{
"description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)",
"type":"string"
"refereed": {
"type": "string",
"description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)"
},
"type":{
"type":"string",
"description":"The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
"type": {
"type": "string",
"description": "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
},
"url":{
"description":"URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ",
"type":"array",
"items":{
"type":"string"
"url": {
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ",
"type": "array",
"items": {
"type": "string",
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. "
}
}
}
},
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version"
}
},
"language": {
@ -362,17 +350,14 @@
},
"maintitle": {
"type": "string",
"descriptio": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software."
},
"subtitle": {
"type": "string",
"descriptio": "Explanatory or alternative name by which a scientific result is known."
"description": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software."
},
"originalId": {
"description": "Identifiers of the record at the original sources",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Identifiers of the record at the original sources"
}
},
"pid": {
@ -380,8 +365,8 @@
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/ControlledField"},
{"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result. Note: the result will have a pid associated only if it was collected from an authority for that pid type. For example a doi will be among the pids for one result if the result metadata were collected from Crossref or Datacite. In all the other cases, the doi will be present among the alteranteIdentifiers for the result "}
{"$ref": "#/definitions/ResultPid"},
{"description": "Persistent identifiers of the result"}
]
}
},
@ -391,7 +376,7 @@
},
"publicationdate": {
"type": "string",
"description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because its the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date."
"description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it\u2019s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the \u201cwinning date\u201d because it becomes the most frequent well-formatted date."
},
"publisher": {
"type": "string",
@ -405,7 +390,8 @@
"description": "See definition of Dublin Core field dc:source",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "See definition of Dublin Core field dc:source"
}
},
"subjects": {
@ -421,19 +407,32 @@
]
},
"subject": {
"allOf": [
{"$ref": "#/definitions/ControlledField"},
{"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."}
]
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)."
},
"value": {
"type": "string",
"description": "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."
}
}
}
}
},
"description": "Keywords associated to the result"
}
},
"subtitle": {
"type": "string",
"description": "Explanatory or alternative name by which a scientific result is known."
},
"tool": {
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product"
}
},
"type": {

View File

@ -1,5 +1,6 @@
import java.io.IOException;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.core.JsonProcessingException;
@ -9,7 +10,10 @@ import com.github.imifou.jsonschema.module.addon.AddonModule;
import com.github.victools.jsonschema.generator.*;
import eu.dnetlib.dhp.ExecCreateSchemas;
import eu.dnetlib.dhp.oa.model.graph.Datasource;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.Organization;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
//@Disabled
class GenerateJsonSchema {
@ -40,7 +44,7 @@ class GenerateJsonSchema {
.without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
SchemaGeneratorConfig config = configBuilder.build();
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(GraphResult.class);
JsonNode jsonSchema = generator.generateSchema(CommunityResult.class);
System.out.println(jsonSchema.toString());
}

View File

@ -9,10 +9,14 @@ import java.util.stream.Collectors;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.community.Context;
@ -60,10 +64,13 @@ public class CommunitySplit implements Serializable {
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
communityProducts
.map(
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
.text(outputPath);
}

View File

@ -82,10 +82,13 @@ public class SparkUpdateProjectInfo implements Serializable {
}, Encoders.bean(CommunityResult.class));
long count = tmp.count();
tmp
.map(
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Append)
.json(outputPath);
.text(outputPath);
}

View File

@ -61,6 +61,29 @@ public class SplitForCommunityTest {
spark.stop();
}
@Test
void testCommunitySplit2() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityResult")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
CommunitySplit split = new CommunitySplit();
split.run(false, sourcePath, workingDir.toString() + "/split", communityMapPath);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/split/Digital_Humanities_and_Cultural_Heritage")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
}
@Test
void testCommunitySplit() {

View File

@ -0,0 +1 @@
{"pid": [{"scheme": "doi", "value": "10.1023/a:1019971625315"}], "contributor": [], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, {"key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a", "value": "Microsoft Academic Graph"}], "id": "50|doi_________::0027accd79214af151336e8237a2b084", "container": {"issnPrinted": "1607-6729", "conferencedate": null, "vol": "385", "conferenceplace": null, "name": "Doklady Biochemistry and Biophysics", "iss": null, "sp": "228", "edition": null, "issnOnline": null, "ep": "234", "issnLinking": null}, "lastupdatetimestamp": 1649039791345, "author": [{"surname": null, "fullname": "Vladimir S. Saakov", "pid": null, "name": null, "rank": 1}], "instance": [{"refereed": "UNKNOWN", "hostedby": {"key": "10|issn___print::55156520c3996f4d887f858c089d1e5f", "value": "Doklady Biochemistry and Biophysics"}, "url": ["https://doi.org/10.1023/a:1019971625315"], "pid": [{"scheme": "doi", "value": "10.1023/a:1019971625315"}], "publicationdate": "2002-01-01", "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "type": "Article"}], "subjects": [{"provenance": null, "subject": {"scheme": "keyword", "value": "General Chemistry"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "Biochemistry"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "General Medicine"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "Biophysics"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Photosystem II"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Ion"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Chemistry"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Soil salinity"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Analytical chemistry"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Function (biology)"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Pulse (signal processing)"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Fluorescence"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Phototroph"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Kinetic energy"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Photochemistry"}}], "publicationdate": "2002-01-01", "indicators": {"impactMeasures": {"influence": {"score": "4.901964E-9", "class": "C"}, "popularity": {"score": "6.185583E-10", "class": "C"}, "influence_alt": {"score": "3", "class": "C"}, "impulse": {"score": "0", "class": "C"}, "popularity_alt": {"score": "0.03722029", "class": "C"}}}, "dateofcollection": "2022-04-04T02:36:31Z", "type": "publication", "description": [], "format": [], "coverage": [], "publisher": "Springer Science and Business Media LLC", "language": {"code": "und", "label": "Undetermined"}, "country": [], "originalId": ["453197", "10.1023/a:1019971625315", "314096869"], "source": ["Crossref", null], "context": [{"code": "enermaps", "provenance": [{"provenance": "Inferred by OpenAIRE", "trust": "0.8"}], "label": "Energy Research"}]}