[dumpSubset] aligned with master

This commit is contained in:
Miriam Baglioni 2023-07-15 11:12:27 +02:00
commit 25be584028
53 changed files with 2651 additions and 458 deletions

View File

@ -26,8 +26,8 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
*/
public class Instance implements Serializable {
@JsonSchema(description = "Indicators computed for this instance, for example Bip!Finder ones")
private Indicator indicators;
// @JsonSchema(description = "Indicators computed for this instance, for example Bip!Finder ones")
// private Indicator indicators;
private List<ResultPid> pid;
@ -141,12 +141,12 @@ public class Instance implements Serializable {
this.alternateIdentifier = alternateIdentifier;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public Indicator getIndicators() {
return indicators;
}
public void setIndicators(Indicator indicators) {
this.indicators = indicators;
}
// @JsonInclude(JsonInclude.Include.NON_NULL)
// public Indicator getIndicators() {
// return indicators;
// }
//
// public void setIndicators(Indicator indicators) {
// this.indicators = indicators;
// }
}

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonGetter;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSetter;
/**
@ -12,6 +13,8 @@ import com.fasterxml.jackson.annotation.JsonSetter;
*/
public class Score implements Serializable {
private String score;
@JsonProperty("class")
private String clazz;
public String getScore() {
@ -28,7 +31,7 @@ public class Score implements Serializable {
}
@JsonSetter("class")
public void setClazz(String classe) {
this.clazz = classe;
public void setClazz(String clazz) {
this.clazz = clazz;
}
}

View File

@ -1,20 +1,6 @@
{
"$schema":"http://json-schema.org/draft-07/schema#",
"definitions": {
"ControlledField": {
"type": "object",
"properties": {
"scheme": {
"type": "string"
},
"value": {
"type": "string"
}
},
"description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)"
}
},
"type":"object",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"accessrights": {
"type": "string",
@ -26,13 +12,14 @@
},
"citationguidelineurl": {
"type": "string",
"description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org."
"description": "The URL of the data source providing information on how to cite its items. As defined by re3data.org."
},
"contenttypes": {
"description": "Types of content in the data source, as defined by OpenDOAR",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Types of content in the data source, as defined by OpenDOAR"
}
},
"databaseaccessrestriction": {
@ -40,14 +27,18 @@
"description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}"
},
"datasourcetype": {
"allOf": [
{
"$ref": "#/definitions/ControlledField"
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme used to express the value (i.e. pubsrepository::journal)"
},
{
"description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies"
"value": {
"type": "string",
"description": "The value expressed in the scheme (Journal)"
}
]
},
"description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies"
},
"datauploadrestriction": {
"type": "string",
@ -57,9 +48,7 @@
"type": "string",
"description": "The date of last validation against the OpenAIRE guidelines for the datasource records"
},
"description": {
"type": "string"
},
"description": {"type": "string"},
"englishname": {
"type": "string",
"description": "The English name of the datasource"
@ -71,14 +60,11 @@
"journal": {
"type": "object",
"properties": {
"conferencedate": {
"type": "string"
},
"conferenceplace": {
"type": "string"
},
"conferencedate": {"type": "string"},
"conferenceplace": {"type": "string"},
"edition": {
"type": "string"
"type": "string",
"description": "Edition of the journal or conference proceeding"
},
"ep": {
"type": "string",
@ -86,19 +72,14 @@
},
"iss": {
"type": "string",
"description": "Issue number"
},
"issnLinking": {
"type": "string"
},
"issnOnline": {
"type": "string"
},
"issnPrinted": {
"type": "string"
"description": "Journal issue number"
},
"issnLinking": {"type": "string"},
"issnOnline": {"type": "string"},
"issnPrinted": {"type": "string"},
"name": {
"type": "string"
"type": "string",
"description": "Name of the journal or conference"
},
"sp": {
"type": "string",
@ -115,15 +96,14 @@
"description": "The languages present in the data source's content, as defined by OpenDOAR.",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "The languages present in the data source's content, as defined by OpenDOAR."
}
},
"logourl": {
"type": "string"
},
"logourl": {"type": "string"},
"missionstatementurl": {
"type": "string",
"description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org"
"description": "The URL of a mission statement describing the designated community of the data source. As defined by re3data.org"
},
"officialname": {
"type": "string",
@ -134,21 +114,29 @@
"description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu."
},
"originalId": {
"description": "Original identifiers for the datasource"
"description": "Original identifiers for the datasource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Original identifiers for the datasource"
}
},
"pid": {
"description": "Persistent identifiers of the datasource",
"type": "array",
"items": {
"allOf": [
{
"$ref": "#/definitions/ControlledField"
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme used to express the value "
},
"value": {
"type": "string",
"description": "The value expressed in the scheme "
}
]
},
"description": "Persistent identifiers of the datasource"
}
},
"pidsystems": {
@ -159,7 +147,8 @@
"description": "Policies of the data source, as defined in OpenDOAR.",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Policies of the data source, as defined in OpenDOAR."
}
},
"releaseenddate": {
@ -174,7 +163,8 @@
"description": "List of subjects associated to the datasource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "List of subjects associated to the datasource"
}
},
"uploadrights": {
@ -185,8 +175,6 @@
"type": "boolean",
"description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise."
},
"websiteurl": {
"type": "string"
}
"websiteurl": {"type": "string"}
}
}

View File

@ -0,0 +1,563 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"CfHbKeyValue": {
"type": "object",
"properties": {
"key": {
"type": "string",
"description": "the OpenAIRE identifier of the data source"
},
"value": {
"type": "string",
"description": "the name of the data source"
}
}
},
"Provenance": {
"type": "object",
"properties": {
"provenance": {"type": "string"},
"trust": {"type": "string"}
}
},
"ResultPid": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories"
},
"value": {
"type": "string",
"description": "The value expressed in the scheme (i.e. 10.1000/182)"
}
}
},
"Score": {
"type": "object",
"properties": {
"clazz": {"type": "string"},
"score": {"type": "string"}
}
}
},
"type": "object",
"properties": {
"author": {
"type": "array",
"items": {
"type": "object",
"properties": {
"fullname": {"type": "string"},
"name": {"type": "string"},
"pid": {
"type": "object",
"properties": {
"id": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The author's pid scheme. OpenAIRE currently supports 'ORCID'"
},
"value": {
"type": "string",
"description": "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)"
}
}
},
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "The reason why the pid was associated to the author"}
]
}
},
"description": "The author's persistent identifiers"
},
"rank": {"type": "integer"},
"surname": {"type": "string"}
}
}
},
"bestaccessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
},
"description": "The openest of the access rights of this result."
},
"codeRepositoryUrl": {
"type": "string",
"description": "Only for results with type 'software': the URL to the repository with the source code"
},
"collectedfrom": {
"description": "Information about the sources from which the record has been collected",
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/CfHbKeyValue"},
{"description": "Information about the sources from which the record has been collected"}
]
}
},
"contactgroup": {
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource"
}
},
"contactperson": {
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource"
}
},
"container": {
"type": "object",
"properties": {
"conferencedate": {"type": "string"},
"conferenceplace": {"type": "string"},
"edition": {
"type": "string",
"description": "Edition of the journal or conference proceeding"
},
"ep": {
"type": "string",
"description": "End page"
},
"iss": {
"type": "string",
"description": "Journal issue number"
},
"issnLinking": {"type": "string"},
"issnOnline": {"type": "string"},
"issnPrinted": {"type": "string"},
"name": {
"type": "string",
"description": "Name of the journal or conference"
},
"sp": {
"type": "string",
"description": "Start page"
},
"vol": {
"type": "string",
"description": "Volume"
}
},
"description": "Container has information about the conference or journal where the result has been presented or published"
},
"context": {
"description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu",
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "Code identifying the RI/RC"
},
"label": {
"type": "string",
"description": "Label of the RI/RC"
},
"provenance": {
"description": "Why this result is associated to the RI/RC.",
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Why this result is associated to the RI/RC."}
]
}
}
},
"description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu"
}
},
"contributor": {
"description": "Contributors for the result",
"type": "array",
"items": {
"type": "string",
"description": "Contributors for the result"
}
},
"country": {
"description": "The list of countries associated to this result",
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "ISO 3166-1 alpha-2 country code (i.e. IT)"
},
"label": {
"type": "string",
"description": "The label for that code (i.e. Italy)"
},
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Why this result is associated to the country."}
]
}
},
"description": "The list of countries associated to this result"
}
},
"coverage": {
"type": "array",
"items": {"type": "string"}
},
"dateofcollection": {
"type": "string",
"description": "When OpenAIRE collected the record the last time"
},
"description": {
"type": "array",
"items": {"type": "string"}
},
"documentationUrl": {
"description": "Only for results with type 'software': URL to the software documentation",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'software': URL to the software documentation"
}
},
"embargoenddate": {
"type": "string",
"description": "Date when the embargo ends and this result turns Open Access"
},
"format": {
"type": "array",
"items": {"type": "string"}
},
"geolocation": {
"description": "Geolocation information",
"type": "array",
"items": {
"type": "object",
"properties": {
"box": {"type": "string"},
"place": {"type": "string"},
"point": {"type": "string"}
},
"description": "Geolocation information"
}
},
"id": {
"type": "string",
"description": "The OpenAIRE identifiers for this result"
},
"indicators": {
"type": "object",
"properties": {
"impactMeasures": {
"type": "object",
"properties": {
"impulse": {"$ref": "#/definitions/Score"},
"influence": {"$ref": "#/definitions/Score"},
"influence_alt": {"$ref": "#/definitions/Score"},
"popularity": {"$ref": "#/definitions/Score"},
"popularity_alt": {"$ref": "#/definitions/Score"}
},
"description": "The impact measures (i.e. popularity)"
},
"usageCounts": {
"type": "object",
"properties": {
"downloads": {"type": "string"},
"views": {"type": "string"}
},
"description": "The usage counts (i.e. downloads)"
}
},
"description": "Indicators computed for this result, for example UsageCount ones"
},
"instance": {
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version",
"type": "array",
"items": {
"type": "object",
"properties": {
"accessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"openAccessRoute": {
"type": "string",
"enum": [
"gold",
"green",
"hybrid",
"bronze"
]
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
},
"description": "The accessRights for this materialization of the result"
},
"alternateIdentifier": {
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs",
"type": "array",
"items": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi"
},
"value": {
"type": "string",
"description": "The value expressed in the scheme"
}
},
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs"
}
},
"articleprocessingcharge": {
"type": "object",
"properties": {
"amount": {"type": "string"},
"currency": {"type": "string"}
},
"description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative."
},
"collectedfrom": {
"allOf": [
{"$ref": "#/definitions/CfHbKeyValue"},
{"description": "Information about the source from which the record has been collected"}
]
},
"hostedby": {
"allOf": [
{"$ref": "#/definitions/CfHbKeyValue"},
{"description": "Information about the source from which the instance can be viewed or downloaded."}
]
},
"license": {"type": "string"},
"pid": {
"type": "array",
"items": {"$ref": "#/definitions/ResultPid"}
},
"publicationdate": {
"type": "string",
"description": "Date of the research product"
},
"refereed": {
"type": "string",
"description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)"
},
"type": {
"type": "string",
"description": "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
},
"url": {
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ",
"type": "array",
"items": {
"type": "string",
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. "
}
}
},
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version"
}
},
"language": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "alpha-3/ISO 639-2 code of the language"
},
"label": {
"type": "string",
"description": "Language label in English"
}
}
},
"lastupdatetimestamp": {
"type": "integer",
"description": "Timestamp of last update of the record in OpenAIRE"
},
"maintitle": {
"type": "string",
"description": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software."
},
"originalId": {
"description": "Identifiers of the record at the original sources",
"type": "array",
"items": {
"type": "string",
"description": "Identifiers of the record at the original sources"
}
},
"pid": {
"description": "Persistent identifiers of the result",
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/ResultPid"},
{"description": "Persistent identifiers of the result"}
]
}
},
"programmingLanguage": {
"type": "string",
"description": "Only for results with type 'software': the programming language"
},
"projects": {
"description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results",
"type": "array",
"items": {
"type": "object",
"properties": {
"acronym": {
"type": "string",
"description": "The acronym of the project"
},
"code": {
"type": "string",
"description": "The grant agreement number"
},
"funder": {
"type": "object",
"properties": {
"fundingStream": {
"type": "string",
"description": "Stream of funding (e.g. for European Commission can be H2020 or FP7)"
},
"jurisdiction": {
"type": "string",
"description": "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)"
},
"name": {
"type": "string",
"description": "The name of the funder (European Commission)"
},
"shortName": {
"type": "string",
"description": "The short name of the funder (EC)"
}
},
"description": "Information about the funder funding the project"
},
"id": {
"type": "string",
"description": "The OpenAIRE id for the project"
},
"provenance": {"$ref": "#/definitions/Provenance"},
"title": {"type": "string"},
"validated": {
"type": "object",
"properties": {
"validatedByFunder": {"type": "boolean"},
"validationDate": {"type": "string"}
}
}
},
"description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results"
}
},
"publicationdate": {
"type": "string",
"description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it\u2019s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the \u201cwinning date\u201d because it becomes the most frequent well-formatted date."
},
"publisher": {
"type": "string",
"description": "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource."
},
"size": {
"type": "string",
"description": "Only for results with type 'dataset': the declared size of the dataset"
},
"source": {
"description": "See definition of Dublin Core field dc:source",
"type": "array",
"items": {
"type": "string",
"description": "See definition of Dublin Core field dc:source"
}
},
"subjects": {
"description": "Keywords associated to the result",
"type": "array",
"items": {
"type": "object",
"properties": {
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Why this subject is associated to the result"}
]
},
"subject": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)."
},
"value": {
"type": "string",
"description": "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."
}
}
}
},
"description": "Keywords associated to the result"
}
},
"subtitle": {
"type": "string",
"description": "Explanatory or alternative name by which a scientific result is known."
},
"tool": {
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product"
}
},
"type": {
"type": "string",
"description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)"
},
"version": {
"type": "string",
"description": "Version of the result"
}
}
}

View File

@ -6,7 +6,8 @@
"description": "Alternative names that identify the organisation",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Alternative names that identify the organisation"
}
},
"country": {
@ -14,25 +15,21 @@
"properties": {
"code": {
"type": "string",
"description": "The organisation country code"
"description": "ISO 3166-1 alpha-2 country code (i.e. IT)"
},
"label": {
"type": "string",
"description": "The organisation country label"
"description": "The label for that code (i.e. Italy)"
}
},
"description": "The country of the organisation"
"description": "The organisation country"
},
"id": {
"type": "string",
"description": "The OpenAIRE id for the organisation"
},
"legalname": {
"type": "string"
},
"legalshortname": {
"type": "string"
},
"legalname": {"type": "string"},
"legalshortname": {"type": "string"},
"pid": {
"description": "Persistent identifiers for the organisation i.e. isni 0000000090326370",
"type": "array",
@ -45,13 +42,12 @@
},
"value": {
"type": "string",
"description": "the value in the schema (i.e. 0000000090326370)"
"description": "The value in the schema (i.e. 0000000090326370)"
}
}
},
"description": "Persistent identifiers for the organisation i.e. isni 0000000090326370"
}
},
"websiteurl": {
"type": "string"
}
"websiteurl": {"type": "string"}
}
}

View File

@ -1,30 +1,32 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"ControlledField": {
"type": "object",
"properties": {
"scheme": {
"type": "string"
},
"value": {
"type": "string"
}
},
"description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)"
},
"Provenance": {
"type": "object",
"properties": {
"provenance": {
"provenance": {"type": "string"},
"trust": {"type": "string"}
}
},
"ResultPid": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The process that produced/provided the information"
"description": "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories"
},
"trust": {
"type": "string"
"value": {
"type": "string",
"description": "The value expressed in the scheme (i.e. 10.1000/182)"
}
},
"description": "Indicates the process that produced (or provided) the information, and the trust associated to the information"
}
},
"Score": {
"type": "object",
"properties": {
"class": {"type": "string"},
"score": {"type": "string"}
}
}
},
"type": "object",
@ -34,55 +36,56 @@
"items": {
"type": "object",
"properties": {
"fullname": {
"type": "string"
},
"name": {
"type": "string"
},
"fullname": {"type": "string"},
"name": {"type": "string"},
"pid": {
"type": "object",
"properties": {
"id": {
"allOf": [
{"$ref": "#/definitions/ControlledField"},
{"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"}
]
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The author's pid scheme. OpenAIRE currently supports 'ORCID'"
},
"value": {
"type": "string",
"description": "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)"
}
}
},
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Provenance of author's pid"}
{"description": "The reason why the pid was associated to the author"}
]
}
}
},
"description": "The author's persistent identifiers"
},
"rank": {
"type": "integer"
},
"surname": {
"type": "string"
}
"rank": {"type": "integer"},
"surname": {"type": "string"}
}
}
},
"bestaccessright":{
"type":"object",
"properties":{
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"bestaccessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
}
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
},
"description": "The openest of the access rights of this result."
},
"codeRepositoryUrl": {
"type": "string",
"description": "Only for results with type 'software': the URL to the repository with the source code"
@ -91,25 +94,23 @@
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource"
}
},
"contactperson": {
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource"
}
},
"container": {
"type": "object",
"properties": {
"conferencedate": {
"type": "string"
},
"conferenceplace": {
"type": "string"
},
"conferencedate": {"type": "string"},
"conferenceplace": {"type": "string"},
"edition": {
"type": "string",
"description": "Edition of the journal or conference proceeding"
@ -120,32 +121,28 @@
},
"iss": {
"type": "string",
"description": "Journal issue"
},
"issnLinking": {
"type": "string"
},
"issnOnline": {
"type": "string"
},
"issnPrinted": {
"type": "string"
"description": "Journal issue number"
},
"issnLinking": {"type": "string"},
"issnOnline": {"type": "string"},
"issnPrinted": {"type": "string"},
"name": {
"type": "string",
"description": "Name of the journal or conference"
},
"sp": {
"type": "string",
"description": "start page"
"description": "Start page"
},
"vol": {
"type": "string"
"type": "string",
"description": "Volume"
}
},
"description": "Container has information about the conference or journal where the result has been presented or published"
},
"contributor": {
"description": "Contributors for the result",
"type": "array",
"items": {
"type": "string",
@ -153,16 +150,18 @@
}
},
"country": {
"description": "The list of countries associated to this result",
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "ISO 3166-1 alpha-2 country code"
"description": "ISO 3166-1 alpha-2 country code (i.e. IT)"
},
"label": {
"type": "string"
"type": "string",
"description": "The label for that code (i.e. Italy)"
},
"provenance": {
"allOf": [
@ -170,14 +169,13 @@
{"description": "Why this result is associated to the country."}
]
}
}
},
"description": "The list of countries associated to this result"
}
},
"coverage": {
"type": "array",
"items": {
"type": "string"
}
"items": {"type": "string"}
},
"dateofcollection": {
"type": "string",
@ -185,15 +183,14 @@
},
"description": {
"type": "array",
"items": {
"type": "string"
}
"items": {"type": "string"}
},
"documentationUrl": {
"description": "Only for results with type 'software': URL to the software documentation",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'software': URL to the software documentation"
}
},
"embargoenddate": {
@ -202,9 +199,7 @@
},
"format": {
"type": "array",
"items": {
"type": "string"
}
"items": {"type": "string"}
},
"geolocation": {
"description": "Geolocation information",
@ -212,31 +207,51 @@
"items": {
"type": "object",
"properties": {
"box": {
"type": "string"
},
"place": {
"type": "string"
},
"point": {
"type": "string"
}
}
"box": {"type": "string"},
"place": {"type": "string"},
"point": {"type": "string"}
},
"description": "Geolocation information"
}
},
"id": {
"type": "string",
"description": "OpenAIRE Identifier"
"description": "The OpenAIRE identifiers for this result"
},
"instance":{
"description":"Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version",
"type":"array",
"items":{
"type":"object",
"properties":{
"accessright":{
"type":"object",
"properties":{
"indicators": {
"type": "object",
"properties": {
"impactMeasures": {
"type": "object",
"properties": {
"impulse": {"$ref": "#/definitions/Score"},
"influence": {"$ref": "#/definitions/Score"},
"influence_alt": {"$ref": "#/definitions/Score"},
"popularity": {"$ref": "#/definitions/Score"},
"popularity_alt": {"$ref": "#/definitions/Score"}
},
"description": "The impact measures (i.e. popularity)"
},
"usageCounts": {
"type": "object",
"properties": {
"downloads": {"type": "string"},
"views": {"type": "string"}
},
"description": "The usage counts (i.e. downloads)"
}
},
"description": "Indicators computed for this result, for example UsageCount ones"
},
"instance": {
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version",
"type": "array",
"items": {
"type": "object",
"properties": {
"accessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
@ -245,102 +260,75 @@
"type": "string",
"description": "Label for the access mode"
},
"openAccessRoute":{
"type":"string",
"enum":[
"openAccessRoute": {
"type": "string",
"enum": [
"gold",
"green",
"hybrid",
"bronze"
],
"description":"The type of OpenAccess applied to the result"
]
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
}
},
"description": "The accessRights for this materialization of the result"
},
"alternateIdentifier":{
"type":"array",
"items":{
"allOf":[
{
"$ref":"#/definitions/ControlledField"
"alternateIdentifier": {
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs",
"type": "array",
"items": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi"
},
{
"description":"All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs"
}
]
}
},
"articleprocessingcharge":{
"description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.",
"type":"object",
"properties":{
"amount":{
"type":"string"
},
"currency":{
"type":"string"
}
}
},
"license":{
"type":"string"
},
"measures":{
"type":"array",
"items":{
"type":"object",
"properties":{
"key":{
"type":"string",
"description":"The measure"
},
"value":{
"type":"string",
"description":"The value for the measure"
"value": {
"type": "string",
"description": "The value expressed in the scheme"
}
},
"description":"Measures computed for this instance, for example Bip!Finder ones"
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs"
}
},
"pid":{
"description":"The set of persistent identifiers associated to this instance that have been collected from an authority for the pid type (i.e. Crossref/Datacite for doi)",
"type":"array",
"items":{
"allOf":[
{
"$ref":"#/definitions/ControlledField"
},
{
"description":"The persistent identifier associated to the result"
}
]
}
},
"publicationdate":{
"type":"string",
"articleprocessingcharge": {
"type": "object",
"properties": {
"amount": {"type": "string"},
"currency": {"type": "string"}
},
"description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative."
},
"license": {"type": "string"},
"pid": {
"type": "array",
"items": {"$ref": "#/definitions/ResultPid"}
},
"publicationdate": {
"type": "string",
"description": "Date of the research product"
},
"refereed":{
"description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)",
"type":"string"
"refereed": {
"type": "string",
"description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)"
},
"type":{
"type":"string",
"description":"The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
"type": {
"type": "string",
"description": "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
},
"url":{
"description":"URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ",
"type":"array",
"items":{
"type":"string"
"url": {
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ",
"type": "array",
"items": {
"type": "string",
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. "
}
}
}
},
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version"
}
},
"language": {
@ -362,17 +350,14 @@
},
"maintitle": {
"type": "string",
"descriptio": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software."
},
"subtitle": {
"type": "string",
"descriptio": "Explanatory or alternative name by which a scientific result is known."
"description": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software."
},
"originalId": {
"description": "Identifiers of the record at the original sources",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Identifiers of the record at the original sources"
}
},
"pid": {
@ -380,8 +365,8 @@
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/ControlledField"},
{"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result. Note: the result will have a pid associated only if it was collected from an authority for that pid type. For example a doi will be among the pids for one result if the result metadata were collected from Crossref or Datacite. In all the other cases, the doi will be present among the alteranteIdentifiers for the result "}
{"$ref": "#/definitions/ResultPid"},
{"description": "Persistent identifiers of the result"}
]
}
},
@ -391,7 +376,7 @@
},
"publicationdate": {
"type": "string",
"description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because its the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date."
"description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it\u2019s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the \u201cwinning date\u201d because it becomes the most frequent well-formatted date."
},
"publisher": {
"type": "string",
@ -405,7 +390,8 @@
"description": "See definition of Dublin Core field dc:source",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "See definition of Dublin Core field dc:source"
}
},
"subjects": {
@ -421,19 +407,32 @@
]
},
"subject": {
"allOf": [
{"$ref": "#/definitions/ControlledField"},
{"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."}
]
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)."
},
"value": {
"type": "string",
"description": "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."
}
}
}
}
},
"description": "Keywords associated to the result"
}
},
"subtitle": {
"type": "string",
"description": "Explanatory or alternative name by which a scientific result is known."
},
"tool": {
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product"
}
},
"type": {

View File

@ -9,7 +9,11 @@ import com.github.imifou.jsonschema.module.addon.AddonModule;
import com.github.victools.jsonschema.generator.*;
import eu.dnetlib.dhp.ExecCreateSchemas;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.Datasource;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.Organization;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
//@Disabled
class GenerateJsonSchema {

View File

@ -53,6 +53,10 @@
<artifactId>dump-schema</artifactId>
<version>1.2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<dependency>
<groupId>io.github.classgraph</groupId>
<artifactId>classgraph</artifactId>

View File

@ -9,8 +9,8 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
public class Constants {
protected static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
protected static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
protected static final Map<String, String> ACCESS_RIGHTS_COAR_MAP = Maps.newHashMap();
protected static final Map<String, String> COAR_CODE_LABEL_MAP = Maps.newHashMap();
public static final String INFERRED = "Inferred by OpenAIRE";
public static final String CABF2 = "c_abf2";
@ -36,22 +36,22 @@ public class Constants {
public static final String IMPACT_IMPULSE = "impulse";
static {
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
accessRightsCoarMap.put("OPEN SOURCE", CABF2);
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_CLOSED, "c_14cb");
accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_EMBARGO, "c_f1cf");
ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);
ACCESS_RIGHTS_COAR_MAP.put("RESTRICTED", "c_16ec");
ACCESS_RIGHTS_COAR_MAP.put("OPEN SOURCE", CABF2);
ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_CLOSED, "c_14cb");
ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_EMBARGO, "c_f1cf");
}
static {
coarCodeLabelMap.put(CABF2, ModelConstants.ACCESS_RIGHT_OPEN);
coarCodeLabelMap.put("c_16ec", "RESTRICTED");
coarCodeLabelMap.put("c_14cb", ModelConstants.ACCESS_RIGHT_CLOSED);
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
COAR_CODE_LABEL_MAP.put(CABF2, ModelConstants.ACCESS_RIGHT_OPEN);
COAR_CODE_LABEL_MAP.put("c_16ec", "RESTRICTED");
COAR_CODE_LABEL_MAP.put("c_14cb", ModelConstants.ACCESS_RIGHT_CLOSED);
COAR_CODE_LABEL_MAP.put("c_f1cf", "EMBARGO");
}
public enum DUMPTYPE {
COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder"), EOSC("eosc");
COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder");
private final String type;

View File

@ -58,15 +58,15 @@ public class ResultMapper implements Serializable {
// I do not map Access Right UNKNOWN or OTHER
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
if (oar.isPresent() && Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
if (oar.isPresent() && Constants.ACCESS_RIGHTS_COAR_MAP.containsKey(oar.get().getClassid())) {
String code = Constants.ACCESS_RIGHTS_COAR_MAP.get(oar.get().getClassid());
out
.setBestaccessright(
BestAccessRight
.newInstance(
code,
Constants.coarCodeLabelMap.get(code),
Constants.COAR_CODE_LABEL_MAP.get(code),
Constants.COAR_ACCESS_RIGHT_SCHEMA));
}
@ -536,35 +536,18 @@ public class ResultMapper implements Serializable {
private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
Optional<eu.dnetlib.dhp.schema.oaf.AccessRight> opAr = Optional.ofNullable(i.getAccessright());
if (opAr.isPresent() && Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
if (opAr.isPresent() && Constants.ACCESS_RIGHTS_COAR_MAP.containsKey(opAr.get().getClassid())) {
String code = Constants.ACCESS_RIGHTS_COAR_MAP.get(opAr.get().getClassid());
instance
.setAccessright(
AccessRight
.newInstance(
code,
Constants.coarCodeLabelMap.get(code),
Constants.COAR_CODE_LABEL_MAP.get(code),
Constants.COAR_ACCESS_RIGHT_SCHEMA));
Optional<List<eu.dnetlib.dhp.schema.oaf.Measure>> mes = Optional.ofNullable(i.getMeasures());
if (mes.isPresent()) {
// List<Indicator> indicators = new ArrayList<>();
// mes
// .get()
// .forEach(
// m -> indicators
// .add(
// Indicator
// .newInstance(
// m.getId(),
// m
// .getUnit()
// .stream()
// .map(u -> Measure.newInstance(u.getKey(), u.getValue()))
// .collect(Collectors.toList()))));
instance.setIndicators(getIndicator(mes.get()));
}
if (opAr.get().getOpenAccessRoute() != null) {
switch (opAr.get().getOpenAccessRoute()) {

View File

@ -9,9 +9,9 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException;
import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient;
public class SendToZenodoHDFS implements Serializable {
@ -81,10 +81,8 @@ public class SendToZenodoHDFS implements Serializable {
String pString = p.toString();
if (!pString.endsWith("_SUCCESS")) {
String name = pString.substring(pString.lastIndexOf("/") + 1);
FSDataInputStream inputStream = fileSystem.open(p);
zenodoApiClient.uploadIS(inputStream, name);
zenodoApiClient.uploadIS3(inputStream, name, fileSystem.getFileStatus(p).getLen());
}
}
@ -92,9 +90,9 @@ public class SendToZenodoHDFS implements Serializable {
zenodoApiClient.sendMretadata(metadata);
}
if (Boolean.TRUE.equals(publish)) {
zenodoApiClient.publish();
}
// if (Boolean.TRUE.equals(publish)) {
// zenodoApiClient.publish();
// }
}
}

View File

@ -10,6 +10,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -18,9 +19,13 @@ import com.google.gson.Gson;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.complete.Constants;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2;
public class Utils {
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -74,4 +79,65 @@ public class Utils {
return new Gson().fromJson(sb.toString(), CommunityMap.class);
}
public static Dataset<String> getEntitiesId(SparkSession spark, String inputPath) {
Dataset<String> dumpedIds = Utils
.readPath(spark, inputPath + "/publication", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, inputPath + "/dataset", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/software", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/otherresearchproduct", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/organization", eu.dnetlib.dhp.oa.model.graph.Organization.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Organization, String>) o -> o.getId(),
Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.oa.model.graph.Project.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Project, String>) o -> o.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/datasource", eu.dnetlib.dhp.oa.model.graph.Datasource.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Datasource, String>) o -> o.getId(),
Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/communities_infrastructures", ResearchCommunity.class)
.map((MapFunction<ResearchCommunity, String>) c -> c.getId(), Encoders.STRING()));
return dumpedIds;
}
public static Dataset<Relation> getValidRelations(SparkSession spark, Dataset<Relation> relations,
Dataset<String> entitiesIds) {
Dataset<Tuple2<String, Relation>> relationSource = relations
.map(
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(r.getSource().getId(), r),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
Dataset<Tuple2<String, Relation>> relJoinSource = relationSource
.joinWith(entitiesIds, relationSource.col("_1").equalTo(entitiesIds.col("value")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
t2._1()._2().getTarget().getId(), t2._1()._2()),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
return relJoinSource
.joinWith(entitiesIds, relJoinSource.col("_1").equalTo(entitiesIds.col("value")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
Encoders.bean(Relation.class));
}
}

View File

@ -9,10 +9,14 @@ import java.util.stream.Collectors;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.community.Context;
@ -60,10 +64,13 @@ public class CommunitySplit implements Serializable {
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
communityProducts
.map(
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
.text(outputPath);
}

View File

@ -8,6 +8,7 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
@ -71,7 +72,8 @@ public class SparkUpdateProjectInfo implements Serializable {
String preparedInfoPath) {
Dataset<CommunityResult> result = Utils.readPath(spark, inputPath, CommunityResult.class);
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
Dataset<CommunityResult> tmp = result
result
.joinWith(
resultProject, result.col("id").equalTo(resultProject.col("resultId")),
"left")
@ -79,13 +81,14 @@ public class SparkUpdateProjectInfo implements Serializable {
CommunityResult r = value._1();
Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList()));
return r;
}, Encoders.bean(CommunityResult.class));
long count = tmp.count();
tmp
}, Encoders.bean(CommunityResult.class))
.map(
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Append)
.json(outputPath);
.text(outputPath);
}

View File

@ -23,6 +23,7 @@ import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import it.unimi.dsi.fastutil.objects.Object2BooleanMap;
import scala.Tuple2;
/**
* Reads all the entities of the same type (Relation / Results) and saves them in the same folder
@ -96,7 +97,7 @@ public class SparkCollectAndSave implements Serializable {
write(
Utils
.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class),
outputPath + "/otheresearchproduct");
outputPath + "/otherresearchproduct");
write(
Utils
.readPath(spark, inputPath + "/result/software", GraphResult.class),
@ -104,14 +105,27 @@ public class SparkCollectAndSave implements Serializable {
}
Utils
// Dataset<String> dumpedIds = Utils.getEntitiesId(spark, outputPath);
Dataset<Relation> relations = Utils
.readPath(spark, inputPath + "/relation/publication", Relation.class)
.union(Utils.readPath(spark, inputPath + "/relation/dataset", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/orp", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/software", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/contextOrg", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/context", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class));
Utils.getValidRelations(spark, relations, Utils.getEntitiesId(spark, outputPath))
// Dataset<Relation> relJoinSource = relations
// .joinWith(dumpedIds, relations.col("source.id").equalTo(dumpedIds.col("value")))
// .map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(),
// Encoders.bean(Relation.class));
//
// relJoinSource
// .joinWith(dumpedIds, relJoinSource.col("target.id").equalTo(dumpedIds.col("value")))
// .map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(),
// Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")

View File

@ -88,7 +88,9 @@ public class SparkDumpFunderResults implements Serializable {
} else {
String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase();
if (fName.equalsIgnoreCase("ec")) {
if (p.getId().contains("h2020")) {
if (p.getId().contains("he")) {
fName += "_HE";
} else if (p.getId().contains("h2020")) {
fName += "_H2020";
} else {
fName += "_FP7";

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
@ -85,7 +86,7 @@ public class SparkResultLinkedToProject implements Serializable {
private static <R extends Result> void writeResultsLinkedToProjects(String communityMapPath, SparkSession spark,
Class<R> inputClazz,
String inputPath, String outputPath, String resultProjectsPath) {
ObjectMapper mapper = new ObjectMapper();
Dataset<R> results = Utils
.readPath(spark, inputPath, inputClazz)
.filter(
@ -101,10 +102,15 @@ public class SparkResultLinkedToProject implements Serializable {
.map(
t2._1(),
communityMap, Constants.DUMPTYPE.FUNDER.getType());
cr.setProjects(t2._2().getProjectsList());
if (cr != null) {
cr.setProjects(t2._2().getProjectsList());
}
return cr;
}, Encoders.bean(CommunityResult.class))
.map((MapFunction<CommunityResult, String>) cr -> mapper.writeValueAsString(cr), Encoders.STRING())
.filter(Objects::nonNull)
.map(
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
Encoders.STRING())
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")

View File

@ -4,10 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.subset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
@ -88,6 +85,8 @@ public class SparkSelectValidContext implements Serializable {
.union(getFilter(spark, inputPath + "/otherresearchproduct", keys, OtherResearchProduct.class))
.distinct();
context.foreach((ForeachFunction<String>) c -> System.out.println(c));
Dataset<ResearchCommunity> researchCommunity = Utils.readPath(spark, contextPath, ResearchCommunity.class);
researchCommunity
@ -112,11 +111,11 @@ public class SparkSelectValidContext implements Serializable {
(FlatMapFunction<I, String>) r -> r
.getContext()
.stream()
.map(c -> c.getId())
.map(c -> extract(c.getId(), keys))
.collect(Collectors.toList())
.iterator(),
Encoders.STRING())
.filter((FilterFunction<String>) c -> extracted(c, keys));
.filter(Objects::nonNull);
}
@ -124,11 +123,11 @@ public class SparkSelectValidContext implements Serializable {
return Optional.ofNullable(r.getContext()).isPresent();
}
private static boolean extracted(String c, List<String> keySet) {
private static String extract(String c, List<String> keySet) {
if (keySet.contains(c))
return true;
return c;
if (c.contains(":") && keySet.contains(c.substring(0, c.indexOf(":"))))
return true;
return false;
return c.substring(0, c.indexOf(":"));
return null;
}
}

View File

@ -2,6 +2,8 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntitiesId;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getValidRelations;
import java.io.Serializable;
import java.util.Optional;
@ -69,65 +71,28 @@ public class SparkSelectValidRelation implements Serializable {
private static void selectValidRelation(SparkSession spark, String inputPath,
String relationPath) {
// read the results
Dataset<String> dumpedIds = Utils
.readPath(spark, inputPath + "/publication", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, inputPath + "/dataset", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/software", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/otherresearchproduct", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/organization", eu.dnetlib.dhp.oa.model.graph.Organization.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Organization, String>) o -> o.getId(),
Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.oa.model.graph.Project.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Project, String>) o -> o.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/datasource", eu.dnetlib.dhp.oa.model.graph.Datasource.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Datasource, String>) o -> o.getId(),
Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/community_infrastructure", ResearchCommunity.class)
.map((MapFunction<ResearchCommunity, String>) c -> c.getId(), Encoders.STRING()));
Dataset<Tuple2<String, Relation>> relationSource = Utils
.readPath(spark, relationPath, Relation.class)
.map(
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(r.getSource().getId(), r),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
getValidRelations(
spark, Utils
.readPath(spark, relationPath, Relation.class),
getEntitiesId(spark, inputPath))
Dataset<Tuple2<String, Relation>> relJoinSource = relationSource
.joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
t2._1()._2().getTarget().getId(), t2._1()._2()),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
relJoinSource
.joinWith(dumpedIds, relJoinSource.col("_1").equalTo(dumpedIds.col("value")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(inputPath + "/relation");
// Dataset<Tuple2<String, Relation>> relJoinSource = relationSource
// .joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value")))
// .map(
// (MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
// t2._1()._2().getTarget().getId(), t2._1()._2()),
// Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
//
// relJoinSource
// .joinWith(dumpedIds, relJoinSource.col("_1").equalTo(dumpedIds.col("value")))
// .map(
// (MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
// Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(inputPath + "/relation");
// relJoinSource = relationSource
// .joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value")))

View File

@ -112,7 +112,7 @@ public class SparkSelectValidRelationContext implements Serializable {
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
Dataset<ResearchCommunity> allowedContext = Utils
.readPath(spark, inputPath + "/community_infrastructure", ResearchCommunity.class);
.readPath(spark, inputPath + "/communities_infrastructures", ResearchCommunity.class);
Dataset<Tuple2<String, Relation>> relJoinSource = relationSource
.joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value")))

View File

@ -0,0 +1,61 @@
package eu.dnetlib.dhp.oa.zenodoapi;
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
import java.io.IOException;
import java.io.InputStream;
import okhttp3.MediaType;
import okhttp3.RequestBody;
import okhttp3.internal.Util;
import okio.BufferedSink;
import okio.Okio;
import okio.Source;
public class InputStreamRequestBody extends RequestBody {
private final InputStream inputStream;
private final MediaType mediaType;
private final long lenght;
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
return new InputStreamRequestBody(inputStream, mediaType, len);
}
private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
this.inputStream = inputStream;
this.mediaType = mediaType;
this.lenght = len;
}
@Override
public MediaType contentType() {
return mediaType;
}
@Override
public long contentLength() {
return lenght;
}
@Override
public void writeTo(BufferedSink sink) throws IOException {
Source source = null;
try {
source = Okio.source(inputStream);
sink.writeAll(source);
} finally {
Util.closeQuietly(source);
}
}
}

View File

@ -0,0 +1,8 @@
package eu.dnetlib.dhp.oa.zenodoapi;
public class MissingConceptDoiException extends Throwable {
public MissingConceptDoiException(String message) {
super(message);
}
}

View File

@ -0,0 +1,488 @@
package eu.dnetlib.dhp.oa.zenodoapi;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHeaders;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
//import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jetbrains.annotations.NotNull;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.zenodoapi.model.ZenodoModel;
import eu.dnetlib.dhp.oa.zenodoapi.model.ZenodoModelList;
import okhttp3.*;
public class ZenodoAPIClient implements Serializable {
String urlString;
String bucket;
String deposition_id;
String access_token;
public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
public String getUrlString() {
return urlString;
}
public void setUrlString(String urlString) {
this.urlString = urlString;
}
public String getBucket() {
return bucket;
}
public void setBucket(String bucket) {
this.bucket = bucket;
}
public void setDeposition_id(String deposition_id) {
this.deposition_id = deposition_id;
}
public ZenodoAPIClient(String urlString, String access_token) {
this.urlString = urlString;
this.access_token = access_token;
}
/**
* Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
*
* @return response code
* @throws IOException
*/
// public int newDeposition() throws IOException {
// String json = "{}";
//
// URL url = new URL(urlString);
// HttpURLConnection conn = (HttpURLConnection) url.openConnection();
// conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
// conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// conn.setRequestMethod("POST");
// conn.setDoOutput(true);
// try (OutputStream os = conn.getOutputStream()) {
// byte[] input = json.getBytes("utf-8");
// os.write(input, 0, input.length);
// }
//
// String body = getBody(conn);
//
// int responseCode = conn.getResponseCode();
// conn.disconnect();
//
// if (!checkOKStatus(responseCode))
// throw new IOException("Unexpected code " + responseCode + body);
//
// ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
// this.bucket = newSubmission.getLinks().getBucket();
// this.deposition_id = newSubmission.getId();
//
// return responseCode;
// }
public int newDeposition() throws IOException {
String json = "{}";
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
Request request = new Request.Builder()
.url(urlString)
.addHeader("Content-Type", "application/json") // add request headers
.addHeader("Authorization", "Bearer " + access_token)
.post(body)
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
// Get response body
json = response.body().string();
eu.dnetlib.dhp.common.api.zenodo.ZenodoModel newSubmission = new Gson().fromJson(json, eu.dnetlib.dhp.common.api.zenodo.ZenodoModel.class);
this.bucket = newSubmission.getLinks().getBucket();
this.deposition_id = newSubmission.getId();
return response.code();
}
}
// public int uploadIS2(InputStream is, String fileName) throws IOException {
//
// final String crlf = "\r\n";
// final String twoHyphens = "--";
// final String boundary = "*****";
//
// HttpPut put = new HttpPut(bucket + "/" + fileName);
//
// put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip");
// put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
//
// put.setEntity(new InputStreamEntity(is));
//
// int statusCode;
// try (CloseableHttpClient client = HttpClients.createDefault()) {
// CloseableHttpResponse response = client.execute(put);
// statusCode = response.getStatusLine().getStatusCode();
//
// }
//
// if (!checkOKStatus(statusCode)) {
// throw new IOException("Unexpected code " + statusCode);
// }
//
// return statusCode;
// }
// public int publish() throws IOException {
// String json = "{}";
// HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish");
// post.addHeader(HttpHeaders.CONTENT_TYPE, "application/json");
// post.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// post.setEntity(new StringEntity(json));
// int statusCode;
// try (CloseableHttpClient client = HttpClients.createDefault()) {
// CloseableHttpResponse response = client.execute(post);
// statusCode = response.getStatusLine().getStatusCode();
// }
// if (!checkOKStatus(statusCode)) {
// throw new IOException("Unexpected code " + statusCode);
// }
// return statusCode;
// }
/**
* Upload files in Zenodo.
*
* @param is the inputStream for the file to upload
* @param file_name the name of the file as it will appear on Zenodo
* @return the response code
*/
public int uploadIS(InputStream is, String file_name) throws IOException {
URL url = new URL(bucket + "/" + file_name);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip");
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setDoOutput(true);
conn.setRequestMethod("PUT");
byte[] buf = new byte[8192];
int length;
try (OutputStream os = conn.getOutputStream()) {
while ((length = is.read(buf)) != -1) {
os.write(buf, 0, length);
os.flush();
}
}
int responseCode = conn.getResponseCode();
if (!checkOKStatus(responseCode)) {
throw new IOException("Unexpected code " + responseCode + getBody(conn));
}
return responseCode;
}
@NotNull
private String getBody(HttpURLConnection conn) throws IOException {
String body = "{}";
try (BufferedReader br = new BufferedReader(
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
StringBuilder response = new StringBuilder();
String responseLine = null;
while ((responseLine = br.readLine()) != null) {
response.append(responseLine.trim());
}
body = response.toString();
}
return body;
}
public int uploadIS3(InputStream is, String file_name, long len) throws IOException {
OkHttpClient httpClient = new OkHttpClient.Builder()
.writeTimeout(600, TimeUnit.SECONDS)
.readTimeout(600, TimeUnit.SECONDS)
.connectTimeout(600, TimeUnit.SECONDS)
.build();
Request request = new Request.Builder()
.url(bucket + "/" + file_name)
.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.code();
}
}
/**
* Associates metadata information to the current deposition
*
* @param metadata the metadata
* @return response code
* @throws IOException
*/
public int sendMretadata(String metadata) throws IOException {
URL url = new URL(urlString + "/" + deposition_id);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setDoOutput(true);
conn.setRequestMethod("PUT");
try (OutputStream os = conn.getOutputStream()) {
byte[] input = metadata.getBytes("utf-8");
os.write(input, 0, input.length);
}
final int responseCode = conn.getResponseCode();
conn.disconnect();
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + getBody(conn));
return responseCode;
}
private boolean checkOKStatus(int responseCode) {
if (HttpURLConnection.HTTP_OK == responseCode ||
HttpURLConnection.HTTP_CREATED == responseCode)
return true;
return false;
}
/**
* To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
* for the new version.
*
* @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
* part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
* concept_rec_id = 656930
* @return response code
* @throws IOException
* @throws MissingConceptDoiException
*/
// public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException {
// setDepositionId(concept_rec_id, 1);
// String json = "{}";
//
// URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
// HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//
// conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// conn.setDoOutput(true);
// conn.setRequestMethod("POST");
//
// try (OutputStream os = conn.getOutputStream()) {
// byte[] input = json.getBytes("utf-8");
// os.write(input, 0, input.length);
//
// }
//
// String body = getBody(conn);
//
// int responseCode = conn.getResponseCode();
//
// conn.disconnect();
// if (!checkOKStatus(responseCode))
// throw new IOException("Unexpected code " + responseCode + body);
//
// ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
// String latest_draft = zenodoModel.getLinks().getLatest_draft();
// deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
// bucket = getBucket(latest_draft);
//
// return responseCode;
//
// }
public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException {
setDepositionId(concept_rec_id, 1);
String json = "{}";
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
Request request = new Request.Builder()
.url(urlString + "/" + deposition_id + "/actions/newversion")
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.post(body)
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
String latest_draft = zenodoModel.getLinks().getLatest_draft();
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
bucket = getBucket(latest_draft);
return response.code();
}
}
/**
* To finish uploading a version or new deposition not published
* It sets the deposition_id and the bucket to be used
*
*
* @param deposition_id the deposition id of the not yet published upload
* concept_rec_id = 656930
* @return response code
* @throws IOException
* @throws MissingConceptDoiException
*/
public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
this.deposition_id = deposition_id;
String json = "{}";
URL url = new URL(urlString + "/" + deposition_id);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setRequestMethod("POST");
conn.setDoOutput(true);
try (OutputStream os = conn.getOutputStream()) {
byte[] input = json.getBytes("utf-8");
os.write(input, 0, input.length);
}
String body = getBody(conn);
int responseCode = conn.getResponseCode();
conn.disconnect();
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
bucket = zenodoModel.getLinks().getBucket();
return responseCode;
}
private void setDepositionId(String concept_rec_id, Integer page) throws Exception, MissingConceptDoiException {
ZenodoModelList zenodoModelList = new Gson()
.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
for (ZenodoModel zm : zenodoModelList) {
if (zm.getConceptrecid().equals(concept_rec_id)) {
deposition_id = zm.getId();
return;
}
}
if (zenodoModelList.size() == 0)
throw new MissingConceptDoiException(
"The concept record id specified was missing in the list of depositions");
setDepositionId(concept_rec_id, page + 1);
}
// private String getPrevDepositions(String page) throws Exception {
//
// HttpGet get = new HttpGet(urlString);
// URI uri = new URIBuilder(get.getURI()).addParameter("page", page).build();
//
// get.setURI(uri);
//
// get.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
// get.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// try (CloseableHttpClient client = HttpClients.createDefault()) {
// CloseableHttpResponse response = client.execute(get);
// final String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8);
// return body;
// }
// }
private String getPrevDepositions(String page) throws IOException {
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
urlBuilder.addQueryParameter("page", page);
String url = urlBuilder.build().toString();
Request request = new Request.Builder()
.url(url)
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.get()
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
return response.body().string();
}
}
private String getBucket(String inputUurl) throws IOException {
URL url = new URL(inputUurl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setDoOutput(true);
conn.setRequestMethod("GET");
String body = getBody(conn);
int responseCode = conn.getResponseCode();
conn.disconnect();
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
return zenodoModel.getLinks().getBucket();
}
}

View File

@ -0,0 +1,18 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
public class Community {
private String identifier;
public String getIdentifier() {
return identifier;
}
public void setIdentifier(String identifier) {
this.identifier = identifier;
}
}

View File

@ -0,0 +1,51 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
public class Creator {
private String affiliation;
private String name;
private String orcid;
public String getAffiliation() {
return affiliation;
}
public void setAffiliation(String affiliation) {
this.affiliation = affiliation;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getOrcid() {
return orcid;
}
public void setOrcid(String orcid) {
this.orcid = orcid;
}
public static Creator newInstance(String name, String affiliation, String orcid) {
Creator c = new Creator();
if (name != null) {
c.name = name;
}
if (affiliation != null) {
c.affiliation = affiliation;
}
if (orcid != null) {
c.orcid = orcid;
}
return c;
}
}

View File

@ -0,0 +1,52 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
import java.io.Serializable;
public class File implements Serializable {
private String checksum;
private String filename;
private long filesize;
private String id;
public String getChecksum() {
return checksum;
}
public void setChecksum(String checksum) {
this.checksum = checksum;
}
public String getFilename() {
return filename;
}
public void setFilename(String filename) {
this.filename = filename;
}
public long getFilesize() {
return filesize;
}
public void setFilesize(long filesize) {
this.filesize = filesize;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
}

View File

@ -0,0 +1,31 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
import java.io.Serializable;
public class Grant implements Serializable {
private String id;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public static Grant newInstance(String id) {
Grant g = new Grant();
g.id = id;
return g;
}
}

View File

@ -0,0 +1,100 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
import java.io.Serializable;
public class Links implements Serializable {
private String bucket;
private String discard;
private String edit;
private String files;
private String html;
private String latest_draft;
private String latest_draft_html;
private String publish;
private String self;
public String getBucket() {
return bucket;
}
public void setBucket(String bucket) {
this.bucket = bucket;
}
public String getDiscard() {
return discard;
}
public void setDiscard(String discard) {
this.discard = discard;
}
public String getEdit() {
return edit;
}
public void setEdit(String edit) {
this.edit = edit;
}
public String getFiles() {
return files;
}
public void setFiles(String files) {
this.files = files;
}
public String getHtml() {
return html;
}
public void setHtml(String html) {
this.html = html;
}
public String getLatest_draft() {
return latest_draft;
}
public void setLatest_draft(String latest_draft) {
this.latest_draft = latest_draft;
}
public String getLatest_draft_html() {
return latest_draft_html;
}
public void setLatest_draft_html(String latest_draft_html) {
this.latest_draft_html = latest_draft_html;
}
public String getPublish() {
return publish;
}
public void setPublish(String publish) {
this.publish = publish;
}
public String getSelf() {
return self;
}
public void setSelf(String self) {
this.self = self;
}
}

View File

@ -0,0 +1,161 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
import java.io.Serializable;
import java.util.List;
public class Metadata implements Serializable {
private String access_right;
private List<Community> communities;
private List<Creator> creators;
private String description;
private String doi;
private List<Grant> grants;
private List<String> keywords;
private String language;
private String license;
private PrereserveDoi prereserve_doi;
private String publication_date;
private List<String> references;
private List<RelatedIdentifier> related_identifiers;
private String title;
private String upload_type;
private String version;
public String getUpload_type() {
return upload_type;
}
public void setUpload_type(String upload_type) {
this.upload_type = upload_type;
}
public String getVersion() {
return version;
}
public void setVersion(String version) {
this.version = version;
}
public String getAccess_right() {
return access_right;
}
public void setAccess_right(String access_right) {
this.access_right = access_right;
}
public List<Community> getCommunities() {
return communities;
}
public void setCommunities(List<Community> communities) {
this.communities = communities;
}
public List<Creator> getCreators() {
return creators;
}
public void setCreators(List<Creator> creators) {
this.creators = creators;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public String getDoi() {
return doi;
}
public void setDoi(String doi) {
this.doi = doi;
}
public List<Grant> getGrants() {
return grants;
}
public void setGrants(List<Grant> grants) {
this.grants = grants;
}
public List<String> getKeywords() {
return keywords;
}
public void setKeywords(List<String> keywords) {
this.keywords = keywords;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public String getLicense() {
return license;
}
public void setLicense(String license) {
this.license = license;
}
public PrereserveDoi getPrereserve_doi() {
return prereserve_doi;
}
public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
this.prereserve_doi = prereserve_doi;
}
public String getPublication_date() {
return publication_date;
}
public void setPublication_date(String publication_date) {
this.publication_date = publication_date;
}
public List<String> getReferences() {
return references;
}
public void setReferences(List<String> references) {
this.references = references;
}
public List<RelatedIdentifier> getRelated_identifiers() {
return related_identifiers;
}
public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
this.related_identifiers = related_identifiers;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
import java.io.Serializable;
public class PrereserveDoi implements Serializable {
private String doi;
private String recid;
public String getDoi() {
return doi;
}
public void setDoi(String doi) {
this.doi = doi;
}
public String getRecid() {
return recid;
}
public void setRecid(String recid) {
this.recid = recid;
}
}

View File

@ -0,0 +1,51 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
import java.io.Serializable;
public class RelatedIdentifier implements Serializable {
private String identifier;
private String relation;
private String resource_type;
private String scheme;
public String getIdentifier() {
return identifier;
}
public void setIdentifier(String identifier) {
this.identifier = identifier;
}
public String getRelation() {
return relation;
}
public void setRelation(String relation) {
this.relation = relation;
}
public String getResource_type() {
return resource_type;
}
public void setResource_type(String resource_type) {
this.resource_type = resource_type;
}
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
}

View File

@ -0,0 +1,126 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
import java.io.Serializable;
import java.util.List;
public class ZenodoModel implements Serializable {
private String conceptrecid;
private String created;
private List<File> files;
private String id;
private Links links;
private Metadata metadata;
private String modified;
private String owner;
private String record_id;
private String state;
private boolean submitted;
private String title;
public String getConceptrecid() {
return conceptrecid;
}
public void setConceptrecid(String conceptrecid) {
this.conceptrecid = conceptrecid;
}
public String getCreated() {
return created;
}
public void setCreated(String created) {
this.created = created;
}
public List<File> getFiles() {
return files;
}
public void setFiles(List<File> files) {
this.files = files;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Links getLinks() {
return links;
}
public void setLinks(Links links) {
this.links = links;
}
public Metadata getMetadata() {
return metadata;
}
public void setMetadata(Metadata metadata) {
this.metadata = metadata;
}
public String getModified() {
return modified;
}
public void setModified(String modified) {
this.modified = modified;
}
public String getOwner() {
return owner;
}
public void setOwner(String owner) {
this.owner = owner;
}
public String getRecord_id() {
return record_id;
}
public void setRecord_id(String record_id) {
this.record_id = record_id;
}
public String getState() {
return state;
}
public void setState(String state) {
this.state = state;
}
public boolean isSubmitted() {
return submitted;
}
public void setSubmitted(boolean submitted) {
this.submitted = submitted;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
}

View File

@ -0,0 +1,15 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
/**
* @author miriam.baglioni
* @Date 01/07/23
*/
import java.util.ArrayList;
public class ZenodoModelList extends ArrayList<ZenodoModel> {
}

View File

@ -99,7 +99,7 @@
<mode>cluster</mode>
<name>Dump table project </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -124,7 +124,7 @@
<mode>cluster</mode>
<name>Dump table project </name>
<class>eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectsSubsetSparkJob</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}

View File

@ -3,10 +3,12 @@
<parameters>
<property>
<name>singleDeposition</name>
<value>false</value>
<description>Indicates if it is a single community deposition</description>
</property>
<property>
<name>communityId</name>
<value>none</value>
<description>the id of the community to be dumped if a dump for a single community should be done</description>
</property>
<property>
@ -35,6 +37,7 @@
</property>
<property>
<name>resultAggregation</name>
<value>false</value>
<description>true if all the result type have to be dumped under result. false otherwise</description>
</property>
<property>
@ -47,6 +50,7 @@
</property>
<property>
<name>metadata</name>
<value>""</value>
<description> the metadata associated to the deposition</description>
</property>
<property>
@ -55,17 +59,19 @@
</property>
<property>
<name>conceptRecordId</name>
<value>none</value>
<description>for new version, the id of the record for the old deposition</description>
</property>
<property>
<name>depositionId</name>
<value>none</value>
<description>the depositionId of a deposition open that has to be added content</description>
</property>
<property>
<name>organizationCommunityMap</name>
<value>none</value>
<description>the organization community map</description>
</property>
<property>
<name>hiveDbName</name>
<description>the target hive database name</description>
@ -215,7 +221,7 @@
</property>
</configuration>
</sub-workflow>
<ok to="End" />
<ok to="make_archive" />
<error to="Kill" />
</action>
@ -232,7 +238,7 @@
</property>
<property>
<name>outputPath</name>
<value>${workingDir}/tar</value>
<value>${outputPath}/dump</value>
</property>
<property>
<name>sourcePath</name>
@ -256,7 +262,7 @@
<error to="Kill" />
</action>
<!-- Sub-workflow which runs the dump for the complete graph -->
<!-- Sub-workflow which runs the dump for the communities -->
<action name="dump_community">
<sub-workflow>
<app-path>${wf:appPath()}/dump_community
@ -273,7 +279,7 @@
</property>
<property>
<name>outputPath</name>
<value>${workingDir}/tar</value>
<value>${outputPath}/dump</value>
</property>
</configuration>
</sub-workflow>
@ -293,7 +299,7 @@
</property>
<property>
<name>outputPath</name>
<value>${workingDir}/tar</value>
<value>${outputPath}/dump</value>
</property>
<property>
<name>sourcePath</name>
@ -309,12 +315,23 @@
<error to="Kill" />
</action>
<!-- <action name="make_archive">-->
<!-- <java>-->
<!-- <main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>-->
<!-- <arg>&#45;&#45;hdfsPath</arg><arg>${outputPath}</arg>-->
<!-- <arg>&#45;&#45;nameNode</arg><arg>${nameNode}</arg>-->
<!-- <arg>&#45;&#45;sourcePath</arg><arg>${workingDir}/tar</arg>-->
<!-- </java>-->
<!-- <ok to="should_upload"/>-->
<!-- <error to="Kill"/>-->
<!-- </action>-->
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--hdfsPath</arg><arg>${outputPath}/tar</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
<arg>--sourcePath</arg><arg>${outputPath}/dump</arg>
</java>
<ok to="should_upload"/>
<error to="Kill"/>
@ -330,7 +347,7 @@
<action name="send_zenodo">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--hdfsPath</arg><arg>${outputPath}/tar/</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--accessToken</arg><arg>${accessToken}</arg>
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>

View File

@ -96,7 +96,7 @@
<mode>cluster</mode>
<name>Dump table publication for community/funder related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -123,7 +123,7 @@
<mode>cluster</mode>
<name>Dump table dataset for community/funder related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -149,7 +149,7 @@
<mode>cluster</mode>
<name>Dump table ORP for community related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -175,7 +175,7 @@
<mode>cluster</mode>
<name>Dump table software for community related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -203,7 +203,7 @@
<mode>cluster</mode>
<name>Prepare association result subset of project info</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -234,7 +234,7 @@
<mode>cluster</mode>
<name>Extend dumped publications with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -259,7 +259,7 @@
<mode>cluster</mode>
<name>Extend dumped dataset with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -284,7 +284,7 @@
<mode>cluster</mode>
<name>Extend dumped ORP with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -309,7 +309,7 @@
<mode>cluster</mode>
<name>Extend dumped software with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -336,7 +336,7 @@
<mode>cluster</mode>
<name>Split dumped result for community</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}

View File

@ -336,7 +336,7 @@
<action name="create_entities_fromcontext">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}/communities_infrastructures/communities_infrastructure.json.gz</arg>
<arg>--hdfsPath</arg><arg>${outputPath}/communities_infrastructures/community_infrastructure.json.gz</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java>

View File

@ -89,7 +89,7 @@
<mode>cluster</mode>
<name>Prepare association result subset of project info</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -121,7 +121,7 @@
<mode>cluster</mode>
<name>Dump funder results </name>
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -148,7 +148,7 @@
<mode>cluster</mode>
<name>Dump funder results </name>
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -175,7 +175,7 @@
<mode>cluster</mode>
<name>Dump funder results </name>
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -202,7 +202,7 @@
<mode>cluster</mode>
<name>Dump funder results </name>
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
@ -231,7 +231,7 @@
<mode>cluster</mode>
<name>Dump funder results </name>
<class>eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults</class>
<jar>dhp-graph-dump-${projectVersion}.jar</jar>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}

View File

@ -638,7 +638,7 @@
<arg>--sourcePath</arg><arg>${outputPath}/original</arg>
<arg>--contextPath</arg><arg>${workingDir}/context/community_infrastructure.json.gz</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/community_infrastructure</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/communities_infrastructures</arg>
</spark>
<ok to="join_context"/>
<error to="Kill"/>
@ -856,7 +856,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/dump</arg>
<arg>--relationPath</arg><arg>${workingDir}/relation</arg> <!-- new relations from context -->
</spark>
<ok to="End"/>
<error to="Kill"/>

View File

@ -0,0 +1,30 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,132 @@
<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>accessToken</name>
<description>the access token used for the deposition in Zenodo</description>
</property>
<property>
<name>connectionUrl</name>
<description>the connection url for Zenodo</description>
</property>
<property>
<name>metadata</name>
<value>""</value>
<description> the metadata associated to the deposition</description>
</property>
<property>
<name>depositionType</name>
<description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
</property>
<property>
<name>conceptRecordId</name>
<value>none</value>
<description>for new version, the id of the record for the old deposition</description>
</property>
<property>
<name>depositionId</name>
<value>none</value>
<description>the depositionId of a deposition open that has to be added content</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="make_archive"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}/tar</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
</java>
<ok to="send_zenodo"/>
<error to="Kill"/>
</action>
<action name="send_zenodo">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}/tar/</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--accessToken</arg><arg>${accessToken}</arg>
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
<arg>--metadata</arg><arg>${metadata}</arg>
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
<arg>--depositionType</arg><arg>${depositionType}</arg>
<arg>--depositionId</arg><arg>${depositionId}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -330,7 +330,8 @@ public class DumpJobTest {
Assertions.assertEquals(ModelConstants.ACCESS_RIGHT_OPEN, gr.getBestaccessright().getLabel());
Assertions
.assertEquals(
Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode());
Constants.ACCESS_RIGHTS_COAR_MAP.get(ModelConstants.ACCESS_RIGHT_OPEN),
gr.getBestaccessright().getCode());
Assertions.assertEquals("One Ecosystem", gr.getContainer().getName());
Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline());
@ -425,7 +426,7 @@ public class DumpJobTest {
.getAccessright()
.getCode()
.equals(
Constants.accessRightsCoarMap
Constants.ACCESS_RIGHTS_COAR_MAP
.get(ModelConstants.ACCESS_RIGHT_OPEN)));
Assertions.assertTrue(instance.getAccessright().getLabel().equals(ModelConstants.ACCESS_RIGHT_OPEN));
Assertions.assertTrue(instance.getAccessright().getOpenAccessRoute().equals(OpenAccessRoute.green));
@ -438,10 +439,6 @@ public class DumpJobTest {
Assertions.assertEquals("2017-01-01", instance.getPublicationdate());
Assertions.assertEquals(null, instance.getArticleprocessingcharge());
Assertions.assertEquals("peerReviewed", instance.getRefereed());
Indicator indicator = instance.getIndicators();
Assertions.assertFalse(Optional.ofNullable(indicator.getUsageCounts()).isPresent());
Assertions.assertTrue(Optional.ofNullable(indicator.getImpactMeasures()).isPresent());
}
@Test

View File

@ -61,6 +61,29 @@ public class SplitForCommunityTest {
spark.stop();
}
@Test
void testCommunitySplit2() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityResult")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
CommunitySplit split = new CommunitySplit();
split.run(false, sourcePath, workingDir.toString() + "/split", communityMapPath);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/split/Digital_Humanities_and_Cultural_Heritage")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
}
@Test
void testCommunitySplit() {

View File

@ -0,0 +1,198 @@
package eu.dnetlib.dhp.oa.graph.dump;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient;
import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
@Disabled
public class ZenodoUploadTest {
private static String workingDir;
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
private final String ACCESS_TOKEN = "OzzOsyucEIHxCEfhlpsMo3myEiwpCza3trCRL7ddfGTAK9xXkIP2MbXd6Vg4";
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(UpdateProjectInfoTest.class.getSimpleName())
.toString();
}
@Test
void testNewDeposition() throws IOException {
CommunityMap communityMap = new CommunityMap();
communityMap.put("ni", "Neuroinformatics");
communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage");
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
fs
.copyFromLocalFile(
false, new Path(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
.getPath()),
new Path(workingDir + "/zenodo/ni/ni"));
fs
.copyFromLocalFile(
false, new Path(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch")
.getPath()),
new Path(workingDir + "/zenodo/dh-ch/dh-ch"));
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
client.newDeposition();
// the second boolean parameter here sets the recursion to true
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fs
.listFiles(
new Path(workingDir + "/zenodo"), true);
while (fileStatusListIterator.hasNext()) {
LocatedFileStatus fileStatus = fileStatusListIterator.next();
String p_string = fileStatus.getPath().toString();
int index = p_string.lastIndexOf("/");
String community = p_string.substring(0, index);
community = community.substring(community.lastIndexOf("/") + 1);
String community_name = communityMap.get(community).replace(" ", "_");
// fs.copyToLocalFile(fileStatus.getPath(), new Path("/tmp/" + community_name));
System.out.println(community);
// File f = new File("/tmp/" + community_name);
FSDataInputStream inputStream = fs.open(fileStatus.getPath());
System.out.println(client.uploadIS(inputStream, community_name));
}
String metadata = "{\"metadata\":{\"access_right\":\"open\",\"communities\":[{\"identifier\":\"openaire-research-graph\"}],\"creators\":[{\"affiliation\":\"CNR - ISTI\",\"name\":\"Manghi, Paolo\",\"orcid\":\"0000-0001-7291-3210\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Atzori, Claudio\",\"orcid\":\"0000-0001-9613-6639\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Bardi, Alessia\",\"orcid\":\"0000-0002-1112-1292\"},{\"affiliation\":\"ISTI - CNR\",\"name\":\"Baglioni, Miriam\",\"orcid\":\"0000-0002-2273-9004\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Shirrwagen, Jochen\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Dimitropoulos, Harry\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"La Bruzzo, Sandro\",\"orcid\":\"0000-0003-2855-1245\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Foufoulas, Ioannis\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Löhden, Aenne\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Bäcker, Amelie\",\"orcid\":\"0000-0001-6015-2063\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Mannocci, Andrea\",\"orcid\":\"0000-0002-5193-7851\"},{\"affiliation\":\"University of Warsaw\",\"name\":\"Horst, Marek\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Czerniak, Andreas\",\"orcid\":\"0000-0003-3883-4169\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kiatropoulou, Katerina\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kokogiannaki, Argiro\",\"orcid\":\"0000-0002-3880-0244\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"De Bonis, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Artini, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Ottonello, Enrico\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Lempesis, Antonis\"},{\"affiliation\":\"CERN\",\"name\":\"Ioannidis, Alexandros\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Summan, Friedrich\"}],\"description\":\"\\u003cp\\u003eThis dataset contains dumps of the OpenAIRE Research Graph containing metadata records relevant for the research communities and initiatives collaborating with OpenAIRE\\u003c/p\\u003e. \\u003cp\\u003eEach dataset is a zip containing a file with one json per line. Each json is compliant to the schema available at XXXX\\u003c/p\\u003e Note that the file that is offered is not a typical json file: each line contains a separate, self-contained json object. For more information please see http://jsonlines.org\",\"grants\":[{\"id\":\"777541\"},{\"id\":\"824091\"},{\"id\":\"824323\"}],\"keywords\":[\"Open Science\",\"Scholarly Communication\",\"Information Science\"],\"language\":\"eng\",\"license\":\"CC-BY-4.0\",\"title\":\"OpenAIRE Research Graph: Dumps for research communities and initiatives.\",\"upload_type\":\"dataset\",\"version\":\"1.0\"}}";
System.out.println(client.sendMretadata(metadata));
//System.out.println(client.publish());
}
@Test
void testNewVersion() throws Exception, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
client.newVersion("656628");
CommunityMap communityMap = new CommunityMap();
communityMap.put("ni", "Neuroinformatics");
communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage");
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
fs
.copyFromLocalFile(
false, new Path(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
.getPath()),
new Path(workingDir + "/zenodo/ni/ni"));
fs
.copyFromLocalFile(
false, new Path(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch")
.getPath()),
new Path(workingDir + "/zenodo/dh-ch/dh-ch"));
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fs
.listFiles(
new Path(workingDir + "/zenodo"), true);
while (fileStatusListIterator.hasNext()) {
LocatedFileStatus fileStatus = fileStatusListIterator.next();
String p_string = fileStatus.getPath().toString();
int index = p_string.lastIndexOf("/");
String community = p_string.substring(0, index);
community = community.substring(community.lastIndexOf("/") + 1);
String community_name = communityMap.get(community).replace(" ", "_");
// fs.copyToLocalFile(fileStatus.getPath(), new Path("/tmp/" + community_name));
System.out.println(community);
// File f = new File("/tmp/" + community_name);
FSDataInputStream inputStream = fs.open(fileStatus.getPath());
System.out.println(client.uploadIS(inputStream, community_name));
}
//System.out.println(client.publish());
}
@Test
void testNewVersion2() throws Exception, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
client.newVersion("1210237");
File file = new File("/Users/miriam.baglioni/Desktop/EOSC_DUMP/publication.tar");
// File file = new File(getClass()
// .getResource("/eu/dnetlib/dhp/common/api/newVersion2")
// .getPath());
InputStream is = new FileInputStream(file);
Assertions.assertEquals(200, client.uploadIS3(is, "newVersion_deposition", file.length()));
// Assertions.assertEquals(202, client.publish());
}
@Test
void readCommunityMap() throws IOException {
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
System.out
.println(
new Gson()
.toJson(
Utils
.readCommunityMap(
fs, getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath())));
}
@Test
void depositBigFile() throws MissingConceptDoiException, IOException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
Assertions.assertEquals(201, client.newDeposition());
File file = new File("/Users/miriam.baglioni/Desktop/EOSC_DUMP/publication.tar");
// File file = new File(getClass()
// .getResource("/eu/dnetlib/dhp/common/api/newVersion2")
// .getPath());
InputStream is = new FileInputStream(file);
Assertions.assertEquals(200, client.uploadIS3(is, "newVersion_deposition", file.length()));
// Assertions.assertEquals(202, client.publish());
}
}

View File

@ -388,13 +388,14 @@ public class DumpSubsetTest {
.textFile(workingDir.toString() + "/dump/community_infrastructure")
.map(item -> OBJECT_MAPPER.readValue(item, ResearchCommunity.class));
Assertions.assertEquals(5, tmp.count());
Assertions.assertEquals(6, tmp.count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("enermaps")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("eutopia")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("dh-ch")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("beopen")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("neanias-underwater")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("sdsn-gr")).count());
}
@ -455,7 +456,7 @@ public class DumpSubsetTest {
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/community_infrastructure")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/community_infrastructure");
.saveAsTextFile(workingDir.toString() + "/dump/communities_infrastructures");
SparkSelectValidRelationContext
.main(
@ -511,10 +512,11 @@ public class DumpSubsetTest {
.textFile(workingDir.toString() + "/relation")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Relation.class));
Assertions.assertEquals(94, tmp.count());
Assertions.assertEquals(47, tmp.filter(r -> r.getSource().getId().startsWith("50|")).count());
Assertions.assertEquals(36, tmp.filter(r -> r.getSource().getId().startsWith("10|")).count());
Assertions.assertEquals(11, tmp.filter(r -> r.getSource().getId().startsWith("00|")).count());
Assertions.assertEquals(102, tmp.count());
Assertions.assertEquals(51, tmp.filter(r -> r.getSource().getId().startsWith("50|")).count());
Assertions.assertEquals(39, tmp.filter(r -> r.getSource().getId().startsWith("10|")).count());
Assertions.assertEquals(12, tmp.filter(r -> r.getSource().getId().startsWith("00|")).count());
}
}

View File

@ -0,0 +1 @@
{"pid": [{"scheme": "doi", "value": "10.1023/a:1019971625315"}], "contributor": [], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, {"key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a", "value": "Microsoft Academic Graph"}], "id": "50|doi_________::0027accd79214af151336e8237a2b084", "container": {"issnPrinted": "1607-6729", "conferencedate": null, "vol": "385", "conferenceplace": null, "name": "Doklady Biochemistry and Biophysics", "iss": null, "sp": "228", "edition": null, "issnOnline": null, "ep": "234", "issnLinking": null}, "lastupdatetimestamp": 1649039791345, "author": [{"surname": null, "fullname": "Vladimir S. Saakov", "pid": null, "name": null, "rank": 1}], "instance": [{"refereed": "UNKNOWN", "hostedby": {"key": "10|issn___print::55156520c3996f4d887f858c089d1e5f", "value": "Doklady Biochemistry and Biophysics"}, "url": ["https://doi.org/10.1023/a:1019971625315"], "pid": [{"scheme": "doi", "value": "10.1023/a:1019971625315"}], "publicationdate": "2002-01-01", "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "type": "Article"}], "subjects": [{"provenance": null, "subject": {"scheme": "keyword", "value": "General Chemistry"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "Biochemistry"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "General Medicine"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "Biophysics"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Photosystem II"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Ion"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Chemistry"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Soil salinity"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Analytical chemistry"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Function (biology)"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Pulse (signal processing)"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Fluorescence"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Phototroph"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Kinetic energy"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Photochemistry"}}], "publicationdate": "2002-01-01", "indicators": {"impactMeasures": {"influence": {"score": "4.901964E-9", "class": "C"}, "popularity": {"score": "6.185583E-10", "class": "C"}, "influence_alt": {"score": "3", "class": "C"}, "impulse": {"score": "0", "class": "C"}, "popularity_alt": {"score": "0.03722029", "class": "C"}}}, "dateofcollection": "2022-04-04T02:36:31Z", "type": "publication", "description": [], "format": [], "coverage": [], "publisher": "Springer Science and Business Media LLC", "language": {"code": "und", "label": "Undetermined"}, "country": [], "originalId": ["453197", "10.1023/a:1019971625315", "314096869"], "source": ["Crossref", null], "context": [{"code": "enermaps", "provenance": [{"provenance": "Inferred by OpenAIRE", "trust": "0.8"}], "label": "Energy Research"}]}

View File

@ -2,4 +2,5 @@
{"id":"00|context_____::aa0e56dd2e9d2a0be749f5debdd2b3d8","acronym":"enermaps","name":"Welcome to EnerMaps Gateway! Find the latest scientific data.","type":"Research Community","description":"","zenodo_community":null,"subject":[]}
{"id":"00|context_____::6f567d9abd1c6603b0c0205a832bc757","acronym":"neanias-underwater","name":"NEANIAS Underwater Research Community","type":"Research Community","description":"","zenodo_community":null,"subject":["Ocean mapping","Multibeam Backscatter","Bathymetry","Seabed classification","Submarine Geomorphology","Underwater Photogrammetry"]}
{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","acronym":"dh-ch","name":"Digital Humanities and Cultural Heritage","type":"Research Community","description":"This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.","zenodo_community":"https://zenodo.org/communities/oac_dh-ch","subject":["modern art","monuments","europeana data model","field walking","frescoes","LIDO metadata schema","art history","excavation","Arts and Humanities General","coins","temples","numismatics","lithics","environmental archaeology","digital cultural heritage","archaeological reports","history","CRMba","churches","cultural heritage","archaeological stratigraphy","religious art","digital humanities","archaeological sites","linguistic studies","bioarchaeology","architectural orders","palaeoanthropology","fine arts","europeana","CIDOC CRM","decorations","classic art","stratigraphy","digital archaeology","intangible cultural heritage","walls","chapels","CRMtex","Language and Literature","paintings","archaeology","mosaics","burials","medieval art","castles","CARARE metadata schema","statues","natural language processing","inscriptions","CRMsci","vaults","contemporary art","Arts and Humanities","CRMarchaeo","pottery"]}
{"id":"00|context_____::5fde864866ea5ded4cc873b3170b63c3","acronym":"beopen","name":"Transport Research","type":"Research Community","description":"Welcome to the Open Research Gateway for Transport Research. This gateway is part of the TOPOS Observatory (https://www.topos-observatory.eu). The TOPOS aims to showcase the status and progress of open science uptake in transport research. It focuses on promoting territorial and cross border cooperation and contributing in the optimization of open science in transport research.\nThe TOPOS Observatory is supported by the EC H2020 BEOPEN project (824323)","zenodo_community":"https://zenodo.org/communities/be-open-transport","subject":["Green Transport","City mobility systems","Vulnerable road users","Traffic engineering","Transport electrification","Intermodal freight transport","Clean vehicle fleets","Intelligent mobility","Inflight refueling","District mobility systems","Navigation and control systems for optimised planning and routing","European Space Technology Platform","European Transport networks","Green cars","Inter-modality infrastructures","Advanced Take Off and Landing Ideas","Sustainable urban systems","port-area railway networks","Innovative forms of urban transport","Alliance for Logistics Innovation through Collaboration in Europe","Advisory Council for Aeronautics Research in Europe","Mobility services for people and goods","Guidance and traffic management","Passenger mobility","Smart mobility and services","transport innovation","high-speed railway","Vehicle design","Inland shipping","public transportation","aviations climate impact","Road transport","On-demand public transport","Personal Air Transport","Pipeline transport","European Association of Aviation Training and Education Organisations","Defrosting of railway infrastructure","Inclusive and affordable transport","River Information Services","jel:L92","Increased use of public transport","Seamless mobility","STRIA","trolleybus transport","Intelligent Transport System","Low-emission alternative energy for transport","Shared mobility for people and goods","Business model for urban mobility","Interoperability of transport systems","Cross-border train slot booking","Air transport","Transport pricing","Sustainable transport","European Rail Transport Research Advisory Council","Alternative aircraft configurations","Railways applications","urban transport","Environmental impact of transport","urban freight delivery systems","Automated Road Transport","Alternative fuels in public transport","Active LIDAR-sensor for GHG-measurements","Autonomous logistics operations","Rational use of motorised transport","Network and traffic management systems","electrification of railway wagons","Single European Sky","Electrified road systems","Railway dynamics","Motorway of the Sea","smart railway communications","Maritime transport","Environmental- friendly transport","Combined transport","Connected automated driving technology","Innovative freight logistics services","automated and shared vehicles","Alternative Aircraft Systems","Land-use and transport interaction","Public transport system","Business plan for shared mobility","Shared mobility","Growing of mobility demand","European Road Transport Research Advisory Council","WATERBORNE ETP","Effective transport management system","Short Sea Shipping","air traffic management","Sea hubs and the motorways of the sea","Urban mobility solutions","Smart city planning","Maritime spatial planning","EUropean rail Research Network of Excellence","ENERGY CONSUMPTION BY THE TRANSPORT SECTOR","Integrated urban plan","inland waterway services","European Conference of Transport Research Institutes","air vehicles","E-freight","Automated Driving","Automated ships","pricing for cross-border passenger transport","Vehicle efficiency","Railway transport","Electric vehicles","Road traffic monitoring","Deep sea shipping","Circular economy in transport","Traffic congestion","air transport system","Urban logistics","Rail transport","OpenStreetMap","high speed rail","Transportation engineering","Intermodal travel information","Flight Data Recorders","Advanced driver assistance systems","long distance freight transport","Inland waterway transport","Smart mobility","Mobility integration","Personal Rapid Transit system","Safety measures & requirements for roads","Green rail transport","Vehicle manufacturing","Future Airport Layout","Rail technologies","European Intermodal Research Advisory Council","inland navigation","Automated urban vehicles","ECSS-standards","Traveller services","Polluting transport","Air Traffic Control","Cooperative and connected and automated transport","Innovative powertrains","Quality of transport system and services","door-to- door logistics chain","Inter-modal aspects of urban mobility","Innovative freight delivery systems","urban freight delivery infrastructures"]}
{"id":"00|context_____::5fde864866ea5ded4cc873b3170b63c3","acronym":"beopen","name":"Transport Research","type":"Research Community","description":"Welcome to the Open Research Gateway for Transport Research. This gateway is part of the TOPOS Observatory (https://www.topos-observatory.eu). The TOPOS aims to showcase the status and progress of open science uptake in transport research. It focuses on promoting territorial and cross border cooperation and contributing in the optimization of open science in transport research.\nThe TOPOS Observatory is supported by the EC H2020 BEOPEN project (824323)","zenodo_community":"https://zenodo.org/communities/be-open-transport","subject":["Green Transport","City mobility systems","Vulnerable road users","Traffic engineering","Transport electrification","Intermodal freight transport","Clean vehicle fleets","Intelligent mobility","Inflight refueling","District mobility systems","Navigation and control systems for optimised planning and routing","European Space Technology Platform","European Transport networks","Green cars","Inter-modality infrastructures","Advanced Take Off and Landing Ideas","Sustainable urban systems","port-area railway networks","Innovative forms of urban transport","Alliance for Logistics Innovation through Collaboration in Europe","Advisory Council for Aeronautics Research in Europe","Mobility services for people and goods","Guidance and traffic management","Passenger mobility","Smart mobility and services","transport innovation","high-speed railway","Vehicle design","Inland shipping","public transportation","aviations climate impact","Road transport","On-demand public transport","Personal Air Transport","Pipeline transport","European Association of Aviation Training and Education Organisations","Defrosting of railway infrastructure","Inclusive and affordable transport","River Information Services","jel:L92","Increased use of public transport","Seamless mobility","STRIA","trolleybus transport","Intelligent Transport System","Low-emission alternative energy for transport","Shared mobility for people and goods","Business model for urban mobility","Interoperability of transport systems","Cross-border train slot booking","Air transport","Transport pricing","Sustainable transport","European Rail Transport Research Advisory Council","Alternative aircraft configurations","Railways applications","urban transport","Environmental impact of transport","urban freight delivery systems","Automated Road Transport","Alternative fuels in public transport","Active LIDAR-sensor for GHG-measurements","Autonomous logistics operations","Rational use of motorised transport","Network and traffic management systems","electrification of railway wagons","Single European Sky","Electrified road systems","Railway dynamics","Motorway of the Sea","smart railway communications","Maritime transport","Environmental- friendly transport","Combined transport","Connected automated driving technology","Innovative freight logistics services","automated and shared vehicles","Alternative Aircraft Systems","Land-use and transport interaction","Public transport system","Business plan for shared mobility","Shared mobility","Growing of mobility demand","European Road Transport Research Advisory Council","WATERBORNE ETP","Effective transport management system","Short Sea Shipping","air traffic management","Sea hubs and the motorways of the sea","Urban mobility solutions","Smart city planning","Maritime spatial planning","EUropean rail Research Network of Excellence","ENERGY CONSUMPTION BY THE TRANSPORT SECTOR","Integrated urban plan","inland waterway services","European Conference of Transport Research Institutes","air vehicles","E-freight","Automated Driving","Automated ships","pricing for cross-border passenger transport","Vehicle efficiency","Railway transport","Electric vehicles","Road traffic monitoring","Deep sea shipping","Circular economy in transport","Traffic congestion","air transport system","Urban logistics","Rail transport","OpenStreetMap","high speed rail","Transportation engineering","Intermodal travel information","Flight Data Recorders","Advanced driver assistance systems","long distance freight transport","Inland waterway transport","Smart mobility","Mobility integration","Personal Rapid Transit system","Safety measures & requirements for roads","Green rail transport","Vehicle manufacturing","Future Airport Layout","Rail technologies","European Intermodal Research Advisory Council","inland navigation","Automated urban vehicles","ECSS-standards","Traveller services","Polluting transport","Air Traffic Control","Cooperative and connected and automated transport","Innovative powertrains","Quality of transport system and services","door-to- door logistics chain","Inter-modal aspects of urban mobility","Innovative freight delivery systems","urban freight delivery infrastructures"]}
{"id":"00|context_____::a38bf77184799906a6ce86b9eb761c80","acronym":"sdsn-gr","name":"Sustainable Development Solutions Network - Greece","type":"Research Community","description":"The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.","zenodo_community":"https://zenodo.org/communities/oac_sdsn-greece","subject":["SDG13 - Climate action","SDG8 - Decent work and economic\n\t\t\t\t\tgrowth","SDG15 - Life on land","SDG2 - Zero hunger","SDG17 - Partnerships for the\n\t\t\t\t\tgoals","SDG10 - Reduced inequalities","SDG5 - Gender equality","SDG12 - Responsible\n\t\t\t\t\tconsumption and production","SDG14 - Life below water","SDG6 - Clean water and\n\t\t\t\t\tsanitation","SDG11 - Sustainable cities and communities","SDG1 - No poverty","SDG3 -\n\t\t\t\t\tGood health and well being","SDG7 - Affordable and clean energy","SDG4 - Quality\n\t\t\t\t\teducation","SDG9 - Industry innovation and infrastructure","SDG16 - Peace justice\n\t\t\t\t\tand strong institutions"]}

File diff suppressed because one or more lines are too long

View File

@ -102,7 +102,8 @@
<junit-jupiter.version>5.6.1</junit-jupiter.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<dhp.guava.version>11.0.2</dhp.guava.version>
<dhp-schemas.version>[2.12.1]</dhp-schemas.version>
<!-- <dhp-schemas.version>[2.13.2-SNAPSHOT]</dhp-schemas.version>-->
<dhp-schemas.version>[2.13.1-patched]</dhp-schemas.version>
</properties>
</project>