From 0857849a86f6dc1207c27a8468e33e368a747fb3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 5 Nov 2021 11:02:37 +0100 Subject: [PATCH 01/21] [Graph DUMP] Remove dump of measure until it will be clear where to put it (at the level of result or at the level of the instance) --- .../dhp/oa/graph/dump/ResultMapper.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 49468540d9..9e2212f4cf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -43,15 +43,15 @@ public class ResultMapper implements Serializable { try { addTypeSpecificInformation(out, input, ort); - Optional> mes = Optional.ofNullable(input.getMeasures()); - if (mes.isPresent()) { - List measure = new ArrayList<>(); - mes - .get() - .forEach( - m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); - out.setMeasures(measure); - } +// Optional> mes = Optional.ofNullable(input.getMeasures()); +// if (mes.isPresent()) { +// List measure = new ArrayList<>(); +// mes +// .get() +// .forEach( +// m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); +// out.setMeasures(measure); +// } Optional .ofNullable(input.getAuthor()) From c10ff6928c67af82cd2b04eb6232608a1e4247a6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 5 Nov 2021 11:36:21 +0100 Subject: [PATCH 02/21] [Graph DUMP] add schema of the dump related to the model as in dhp-schemas.2.8.31. Note the measere element at the level of the result has been removed because of issues on where to display it: at the level of the result or at the level of the entity --- .../community_infrastructure_schema.json | 35 ++ .../graph/dump/schemas/datasource_schema.json | 192 ++++++++++ .../dump/schemas/organization_schema.json | 57 +++ .../oa/graph/dump/schemas/project_schema.json | 119 ++++++ .../graph/dump/schemas/relation_schema.json | 68 ++++ .../dump/{ => schemas}/result_schema.json | 361 ++++++------------ 6 files changed, 596 insertions(+), 236 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{ => schemas}/result_schema.json (50%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json new file mode 100644 index 0000000000..727432a671 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json @@ -0,0 +1,35 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "acronym": { + "type": "string", + "description": "The acronym of the community" + }, + "description": { + "type": "string", + "description": "Description of the research community/research infrastructure" + }, + "id": { + "type": "string", + "description": "OpenAIRE id of the research community/research infrastructure" + }, + "name": { + "type": "string", + "description": "The long name of the community" + }, + "subject": { + "description": "Only for research communities: the list of the subjects associated to the research community", + "type": "array", + "items": {"type": "string"} + }, + "type": { + "type": "string", + "description": "One of {Research Community, Research infrastructure}" + }, + "zenodo_community": { + "type": "string", + "description": "The URL of the Zenodo community associated to the Research community/Research infrastructure" + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json new file mode 100644 index 0000000000..c416f23208 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json @@ -0,0 +1,192 @@ +{ + "$schema":"http://json-schema.org/draft-07/schema#", + "definitions": { + "ControlledField": { + "type": "object", + "properties": { + "scheme": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" + } + }, + "type":"object", + "properties": { + "accessrights": { + "type": "string", + "description": "Type of access to the data source, as defined by re3data.org. Possible values: {open, restricted, closed}" + }, + "certificates": { + "type": "string", + "description": "The certificate, seal or standard the data source complies with. As defined by re3data.org." + }, + "citationguidelineurl": { + "type": "string", + "description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org." + }, + "contenttypes": { + "description": "Types of content in the data source, as defined by OpenDOAR", + "type": "array", + "items": { + "type": "string" + } + }, + "databaseaccessrestriction": { + "type": "string", + "description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}" + }, + "datasourcetype": { + "allOf": [ + { + "$ref": "#/definitions/ControlledField" + }, + { + "description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies" + } + ] + }, + "datauploadrestriction": { + "type": "string", + "description": "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}" + }, + "dateofvalidation": { + "type": "string", + "description": "The date of last validation against the OpenAIRE guidelines for the datasource records" + }, + "description": { + "type": "string" + }, + "englishname": { + "type": "string", + "description": "The English name of the datasource" + }, + "id": { + "type": "string", + "description": "The OpenAIRE id of the data source" + }, + "journal": { + "type": "object", + "properties": { + "conferencedate": { + "type": "string" + }, + "conferenceplace": { + "type": "string" + }, + "edition": { + "type": "string" + }, + "ep": { + "type": "string", + "description": "End page" + }, + "iss": { + "type": "string", + "description": "Issue number" + }, + "issnLinking": { + "type": "string" + }, + "issnOnline": { + "type": "string" + }, + "issnPrinted": { + "type": "string" + }, + "name": { + "type": "string" + }, + "sp": { + "type": "string", + "description": "Start page" + }, + "vol": { + "type": "string", + "description": "Volume" + } + }, + "description": "Information about the journal, if this data source is of type Journal." + }, + "languages": { + "description": "The languages present in the data source's content, as defined by OpenDOAR.", + "type": "array", + "items": { + "type": "string" + } + }, + "logourl": { + "type": "string" + }, + "missionstatementurl": { + "type": "string", + "description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org" + }, + "officialname": { + "type": "string", + "description": "The official name of the datasource" + }, + "openairecompatibility": { + "type": "string", + "description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu." + }, + "originalId": { + "description": "Original identifiers for the datasource" + "type": "array", + "items": { + "type": "string" + } + }, + "pid": { + "description": "Persistent identifiers of the datasource", + "type": "array", + "items": { + "allOf": [ + { + "$ref": "#/definitions/ControlledField" + } + ] + } + }, + "pidsystems": { + "type": "string", + "description": "The persistent identifier system that is used by the data source. As defined by re3data.org" + }, + "policies": { + "description": "Policies of the data source, as defined in OpenDOAR.", + "type": "array", + "items": { + "type": "string" + } + }, + "releaseenddate": { + "type": "string", + "description": "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org" + }, + "releasestartdate": { + "type": "string", + "description": "Releasing date of the data source, as defined by re3data.org" + }, + "subjects": { + "description": "List of subjects associated to the datasource", + "type": "array", + "items": { + "type": "string" + } + }, + "uploadrights": { + "type": "string", + "description": "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}" + }, + "versioning": { + "type": "boolean", + "description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise." + }, + "websiteurl": { + "type": "string" + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json new file mode 100644 index 0000000000..16afa386d5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json @@ -0,0 +1,57 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "alternativenames": { + "description": "Alternative names that identify the organisation", + "type": "array", + "items": { + "type": "string" + } + }, + "country": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The organisation country code" + }, + "label": { + "type": "string", + "description": "The organisation country label" + } + }, + "description": "The country of the organisation" + }, + "id": { + "type": "string", + "description": "The OpenAIRE id for the organisation" + }, + "legalname": { + "type": "string" + }, + "legalshortname": { + "type": "string" + }, + "pid": { + "description": "Persistent identifiers for the organisation i.e. isni 0000000090326370", + "type": "array", + "items": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "The scheme of the identifier (i.e. isni)" + }, + "value": { + "type": "string", + "description": "the value in the schema (i.e. 0000000090326370)" + } + } + } + }, + "websiteurl": { + "type": "string" + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json new file mode 100644 index 0000000000..c81187258e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json @@ -0,0 +1,119 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "acronym": { + "type": "string" + }, + "callidentifier": { + "type": "string" + }, + "code": { + "type": "string", + "description": "The grant agreement number" + }, + "enddate": { + "type": "string" + }, + "funding": { + "description": "Funding information for the project", + "type": "array", + "items": { + "type": "object", + "properties": { + "funding_stream": { + "type": "object", + "properties": { + "description": { + "type": "string", + "description": "Description of the funding stream" + }, + "id": { + "type": "string", + "description": "Id of the funding stream" + } + } + }, + "jurisdiction": { + "type": "string", + "description": "The jurisdiction of the funder (i.e. EU)" + }, + "name": { + "type": "string", + "description": "The name of the funder (European Commission)" + }, + "shortName": { + "type": "string", + "description": "The short name of the funder (EC)" + } + } + } + }, + "granted": { + "type": "object", + "properties": { + "currency": { + "type": "string", + "description": "The currency of the granted amount (e.g. EUR)" + }, + "fundedamount": { + "type": "number", + "description": "The funded amount" + }, + "totalcost": { + "type": "number", + "description": "The total cost of the project" + } + }, + "description": "The money granted to the project" + }, + "h2020programme": { + "description": "The h2020 programme funding the project", + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The code of the programme" + }, + "description": { + "type": "string", + "description": "The description of the programme" + } + } + } + }, + "id": { + "type": "string", + "description": "OpenAIRE id for the project" + }, + "keywords": { + "type": "string" + }, + "openaccessmandatefordataset": { + "type": "boolean" + }, + "openaccessmandateforpublications": { + "type": "boolean" + }, + "startdate": { + "type": "string" + }, + "subject": { + "type": "array", + "items": { + "type": "string" + } + }, + "summary": { + "type": "string" + }, + "title": { + "type": "string" + }, + "websiteurl": { + "type": "string" + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json new file mode 100644 index 0000000000..98134a03bd --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json @@ -0,0 +1,68 @@ +{ + "$schema":"http://json-schema.org/draft-07/schema#", + "definitions": { + "Node": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The OpenAIRE id of the entity" + }, + "type": { + "type": "string", + "description": "The type of the entity (i.e. organisation)" + } + } + } + }, + "type":"object", + "properties": { + "provenance": { + "type": "object", + "properties": { + "provenance": { + "type": "string", + "description": "The reason why OpenAIRE holds the relation " + }, + "trust": { + "type": "string", + "description": "The trust of the relation in the range of [0,1]. Where greater the number, more the trust. Harvested relationships have typically a high trust (0.9). The trust of inferred relationship is calculated by the inference algorithm that generated them, as described in https://graph.openaire.eu/about#architecture (Enrichment --> Mining)" + } + } + }, + "reltype": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The semantics of the relation (i.e. isAuthorInstitutionOf). " + }, + "type": { + "type": "string", + "description": "the type of the relation (i.e. affiliation)" + } + }, + "description": "To represent the semantics of a relation between two entities" + }, + "source": { + "allOf": [ + {"$ref": "#/definitions/Node"}, + {"description": "The node source in the relation"} + ] + }, + "target": { + "allOf": [ + {"$ref": "#/definitions/Node"}, + {"description": "The node target in the relation"} + ] + }, + "validated":{ + "type":"boolean", + "description":"True if the relation is related to a project and it has been collected from an authoritative source (i.e. the funder)" + }, + "validationDate":{ + "type":"string", + "description":"The date when the relation was collected from OpenAIRE" + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json similarity index 50% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json index cb092110ec..a318b7ada7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json @@ -1,9 +1,9 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { - "AccessRight": { - "type": "object", - "properties": { + "AccessRight":{ + "type":"object", + "properties":{ "code": { "type": "string", "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" @@ -12,6 +12,16 @@ "type": "string", "description": "Label for the access mode" }, + "openAccessRoute":{ + "type":"string", + "enum":[ + "gold", + "green", + "hybrid", + "bronze" + ], + "description":"The type of OpenAccess applied to the result" + }, "scheme": { "type": "string", "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" @@ -22,65 +32,40 @@ "type": "object", "properties": { "scheme": { - "type": "string", - "description": "The scheme for the resource" + "type": "string" }, "value": { - "type": "string", - "description": "the value in the scheme" + "type": "string" } - } - }, - "KeyValue": { - "type": "object", - "properties": { - "key": { - "type": "string", - "description": "Description of key" - }, - "value": { - "type": "string", - "description": "Description of value" - } - } + }, + "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" }, "Provenance": { "type": "object", "properties": { "provenance": { "type": "string", - "description": "The provenance of the information" + "description": "The process that produced/provided the information" }, "trust": { - "type": "string", - "description": "The trust associated to the information" + "type": "string" } - } + }, + "description": "Indicates the process that produced (or provided) the information, and the trust associated to the information" } }, "type": "object", "properties": { "author": { - "description": "List of authors of the research results", "type": "array", "items": { "type": "object", "properties": { - "affiliation": { - "description": "Affiliations of the author", - "type": "array", - "items": { - "type": "string", - "description": "One of the affiliation of the author" - } - }, "fullname": { - "type": "string", - "description": "Fullname of the author" + "type": "string" }, "name": { - "type": "string", - "description": "First name of the author" + "type": "string" }, "pid": { "type": "object", @@ -94,44 +79,34 @@ "provenance": { "allOf": [ {"$ref": "#/definitions/Provenance"}, - {"description": "The provenance of the author's pid"} + {"description": "Provenance of author's pid"} ] } - }, - "description": "Persistent identifier of the author (e.g. ORCID)" + } }, "rank": { - "type": "integer", - "description": "Order in which the author appears in the authors list" + "type": "integer" }, "surname": { - "type": "string", - "description": "Surname of the author" + "type": "string" } - }, - "description": "One of the author of the research result" + } } }, - "bestaccessright": { - "allOf": [ - {"$ref": "#/definitions/AccessRight"}, - {"description": "The openest access right associated to the manifestations of this research results"} - ] - }, + "bestaccessright":{ + "allOf":[ + { + "$ref":"#/definitions/AccessRight" + }, + { + "description":"The openest access right associated to the manifestations of this research results" + } + ] + }, "codeRepositoryUrl": { "type": "string", "description": "Only for results with type 'software': the URL to the repository with the source code" }, - "collectedfrom": { - "description": "Information about the sources from which the record has been collected", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/KeyValue"}, - {"description": "Key is the OpenAIRE identifier of the data source, value is its name"} - ] - } - }, "contactgroup": { "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", "type": "array", @@ -150,12 +125,10 @@ "type": "object", "properties": { "conferencedate": { - "type": "string", - "description": "Date of the conference" + "type": "string" }, "conferenceplace": { - "type": "string", - "description": "Place of the conference" + "type": "string" }, "edition": { "type": "string", @@ -170,16 +143,13 @@ "description": "Journal issue" }, "issnLinking": { - "type": "string", - "description": "Journal linking iisn" + "type": "string" }, "issnOnline": { - "type": "string", - "description": "Journal online issn" + "type": "string" }, "issnPrinted": { - "type": "string", - "description": "Journal printed issn" + "type": "string" }, "name": { "type": "string", @@ -187,51 +157,22 @@ }, "sp": { "type": "string", - "description": "Start page" + "description": "start page" }, "vol": { - "type": "string", - "description": "Volume" + "type": "string" } }, "description": "Container has information about the conference or journal where the result has been presented or published" }, - "context": { - "description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu", - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "Code identifying the RI/RC" - }, - "label": { - "type": "string", - "description": "Label of the RI/RC" - }, - "provenance": { - "description": "Why this result is associated to the RI/RC.", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/Provenance"} - - ] - } - } - } - } - }, "contributor": { - "description": "Contributors of this result", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "Contributors for the result" } }, "country": { - "description": "Country associated to this result", "type": "array", "items": { "type": "object", @@ -241,8 +182,7 @@ "description": "ISO 3166-1 alpha-2 country code" }, "label": { - "type": "string", - "description": "English label of the country" + "type": "string" }, "provenance": { "allOf": [ @@ -274,39 +214,13 @@ "type": "array", "items": { "type": "string" - } }, "embargoenddate": { "type": "string", "description": "Date when the embargo ends and this result turns Open Access" }, - "externalReference": { - "description": "Links to external resources like entries from thematic databases (e.g. Protein Data Bank)", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this result is linked to the external resource"} - ] - }, - "typology": { - "type": "string" - }, - "value": { - "type": "string" - } - } - } - }, "format": { - "type": "array", "items": { "type": "string" @@ -332,56 +246,87 @@ }, "id": { "type": "string", - "description": "OpenAIRE identifier" + "description": "OpenAIRE Identifier" }, - "instance": { - "description": "Manifestations (i.e. different versions) of the result. For example: the pre-print and the published versions are two manifestations of the same research result", - "type": "array", - "items": { - "type": "object", - "properties": { - "accessright": { - "allOf": [ - {"$ref": "#/definitions/AccessRight"}, - {"description": "Access right of this instance"} + "instance":{ + "description":"Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version", + "type":"array", + "items":{ + "type":"object", + "properties":{ + "accessright":{ + "allOf":[ + { + "$ref":"#/definitions/AccessRight" + }, + { + "description":"The accessright of this instance. One result may have multiple instances, each with a different accessright" + } ] }, - "collectedfrom": { - "allOf": [ - {"$ref": "#/definitions/KeyValue"}, - {"description": "Information about the source from which the instance has been collected. Key is the OpenAIRE identifier of the data source, value is its name"} - ] + "alternateIdentifier":{ + "description":"All the identifiers associated to the result other than the authoritative ones", + "type":"array", + "items":{ + "allOf":[ + { + "$ref":"#/definitions/ControlledField" + }, + { + "description":"Description of alternateIdentifier" + } + ] + } + }, + "articleprocessingcharge":{ + "description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.", + "type":"object", + "properties":{ + "amount":{ + "type":"string" + }, + "currency":{ + "type":"string" + } + } }, - "hostedby": { - "allOf": [ - {"$ref": "#/definitions/KeyValue"}, - {"description": "Information about the source from which the instance can be viewed or downloaded. Key is the OpenAIRE identifier of the data source, value is its name"} - ] + "license":{ + "type":"string" }, - "license": { - "type": "string", - "description": "License applied to the instance" + "pid":{ + "description":"The set of persistent identifiers associated to this instance that have been collected from an authority for the pid type (i.e. Crossref/Datacite for doi)", + "type":"array", + "items":{ + "allOf":[ + { + "$ref":"#/definitions/ControlledField" + }, + { + "description":"The persistent identifier associated to the result" + } + ] + } + }, + "publicationdate":{ + "type":"string" }, - "publicationdate": { - "type": "string", - "description": "Publication date of the instance" + "refereed":{ + "description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)", + "type":"string" }, - "refereed": { - "type": "string", - "description": "Was the instance subject to peer-review? Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed' (see also https://api.openaire.eu/vocabularies/dnet:review_levels)" + "type":{ + "type":"string", + "description":"The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" }, - "type": { - "type": "string", - "description": "Type of the instance. Possible values are listed at https://api.openaire.eu/vocabularies/dnet:publication_resource" - }, - "url": { - "description":"Location where the instance is accessible", - "type": "array", - "items": { - "type": "string" + "url":{ + "description":"URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ", + "type":"array", + "items":{ + "type":"string" } } } + } }, "language": { @@ -402,8 +347,7 @@ "description": "Timestamp of last update of the record in OpenAIRE" }, "maintitle": { - "type": "string", - "description": "Title" + "type": "string" }, "originalId": { "description": "Identifiers of the record at the original sources", @@ -418,74 +362,20 @@ "items": { "allOf": [ {"$ref": "#/definitions/ControlledField"}, - {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result "} + {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result. Note: the result will have a pid associated only if it was collected from an authority for that pid type. For example a doi will be among the pids for one result if the result metadata were collected from Crossref or Datacite. In all the other cases, the doi will be present among the alteranteIdentifiers for the result "} ] } }, + "programmingLanguage": { "type": "string", "description": "Only for results with type 'software': the programming language" }, - "projects": { - "description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results", - "type": "array", - "items": { - "type": "object", - "properties": { - "acronym": { - "type": "string", - "description": "Project acronym" - }, - "code": { - "type": "string", - "description": "Grant code" - }, - "funder": { - "type": "object", - "properties": { - "fundingStream": { - "type": "string", - "description": "Stream of funding (e.g. for European Commission can be H2020 or FP7)" - }, - "jurisdiction": { - "type": "string", - "description": "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)" - }, - "name": { - "type": "string", - "description": "Name of the funder" - }, - "shortName": { - "type": "string", - "description": "Short name or acronym of the funder" - } - }, - "description": "Information about the funder funding the project" - }, - "id": { - "type": "string", - "description": "OpenAIRE identifier of the project" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this project is associated to the result"} - ] - }, - "title": { - "type": "string", - "description": "Title of the project" - } - } - } - }, "publicationdate": { - "type": "string", - "description": "Date of publication" + "type": "string" }, "publisher": { - "type": "string", - "description": "Publisher" + "type": "string" }, "size": { "type": "string", @@ -513,15 +403,14 @@ "subject": { "allOf": [ {"$ref": "#/definitions/ControlledField"}, - {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary). "} + {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."} ] } } } }, "subtitle": { - "type": "string", - "description": "Sub-title of the result" + "type": "string" }, "tool": { "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", From a22c29fba185a799c37e46ffe09a126b0b97eba2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 5 Nov 2021 12:08:33 +0100 Subject: [PATCH 03/21] [Graph DUMP] Filtering out from the originalIds the id of the result in OpenAIRE --- .../main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 9e2212f4cf..8e38a5515c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -133,7 +133,7 @@ public class ResultMapper implements Serializable { .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); out.setFormat(formatList); out.setId(input.getId()); - out.setOriginalId(input.getOriginalId()); + out.setOriginalId(input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())); Optional> oInst = Optional .ofNullable(input.getInstance()); From 5681e89544a676f8b4446a51510001e6252f6fff Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 5 Nov 2021 12:18:24 +0100 Subject: [PATCH 04/21] Update 'dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json' --- .../oa/graph/dump/schemas/result_schema.json | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json index a318b7ada7..d4f05e5ae7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json @@ -308,7 +308,8 @@ } }, "publicationdate":{ - "type":"string" + "type":"string", + "description": "Date of the research product" }, "refereed":{ "description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)", @@ -326,7 +327,6 @@ } } } - } }, "language": { @@ -338,7 +338,7 @@ }, "label": { "type": "string", - "description": "English label" + "description": "Language label in English" } } }, @@ -347,7 +347,12 @@ "description": "Timestamp of last update of the record in OpenAIRE" }, "maintitle": { - "type": "string" + "type": "string", + "descriptio": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software." + }, + "subtitle": { + "type": "string", + "descriptio": "Explanatory or alternative name by which a scientific result is known." }, "originalId": { "description": "Identifiers of the record at the original sources", @@ -366,16 +371,17 @@ ] } }, - "programmingLanguage": { "type": "string", "description": "Only for results with type 'software': the programming language" }, "publicationdate": { - "type": "string" + "type": "string", + "description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date." }, "publisher": { - "type": "string" + "type": "string", + "description": "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource." }, "size": { "type": "string", @@ -409,9 +415,6 @@ } } }, - "subtitle": { - "type": "string" - }, "tool": { "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", "type": "array", From 8442efd8d1f7c2d76ce0fd4bcef57a3752d04fa3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 5 Nov 2021 12:29:22 +0100 Subject: [PATCH 05/21] [Graph DUMP] Filtering out from the originalIds the id of the result in OpenAIRE --- .../main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 8e38a5515c..6177007005 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -133,7 +133,9 @@ public class ResultMapper implements Serializable { .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); out.setFormat(formatList); out.setId(input.getId()); - out.setOriginalId(input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())); + out + .setOriginalId( + input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())); Optional> oInst = Optional .ofNullable(input.getInstance()); From 94918a673cf5498ce2dc5709cfc1dbf96e44d80a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 8 Nov 2021 10:25:28 +0100 Subject: [PATCH 06/21] [Graph DUMP] Fix issue for empty origilaId list --- .../dhp/oa/graph/dump/ResultMapper.java | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 6177007005..b6ac663b83 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -133,9 +133,21 @@ public class ResultMapper implements Serializable { .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); out.setFormat(formatList); out.setId(input.getId()); - out - .setOriginalId( - input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())); + out.setOriginalId(new ArrayList<>()); + Optional + .ofNullable(input.getOriginalId()) + .ifPresent( + v -> out + .setOriginalId( + input + .getOriginalId() + .stream() + .filter(s -> !s.startsWith("50|")) + .collect(Collectors.toList()))); +// Optional +// .ofNullable( +// input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())) +// .ifPresent(v -> out.setOriginalId(v)); Optional> oInst = Optional .ofNullable(input.getInstance()); From 8cc50ecee05d311fe8b3adc02caf9a2b6237bd25 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 11 Nov 2021 08:59:20 +0100 Subject: [PATCH 07/21] [Graph Dump] changed AccessRight with BestAccessRight in the dump and modified the dependency to the schema to the SNAPSHOT --- .../main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java | 3 ++- pom.xml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index b6ac663b83..78f0b67969 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -65,7 +65,8 @@ public class ResultMapper implements Serializable { String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); out .setBestaccessright( - AccessRight + + BestAccessRight .newInstance( code, Constants.coarCodeLabelMap.get(code), diff --git a/pom.xml b/pom.xml index 02bc5d8d4e..53b5938e09 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.8.21] + [2.8.22-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From e5498052e801d663a95d23c4172c50d812bb18f0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 11 Nov 2021 16:14:10 +0100 Subject: [PATCH 08/21] [Graph DUmp] removed OpenAccessRoute from test in best access right --- .../src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index bf6301ec47..6d2bced94e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -331,7 +331,7 @@ public class DumpJobTest { Assertions .assertEquals( Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode()); - Assertions.assertEquals(null, gr.getBestaccessright().getOpenAccessRoute()); + Assertions.assertEquals("One Ecosystem", gr.getContainer().getName()); Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline()); From b8bdabfae95c017d9c4ab84e1c1778699c2f8127 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 11 Nov 2021 16:16:48 +0100 Subject: [PATCH 09/21] [Graph DUmp] removed OpenAccessRoute from test in best access right --- .../src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 6d2bced94e..602aaf6e66 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -331,7 +331,6 @@ public class DumpJobTest { Assertions .assertEquals( Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode()); - Assertions.assertEquals("One Ecosystem", gr.getContainer().getName()); Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline()); From 6595135a1a870f67b53aa0a8baa15fbe2a984a3c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 12 Nov 2021 11:45:38 +0100 Subject: [PATCH 10/21] [Dump Schemas] changed the schema of the dumped result according to the modifications in the bestAccessRight type --- .../oa/graph/dump/schemas/result_schema.json | 95 ++++++++----------- .../dhp/oa/graph/dump/GenerateJsonSchema.java | 4 +- 2 files changed, 41 insertions(+), 58 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json index d4f05e5ae7..a1e09525e5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json @@ -1,34 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { - "AccessRight":{ - "type":"object", - "properties":{ - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "openAccessRoute":{ - "type":"string", - "enum":[ - "gold", - "green", - "hybrid", - "bronze" - ], - "description":"The type of OpenAccess applied to the result" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - } - }, - "ControlledField": { + "ControlledField": { "type": "object", "properties": { "scheme": { @@ -94,14 +67,21 @@ } }, "bestaccessright":{ - "allOf":[ - { - "$ref":"#/definitions/AccessRight" - }, - { - "description":"The openest access right associated to the manifestations of this research results" - } - ] + "type":"object", + "properties":{ + "code": { + "type": "string", + "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "label": { + "type": "string", + "description": "Label for the access mode" + }, + "scheme": { + "type": "string", + "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" + } + } }, "codeRepositoryUrl": { "type": "string", @@ -255,29 +235,32 @@ "type":"object", "properties":{ "accessright":{ - "allOf":[ - { - "$ref":"#/definitions/AccessRight" + "type":"object", + "properties":{ + "code": { + "type": "string", + "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" }, - { - "description":"The accessright of this instance. One result may have multiple instances, each with a different accessright" + "label": { + "type": "string", + "description": "Label for the access mode" + }, + "openAccessRoute":{ + "type":"string", + "enum":[ + "gold", + "green", + "hybrid", + "bronze" + ], + "description":"The type of OpenAccess applied to the result" + }, + "scheme": { + "type": "string", + "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" } - ] + } }, - "alternateIdentifier":{ - "description":"All the identifiers associated to the result other than the authoritative ones", - "type":"array", - "items":{ - "allOf":[ - { - "$ref":"#/definitions/ControlledField" - }, - { - "description":"Description of alternateIdentifier" - } - ] - } - }, "articleprocessingcharge":{ "description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.", "type":"object", diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java index 697ec705f0..2cc53027e5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java @@ -9,7 +9,7 @@ import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.schema.dump.oaf.graph.*; -@Disabled +//@Disabled class GenerateJsonSchema { @Test @@ -21,7 +21,7 @@ class GenerateJsonSchema { configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName()); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(Relation.class); + JsonNode jsonSchema = generator.generateSchema(GraphResult.class); System.out.println(jsonSchema.toString()); } From 99d86134f518627a45c87bf02516ff9c3f8d76ce Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 16 Nov 2021 12:04:21 +0100 Subject: [PATCH 11/21] [Graph Dump] changed the dump since the measures have been moded at the level of the instance --- .../dhp/oa/graph/dump/ResultMapper.java | 20 ++++++++++--------- .../dhp/oa/graph/dump/DumpJobTest.java | 15 ++++++++++---- .../resultDump/publication_extendedinstance | 2 +- pom.xml | 2 +- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 78f0b67969..3a29a2bbf6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -43,15 +43,6 @@ public class ResultMapper implements Serializable { try { addTypeSpecificInformation(out, input, ort); -// Optional> mes = Optional.ofNullable(input.getMeasures()); -// if (mes.isPresent()) { -// List measure = new ArrayList<>(); -// mes -// .get() -// .forEach( -// m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); -// out.setMeasures(measure); -// } Optional .ofNullable(input.getAuthor()) @@ -460,6 +451,17 @@ public class ResultMapper implements Serializable { code, Constants.coarCodeLabelMap.get(code), Constants.COAR_ACCESS_RIGHT_SCHEMA)); + + Optional> mes = Optional.ofNullable(i.getMeasures()); + if (mes.isPresent()){ + List measure = new ArrayList<>(); + mes + .get() + .forEach( + m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); + instance.setMeasures(measure ); + } + if (opAr.get().getOpenAccessRoute() != null) { switch (opAr.get().getOpenAccessRoute()) { case hybrid: diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 602aaf6e66..89b12a7127 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -171,10 +171,14 @@ public class DumpJobTest { GraphResult gr = verificationDataset.first(); - Assertions.assertEquals(2, gr.getMeasures().size()); + Assertions.assertEquals(1, gr.getInstance().size()); + + Assertions.assertEquals(2, gr.getInstance().get(0).getMeasures().size()); Assertions .assertTrue( gr + .getInstance() + .get(0) .getMeasures() .stream() .anyMatch( @@ -183,6 +187,8 @@ public class DumpJobTest { Assertions .assertTrue( gr + .getInstance() + .get(0) .getMeasures() .stream() .anyMatch( @@ -357,11 +363,12 @@ public class DumpJobTest { Assertions.assertEquals("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2", gr.getId()); - Assertions.assertEquals(2, gr.getOriginalId().size()); + System.out.println(gr.getOriginalId().size()); + + Assertions.assertEquals(1, gr.getOriginalId().size()); Assertions .assertTrue( - gr.getOriginalId().contains("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2") - && gr.getOriginalId().contains("10.3897/oneeco.2.e13718")); + gr.getOriginalId().contains("10.3897/oneeco.2.e13718")); Assertions.assertEquals(1, gr.getPid().size()); Assertions diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance index 043be7b1a9..4688115134 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance @@ -1 +1 @@ -{"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1628257970612,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","dateoftransformation":"2020-03-23T00:26:59.078Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-23T00:20:51.392Z","altered":true,"baseURL":"http%3A%2F%2Fzookeys.pensoft.net%2Foai.php","identifier":"10.3897/oneeco.2.e13718","datestamp":"2017-09-08","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":[{"id": "influence", "unit": [{"value": "1.62759106106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.22519296", "key": "score"}]}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":[],"affiliation":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[{"dataInfo":null, "classid":"IT", "classname":"Italy","schemeid":"fake","schemename":"fake"}],"subject":[{"value":"Ecosystem Services hotspots","qualifier":{"classid":"ACM","classname":"ACM Computing Classification System","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Natura 2000","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Quiet Protected Areas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Biodiversity","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Agriculture","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Elevation","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Slope","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecosystem Service trade-offs and synergies","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":" cultural services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"provisioning services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"regulating services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"supporting services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ecosystem Service capacity is higher in areas of multiple designation types","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Pensoft Publishers","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"One Ecosystem 2: e13718","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"text/html","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"}],"id":"dh-ch"}],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":"green"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem","dataInfo":null},"url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.3897/oneeco.2.e13718","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} \ No newline at end of file +{"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1628257970612,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","dateoftransformation":"2020-03-23T00:26:59.078Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-23T00:20:51.392Z","altered":true,"baseURL":"http%3A%2F%2Fzookeys.pensoft.net%2Foai.php","identifier":"10.3897/oneeco.2.e13718","datestamp":"2017-09-08","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":[],"affiliation":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[{"dataInfo":null, "classid":"IT", "classname":"Italy","schemeid":"fake","schemename":"fake"}],"subject":[{"value":"Ecosystem Services hotspots","qualifier":{"classid":"ACM","classname":"ACM Computing Classification System","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Natura 2000","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Quiet Protected Areas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Biodiversity","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Agriculture","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Elevation","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Slope","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecosystem Service trade-offs and synergies","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":" cultural services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"provisioning services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"regulating services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"supporting services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ecosystem Service capacity is higher in areas of multiple designation types","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Pensoft Publishers","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"One Ecosystem 2: e13718","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"text/html","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"}],"id":"dh-ch"}],"externalReference":[],"instance":[{"measures":[{"id": "influence", "unit": [{"value": "1.62759106106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.22519296", "key": "score"}]}],"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":"green"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem","dataInfo":null},"url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.3897/oneeco.2.e13718","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 274192c676..616d10c184 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.8.22] + [2.9.23] [4.0.3] [6.0.5] [3.1.6] From 0136a8c266c74c91c9386226f745b87de5934e87 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Nov 2021 14:38:33 +0100 Subject: [PATCH 12/21] [Graph Dump] Change test to mirror that measure is at the level of the isntance --- .../java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 89b12a7127..37ba96abeb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -177,8 +177,8 @@ public class DumpJobTest { Assertions .assertTrue( gr - .getInstance() - .get(0) + .getInstance() + .get(0) .getMeasures() .stream() .anyMatch( @@ -187,8 +187,8 @@ public class DumpJobTest { Assertions .assertTrue( gr - .getInstance() - .get(0) + .getInstance() + .get(0) .getMeasures() .stream() .anyMatch( From 5dc5792722f7ec2c633f9a268105c859685ae02f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Nov 2021 14:39:12 +0100 Subject: [PATCH 13/21] [Graph Dump] Change test resource to mirror the movement of the measure element --- .../oa/graph/dump/addProjectInfo/publication_extendedmodel | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel index 6b146405a9..979cbf1684 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel @@ -1,2 +1,2 @@ -{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} -{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file From 9fae8721812010b790c4989cb7e38df8a0ec176b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 19 Nov 2021 11:25:50 +0100 Subject: [PATCH 14/21] [Graph Dump] changed to mirror the changes in the model --- .../dnetlib/dhp/common/GraphResultMapper.java | 413 ------------------ .../dhp/oa/graph/dump/ResultMapper.java | 45 +- .../dump/complete/DumpGraphEntities.java | 6 +- pom.xml | 2 +- 4 files changed, 25 insertions(+), 441 deletions(-) delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java deleted file mode 100644 index 8ceee5c8a0..0000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java +++ /dev/null @@ -1,413 +0,0 @@ - -package eu.dnetlib.dhp.common; - -import java.io.Serializable; -import java.util.*; -import java.util.stream.Collectors; - -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.dump.oaf.*; -import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; -import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; - -public class GraphResultMapper implements Serializable { - - public static Result map( - E in) { - - CommunityResult out = new CommunityResult(); - - eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in; - Optional ort = Optional.ofNullable(input.getResulttype()); - if (ort.isPresent()) { - switch (ort.get().getClassid()) { - case "publication": - Optional journal = Optional - .ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal()); - if (journal.isPresent()) { - Journal j = journal.get(); - Container c = new Container(); - c.setConferencedate(j.getConferencedate()); - c.setConferenceplace(j.getConferenceplace()); - c.setEdition(j.getEdition()); - c.setEp(j.getEp()); - c.setIss(j.getIss()); - c.setIssnLinking(j.getIssnLinking()); - c.setIssnOnline(j.getIssnOnline()); - c.setIssnPrinted(j.getIssnPrinted()); - c.setName(j.getName()); - c.setSp(j.getSp()); - c.setVol(j.getVol()); - out.setContainer(c); - out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname()); - } - break; - case "dataset": - eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input; - Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue())); - Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue())); - - out - .setGeolocation( - Optional - .ofNullable(id.getGeolocation()) - .map( - igl -> igl - .stream() - .filter(Objects::nonNull) - .map(gli -> { - GeoLocation gl = new GeoLocation(); - gl.setBox(gli.getBox()); - gl.setPlace(gli.getPlace()); - gl.setPoint(gli.getPoint()); - return gl; - }) - .collect(Collectors.toList())) - .orElse(null)); - - out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname()); - break; - case "software": - - eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input; - Optional - .ofNullable(is.getCodeRepositoryUrl()) - .ifPresent(value -> out.setCodeRepositoryUrl(value.getValue())); - Optional - .ofNullable(is.getDocumentationUrl()) - .ifPresent( - value -> out - .setDocumentationUrl( - value - .stream() - .map(Field::getValue) - .collect(Collectors.toList()))); - - Optional - .ofNullable(is.getProgrammingLanguage()) - .ifPresent(value -> out.setProgrammingLanguage(value.getClassid())); - - out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname()); - break; - case "other": - - eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input; - out - .setContactgroup( - Optional - .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - - out - .setContactperson( - Optional - .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - out - .setTool( - Optional - .ofNullable(ir.getTool()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - - out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); - - break; - } - - Optional - .ofNullable(input.getAuthor()) - .ifPresent( - ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList()))); - - // I do not map Access Right UNKNOWN or OTHER - - Optional oar = Optional.ofNullable(input.getBestaccessright()); - if (oar.isPresent()) { - if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); - out - .setBestaccessright( - AccessRight - .newInstance( - code, - Constants.coarCodeLabelMap.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - } - } - - final List contributorList = new ArrayList<>(); - Optional - .ofNullable(input.getContributor()) - .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue()))); - out.setContributor(contributorList); - - Optional - .ofNullable(input.getCountry()) - .ifPresent( - value -> out - .setCountry( - value - .stream() - .map( - c -> { - if (c.getClassid().equals((ModelConstants.UNKNOWN))) { - return null; - } - Country country = new Country(); - country.setCode(c.getClassid()); - country.setLabel(c.getClassname()); - Optional - .ofNullable(c.getDataInfo()) - .ifPresent( - provenance -> country - .setProvenance( - Provenance - .newInstance( - provenance - .getProvenanceaction() - .getClassname(), - c.getDataInfo().getTrust()))); - return country; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList()))); - - final List coverageList = new ArrayList<>(); - Optional - .ofNullable(input.getCoverage()) - .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue()))); - out.setCoverage(coverageList); - - out.setDateofcollection(input.getDateofcollection()); - - final List descriptionList = new ArrayList<>(); - Optional - .ofNullable(input.getDescription()) - .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); - out.setDescription(descriptionList); - Optional> oStr = Optional.ofNullable(input.getEmbargoenddate()); - if (oStr.isPresent()) { - out.setEmbargoenddate(oStr.get().getValue()); - } - - final List formatList = new ArrayList<>(); - Optional - .ofNullable(input.getFormat()) - .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); - out.setFormat(formatList); - out.setId(input.getId()); - out.setOriginalId(input.getOriginalId()); - - Optional> oInst = Optional - .ofNullable(input.getInstance()); - - if (oInst.isPresent()) { - out - .setInstance( - oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList())); - - } - - Optional oL = Optional.ofNullable(input.getLanguage()); - if (oL.isPresent()) { - eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); - out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname())); - } - Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); - if (oLong.isPresent()) { - out.setLastupdatetimestamp(oLong.get()); - } - Optional> otitle = Optional.ofNullable(input.getTitle()); - if (otitle.isPresent()) { - List iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setMaintitle(iTitle.get(0).getValue()); - } - - iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setSubtitle(iTitle.get(0).getValue()); - } - - } - - List pids = new ArrayList<>(); - Optional - .ofNullable(input.getPid()) - .ifPresent( - value -> value - .stream() - .forEach( - p -> pids - .add( - ControlledField - .newInstance(p.getQualifier().getClassid(), p.getValue())))); - out.setPid(pids); - oStr = Optional.ofNullable(input.getDateofacceptance()); - if (oStr.isPresent()) { - out.setPublicationdate(oStr.get().getValue()); - } - oStr = Optional.ofNullable(input.getPublisher()); - if (oStr.isPresent()) { - out.setPublisher(oStr.get().getValue()); - } - - List sourceList = new ArrayList<>(); - Optional - .ofNullable(input.getSource()) - .ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue()))); - // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList())); - List subjectList = new ArrayList<>(); - Optional - .ofNullable(input.getSubject()) - .ifPresent( - value -> value - .forEach(s -> subjectList.add(getSubject(s)))); - - out.setSubjects(subjectList); - - out.setType(input.getResulttype().getClassid()); - } - - out - .setCollectedfrom( - input - .getCollectedfrom() - .stream() - .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue())) - .collect(Collectors.toList())); - - return out; - - } - - private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) { - CommunityInstance instance = new CommunityInstance(); - - setCommonValue(i, instance); - - instance - .setCollectedfrom( - KeyValue - .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); - - instance - .setHostedby( - KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); - - return instance; - - } - - private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) { - Optional opAr = Optional - .ofNullable(i.getAccessright()); - if (opAr.isPresent()) { - if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid()); - instance - .setAccessright( - AccessRight - .newInstance( - code, - Constants.coarCodeLabelMap.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - } - } - - Optional - .ofNullable(i.getLicense()) - .ifPresent(value -> instance.setLicense(value.getValue())); - Optional - .ofNullable(i.getDateofacceptance()) - .ifPresent(value -> instance.setPublicationdate(value.getValue())); - Optional - .ofNullable(i.getRefereed()) - .ifPresent(value -> instance.setRefereed(value.getClassname())); - Optional - .ofNullable(i.getInstancetype()) - .ifPresent(value -> instance.setType(value.getClassname())); - Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); - - } - - private static Subject getSubject(StructuredProperty s) { - Subject subject = new Subject(); - subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue())); - Optional di = Optional.ofNullable(s.getDataInfo()); - if (di.isPresent()) { - Provenance p = new Provenance(); - p.setProvenance(di.get().getProvenanceaction().getClassname()); - p.setTrust(di.get().getTrust()); - subject.setProvenance(p); - } - - return subject; - } - - private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) { - Author a = new Author(); - a.setFullname(oa.getFullname()); - a.setName(oa.getName()); - a.setSurname(oa.getSurname()); - a.setRank(oa.getRank()); - - Optional> oPids = Optional - .ofNullable(oa.getPid()); - if (oPids.isPresent()) { - Pid pid = getOrcid(oPids.get()); - if (pid != null) { - a.setPid(pid); - } - } - - return a; - } - - private static Pid getOrcid(List p) { - for (StructuredProperty pid : p) { - if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) { - Optional di = Optional.ofNullable(pid.getDataInfo()); - if (di.isPresent()) { - return Pid - .newInstance( - ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()), - Provenance - .newInstance( - di.get().getProvenanceaction().getClassname(), - di.get().getTrust())); - } else { - return Pid - .newInstance( - ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()) - - ); - } - - } - } - return null; - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 3a29a2bbf6..7dc63a019f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -16,6 +16,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.Country; import eu.dnetlib.dhp.schema.dump.oaf.GeoLocation; import eu.dnetlib.dhp.schema.dump.oaf.Instance; import eu.dnetlib.dhp.schema.dump.oaf.KeyValue; +import eu.dnetlib.dhp.schema.dump.oaf.Measure; import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; import eu.dnetlib.dhp.schema.dump.oaf.Qualifier; import eu.dnetlib.dhp.schema.dump.oaf.Result; @@ -82,7 +83,7 @@ public class ResultMapper implements Serializable { if (c.getClassid().equals((ModelConstants.UNKNOWN))) { return null; } - Country country = new Country(); + ResultCountry country = new ResultCountry(); country.setCode(c.getClassid()); country.setLabel(c.getClassname()); Optional @@ -136,10 +137,6 @@ public class ResultMapper implements Serializable { .stream() .filter(s -> !s.startsWith("50|")) .collect(Collectors.toList()))); -// Optional -// .ofNullable( -// input.getOriginalId().stream().filter(s -> !s.startsWith("50|")).collect(Collectors.toList())) -// .ifPresent(v -> out.setOriginalId(v)); Optional> oInst = Optional .ofNullable(input.getInstance()); @@ -163,7 +160,7 @@ public class ResultMapper implements Serializable { Optional oL = Optional.ofNullable(input.getLanguage()); if (oL.isPresent()) { eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); - out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname())); + out.setLanguage(Language.newInstance(language.getClassid(), language.getClassname())); } Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); if (oLong.isPresent()) { @@ -199,7 +196,7 @@ public class ResultMapper implements Serializable { value .stream() .map( - p -> ControlledField + p -> ResultPid .newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); @@ -452,14 +449,14 @@ public class ResultMapper implements Serializable { Constants.coarCodeLabelMap.get(code), Constants.COAR_ACCESS_RIGHT_SCHEMA)); - Optional> mes = Optional.ofNullable(i.getMeasures()); - if (mes.isPresent()){ - List measure = new ArrayList<>(); + Optional> mes = Optional.ofNullable(i.getMeasures()); + if (mes.isPresent()) { + List measure = new ArrayList<>(); mes - .get() - .forEach( - m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); - instance.setMeasures(measure ); + .get() + .forEach( + m -> m.getUnit().forEach(u -> measure.add(Measure.newInstance(m.getId(), u.getValue())))); + instance.setMeasures(measure); } if (opAr.get().getOpenAccessRoute() != null) { @@ -489,7 +486,7 @@ public class ResultMapper implements Serializable { .setPid( pid .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> ResultPid.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); Optional @@ -499,7 +496,7 @@ public class ResultMapper implements Serializable { .setAlternateIdentifier( ai .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> AlternateIdentifier.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); Optional @@ -579,7 +576,7 @@ public class ResultMapper implements Serializable { private static Subject getSubject(StructuredProperty s) { Subject subject = new Subject(); - subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue())); + subject.setSubject(SubjectSchemeValue.newInstance(s.getQualifier().getClassid(), s.getValue())); Optional di = Optional.ofNullable(s.getDataInfo()); if (di.isPresent()) { Provenance p = new Provenance(); @@ -601,7 +598,7 @@ public class ResultMapper implements Serializable { Optional> oPids = Optional .ofNullable(oa.getPid()); if (oPids.isPresent()) { - Pid pid = getOrcid(oPids.get()); + AuthorPid pid = getOrcid(oPids.get()); if (pid != null) { a.setPid(pid); } @@ -610,12 +607,12 @@ public class ResultMapper implements Serializable { return a; } - private static Pid getAuthorPid(StructuredProperty pid) { + private static AuthorPid getAuthorPid(StructuredProperty pid) { Optional di = Optional.ofNullable(pid.getDataInfo()); if (di.isPresent()) { - return Pid + return AuthorPid .newInstance( - ControlledField + AuthorPidSchemeValue .newInstance( pid.getQualifier().getClassid(), pid.getValue()), @@ -624,9 +621,9 @@ public class ResultMapper implements Serializable { di.get().getProvenanceaction().getClassname(), di.get().getTrust())); } else { - return Pid + return AuthorPid .newInstance( - ControlledField + AuthorPidSchemeValue .newInstance( pid.getQualifier().getClassid(), pid.getValue()) @@ -635,7 +632,7 @@ public class ResultMapper implements Serializable { } } - private static Pid getOrcid(List p) { + private static AuthorPid getOrcid(List p) { List pidList = p.stream().map(pid -> { if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) || (pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index 7f64db41cc..e6b4a9b70b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -140,7 +140,7 @@ public class DumpGraphEntities implements Serializable { .ofNullable(d.getDatasourcetype()) .ifPresent( dsType -> datasource - .setDatasourcetype(ControlledField.newInstance(dsType.getClassid(), dsType.getClassname()))); + .setDatasourcetype(DatasourceSchemeValue.newInstance(dsType.getClassid(), dsType.getClassname()))); Optional .ofNullable(d.getOpenairecompatibility()) @@ -499,7 +499,7 @@ public class DumpGraphEntities implements Serializable { .ifPresent( value -> { if (!value.getClassid().equals(Constants.UNKNOWN)) { - organization.setCountry(Qualifier.newInstance(value.getClassid(), value.getClassname())); + organization.setCountry(Country.newInstance(value.getClassid(), value.getClassname())); } }); @@ -515,7 +515,7 @@ public class DumpGraphEntities implements Serializable { .setPid( value .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); return organization; diff --git a/pom.xml b/pom.xml index 3acbdee9e6..a1e9ccc6a9 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.9.23] + [2.9.24-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 0506fa2654e422ee12d84aca7393085edd932087 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 19 Nov 2021 15:56:25 +0100 Subject: [PATCH 15/21] [Graph Dump] changed to mirror the changes in the model --- .../eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java | 10 ++++------ .../dhp/oa/graph/dump/complete/DumpGraphEntities.java | 7 +------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 7dc63a019f..aad3a87062 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -12,14 +12,12 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.*; import eu.dnetlib.dhp.schema.dump.oaf.AccessRight; import eu.dnetlib.dhp.schema.dump.oaf.Author; -import eu.dnetlib.dhp.schema.dump.oaf.Country; import eu.dnetlib.dhp.schema.dump.oaf.GeoLocation; import eu.dnetlib.dhp.schema.dump.oaf.Instance; -import eu.dnetlib.dhp.schema.dump.oaf.KeyValue; import eu.dnetlib.dhp.schema.dump.oaf.Measure; import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; -import eu.dnetlib.dhp.schema.dump.oaf.Qualifier; import eu.dnetlib.dhp.schema.dump.oaf.Result; +import eu.dnetlib.dhp.schema.dump.oaf.community.CfHbKeyValue; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Context; @@ -231,7 +229,7 @@ public class ResultMapper implements Serializable { input .getCollectedfrom() .stream() - .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue())) + .map(cf -> CfHbKeyValue.newInstance(cf.getKey(), cf.getValue())) .collect(Collectors.toList())); Set communities = communityMap.keySet(); @@ -424,12 +422,12 @@ public class ResultMapper implements Serializable { instance .setCollectedfrom( - KeyValue + CfHbKeyValue .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); instance .setHostedby( - KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); + CfHbKeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); return instance; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index e6b4a9b70b..c9de866139 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; -import static com.jayway.jsonpath.Filter.filter; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -11,9 +10,7 @@ import java.util.stream.Collectors; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; @@ -22,8 +19,6 @@ import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.common.ModelSupport; @@ -133,7 +128,7 @@ public class DumpGraphEntities implements Serializable { .ifPresent( pids -> pids .stream() - .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList())); Optional From 58bc3f223ad1e180113fae6909c8656a8b85ee68 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 2 Dec 2021 14:09:46 +0100 Subject: [PATCH 16/21] [GRAPH DUMP] Add filtering for relation we do not want to dump. It is based on the relclass --- .../dump/complete/CreateContextEntities.java | 2 +- .../dump/complete/CreateContextRelation.java | 2 +- .../dump/complete/SparkCollectAndSave.java | 2 +- .../dump/complete/SparkDumpEntitiesJob.java | 2 +- .../dump/complete/SparkDumpRelationJob.java | 17 +- .../complete/SparkOrganizationRelation.java | 2 +- .../SparkSelectValidRelationsJob.java | 2 +- .../community/oozie_app/config-default.xml | 30 - .../dump/community/oozie_app/workflow.xml | 431 ------------- .../complete/oozie_app/config-default.xml | 30 - .../dump/complete/oozie_app/workflow.xml | 586 ------------------ .../community_infrastructure_schema.json | 37 -- .../complete/schema/datasource_schema.json | 192 ------ .../complete/schema/organization_schema.json | 57 -- .../dump/complete/schema/project_schema.json | 119 ---- .../dump/complete/schema/relation_schema.json | 60 -- .../dump/complete/schema/result_schema.json | 398 ------------ .../oozie_app/config-default.xml | 30 - .../dump/funderresults/oozie_app/workflow.xml | 563 ----------------- .../input_collect_and_save.json | 0 ...rs.json => input_complete_parameters.json} | 0 .../input_entity_parameter.json | 0 .../input_organization_parameters.json | 0 .../input_relationdump_parameters.json | 6 + .../community_infrastructure_schema.json | 35 -- .../graph/dump/schemas/datasource_schema.json | 192 ------ .../dump/schemas/organization_schema.json | 57 -- .../oa/graph/dump/schemas/project_schema.json | 119 ---- .../graph/dump/schemas/relation_schema.json | 68 -- .../oa/graph/dump/schemas/result_schema.json | 417 ------------- .../graph/dump/complete/DumpRelationTest.java | 61 +- 31 files changed, 84 insertions(+), 3433 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/community_infrastructure_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/datasource_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/organization_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/project_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/relation_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{complete => }/input_collect_and_save.json (100%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{complete/input_parameters.json => input_complete_parameters.json} (100%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{complete => }/input_entity_parameter.json (100%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{complete => }/input_organization_parameters.json (100%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{complete => }/input_relationdump_parameters.json (73%) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index 120de9327b..aa031203d0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -41,7 +41,7 @@ public class CreateContextEntities implements Serializable { .toString( CreateContextEntities.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index a468e334da..3c71fcaa52 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -48,7 +48,7 @@ public class CreateContextRelation implements Serializable { .requireNonNull( CreateContextRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json"))); + "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java index 671bccd25c..e902a02c8f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java @@ -31,7 +31,7 @@ public class SparkCollectAndSave implements Serializable { .toString( SparkCollectAndSave.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_collect_and_save.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java index 8b282386f8..f239ffca7f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java @@ -22,7 +22,7 @@ public class SparkDumpEntitiesJob implements Serializable { .toString( SparkDumpEntitiesJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/wf/input_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java index ddfd6592f9..05e3f88359 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java @@ -4,10 +4,11 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.Optional; +import java.util.*; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -37,7 +38,7 @@ public class SparkDumpRelationJob implements Serializable { .toString( SparkDumpRelationJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -54,6 +55,13 @@ public class SparkDumpRelationJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + Optional rs = Optional.ofNullable(parser.get("removeSet")); + final Set removeSet = new HashSet<>(); + if(rs.isPresent()){ + Collections.addAll(removeSet, rs.get().split(";")); + } + + SparkConf conf = new SparkConf(); runWithSparkSession( @@ -61,15 +69,16 @@ public class SparkDumpRelationJob implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - dumpRelation(spark, inputPath, outputPath); + dumpRelation(spark, inputPath, outputPath, removeSet); }); } - private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) { + private static void dumpRelation(SparkSession spark, String inputPath, String outputPath, Set removeSet) { Dataset relations = Utils.readPath(spark, inputPath, Relation.class); relations + .filter((FilterFunction)r -> !removeSet.contains(r.getRelClass())) .map((MapFunction) relation -> { eu.dnetlib.dhp.schema.dump.oaf.graph.Relation relNew = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation(); relNew diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index f9d2123e2c..4475232dec 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -39,7 +39,7 @@ public class SparkOrganizationRelation implements Serializable { .toString( SparkOrganizationRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_organization_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java index 20f3fc4a7e..d1bca6a7c6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java @@ -35,7 +35,7 @@ public class SparkSelectValidRelationsJob implements Serializable { .toString( SparkSelectValidRelationsJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0aee..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml deleted file mode 100644 index fcef2547ac..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml +++ /dev/null @@ -1,431 +0,0 @@ - - - - - sourcePath - the source path - - - isLookUpUrl - the isLookup service endpoint - - - outputPath - the output path - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - one among {new, update, version} - - - conceptRecordId - for new version, the id of the record for the old deposition - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - - - - - - - - yarn - cluster - Dump table publication for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/publication - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table dataset for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dataset - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table ORP for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/otherresearchproduct - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table software for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/software - --communityMapPath${workingDir}/communityMap - - - - - - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - yarn - cluster - Extend dumped publications with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/publication - --outputPath${workingDir}/ext/publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped dataset with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dataset - --outputPath${workingDir}/ext/dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped ORP with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/otherresearchproduct - --outputPath${workingDir}/ext/orp - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped software with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/software - --outputPath${workingDir}/ext/software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - - yarn - cluster - Split dumped result for community - eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext - --outputPath${workingDir}/split - --communityMapPath${workingDir}/communityMap - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/split - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --communityMapPath${workingDir}/communityMap - --conceptRecordId${conceptRecordId} - --depositionId${depositionId} - --depositionType${depositionType} - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0aee..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml deleted file mode 100644 index 8189e25945..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml +++ /dev/null @@ -1,586 +0,0 @@ - - - - - sourcePath - the source path - - - isLookUpUrl - the isLookup service endpoint - - - outputPath - the output path - - - resultAggregation - true if all the result type have to be dumped under result. false otherwise - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - for new version, the id of the record for the old deposition - - - depositionId - the depositionId of a deposition open that has to be added content - - - organizationCommunityMap - the organization community map - - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - - - - - - - - - - - - yarn - cluster - Dump table publication - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/result/publication - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/result/dataset - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/result/otherresearchproduct - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/result/software - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table organization - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/organization - --resultTableNameeu.dnetlib.dhp.schema.oaf.Organization - --outputPath${workingDir}/collect/organization - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table project - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/project - --resultTableNameeu.dnetlib.dhp.schema.oaf.Project - --outputPath${workingDir}/collect/project - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table datasource - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/datasource - --resultTableNameeu.dnetlib.dhp.schema.oaf.Datasource - --outputPath${workingDir}/collect/datasource - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/relation/relation - - - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities - --hdfsPath${workingDir}/collect/communities_infrastructures/communities_infrastructure.json.gz - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation - --hdfsPath${workingDir}/relation/context - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/relation/contextOrg - --organizationCommunityMap${organizationCommunityMap} - --communityMapPath${workingDir}/communityMap - - - - - - - - - - - - - - - - - yarn - cluster - Extract Relations from publication - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/relation/publication - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/relation/dataset - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/relation/orp - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/relation/software - --communityMapPath${workingDir}/communityMap - - - - - - - - - - yarn - cluster - Collect Results and Relations and put them in the right path - eu.dnetlib.dhp.oa.graph.dump.complete.SparkCollectAndSave - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir} - --outputPath${workingDir}/collect - --resultAggregation${resultAggregation} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/collect - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --communityMapPath${workingDir}/communityMap - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/community_infrastructure_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/community_infrastructure_schema.json deleted file mode 100644 index d2f1792125..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/community_infrastructure_schema.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "description": { - "type": "string", - "description": "Description of the research community/research infrastructure" - }, - "id": { - "type": "string", - "description": "OpenAIRE id of the research community/research infrastructure" - }, - "name": { - "type": "string", - "description": "The long name of the community" - }, - "originalId": { - "type": "string", - "description": "The acronym of the community" - }, - "subject": { - "description": "Only for research communities: the list of the subjects associated to the research community", - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "description": "One of {Research Community, Research infrastructure}" - }, - "zenodo_community": { - "type": "string", - "description": "The URL of the Zenodo community associated to the Research community/Research infrastructure" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/datasource_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/datasource_schema.json deleted file mode 100644 index b9c15d9219..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/datasource_schema.json +++ /dev/null @@ -1,192 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - } - }, - "type": "object", - "properties": { - "accessrights": { - "type": "string", - "description": "Type of access to the data source, as defined by re3data.org. Possible values: {open, restricted, closed}" - }, - "certificates": { - "type": "string", - "description": "The certificate, seal or standard the data source complies with. As defined by re3data.org." - }, - "citationguidelineurl": { - "type": "string", - "description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org." - }, - "contenttypes": { - "description": "Types of content in the data source, as defined by OpenDOAR", - "type": "array", - "items": { - "type": "string" - } - }, - "databaseaccessrestriction": { - "type": "string", - "description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}" - }, - "datasourcetype": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" - }, - { - "description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies" - } - ] - }, - "datauploadrestriction": { - "type": "string", - "description": "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}" - }, - "dateofvalidation": { - "type": "string", - "description": "The date of last validation against the OpenAIRE guidelines for the datasource records" - }, - "description": { - "type": "string" - }, - "englishname": { - "type": "string", - "description": "The English name of the datasource" - }, - "id": { - "type": "string", - "description": "The OpenAIRE id of the data source" - }, - "journal": { - "type": "object", - "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, - "edition": { - "type": "string" - }, - "ep": { - "type": "string", - "description": "End page" - }, - "iss": { - "type": "string", - "description": "Issue number" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" - }, - "name": { - "type": "string" - }, - "sp": { - "type": "string", - "description": "Start page" - }, - "vol": { - "type": "string", - "description": "Volume" - } - }, - "description": "Information about the journal, if this data source is of type Journal." - }, - "languages": { - "description": "The languages present in the data source's content, as defined by OpenDOAR.", - "type": "array", - "items": { - "type": "string" - } - }, - "logourl": { - "type": "string" - }, - "missionstatementurl": { - "type": "string", - "description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org" - }, - "officialname": { - "type": "string", - "description": "The official name of the datasource" - }, - "openairecompatibility": { - "type": "string", - "description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu." - }, - "originalId": { - "description": "Original identifiers for the datasource" - "type": "array", - "items": { - "type": "string" - } - }, - "pid": { - "description": "Persistent identifiers of the datasource", - "type": "array", - "items": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" - } - ] - } - }, - "pidsystems": { - "type": "string", - "description": "The persistent identifier system that is used by the data source. As defined by re3data.org" - }, - "policies": { - "description": "Policies of the data source, as defined in OpenDOAR.", - "type": "array", - "items": { - "type": "string" - } - }, - "releaseenddate": { - "type": "string", - "description": "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org" - }, - "releasestartdate": { - "type": "string", - "description": "Releasing date of the data source, as defined by re3data.org" - }, - "subjects": { - "description": "List of subjects associated to the datasource", - "type": "array", - "items": { - "type": "string" - } - }, - "uploadrights": { - "type": "string", - "description": "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}" - }, - "versioning": { - "type": "boolean", - "description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise." - }, - "websiteurl": { - "type": "string" - } - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/organization_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/organization_schema.json deleted file mode 100644 index 16afa386d5..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/organization_schema.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "alternativenames": { - "description": "Alternative names that identify the organisation", - "type": "array", - "items": { - "type": "string" - } - }, - "country": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "The organisation country code" - }, - "label": { - "type": "string", - "description": "The organisation country label" - } - }, - "description": "The country of the organisation" - }, - "id": { - "type": "string", - "description": "The OpenAIRE id for the organisation" - }, - "legalname": { - "type": "string" - }, - "legalshortname": { - "type": "string" - }, - "pid": { - "description": "Persistent identifiers for the organisation i.e. isni 0000000090326370", - "type": "array", - "items": { - "type": "object", - "properties": { - "scheme": { - "type": "string", - "description": "The scheme of the identifier (i.e. isni)" - }, - "value": { - "type": "string", - "description": "the value in the schema (i.e. 0000000090326370)" - } - } - } - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/project_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/project_schema.json deleted file mode 100644 index c81187258e..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/project_schema.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "acronym": { - "type": "string" - }, - "callidentifier": { - "type": "string" - }, - "code": { - "type": "string", - "description": "The grant agreement number" - }, - "enddate": { - "type": "string" - }, - "funding": { - "description": "Funding information for the project", - "type": "array", - "items": { - "type": "object", - "properties": { - "funding_stream": { - "type": "object", - "properties": { - "description": { - "type": "string", - "description": "Description of the funding stream" - }, - "id": { - "type": "string", - "description": "Id of the funding stream" - } - } - }, - "jurisdiction": { - "type": "string", - "description": "The jurisdiction of the funder (i.e. EU)" - }, - "name": { - "type": "string", - "description": "The name of the funder (European Commission)" - }, - "shortName": { - "type": "string", - "description": "The short name of the funder (EC)" - } - } - } - }, - "granted": { - "type": "object", - "properties": { - "currency": { - "type": "string", - "description": "The currency of the granted amount (e.g. EUR)" - }, - "fundedamount": { - "type": "number", - "description": "The funded amount" - }, - "totalcost": { - "type": "number", - "description": "The total cost of the project" - } - }, - "description": "The money granted to the project" - }, - "h2020programme": { - "description": "The h2020 programme funding the project", - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "The code of the programme" - }, - "description": { - "type": "string", - "description": "The description of the programme" - } - } - } - }, - "id": { - "type": "string", - "description": "OpenAIRE id for the project" - }, - "keywords": { - "type": "string" - }, - "openaccessmandatefordataset": { - "type": "boolean" - }, - "openaccessmandateforpublications": { - "type": "boolean" - }, - "startdate": { - "type": "string" - }, - "subject": { - "type": "array", - "items": { - "type": "string" - } - }, - "summary": { - "type": "string" - }, - "title": { - "type": "string" - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/relation_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/relation_schema.json deleted file mode 100644 index 7c7de9c98d..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/relation_schema.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "Node": { - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The OpenAIRE id of the entity" - }, - "type": { - "type": "string", - "description": "The type of the entity (i.e. organisation)" - } - } - } - }, - "type": "object", - "properties": { - "provenance": { - "type": "object", - "properties": { - "provenance": { - "type": "string", - "description": "The reason why OpenAIRE holds the relation " - }, - "trust": { - "type": "string", - "description": "The trust of the relation in the range of [0,1]. Where greater the number, more the trust. Harvested relationships have typically a high trust (0.9). The trust of inferred relationship is calculated by the inference algorithm that generated them, as described in https://graph.openaire.eu/about#architecture (Enrichment --> Mining)" - } - } - }, - "reltype": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The semantics of the relation (i.e. isAuthorInstitutionOf). " - }, - "type": { - "type": "string", - "description": "the type of the relation (i.e. affiliation)" - } - }, - "description": "To represent the semantics of a relation between two entities" - }, - "source": { - "allOf": [ - {"$ref": "#/definitions/Node"}, - {"description": "The node source in the relation"} - ] - }, - "target": { - "allOf": [ - {"$ref": "#/definitions/Node"}, - {"description": "The node target in the relation"} - ] - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json deleted file mode 100644 index 03cbfb0744..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json +++ /dev/null @@ -1,398 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "AccessRight":{ - "type":"object", - "properties":{ - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - } - }, - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - }, - "Provenance": { - "type": "object", - "properties": { - "provenance": { - "type": "string", - "description": "The process that produced/provided the information" - }, - "trust": { - "type": "string" - } - }, - "description": "Indicates the process that produced (or provided) the information, and the trust associated to the information" - } - }, - "type": "object", - "properties": { - "author": { - "type": "array", - "items": { - "type": "object", - "properties": { - "fullname": { - "type": "string" - }, - "name": { - "type": "string" - }, - "pid": { - "type": "object", - "properties": { - "id": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"} - ] - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Provenance of author's pid"} - ] - } - } - }, - "rank": { - "type": "integer" - }, - "surname": { - "type": "string" - } - } - } - }, - "bestaccessright": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - }, - "description": "The openest access right associated to the manifestations of this research results" - }, - "codeRepositoryUrl": { - "type": "string", - "description": "Only for results with type 'software': the URL to the repository with the source code" - }, - "contactgroup": { - "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "contactperson": { - "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "container": { - "type": "object", - "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, - "edition": { - "type": "string", - "description": "Edition of the journal or conference proceeding" - }, - "ep": { - "type": "string", - "description": "End page" - }, - "iss": { - "type": "string", - "description": "Journal issue" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" - }, - "name": { - "type": "string", - "description": "Name of the journal or conference" - }, - "sp": { - "type": "string", - "description": "start page" - }, - "vol": { - "type": "string" - } - }, - "description": "Container has information about the conference or journal where the result has been presented or published" - }, - "contributor": { - "type": "array", - "items": { - "type": "string", - "description": "Description of contributor" - } - }, - "country": { - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "ISO 3166-1 alpha-2 country code" - }, - "label": { - "type": "string" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this result is associated to the country."} - ] - } - } - } - }, - "coverage": { - "type": "array", - "items": { - "type": "string" - } - }, - "dateofcollection": { - "type": "string", - "description": "When OpenAIRE collected the record the last time" - }, - "description": { - "type": "array", - "items": { - "type": "string" - } - }, - "documentationUrl": { - "description": "Only for results with type 'software': URL to the software documentation", - "type": "array", - "items": { - "type": "string" - } - }, - "embargoenddate": { - "type": "string", - "description": "Date when the embargo ends and this result turns Open Access" - }, - "format": { - "type": "array", - "items": { - "type": "string" - } - }, - "geolocation": { - "description": "Geolocation information", - "type": "array", - "items": { - "type": "object", - "properties": { - "box": { - "type": "string" - }, - "place": { - "type": "string" - }, - "point": { - "type": "string" - } - } - } - }, - "id": { - "type": "string", - "description": "OpenAIRE Identifier" - }, - "language": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "alpha-3/ISO 639-2 code of the language" - }, - "label": { - "type": "string", - "description": "English label" - } - } - }, - "lastupdatetimestamp": { - "type": "integer", - "description": "Timestamp of last update of the record in OpenAIRE" - }, - "maintitle": { - "type": "string" - }, - "originalId": { - "description": "Identifiers of the record at the original sources", - "type": "array", - "items": { - "type": "string" - } - }, - "pid": { - "description": "Persistent identifiers of the result", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result "} - ] - } - }, - "instance":{ - "type":"array", - "items":{ - "type":"object", - "properties":{ - "accessright":{ - "allOf":[ - { - "$ref":"#/definitions/AccessRight" - }, - { - "description":"The accessright of this materialization of the result" - } - ] - }, - "articleprocessingcharge":{ - "type":"object", - "properties":{ - "amount":{ - "type":"string" - }, - "currency":{ - "type":"string" - } - } - }, - "license":{ - "type":"string" - }, - "publicationdate":{ - "type":"string" - }, - "refereed":{ - "type":"string" - }, - "type":{ - "type":"string", - "description":"The specific sub-type of this materialization of the result (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" - }, - "url":{ - "description":"Description of url", - "type":"array", - "items":{ - "type":"string", - "description":"urls where it is possible to access the materialization of the result" - } - } - }, - "description":"One of the materialization for this result" - } - }, - "programmingLanguage": { - "type": "string", - "description": "Only for results with type 'software': the programming language" - }, - "publicationdate": { - "type": "string" - }, - "publisher": { - "type": "string" - }, - "size": { - "type": "string", - "description": "Only for results with type 'dataset': the declared size of the dataset" - }, - "source": { - "description": "See definition of Dublin Core field dc:source", - "type": "array", - "items": { - "type": "string" - } - }, - "subjects": { - "description": "Keywords associated to the result", - "type": "array", - "items": { - "type": "object", - "properties": { - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this subject is associated to the result"} - ] - }, - "subject": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."} - ] - } - } - } - }, - "subtitle": { - "type": "string" - }, - "tool": { - "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)" - }, - "version": { - "type": "string", - "description": "Version of the result" - } - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0aee..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml deleted file mode 100644 index 650b972fa3..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml +++ /dev/null @@ -1,563 +0,0 @@ - - - - - upload - false - true to upload the dump for the funders in Zenodo - - - sourcePath - the source path - - - isLookUpUrl - the isLookup service endpoint - - - outputPath - the output path - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - for new version, the id of the record for the old deposition - - - depositionId - the depositionId of a deposition open that has to be added content - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/result/publication - --relationPath${sourcePath}/relation - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/result/dataset - --relationPath${sourcePath}/relation - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/result/otherresearchproduct - --relationPath${sourcePath}/relation - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/result/software - --relationPath${sourcePath}/relation - - - - - - - - - - - - - - - - - yarn - cluster - Dump table publication for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/dump/publication - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - yarn - cluster - Dump table dataset for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dump/dataset - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - yarn - cluster - Dump table ORP for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/dump/otherresearchproduct - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - yarn - cluster - Dump table software for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/dump/software - --communityMapPath${workingDir}/communityMap - --dumpTypefunder - - - - - - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - yarn - cluster - Extend dumped publications with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/publication - --outputPath${workingDir}/ext/publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped dataset with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/dataset - --outputPath${workingDir}/ext/dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped ORP with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/otherresearchproduct - --outputPath${workingDir}/ext/orp - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped software with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/software - --outputPath${workingDir}/ext/software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext - --outputPath${workingDir}/resultperfunder - --relationPath${sourcePath} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/resultperfunder - - - - - - - - ${wf:conf('upload') eq true} - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --communityMapPath${workingDir}/communityMap - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_collect_and_save.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_collect_and_save.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_parameters.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_organization_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_organization_parameters.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json similarity index 73% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json index 2bfcac3bce..5c26ea7d1e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json @@ -19,6 +19,12 @@ "paramLongName": "isSparkSessionManaged", "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false + }, + { + "paramName": "rs", + "paramLongName": "removeSet", + "paramDescription": "the list of classname relations, split by ';', not to be dumped", + "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json deleted file mode 100644 index 727432a671..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/community_infrastructure_schema.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "acronym": { - "type": "string", - "description": "The acronym of the community" - }, - "description": { - "type": "string", - "description": "Description of the research community/research infrastructure" - }, - "id": { - "type": "string", - "description": "OpenAIRE id of the research community/research infrastructure" - }, - "name": { - "type": "string", - "description": "The long name of the community" - }, - "subject": { - "description": "Only for research communities: the list of the subjects associated to the research community", - "type": "array", - "items": {"type": "string"} - }, - "type": { - "type": "string", - "description": "One of {Research Community, Research infrastructure}" - }, - "zenodo_community": { - "type": "string", - "description": "The URL of the Zenodo community associated to the Research community/Research infrastructure" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json deleted file mode 100644 index c416f23208..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/datasource_schema.json +++ /dev/null @@ -1,192 +0,0 @@ -{ - "$schema":"http://json-schema.org/draft-07/schema#", - "definitions": { - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - } - }, - "type":"object", - "properties": { - "accessrights": { - "type": "string", - "description": "Type of access to the data source, as defined by re3data.org. Possible values: {open, restricted, closed}" - }, - "certificates": { - "type": "string", - "description": "The certificate, seal or standard the data source complies with. As defined by re3data.org." - }, - "citationguidelineurl": { - "type": "string", - "description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org." - }, - "contenttypes": { - "description": "Types of content in the data source, as defined by OpenDOAR", - "type": "array", - "items": { - "type": "string" - } - }, - "databaseaccessrestriction": { - "type": "string", - "description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}" - }, - "datasourcetype": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" - }, - { - "description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies" - } - ] - }, - "datauploadrestriction": { - "type": "string", - "description": "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}" - }, - "dateofvalidation": { - "type": "string", - "description": "The date of last validation against the OpenAIRE guidelines for the datasource records" - }, - "description": { - "type": "string" - }, - "englishname": { - "type": "string", - "description": "The English name of the datasource" - }, - "id": { - "type": "string", - "description": "The OpenAIRE id of the data source" - }, - "journal": { - "type": "object", - "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, - "edition": { - "type": "string" - }, - "ep": { - "type": "string", - "description": "End page" - }, - "iss": { - "type": "string", - "description": "Issue number" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" - }, - "name": { - "type": "string" - }, - "sp": { - "type": "string", - "description": "Start page" - }, - "vol": { - "type": "string", - "description": "Volume" - } - }, - "description": "Information about the journal, if this data source is of type Journal." - }, - "languages": { - "description": "The languages present in the data source's content, as defined by OpenDOAR.", - "type": "array", - "items": { - "type": "string" - } - }, - "logourl": { - "type": "string" - }, - "missionstatementurl": { - "type": "string", - "description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org" - }, - "officialname": { - "type": "string", - "description": "The official name of the datasource" - }, - "openairecompatibility": { - "type": "string", - "description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu." - }, - "originalId": { - "description": "Original identifiers for the datasource" - "type": "array", - "items": { - "type": "string" - } - }, - "pid": { - "description": "Persistent identifiers of the datasource", - "type": "array", - "items": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" - } - ] - } - }, - "pidsystems": { - "type": "string", - "description": "The persistent identifier system that is used by the data source. As defined by re3data.org" - }, - "policies": { - "description": "Policies of the data source, as defined in OpenDOAR.", - "type": "array", - "items": { - "type": "string" - } - }, - "releaseenddate": { - "type": "string", - "description": "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org" - }, - "releasestartdate": { - "type": "string", - "description": "Releasing date of the data source, as defined by re3data.org" - }, - "subjects": { - "description": "List of subjects associated to the datasource", - "type": "array", - "items": { - "type": "string" - } - }, - "uploadrights": { - "type": "string", - "description": "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}" - }, - "versioning": { - "type": "boolean", - "description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise." - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json deleted file mode 100644 index 16afa386d5..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/organization_schema.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "alternativenames": { - "description": "Alternative names that identify the organisation", - "type": "array", - "items": { - "type": "string" - } - }, - "country": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "The organisation country code" - }, - "label": { - "type": "string", - "description": "The organisation country label" - } - }, - "description": "The country of the organisation" - }, - "id": { - "type": "string", - "description": "The OpenAIRE id for the organisation" - }, - "legalname": { - "type": "string" - }, - "legalshortname": { - "type": "string" - }, - "pid": { - "description": "Persistent identifiers for the organisation i.e. isni 0000000090326370", - "type": "array", - "items": { - "type": "object", - "properties": { - "scheme": { - "type": "string", - "description": "The scheme of the identifier (i.e. isni)" - }, - "value": { - "type": "string", - "description": "the value in the schema (i.e. 0000000090326370)" - } - } - } - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json deleted file mode 100644 index c81187258e..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/project_schema.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "acronym": { - "type": "string" - }, - "callidentifier": { - "type": "string" - }, - "code": { - "type": "string", - "description": "The grant agreement number" - }, - "enddate": { - "type": "string" - }, - "funding": { - "description": "Funding information for the project", - "type": "array", - "items": { - "type": "object", - "properties": { - "funding_stream": { - "type": "object", - "properties": { - "description": { - "type": "string", - "description": "Description of the funding stream" - }, - "id": { - "type": "string", - "description": "Id of the funding stream" - } - } - }, - "jurisdiction": { - "type": "string", - "description": "The jurisdiction of the funder (i.e. EU)" - }, - "name": { - "type": "string", - "description": "The name of the funder (European Commission)" - }, - "shortName": { - "type": "string", - "description": "The short name of the funder (EC)" - } - } - } - }, - "granted": { - "type": "object", - "properties": { - "currency": { - "type": "string", - "description": "The currency of the granted amount (e.g. EUR)" - }, - "fundedamount": { - "type": "number", - "description": "The funded amount" - }, - "totalcost": { - "type": "number", - "description": "The total cost of the project" - } - }, - "description": "The money granted to the project" - }, - "h2020programme": { - "description": "The h2020 programme funding the project", - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "The code of the programme" - }, - "description": { - "type": "string", - "description": "The description of the programme" - } - } - } - }, - "id": { - "type": "string", - "description": "OpenAIRE id for the project" - }, - "keywords": { - "type": "string" - }, - "openaccessmandatefordataset": { - "type": "boolean" - }, - "openaccessmandateforpublications": { - "type": "boolean" - }, - "startdate": { - "type": "string" - }, - "subject": { - "type": "array", - "items": { - "type": "string" - } - }, - "summary": { - "type": "string" - }, - "title": { - "type": "string" - }, - "websiteurl": { - "type": "string" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json deleted file mode 100644 index 98134a03bd..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/relation_schema.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "$schema":"http://json-schema.org/draft-07/schema#", - "definitions": { - "Node": { - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The OpenAIRE id of the entity" - }, - "type": { - "type": "string", - "description": "The type of the entity (i.e. organisation)" - } - } - } - }, - "type":"object", - "properties": { - "provenance": { - "type": "object", - "properties": { - "provenance": { - "type": "string", - "description": "The reason why OpenAIRE holds the relation " - }, - "trust": { - "type": "string", - "description": "The trust of the relation in the range of [0,1]. Where greater the number, more the trust. Harvested relationships have typically a high trust (0.9). The trust of inferred relationship is calculated by the inference algorithm that generated them, as described in https://graph.openaire.eu/about#architecture (Enrichment --> Mining)" - } - } - }, - "reltype": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The semantics of the relation (i.e. isAuthorInstitutionOf). " - }, - "type": { - "type": "string", - "description": "the type of the relation (i.e. affiliation)" - } - }, - "description": "To represent the semantics of a relation between two entities" - }, - "source": { - "allOf": [ - {"$ref": "#/definitions/Node"}, - {"description": "The node source in the relation"} - ] - }, - "target": { - "allOf": [ - {"$ref": "#/definitions/Node"}, - {"description": "The node target in the relation"} - ] - }, - "validated":{ - "type":"boolean", - "description":"True if the relation is related to a project and it has been collected from an authoritative source (i.e. the funder)" - }, - "validationDate":{ - "type":"string", - "description":"The date when the relation was collected from OpenAIRE" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json deleted file mode 100644 index a1e09525e5..0000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/schemas/result_schema.json +++ /dev/null @@ -1,417 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - }, - "Provenance": { - "type": "object", - "properties": { - "provenance": { - "type": "string", - "description": "The process that produced/provided the information" - }, - "trust": { - "type": "string" - } - }, - "description": "Indicates the process that produced (or provided) the information, and the trust associated to the information" - } - }, - "type": "object", - "properties": { - "author": { - "type": "array", - "items": { - "type": "object", - "properties": { - "fullname": { - "type": "string" - }, - "name": { - "type": "string" - }, - "pid": { - "type": "object", - "properties": { - "id": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"} - ] - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Provenance of author's pid"} - ] - } - } - }, - "rank": { - "type": "integer" - }, - "surname": { - "type": "string" - } - } - } - }, - "bestaccessright":{ - "type":"object", - "properties":{ - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - } - }, - "codeRepositoryUrl": { - "type": "string", - "description": "Only for results with type 'software': the URL to the repository with the source code" - }, - "contactgroup": { - "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "contactperson": { - "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", - "type": "array", - "items": { - "type": "string" - } - }, - "container": { - "type": "object", - "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, - "edition": { - "type": "string", - "description": "Edition of the journal or conference proceeding" - }, - "ep": { - "type": "string", - "description": "End page" - }, - "iss": { - "type": "string", - "description": "Journal issue" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" - }, - "name": { - "type": "string", - "description": "Name of the journal or conference" - }, - "sp": { - "type": "string", - "description": "start page" - }, - "vol": { - "type": "string" - } - }, - "description": "Container has information about the conference or journal where the result has been presented or published" - }, - "contributor": { - "type": "array", - "items": { - "type": "string", - "description": "Contributors for the result" - } - }, - "country": { - "type": "array", - "items": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "ISO 3166-1 alpha-2 country code" - }, - "label": { - "type": "string" - }, - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this result is associated to the country."} - ] - } - } - } - }, - "coverage": { - "type": "array", - "items": { - "type": "string" - } - }, - "dateofcollection": { - "type": "string", - "description": "When OpenAIRE collected the record the last time" - }, - "description": { - "type": "array", - "items": { - "type": "string" - } - }, - "documentationUrl": { - "description": "Only for results with type 'software': URL to the software documentation", - "type": "array", - "items": { - "type": "string" - } - }, - "embargoenddate": { - "type": "string", - "description": "Date when the embargo ends and this result turns Open Access" - }, - "format": { - "type": "array", - "items": { - "type": "string" - } - }, - "geolocation": { - "description": "Geolocation information", - "type": "array", - "items": { - "type": "object", - "properties": { - "box": { - "type": "string" - }, - "place": { - "type": "string" - }, - "point": { - "type": "string" - } - } - } - }, - "id": { - "type": "string", - "description": "OpenAIRE Identifier" - }, - "instance":{ - "description":"Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version", - "type":"array", - "items":{ - "type":"object", - "properties":{ - "accessright":{ - "type":"object", - "properties":{ - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, - "openAccessRoute":{ - "type":"string", - "enum":[ - "gold", - "green", - "hybrid", - "bronze" - ], - "description":"The type of OpenAccess applied to the result" - }, - "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - } - }, - "articleprocessingcharge":{ - "description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.", - "type":"object", - "properties":{ - "amount":{ - "type":"string" - }, - "currency":{ - "type":"string" - } - } - }, - "license":{ - "type":"string" - }, - "pid":{ - "description":"The set of persistent identifiers associated to this instance that have been collected from an authority for the pid type (i.e. Crossref/Datacite for doi)", - "type":"array", - "items":{ - "allOf":[ - { - "$ref":"#/definitions/ControlledField" - }, - { - "description":"The persistent identifier associated to the result" - } - ] - } - }, - "publicationdate":{ - "type":"string", - "description": "Date of the research product" - }, - "refereed":{ - "description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)", - "type":"string" - }, - "type":{ - "type":"string", - "description":"The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" - }, - "url":{ - "description":"URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ", - "type":"array", - "items":{ - "type":"string" - } - } - } - } - }, - "language": { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "alpha-3/ISO 639-2 code of the language" - }, - "label": { - "type": "string", - "description": "Language label in English" - } - } - }, - "lastupdatetimestamp": { - "type": "integer", - "description": "Timestamp of last update of the record in OpenAIRE" - }, - "maintitle": { - "type": "string", - "descriptio": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software." - }, - "subtitle": { - "type": "string", - "descriptio": "Explanatory or alternative name by which a scientific result is known." - }, - "originalId": { - "description": "Identifiers of the record at the original sources", - "type": "array", - "items": { - "type": "string" - } - }, - "pid": { - "description": "Persistent identifiers of the result", - "type": "array", - "items": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result. Note: the result will have a pid associated only if it was collected from an authority for that pid type. For example a doi will be among the pids for one result if the result metadata were collected from Crossref or Datacite. In all the other cases, the doi will be present among the alteranteIdentifiers for the result "} - ] - } - }, - "programmingLanguage": { - "type": "string", - "description": "Only for results with type 'software': the programming language" - }, - "publicationdate": { - "type": "string", - "description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date." - }, - "publisher": { - "type": "string", - "description": "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource." - }, - "size": { - "type": "string", - "description": "Only for results with type 'dataset': the declared size of the dataset" - }, - "source": { - "description": "See definition of Dublin Core field dc:source", - "type": "array", - "items": { - "type": "string" - } - }, - "subjects": { - "description": "Keywords associated to the result", - "type": "array", - "items": { - "type": "object", - "properties": { - "provenance": { - "allOf": [ - {"$ref": "#/definitions/Provenance"}, - {"description": "Why this subject is associated to the result"} - ] - }, - "subject": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."} - ] - } - } - } - }, - "tool": { - "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", - "type": "array", - "items": { - "type": "string" - } - }, - "type": { - "type": "string", - "description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)" - }, - "version": { - "type": "string", - "description": "Version of the result" - } - } -} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java index fe178795d7..3dbf2b09d3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java @@ -4,8 +4,10 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; + import java.util.HashMap; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -81,7 +83,6 @@ public class DumpRelationTest { "-sourcePath", sourcePath }); -// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset"); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -144,7 +145,6 @@ public class DumpRelationTest { "-sourcePath", sourcePath }); -// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset"); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -203,4 +203,61 @@ public class DumpRelationTest { "and validationDate = '2021-08-06'") .count()); } + + @Test + public void test3() throws Exception {// + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") + .getPath(); + + SparkDumpRelationJob.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath, + "-removeSet", "isParticipant" + }); + + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + verificationDataset.createOrReplaceTempView("table"); + + verificationDataset + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + Dataset check = spark + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); + + Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); + Assertions + .assertEquals( + 22, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); + + Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); + + + Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count()); + Assertions + .assertEquals( + 1, check + .filter( + "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + + "and provenance = 'Inferred by OpenAIRE'") + .count()); + } + } From d9f80488cc471bcb28b294de523e7ccac9bd571b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 2 Dec 2021 14:15:19 +0100 Subject: [PATCH 17/21] [GRAPH DUMP] Add one more test to check the filtering of the relations --- .../graph/dump/complete/DumpRelationTest.java | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java index 3dbf2b09d3..8a2eb585e9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java @@ -260,4 +260,54 @@ public class DumpRelationTest { .count()); } + @Test + public void test4() throws Exception {// + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") + .getPath(); + + SparkDumpRelationJob.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath, + "-removeSet", "isParticipant;isAuthorInstitutionOf" + }); + + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + verificationDataset.createOrReplaceTempView("table"); + + verificationDataset + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + Dataset check = spark + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); + + Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); + Assertions + .assertEquals( + 22, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); + + Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); + + + Assertions.assertEquals(0, check.filter("name = 'isAuthorInstitutionOf'").count()); + + } + } From 4bb1d43afc45493673bf7055c7b332bb42189274 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 3 Dec 2021 12:35:51 +0100 Subject: [PATCH 18/21] - --- .../dnetlib/dhp/oa/graph/dump/DumpJobTest.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 37ba96abeb..6dc56c846f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -917,8 +917,7 @@ public class DumpJobTest { DumpProducts dump = new DumpProducts(); dump .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -949,6 +948,19 @@ public class DumpJobTest { Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1); + temp = spark + .sql( + "select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " + + "from check " + + "lateral view explode (instance) i as inst " + + "where inst.articleprocessingcharge is not null"); + + + Assertions.assertEquals("3131.64", temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").collectAsList().get(0).getString(1)); + Assertions.assertEquals("EUR", temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").collectAsList().get(0).getString(2)); + + Assertions.assertEquals("2578.35", temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").collectAsList().get(0).getString(1)); + Assertions.assertEquals("EUR", temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").collectAsList().get(0).getString(2)); } } From 8d755cca808062f2f55e70c1c927eca7c84131cb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 13 Dec 2021 15:01:40 +0100 Subject: [PATCH 19/21] - --- .../dump/complete/CreateContextEntities.java | 2 +- .../dump/complete/CreateContextRelation.java | 2 +- .../dump/complete/SparkCollectAndSave.java | 2 +- .../dump/complete/SparkDumpEntitiesJob.java | 2 +- .../dump/complete/SparkDumpRelationJob.java | 9 +- .../complete/SparkOrganizationRelation.java | 2 +- .../SparkSelectValidRelationsJob.java | 2 +- .../dhp/oa/graph/dump/DumpJobTest.java | 49 +++++++-- .../graph/dump/complete/DumpRelationTest.java | 100 ++++++++---------- 9 files changed, 94 insertions(+), 76 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index aa031203d0..c422c3b404 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -41,7 +41,7 @@ public class CreateContextEntities implements Serializable { .toString( CreateContextEntities.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index 3c71fcaa52..03f53de641 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -48,7 +48,7 @@ public class CreateContextRelation implements Serializable { .requireNonNull( CreateContextRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json"))); + "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java index e902a02c8f..1c98ea6cb9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java @@ -31,7 +31,7 @@ public class SparkCollectAndSave implements Serializable { .toString( SparkCollectAndSave.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java index f239ffca7f..7cc05a821d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java @@ -22,7 +22,7 @@ public class SparkDumpEntitiesJob implements Serializable { .toString( SparkDumpEntitiesJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/wf/input_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java index 05e3f88359..0ac97dcb64 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java @@ -38,7 +38,7 @@ public class SparkDumpRelationJob implements Serializable { .toString( SparkDumpRelationJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -55,13 +55,12 @@ public class SparkDumpRelationJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - Optional rs = Optional.ofNullable(parser.get("removeSet")); + Optional rs = Optional.ofNullable(parser.get("removeSet")); final Set removeSet = new HashSet<>(); - if(rs.isPresent()){ + if (rs.isPresent()) { Collections.addAll(removeSet, rs.get().split(";")); } - SparkConf conf = new SparkConf(); runWithSparkSession( @@ -78,7 +77,7 @@ public class SparkDumpRelationJob implements Serializable { private static void dumpRelation(SparkSession spark, String inputPath, String outputPath, Set removeSet) { Dataset relations = Utils.readPath(spark, inputPath, Relation.class); relations - .filter((FilterFunction)r -> !removeSet.contains(r.getRelClass())) + .filter((FilterFunction) r -> !removeSet.contains(r.getRelClass())) .map((MapFunction) relation -> { eu.dnetlib.dhp.schema.dump.oaf.graph.Relation relNew = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation(); relNew diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index 4475232dec..dff1a1066e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -39,7 +39,7 @@ public class SparkOrganizationRelation implements Serializable { .toString( SparkOrganizationRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java index d1bca6a7c6..1e5675e5ff 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java @@ -35,7 +35,7 @@ public class SparkSelectValidRelationsJob implements Serializable { .toString( SparkSelectValidRelationsJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 6dc56c846f..30e2f93ef9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -917,7 +917,7 @@ public class DumpJobTest { DumpProducts dump = new DumpProducts(); dump .run( - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -949,18 +949,45 @@ public class DumpJobTest { Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1); temp = spark - .sql( - "select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " + - "from check " + - "lateral view explode (instance) i as inst " + - "where inst.articleprocessingcharge is not null"); + .sql( + "select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " + + "from check " + + "lateral view explode (instance) i as inst " + + "where inst.articleprocessingcharge is not null"); + Assertions + .assertEquals( + "3131.64", + temp + .filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'") + .collectAsList() + .get(0) + .getString(1)); + Assertions + .assertEquals( + "EUR", + temp + .filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'") + .collectAsList() + .get(0) + .getString(2)); - Assertions.assertEquals("3131.64", temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").collectAsList().get(0).getString(1)); - Assertions.assertEquals("EUR", temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").collectAsList().get(0).getString(2)); - - Assertions.assertEquals("2578.35", temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").collectAsList().get(0).getString(1)); - Assertions.assertEquals("EUR", temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").collectAsList().get(0).getString(2)); + Assertions + .assertEquals( + "2578.35", + temp + .filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'") + .collectAsList() + .get(0) + .getString(1)); + Assertions + .assertEquals( + "EUR", + temp + .filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'") + .collectAsList() + .get(0) + .getString(2)); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java index 8a2eb585e9..763318ae49 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java @@ -4,10 +4,8 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; - import java.util.HashMap; - import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -83,7 +81,6 @@ public class DumpRelationTest { "-sourcePath", sourcePath }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc @@ -145,7 +142,6 @@ public class DumpRelationTest { "-sourcePath", sourcePath }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc @@ -207,105 +203,101 @@ public class DumpRelationTest { @Test public void test3() throws Exception {// final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") + .getPath(); SparkDumpRelationJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath, - "-removeSet", "isParticipant" + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath, + "-removeSet", "isParticipant" }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); verificationDataset.createOrReplaceTempView("table"); verificationDataset - .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); Dataset check = spark - .sql( - "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " - + - "from table "); + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); Assertions - .assertEquals( - 22, check - .filter( - "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + - "provenance = 'Harvested'") - .count()); + .assertEquals( + 22, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); - Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count()); Assertions - .assertEquals( - 1, check - .filter( - "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + - "and provenance = 'Inferred by OpenAIRE'") - .count()); + .assertEquals( + 1, check + .filter( + "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + + "and provenance = 'Inferred by OpenAIRE'") + .count()); } @Test public void test4() throws Exception {// final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") + .getPath(); SparkDumpRelationJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath, - "-removeSet", "isParticipant;isAuthorInstitutionOf" + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath, + "-removeSet", "isParticipant;isAuthorInstitutionOf" }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); verificationDataset.createOrReplaceTempView("table"); verificationDataset - .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); Dataset check = spark - .sql( - "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " - + - "from table "); + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); Assertions - .assertEquals( - 22, check - .filter( - "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + - "provenance = 'Harvested'") - .count()); + .assertEquals( + 22, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); - Assertions.assertEquals(0, check.filter("name = 'isAuthorInstitutionOf'").count()); } From 460e6b95d63b04c4977be1302eca2f58e7259668 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 21 Dec 2021 13:48:03 +0100 Subject: [PATCH 20/21] [Graph Dump] - --- .../graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml index f84ab7e1ad..8eab569928 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml @@ -325,6 +325,7 @@ --sourcePath${workingDir}/validrelation --outputPath${workingDir}/relation/relation + --removeSet${removeSet} From 8d185000694d05f61cdbf521309357dc0a328695 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 22 Dec 2021 12:47:21 +0100 Subject: [PATCH 21/21] using dhp-schema:2.9.24 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b95bd9acbb..ed7b8a2ca0 100644 --- a/pom.xml +++ b/pom.xml @@ -758,7 +758,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.9.24-SNAPSHOT] + [2.9.24] [4.0.3] [6.0.5] [3.1.6]