From 608122fe678aba44494865bc4556d752b87f9a97 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 11 Aug 2022 14:23:55 +0200 Subject: [PATCH] [EOSC DUMP] changed to make the dumped result an extention of the CommunityResult. The subjects field has been replaced by the subject field modeled as an hashmap> where Subject is value and provenance --- .../eu/dnetlib/dhp/eosc/model/EoscResult.java | 23 +- .../eu/dnetlib/dhp/eosc/model/Subject.java | 36 ++ .../java/eu/dnetlib/dhp/oa/model/Result.java | 2 + .../oa/model/community/CommunityInstance.java | 2 + .../oa/model/community/CommunityResult.java | 2 + .../jsonschemas/community_result_schema.json | 602 ++++++++++++++++++ .../jsonschemas/eosc_result_schema.json | 128 ++++ .../dhp/oa/graph/dump/ResultMapper.java | 96 ++- .../community/SparkUpdateProjectInfo.java | 35 +- .../dump/eosc/SelectEoscResultsJobStep1.java | 11 +- .../dump/eosc_select_result_parameters.json | 6 + .../dump/eoscdump/oozie_app/workflow.xml | 169 ++++- .../graph/dump/project_input_parameters.json | 6 + .../dhp/oa/graph/dump/DumpJobTest.java | 23 +- pom.xml | 2 +- 15 files changed, 1083 insertions(+), 60 deletions(-) create mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Subject.java create mode 100644 dump-schema/src/main/resources/jsonschemas/community_result_schema.json diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java index 1bbc675..88699f9 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java @@ -1,18 +1,29 @@ package eu.dnetlib.dhp.eosc.model; +import java.util.List; +import java.util.Map; + import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; -import eu.dnetlib.dhp.oa.model.graph.GraphResult; +import eu.dnetlib.dhp.oa.model.community.CommunityResult; /** * @author miriam.baglioni * @Date 29/07/22 */ -public class EoscResult extends GraphResult { +public class EoscResult extends CommunityResult { + @JsonSchema(description = "Describes a reference to the EOSC Interoperability Framework (IF) Guidelines") private EoscInteroperabilityFramework eoscIF; + @JsonSchema(description = "The subject dumped by type associated to the result") + private Map> subject; + +// public EoscResult() { +// super(); +// } + public EoscInteroperabilityFramework getEoscIF() { return eoscIF; } @@ -20,4 +31,12 @@ public class EoscResult extends GraphResult { public void setEoscIF(EoscInteroperabilityFramework eoscIF) { this.eoscIF = eoscIF; } + + public Map> getSubject() { + return subject; + } + + public void setSubject(Map> subject) { + this.subject = subject; + } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Subject.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Subject.java new file mode 100644 index 0000000..549cf85 --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Subject.java @@ -0,0 +1,36 @@ + +package eu.dnetlib.dhp.eosc.model; + +import java.io.Serializable; + +import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; + +import eu.dnetlib.dhp.oa.model.Provenance; + +/** + * @author miriam.baglioni + * @Date 10/08/22 + */ +public class Subject implements Serializable { + @JsonSchema(description = "Why this subject is associated to the result") + private Provenance provenance; + + @JsonSchema(description = "The subject value") + private String value; + + public Provenance getProvenance() { + return provenance; + } + + public void setProvenance(Provenance provenance) { + this.provenance = provenance; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java index 3d580fd..562f3db 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.model; import java.io.Serializable; import java.util.List; +import com.fasterxml.jackson.annotation.JsonInclude; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; /** @@ -248,6 +249,7 @@ public class Result implements Serializable { this.country = country; } + @JsonInclude(JsonInclude.Include.NON_NULL) public List getSubjects() { return subjects; } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java index 7e3a7cf..23f7e3a 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.oa.model.community; +import com.fasterxml.jackson.annotation.JsonInclude; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; import eu.dnetlib.dhp.oa.model.Instance; @@ -20,6 +21,7 @@ public class CommunityInstance extends Instance { private CfHbKeyValue hostedby; @JsonSchema(description = "Information about the source from which the record has been collected") + @JsonInclude(JsonInclude.Include.NON_NULL) private CfHbKeyValue collectedfrom; public CfHbKeyValue getHostedby() { diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java index 88c2cc5..0f71495 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.model.community; import java.util.List; +import com.fasterxml.jackson.annotation.JsonInclude; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; import eu.dnetlib.dhp.oa.model.Result; @@ -29,6 +30,7 @@ public class CommunityResult extends Result { private List context; @JsonSchema(description = "Information about the sources from which the record has been collected") + @JsonInclude(JsonInclude.Include.NON_NULL) protected List collectedfrom; @JsonSchema( diff --git a/dump-schema/src/main/resources/jsonschemas/community_result_schema.json b/dump-schema/src/main/resources/jsonschemas/community_result_schema.json new file mode 100644 index 0000000..bb307a8 --- /dev/null +++ b/dump-schema/src/main/resources/jsonschemas/community_result_schema.json @@ -0,0 +1,602 @@ +{ + "$schema" : "http://json-schema.org/draft-07/schema#", + "definitions" : { + "CfHbKeyValue" : { + "type" : "object", + "properties" : { + "key" : { + "type" : "string", + "description" : "the OpenAIRE identifier of the data source" + }, + "value" : { + "type" : "string", + "description" : "the name of the data source" + } + } + }, + "Provenance" : { + "type" : "object", + "properties" : { + "provenance" : { + "type" : "string" + }, + "trust" : { + "type" : "string" + } + } + }, + "ResultPid" : { + "type" : "object", + "properties" : { + "scheme" : { + "type" : "string", + "description" : "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories" + }, + "value" : { + "type" : "string", + "description" : "The value expressed in the scheme (i.e. 10.1000/182)" + } + } + } + }, + "type" : "object", + "properties" : { + "author" : { + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "fullname" : { + "type" : "string" + }, + "name" : { + "type" : "string" + }, + "pid" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "object", + "properties" : { + "scheme" : { + "type" : "string", + "description" : "The author's pid scheme. OpenAIRE currently supports 'ORCID'" + }, + "value" : { + "type" : "string", + "description" : "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)" + } + } + }, + "provenance" : { + "allOf" : [ { + "$ref" : "#/definitions/Provenance" + }, { + "description" : "The reason why the pid was associated to the author" + } ] + } + }, + "description" : "The author's persistent identifiers" + }, + "rank" : { + "type" : "integer" + }, + "surname" : { + "type" : "string" + } + } + } + }, + "bestaccessright" : { + "type" : "object", + "properties" : { + "code" : { + "type" : "string", + "description" : "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "label" : { + "type" : "string", + "description" : "Label for the access mode" + }, + "scheme" : { + "type" : "string", + "description" : "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" + } + }, + "description" : "The openest of the access rights of this result." + }, + "codeRepositoryUrl" : { + "type" : "string", + "description" : "Only for results with type 'software': the URL to the repository with the source code" + }, + "collectedfrom" : { + "description" : "Information about the sources from which the record has been collected", + "type" : "array", + "items" : { + "allOf" : [ { + "$ref" : "#/definitions/CfHbKeyValue" + }, { + "description" : "Information about the sources from which the record has been collected" + } ] + } + }, + "contactgroup" : { + "description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", + "type" : "array", + "items" : { + "type" : "string", + "description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource" + } + }, + "contactperson" : { + "description" : "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", + "type" : "array", + "items" : { + "type" : "string", + "description" : "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource" + } + }, + "container" : { + "type" : "object", + "properties" : { + "conferencedate" : { + "type" : "string" + }, + "conferenceplace" : { + "type" : "string" + }, + "edition" : { + "type" : "string", + "description" : "Edition of the journal or conference proceeding" + }, + "ep" : { + "type" : "string", + "description" : "End page" + }, + "iss" : { + "type" : "string", + "description" : "Journal issue number" + }, + "issnLinking" : { + "type" : "string" + }, + "issnOnline" : { + "type" : "string" + }, + "issnPrinted" : { + "type" : "string" + }, + "name" : { + "type" : "string", + "description" : "Name of the journal or conference" + }, + "sp" : { + "type" : "string", + "description" : "Start page" + }, + "vol" : { + "type" : "string", + "description" : "Volume" + } + }, + "description" : "Container has information about the conference or journal where the result has been presented or published" + }, + "context" : { + "description" : "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu", + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "code" : { + "type" : "string", + "description" : "Code identifying the RI/RC" + }, + "label" : { + "type" : "string", + "description" : "Label of the RI/RC" + }, + "provenance" : { + "description" : "Why this result is associated to the RI/RC.", + "type" : "array", + "items" : { + "allOf" : [ { + "$ref" : "#/definitions/Provenance" + }, { + "description" : "Why this result is associated to the RI/RC." + } ] + } + } + }, + "description" : "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu" + } + }, + "contributor" : { + "description" : "Contributors for the result", + "type" : "array", + "items" : { + "type" : "string", + "description" : "Contributors for the result" + } + }, + "country" : { + "description" : "The list of countries associated to this result", + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "code" : { + "type" : "string", + "description" : "ISO 3166-1 alpha-2 country code (i.e. IT)" + }, + "label" : { + "type" : "string", + "description" : "The label for that code (i.e. Italy)" + }, + "provenance" : { + "allOf" : [ { + "$ref" : "#/definitions/Provenance" + }, { + "description" : "Why this result is associated to the country." + } ] + } + }, + "description" : "The list of countries associated to this result" + } + }, + "coverage" : { + "type" : "array", + "items" : { + "type" : "string" + } + }, + "dateofcollection" : { + "type" : "string", + "description" : "When OpenAIRE collected the record the last time" + }, + "description" : { + "type" : "array", + "items" : { + "type" : "string" + } + }, + "documentationUrl" : { + "description" : "Only for results with type 'software': URL to the software documentation", + "type" : "array", + "items" : { + "type" : "string", + "description" : "Only for results with type 'software': URL to the software documentation" + } + }, + "embargoenddate" : { + "type" : "string", + "description" : "Date when the embargo ends and this result turns Open Access" + }, + "format" : { + "type" : "array", + "items" : { + "type" : "string" + } + }, + "geolocation" : { + "description" : "Geolocation information", + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "box" : { + "type" : "string" + }, + "place" : { + "type" : "string" + }, + "point" : { + "type" : "string" + } + }, + "description" : "Geolocation information" + } + }, + "id" : { + "type" : "string", + "description" : "The OpenAIRE identifiers for this result" + }, + "instance" : { + "description" : "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version", + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "accessright" : { + "type" : "object", + "properties" : { + "code" : { + "type" : "string", + "description" : "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "label" : { + "type" : "string", + "description" : "Label for the access mode" + }, + "openAccessRoute" : { + "type" : "string", + "enum" : [ "gold", "green", "hybrid", "bronze" ] + }, + "scheme" : { + "type" : "string", + "description" : "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" + } + }, + "description" : "The accessRights for this materialization of the result" + }, + "alternateIdentifier" : { + "description" : "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs", + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "scheme" : { + "type" : "string", + "description" : "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi" + }, + "value" : { + "type" : "string", + "description" : "The value expressed in the scheme" + } + }, + "description" : "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs" + } + }, + "articleprocessingcharge" : { + "type" : "object", + "properties" : { + "amount" : { + "type" : "string" + }, + "currency" : { + "type" : "string" + } + }, + "description" : "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative." + }, + "collectedfrom" : { + "allOf" : [ { + "$ref" : "#/definitions/CfHbKeyValue" + }, { + "description" : "Information about the source from which the record has been collected" + } ] + }, + "hostedby" : { + "allOf" : [ { + "$ref" : "#/definitions/CfHbKeyValue" + }, { + "description" : "Information about the source from which the instance can be viewed or downloaded." + } ] + }, + "license" : { + "type" : "string" + }, + "measures" : { + "description" : "Measures computed for this instance, for example Bip!Finder ones", + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "key" : { + "type" : "string", + "description" : "The measure (i.e. popularity)" + }, + "value" : { + "type" : "string", + "description" : "The value for that measure" + } + }, + "description" : "Measures computed for this instance, for example Bip!Finder ones" + } + }, + "pid" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/ResultPid" + } + }, + "publicationdate" : { + "type" : "string", + "description" : "Date of the research product" + }, + "refereed" : { + "type" : "string", + "description" : "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)" + }, + "type" : { + "type" : "string", + "description" : "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" + }, + "url" : { + "description" : "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ", + "type" : "array", + "items" : { + "type" : "string", + "description" : "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. " + } + } + }, + "description" : "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version" + } + }, + "language" : { + "type" : "object", + "properties" : { + "code" : { + "type" : "string", + "description" : "alpha-3/ISO 639-2 code of the language" + }, + "label" : { + "type" : "string", + "description" : "Language label in English" + } + } + }, + "lastupdatetimestamp" : { + "type" : "integer", + "description" : "Timestamp of last update of the record in OpenAIRE" + }, + "maintitle" : { + "type" : "string", + "description" : "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software." + }, + "originalId" : { + "description" : "Identifiers of the record at the original sources", + "type" : "array", + "items" : { + "type" : "string", + "description" : "Identifiers of the record at the original sources" + } + }, + "pid" : { + "description" : "Persistent identifiers of the result", + "type" : "array", + "items" : { + "allOf" : [ { + "$ref" : "#/definitions/ResultPid" + }, { + "description" : "Persistent identifiers of the result" + } ] + } + }, + "programmingLanguage" : { + "type" : "string", + "description" : "Only for results with type 'software': the programming language" + }, + "projects" : { + "description" : "List of projects (i.e. grants) that (co-)funded the production ofn the research results", + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "acronym" : { + "type" : "string", + "description" : "The acronym of the project" + }, + "code" : { + "type" : "string", + "description" : "The grant agreement number" + }, + "funder" : { + "type" : "object", + "properties" : { + "fundingStream" : { + "type" : "string", + "description" : "Stream of funding (e.g. for European Commission can be H2020 or FP7)" + }, + "jurisdiction" : { + "type" : "string", + "description" : "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)" + }, + "name" : { + "type" : "string", + "description" : "The name of the funder (European Commission)" + }, + "shortName" : { + "type" : "string", + "description" : "The short name of the funder (EC)" + } + }, + "description" : "Information about the funder funding the project" + }, + "id" : { + "type" : "string", + "description" : "The OpenAIRE id for the project" + }, + "provenance" : { + "$ref" : "#/definitions/Provenance" + }, + "title" : { + "type" : "string" + }, + "validated" : { + "type" : "object", + "properties" : { + "validatedByFunder" : { + "type" : "boolean" + }, + "validationDate" : { + "type" : "string" + } + } + } + }, + "description" : "List of projects (i.e. grants) that (co-)funded the production ofn the research results" + } + }, + "publicationdate" : { + "type" : "string", + "description" : "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date." + }, + "publisher" : { + "type" : "string", + "description" : "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource." + }, + "size" : { + "type" : "string", + "description" : "Only for results with type 'dataset': the declared size of the dataset" + }, + "source" : { + "description" : "See definition of Dublin Core field dc:source", + "type" : "array", + "items" : { + "type" : "string", + "description" : "See definition of Dublin Core field dc:source" + } + }, + "subjects" : { + "description" : "Keywords associated to the result", + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "provenance" : { + "allOf" : [ { + "$ref" : "#/definitions/Provenance" + }, { + "description" : "Why this subject is associated to the result" + } ] + }, + "subject" : { + "type" : "object", + "properties" : { + "scheme" : { + "type" : "string", + "description" : "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)." + }, + "value" : { + "type" : "string", + "description" : "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)." + } + } + } + }, + "description" : "Keywords associated to the result" + } + }, + "subtitle" : { + "type" : "string", + "description" : "Explanatory or alternative name by which a scientific result is known." + }, + "tool" : { + "description" : "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", + "type" : "array", + "items" : { + "type" : "string", + "description" : "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product" + } + }, + "type" : { + "type" : "string", + "description" : "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)" + }, + "version" : { + "type" : "string", + "description" : "Version of the result" + } + } +} \ No newline at end of file diff --git a/dump-schema/src/main/resources/jsonschemas/eosc_result_schema.json b/dump-schema/src/main/resources/jsonschemas/eosc_result_schema.json index 7e2b5b7..27fd7b6 100644 --- a/dump-schema/src/main/resources/jsonschemas/eosc_result_schema.json +++ b/dump-schema/src/main/resources/jsonschemas/eosc_result_schema.json @@ -1,6 +1,19 @@ { "$schema" : "http://json-schema.org/draft-07/schema#", "definitions" : { + "CfHbKeyValue" : { + "type" : "object", + "properties" : { + "key" : { + "type" : "string", + "description" : "the OpenAIRE identifier of the data source" + }, + "value" : { + "type" : "string", + "description" : "the name of the data source" + } + } + }, "Provenance" : { "type" : "object", "properties" : { @@ -96,6 +109,17 @@ "type" : "string", "description" : "Only for results with type 'software': the URL to the repository with the source code" }, + "collectedfrom" : { + "description" : "Information about the sources from which the record has been collected", + "type" : "array", + "items" : { + "allOf" : [ { + "$ref" : "#/definitions/CfHbKeyValue" + }, { + "description" : "Information about the sources from which the record has been collected" + } ] + } + }, "contactgroup" : { "description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", "type" : "array", @@ -157,6 +181,35 @@ }, "description" : "Container has information about the conference or journal where the result has been presented or published" }, + "context" : { + "description" : "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu", + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "code" : { + "type" : "string", + "description" : "Code identifying the RI/RC" + }, + "label" : { + "type" : "string", + "description" : "Label of the RI/RC" + }, + "provenance" : { + "description" : "Why this result is associated to the RI/RC.", + "type" : "array", + "items" : { + "allOf" : [ { + "$ref" : "#/definitions/Provenance" + }, { + "description" : "Why this result is associated to the RI/RC." + } ] + } + } + }, + "description" : "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu" + } + }, "contributor" : { "description" : "Contributors for the result", "type" : "array", @@ -327,6 +380,20 @@ }, "description" : "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative." }, + "collectedfrom" : { + "allOf" : [ { + "$ref" : "#/definitions/CfHbKeyValue" + }, { + "description" : "Information about the source from which the record has been collected" + } ] + }, + "hostedby" : { + "allOf" : [ { + "$ref" : "#/definitions/CfHbKeyValue" + }, { + "description" : "Information about the source from which the instance can be viewed or downloaded." + } ] + }, "license" : { "type" : "string" }, @@ -422,6 +489,67 @@ "type" : "string", "description" : "Only for results with type 'software': the programming language" }, + "projects" : { + "description" : "List of projects (i.e. grants) that (co-)funded the production ofn the research results", + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "acronym" : { + "type" : "string", + "description" : "The acronym of the project" + }, + "code" : { + "type" : "string", + "description" : "The grant agreement number" + }, + "funder" : { + "type" : "object", + "properties" : { + "fundingStream" : { + "type" : "string", + "description" : "Stream of funding (e.g. for European Commission can be H2020 or FP7)" + }, + "jurisdiction" : { + "type" : "string", + "description" : "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)" + }, + "name" : { + "type" : "string", + "description" : "The name of the funder (European Commission)" + }, + "shortName" : { + "type" : "string", + "description" : "The short name of the funder (EC)" + } + }, + "description" : "Information about the funder funding the project" + }, + "id" : { + "type" : "string", + "description" : "The OpenAIRE id for the project" + }, + "provenance" : { + "$ref" : "#/definitions/Provenance" + }, + "title" : { + "type" : "string" + }, + "validated" : { + "type" : "object", + "properties" : { + "validatedByFunder" : { + "type" : "boolean" + }, + "validationDate" : { + "type" : "string" + } + } + } + }, + "description" : "List of projects (i.e. grants) that (co-)funded the production ofn the research results" + } + }, "publicationdate" : { "type" : "string", "description" : "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date." diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 8d4035b..097efc4 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -9,12 +9,8 @@ import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.eosc.model.EoscInteroperabilityFramework; import eu.dnetlib.dhp.eosc.model.EoscResult; -import eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1; import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException; import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; import eu.dnetlib.dhp.oa.model.*; @@ -157,17 +153,13 @@ public class ResultMapper implements Serializable { ((GraphResult) out) .setInstance( oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); - } else if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { - ((EoscResult) out) - .setInstance( - oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); } else { ((CommunityResult) out) .setInstance( oInst .get() .stream() - .map(ResultMapper::getCommunityInstance) + .map(i -> getCommunityInstance(i, dumpType)) .collect(Collectors.toList())); } } @@ -229,14 +221,18 @@ public class ResultMapper implements Serializable { .ifPresent( value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList()))); - List subjectList = new ArrayList<>(); - Optional - .ofNullable(input.getSubject()) - .ifPresent( - value -> value - .forEach(s -> subjectList.add(getSubject(s)))); + if (!Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { + List subjectList = new ArrayList<>(); + Optional + .ofNullable(input.getSubject()) + .ifPresent( + value -> value + .forEach(s -> subjectList.add(getSubject(s)))); - out.setSubjects(subjectList); + out.setSubjects(subjectList); + } else { + ((EoscResult) out).setSubject(createSubjectMap(input)); + } out.setType(input.getResulttype().getClassid()); @@ -259,15 +255,17 @@ public class ResultMapper implements Serializable { } } - } else if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { - ((CommunityResult) out) - .setCollectedfrom( - input - .getCollectedfrom() - .stream() - .map(cf -> CfHbKeyValue.newInstance(cf.getKey(), cf.getValue())) - .collect(Collectors.toList())); - + } + if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { + if (!Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { + ((CommunityResult) out) + .setCollectedfrom( + input + .getCollectedfrom() + .stream() + .map(cf -> CfHbKeyValue.newInstance(cf.getKey(), cf.getValue())) + .collect(Collectors.toList())); + } Set communities = communityMap.keySet(); List contextList = Optional .ofNullable( @@ -340,6 +338,28 @@ public class ResultMapper implements Serializable { } + private static Map> createSubjectMap( + eu.dnetlib.dhp.schema.oaf.Result input) { + Map> map = new HashMap<>(); + if (!Optional.ofNullable(input.getSubject()).isPresent()) + return map; + input.getSubject().stream().forEach(s -> { + String key = s.getQualifier().getClassid(); + if (!map.containsKey(key) && !(key.equals("fos") || key.equals("sdg"))) { + + map.put(key, new ArrayList<>()); + } + eu.dnetlib.dhp.eosc.model.Subject subject = new eu.dnetlib.dhp.eosc.model.Subject(); + subject.setValue(s.getValue()); + Provenance p = getProvenance(s); + if (p != null) { + subject.setProvenance(p); + } + map.get(key).add(subject); + }); + return map; + } + private static void addTypeSpecificInformation(Result out, eu.dnetlib.dhp.schema.oaf.Result input, Optional ort) throws NoAvailableEntityTypeException { switch (ort.get().getClassid()) { @@ -451,15 +471,17 @@ public class ResultMapper implements Serializable { } - private static CommunityInstance getCommunityInstance(eu.dnetlib.dhp.schema.oaf.Instance i) { + private static CommunityInstance getCommunityInstance(eu.dnetlib.dhp.schema.oaf.Instance i, String dumpType) { CommunityInstance instance = new CommunityInstance(); setCommonValue(i, instance); - instance - .setCollectedfrom( - CfHbKeyValue - .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); + if (!Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { + instance + .setCollectedfrom( + CfHbKeyValue + .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); + } instance .setHostedby( @@ -608,14 +630,22 @@ public class ResultMapper implements Serializable { } - private static Subject getSubject(StructuredProperty s) { - Subject subject = new Subject(); - subject.setSubject(SubjectSchemeValue.newInstance(s.getQualifier().getClassid(), s.getValue())); + private static Provenance getProvenance(StructuredProperty s) { Optional di = Optional.ofNullable(s.getDataInfo()); if (di.isPresent()) { Provenance p = new Provenance(); p.setProvenance(di.get().getProvenanceaction().getClassname()); p.setTrust(di.get().getTrust()); + return p; + } + return null; + } + + private static Subject getSubject(StructuredProperty s) { + Subject subject = new Subject(); + subject.setSubject(SubjectSchemeValue.newInstance(s.getQualifier().getClassid(), s.getValue())); + Provenance p = getProvenance(s); + if (p != null) { subject.setProvenance(p); } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java index 706f513..e50d011 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java @@ -19,8 +19,12 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.eosc.model.EoscResult; +import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.model.Result; import eu.dnetlib.dhp.oa.model.community.CommunityResult; +import eu.dnetlib.dhp.schema.oaf.OafEntity; import scala.Tuple2; public class SparkUpdateProjectInfo implements Serializable { @@ -53,33 +57,50 @@ public class SparkUpdateProjectInfo implements Serializable { final String preparedInfoPath = parser.get("preparedInfoPath"); log.info("preparedInfoPath: {}", preparedInfoPath); + final String dumpType = Optional + .ofNullable(parser.get("dumpType")) + .orElse(Constants.DUMPTYPE.COMMUNITY.getType()); + log.info("dumpType: {}", dumpType); + SparkConf conf = new SparkConf(); + Class clazz; + + if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { + clazz = (Class) Class.forName("eu.dnetlib.dhp.eosc.model.EoscResult"); + } else { + clazz = (Class) Class + .forName("eu.dnetlib.dhp.oa.model.community.CommunityResult"); + } + runWithSparkSession( conf, isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - extend(spark, inputPath, outputPath, preparedInfoPath); + extend(spark, inputPath, outputPath, preparedInfoPath, clazz); }); } - private static void extend( + private static void extend( SparkSession spark, String inputPath, String outputPath, - String preparedInfoPath) { - Dataset result = Utils.readPath(spark, inputPath, CommunityResult.class); + String preparedInfoPath, + Class clazz) { + + Dataset result = Utils.readPath(spark, inputPath, clazz); + Dataset resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class); result .joinWith( resultProject, result.col("id").equalTo(resultProject.col("resultId")), "left") - .map((MapFunction, CommunityResult>) value -> { - CommunityResult r = value._1(); + .map((MapFunction, E>) value -> { + E r = value._1(); Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList())); return r; - }, Encoders.bean(CommunityResult.class)) + }, Encoders.bean(clazz)) .write() .option("compression", "gzip") .mode(SaveMode.Append) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java index 304d891..4df4b4f 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java @@ -21,6 +21,7 @@ import eu.dnetlib.dhp.eosc.model.EoscResult; import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.model.graph.GraphResult; import eu.dnetlib.dhp.schema.oaf.Result; @@ -53,6 +54,9 @@ public class SelectEoscResultsJobStep1 implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + final String communityMapPath = parser.get("communityMapPath"); + log.info("communityMapPath: {}", communityMapPath); + final String resultClassName = parser.get("resultTableName"); log.info("resultTableName: {}", resultClassName); @@ -65,12 +69,13 @@ public class SelectEoscResultsJobStep1 implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - selectEoscResults(spark, inputPath, outputPath, inputClazz); + selectEoscResults(spark, inputPath, outputPath, inputClazz, communityMapPath); }); } private static void selectEoscResults(SparkSession spark, String inputPath, String outputPath, - Class inputClazz) { + Class inputClazz, String communityMapPath) { + CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); Utils .readPath(spark, inputPath, inputClazz) .filter( @@ -78,7 +83,7 @@ public class SelectEoscResultsJobStep1 implements Serializable { && r.getContext().stream().anyMatch(c -> c.getId().equals("eosc"))) .map( (MapFunction) r -> (EoscResult) ResultMapper - .map(r, null, Constants.DUMPTYPE.EOSC.getType()), + .map(r, communityMap, Constants.DUMPTYPE.EOSC.getType()), Encoders.bean(EoscResult.class)) .write() .mode(SaveMode.Overwrite) diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_select_result_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_select_result_parameters.json index a59a5ce..605db73 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_select_result_parameters.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_select_result_parameters.json @@ -23,6 +23,12 @@ "paramLongName":"resultTableName", "paramDescription": "the name of the result table we are currently working on", "paramRequired": true + }, + { + "paramName":"cmp", + "paramLongName":"communityMapPath", + "paramDescription": "The path to the community map", + "paramRequired": true } ] diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml index de85e94..182c935 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml @@ -85,11 +85,24 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap + --outputPath${workingDir}/communityMap + --nameNode${nameNode} + --isLookUpUrl${isLookUpUrl} + --singleDeposition${singleDeposition} + --communityId${communityId} + + + + + @@ -116,8 +129,8 @@ --sourcePath${sourcePath}/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/tar/publication - + --outputPath${workingDir}/dump/publication + --communityMapPath${workingDir}/communityMap @@ -141,8 +154,8 @@ --sourcePath${sourcePath}/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/tar/dataset - + --outputPath${workingDir}/dump/dataset + --communityMapPath${workingDir}/communityMap @@ -166,8 +179,8 @@ --sourcePath${sourcePath}/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/tar/otherresearchproduct - + --outputPath${workingDir}/dump/otherresearchproduct + --communityMapPath${workingDir}/communityMap @@ -191,15 +204,151 @@ --sourcePath${sourcePath}/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/tar/software - + --outputPath${workingDir}/dump/software + --communityMapPath${workingDir}/communityMap - + + + + yarn + cluster + Prepare association result subset of project info + eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath} + --outputPath${workingDir}/preparedInfo + + + + + + + + + + + + + + + yarn + cluster + Extend dumped publications with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/publication + --outputPath${workingDir}/tar/publication + --preparedInfoPath${workingDir}/preparedInfo + --dumpTypeeosc + + + + + + + + yarn + cluster + Extend dumped dataset with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/dataset + --outputPath${workingDir}/tar/dataset + --preparedInfoPath${workingDir}/preparedInfo + --dumpTypeeosc + + + + + + + + yarn + cluster + Extend dumped ORP with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/otherresearchproduct + --outputPath${workingDir}/tar/orp + --preparedInfoPath${workingDir}/preparedInfo + --dumpTypeeosc + + + + + + + + yarn + cluster + Extend dumped software with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/software + --outputPath${workingDir}/tar/software + --preparedInfoPath${workingDir}/preparedInfo + --dumpTypeeosc + + + + + + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json index f2dc02b..14bd4b4 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json @@ -23,6 +23,12 @@ "paramLongName": "preparedInfoPath", "paramDescription": "the path of the association result projectlist", "paramRequired": true + }, + { + "paramName": "dt", + "paramLongName": "dumpType", + "paramDescription": "the dump type", + "paramRequired": false } ] diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 34da999..c31f4ca 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -4,9 +4,9 @@ package eu.dnetlib.dhp.oa.graph.dump; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.Arrays; -import java.util.List; +import java.util.*; +import org.apache.commons.collections.map.HashedMap; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -26,6 +26,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.eosc.model.EoscResult; +import eu.dnetlib.dhp.eosc.model.Subject; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1; import eu.dnetlib.dhp.oa.model.Instance; @@ -900,8 +901,10 @@ public class DumpJobTest { "-sourcePath", sourcePath, "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/working" - + "-outputPath", workingDir.toString() + "/working", + "-communityMapPath", getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath() }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -922,6 +925,8 @@ public class DumpJobTest { Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getUrl().equals("")).count()); Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getSemanticRelation().equals("compliesWith")).count()); + System.out.println(OBJECT_MAPPER.writeValueAsString(verificationDataset.first())); + } @Test @@ -1010,4 +1015,14 @@ public class DumpJobTest { .getString(2)); } + @Test + public void SerializeMap() throws JsonProcessingException { + Map> map = new HashMap<>(); + map.put("prova", new ArrayList<>()); + Subject s = new Subject(); + s.setProvenance(null); + s.setValue("codiv-19"); + map.get("prova").add(s); + System.out.println(OBJECT_MAPPER.writeValueAsString(map)); + } } diff --git a/pom.xml b/pom.xml index 3446ab3..ad534b2 100644 --- a/pom.xml +++ b/pom.xml @@ -102,7 +102,7 @@ 5.6.1 3.5 11.0.2 - [2.12.2-SCHEMA-NO-DUMP] + [2.13.2-SNAPSHOT] \ No newline at end of file