From a623883b62861446ae4f76b8b1debbefaa17a2d0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Oct 2023 17:01:03 +0200 Subject: [PATCH 01/25] Added extention to dump Organizations and also relations of type resultOrganization --- .../dnetlib/dhp/eosc/model/Affiliation.java | 46 ++++++++ .../dnetlib/dhp/eosc/model/Organization.java | 99 +++++++++++----- .../dhp/eosc/model/OrganizationPid.java | 8 ++ .../eu/dnetlib/dhp/eosc/model/Relation.java | 12 ++ .../eu/dnetlib/dhp/eosc/model/Result.java | 6 +- .../ExtendEoscResultWithOrganization.java | 5 +- ...ExtendEoscResultWithOrganizationStep2.java | 111 ++++++++++++++++-- .../graph/dump/eosc/ResultOrganizations.java | 9 +- .../oa/graph/dump/eosc/oozie_app/workflow.xml | 5 +- .../graph/dump/eosc/SelectEoscResultTest.java | 6 +- 10 files changed, 252 insertions(+), 55 deletions(-) create mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Affiliation.java diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Affiliation.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Affiliation.java new file mode 100644 index 0000000..0c0d3c4 --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Affiliation.java @@ -0,0 +1,46 @@ + +package eu.dnetlib.dhp.eosc.model; + +import java.io.Serializable; +import java.util.List; + +import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; + +/** + * @author miriam.baglioni + * @Date 13/09/22 + */ +public class Affiliation implements Serializable { + @JsonSchema(description = "the OpenAIRE id of the organizaiton") + private String id; + + @JsonSchema(description = "the name of the organization") + private String name; + + @JsonSchema(description = "the list of pids we have in OpenAIRE for the organization") + private List pid; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public List getPid() { + return pid; + } + + public void setPid(List pid) { + this.pid = pid; + } +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Organization.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Organization.java index 639a3c1..395ac24 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Organization.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Organization.java @@ -1,4 +1,3 @@ - package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; @@ -7,40 +6,86 @@ import java.util.List; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; /** - * @author miriam.baglioni - * @Date 13/09/22 + * To represent the generic organizaiton. It has the following parameters: + * - private String legalshortname to store the legalshortname of the organizaiton + * - private String legalname to store the legal name of the organization + * - private String websiteurl to store the websiteurl of the organization + * - private List alternativenames to store the alternative names of the organization + * - private Country country to store the country of the organization + * - private String id to store the openaire id of the organization + * - private List pid to store the list of pids for the organization */ public class Organization implements Serializable { - @JsonSchema(description = "the OpenAIRE id of the organizaiton") - private String id; + private String legalshortname; + private String legalname; + private String websiteurl; - @JsonSchema(description = "the name of the organization") - private String name; + @JsonSchema(description = "Alternative names that identify the organisation") + private List alternativenames; - @JsonSchema(description = "the list of pids we have in OpenAIRE for the organization") - private List pid; + @JsonSchema(description = "The organisation country") + private Country country; - public String getId() { - return id; - } + @JsonSchema(description = "The OpenAIRE id for the organisation") + private String id; - public void setId(String id) { - this.id = id; - } + @JsonSchema(description = "Persistent identifiers for the organisation i.e. isni 0000000090326370") + private List pid; - public String getName() { - return name; - } + public String getLegalshortname() { + return legalshortname; + } - public void setName(String name) { - this.name = name; - } + public void setLegalshortname(String legalshortname) { + this.legalshortname = legalshortname; + } - public List getPid() { - return pid; - } + public String getLegalname() { + return legalname; + } + + public void setLegalname(String legalname) { + this.legalname = legalname; + } + + public String getWebsiteurl() { + return websiteurl; + } + + public void setWebsiteurl(String websiteurl) { + this.websiteurl = websiteurl; + } + + public List getAlternativenames() { + return alternativenames; + } + + public void setAlternativenames(List alternativenames) { + this.alternativenames = alternativenames; + } + + public Country getCountry() { + return country; + } + + public void setCountry(Country country) { + this.country = country; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public List getPid() { + return pid; + } + + public void setPid(List pid) { + this.pid = pid; + } - public void setPid(List pid) { - this.pid = pid; - } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/OrganizationPid.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/OrganizationPid.java index 79ffdcf..4613d4d 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/OrganizationPid.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/OrganizationPid.java @@ -32,4 +32,12 @@ public class OrganizationPid implements Serializable { public void setValue(String value) { this.value = value; } + + public static OrganizationPid newInstance(String type, String value){ + OrganizationPid op = new OrganizationPid(); + op.type = type; + op.value = value; + + return op; + } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Relation.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Relation.java index 91ce474..93f4deb 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Relation.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Relation.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; import java.util.Objects; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; /** @@ -20,12 +21,15 @@ public class Relation implements Serializable { private String target; @JsonSchema(description = "To represent the semantics of a relation between two entities") + @JsonIgnoreProperties(ignoreUnknown = true) private RelType reltype; @JsonSchema(description = "The reason why OpenAIRE holds the relation ") + @JsonIgnoreProperties(ignoreUnknown = true) private Provenance provenance; @JsonSchema(description = "The result type of the target for this relation") + @JsonIgnoreProperties(ignoreUnknown = true) private String targetType; public String getTargetType() { @@ -82,4 +86,12 @@ public class Relation implements Serializable { relation.provenance = provenance; return relation; } + + public static Relation newInstance(String source, String target) { + Relation relation = new Relation(); + relation.source = source; + relation.target = target; + + return relation; + } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Result.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Result.java index 61b8584..e3ef5b5 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Result.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Result.java @@ -24,7 +24,7 @@ public class Result implements Serializable { private List keywords; @JsonSchema(description = "The list of organizations the result is affiliated to") - private List affiliation; + private List affiliation; @JsonSchema(description = "The indicators for this result") private Indicator indicator; @@ -465,11 +465,11 @@ public class Result implements Serializable { this.subject = subject; } - public List getAffiliation() { + public List getAffiliation() { return affiliation; } - public void setAffiliation(List affiliation) { + public void setAffiliation(List affiliation) { this.affiliation = affiliation; } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganization.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganization.java index 09cc1c7..ff40538 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganization.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganization.java @@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.*; +import eu.dnetlib.dhp.eosc.model.Affiliation; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -91,7 +92,7 @@ public class ExtendEoscResultWithOrganization implements Serializable { if (t2._2() != null) { ResultOrganizations rOrg = new ResultOrganizations(); rOrg.setResultId(t2._1().getTarget()); - eu.dnetlib.dhp.eosc.model.Organization org = new eu.dnetlib.dhp.eosc.model.Organization(); + Affiliation org = new Affiliation(); org.setId(t2._2().getId()); if (Optional.ofNullable(t2._2().getLegalname()).isPresent()) { org.setName(t2._2().getLegalname().getValue()); @@ -135,7 +136,7 @@ public class ExtendEoscResultWithOrganization implements Serializable { return first._1(); } Result ret = first._1(); - List affiliation = new ArrayList<>(); + List affiliation = new ArrayList<>(); Set alreadyInsertedAffiliations = new HashSet<>(); affiliation.add(first._2().getAffiliation()); alreadyInsertedAffiliations.add(first._2().getAffiliation().getId()); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index 55bed6d..9c0f785 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -5,7 +5,10 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.*; +import java.util.stream.Collectors; +import eu.dnetlib.dhp.eosc.model.Affiliation; +import eu.dnetlib.dhp.eosc.model.Country; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -33,6 +36,8 @@ import scala.Tuple2; public class ExtendEoscResultWithOrganizationStep2 implements Serializable { private static final Logger log = LoggerFactory.getLogger(ExtendEoscResultWithOrganizationStep2.class); + private final static String UNKNOWN = "UNKNOWN"; + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( @@ -52,11 +57,11 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { final String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); - final String resultPath = parser.get("resultPath"); - log.info("resultPath: {}", resultPath); + final String workingPath = parser.get("workingPath"); + log.info("workingPath: {}", workingPath); - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); +// final String outputPath = parser.get("outputPath"); +// log.info("outputPath: {}", outputPath); SparkConf conf = new SparkConf(); @@ -64,15 +69,15 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { conf, isSparkSessionManaged, spark -> { - Utils.removeOutputDir(spark, outputPath); - addOrganizations(spark, inputPath, outputPath, resultPath); + Utils.removeOutputDir(spark, workingPath + "publicationextendedaffiliation"); + addOrganizations(spark, inputPath, workingPath ); }); } - private static void addOrganizations(SparkSession spark, String inputPath, String outputPath, - String resultPath) { + private static void addOrganizations(SparkSession spark, String inputPath, String workingPath) { + Dataset results = Utils - .readPath(spark, resultPath, Result.class); + .readPath(spark, workingPath + "publication", Result.class); Dataset relations = Utils .readPath(spark, inputPath + "/relation", Relation.class) @@ -88,7 +93,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { if (t2._2() != null) { ResultOrganizations rOrg = new ResultOrganizations(); rOrg.setResultId(t2._1().getTarget()); - eu.dnetlib.dhp.eosc.model.Organization org = new eu.dnetlib.dhp.eosc.model.Organization(); + Affiliation org = new Affiliation(); org.setId(t2._2().getId()); if (Optional.ofNullable(t2._2().getLegalname()).isPresent()) { org.setName(t2._2().getLegalname().getValue()); @@ -131,7 +136,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { return first._1(); } Result ret = first._1(); - List affiliation = new ArrayList<>(); + List affiliation = new ArrayList<>(); Set alreadyInsertedAffiliations = new HashSet<>(); affiliation.add(first._2().getAffiliation()); alreadyInsertedAffiliations.add(first._2().getAffiliation().getId()); @@ -148,8 +153,88 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath); + .json(workingPath + "publicationextendedaffiliation"); + + relations + .joinWith(organizations, relations.col("source").equalTo(organizations.col("id"))) + .map((MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization(t2._2()),Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(workingPath + "organization"); + + relations + .joinWith(organizations, relations.col("source").equalTo(organizations.col("id"))) + .map((MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> eu.dnetlib.dhp.eosc.model.Relation.newInstance(t2._1().getSource(), t2._1().getTarget()), Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class) ) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(workingPath + "resultOrganization"); } -} + private static eu.dnetlib.dhp.eosc.model.Organization mapOrganization(Organization org){ + + if (Boolean.TRUE.equals(org.getDataInfo().getDeletedbyinference())) + return null; + if (!Optional.ofNullable(org.getLegalname()).isPresent() + && !Optional.ofNullable(org.getLegalshortname()).isPresent()) + return null; + + eu.dnetlib.dhp.eosc.model.Organization organization = new eu.dnetlib.dhp.eosc.model.Organization(); + + Optional + .ofNullable(org.getLegalshortname()) + .ifPresent(value -> organization.setLegalshortname(value.getValue())); + + Optional + .ofNullable(org.getLegalname()) + .ifPresent(value -> organization.setLegalname(value.getValue())); + + Optional + .ofNullable(org.getWebsiteurl()) + .ifPresent(value -> organization.setWebsiteurl(value.getValue())); + + Optional + .ofNullable(org.getAlternativeNames()) + .ifPresent( + value -> organization + .setAlternativenames( + value + .stream() + .map(v -> v.getValue()) + .collect(Collectors.toList()))); + + Optional + .ofNullable(org.getCountry()) + .ifPresent( + value -> { + if (!value.getClassid().equals(UNKNOWN)) { + organization + .setCountry( + Country.newInstance(value.getClassid(), value.getClassname())); + } + + }); + + Optional + .ofNullable(org.getId()) + .ifPresent(value -> organization.setId(value)); + + Optional + .ofNullable(org.getPid()) + .ifPresent( + value -> organization + .setPid( + value + .stream() + .map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue())) + .collect(Collectors.toList()))); + + return organization; + } + + } + + diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ResultOrganizations.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ResultOrganizations.java index c8a7e03..8bd6514 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ResultOrganizations.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ResultOrganizations.java @@ -2,9 +2,8 @@ package eu.dnetlib.dhp.oa.graph.dump.eosc; import java.io.Serializable; -import java.util.List; -import eu.dnetlib.dhp.eosc.model.Organization; +import eu.dnetlib.dhp.eosc.model.Affiliation; /** * @author miriam.baglioni @@ -12,7 +11,7 @@ import eu.dnetlib.dhp.eosc.model.Organization; */ public class ResultOrganizations implements Serializable { private String resultId; - private Organization affiliation; + private Affiliation affiliation; public String getResultId() { return resultId; @@ -22,11 +21,11 @@ public class ResultOrganizations implements Serializable { this.resultId = resultId; } - public Organization getAffiliation() { + public Affiliation getAffiliation() { return affiliation; } - public void setAffiliation(Organization affiliation) { + public void setAffiliation(Affiliation affiliation) { this.affiliation = affiliation; } } diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml index 796aacb..631986f 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml @@ -164,8 +164,9 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath} - --resultPath${workingDir}/dump/publication - --outputPath${workingDir}/dump/publicationextendedaffiliation + --workingPath${workingDir}/dump/ + + diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java index 0b7fd5b..691cdab 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java @@ -25,7 +25,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.eosc.model.Indicator; -import eu.dnetlib.dhp.eosc.model.Organization; +import eu.dnetlib.dhp.eosc.model.Affiliation; import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.schema.action.AtomicAction; import scala.Tuple2; @@ -174,7 +174,7 @@ public class SelectEoscResultTest { .getAffiliation() .size()); - List affiliations = tmp + List affiliations = tmp .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba")) .first() .getAffiliation(); @@ -184,7 +184,7 @@ public class SelectEoscResultTest { affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("Doris Engineering (France)"))); Assertions.assertTrue(affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("RENNES METROPOLE"))); - Organization organization = affiliations + Affiliation organization = affiliations .stream() .filter(a -> a.getId().equalsIgnoreCase("20|13811704aa70::51a6ade52065e3b371d1ae822e07f1ff")) .findFirst() From 7fca920b5fa64474e9c9fb216a5ff540c3e0c013 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Oct 2023 11:14:46 +0200 Subject: [PATCH 02/25] Added extention to dump Projects and also relations of type resultProject --- .../eu/dnetlib/dhp/eosc/model/Funder.java | 64 ++--- .../dnetlib/dhp/eosc/model/FunderShort.java | 58 +++++ .../eu/dnetlib/dhp/eosc/model/Fundings.java | 44 ++++ .../eu/dnetlib/dhp/eosc/model/Granted.java | 66 +++++ .../eu/dnetlib/dhp/eosc/model/Programme.java | 46 ++++ .../eu/dnetlib/dhp/eosc/model/Project.java | 245 +++++++++++++----- .../dhp/eosc/model/ProjectSummary.java | 97 +++++++ .../eu/dnetlib/dhp/eosc/model/Result.java | 6 +- ...ExtendEoscResultWithOrganizationStep2.java | 12 +- .../dhp/oa/graph/dump/eosc/ResultProject.java | 8 +- .../dump/eosc/SparkPrepareResultProject.java | 18 +- .../dump/eosc/SparkUpdateProjectInfo.java | 213 ++++++++++++++- .../oa/graph/dump/eosc/oozie_app/workflow.xml | 118 +-------- .../oa/graph/dump/UpdateProjectInfoTest.java | 12 +- 14 files changed, 749 insertions(+), 258 deletions(-) create mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/FunderShort.java create mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java create mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java create mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java create mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/ProjectSummary.java diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java index cea8c3e..f2b198d 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java @@ -1,58 +1,28 @@ - package eu.dnetlib.dhp.eosc.model; -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - /** * @author miriam.baglioni - * @Date 26/01/23 + * @Date 25/10/23 */ -public class Funder implements Serializable { - @JsonSchema(description = "The short name of the funder (EC)") - private String shortName; - @JsonSchema(description = "The name of the funder (European Commission)") - private String name; + import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - @JsonSchema( - description = "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)") - private String jurisdiction; +/** + * To store information about the funder funding the project related to the result. It extends + * eu.dnetlib.dhp.schema.dump.oaf.Funder with the following parameter: - - private + * eu.dnetdlib.dhp.schema.dump.oaf.graph.Fundings funding_stream to store the fundingstream + */ + public class Funder extends FunderShort { - public String getJurisdiction() { - return jurisdiction; - } + @JsonSchema(description = "Description of the funding stream") + private Fundings funding_stream; - public void setJurisdiction(String jurisdiction) { - this.jurisdiction = jurisdiction; - } + public Fundings getFunding_stream() { + return funding_stream; + } - public String getShortName() { - return shortName; - } - - public void setShortName(String shortName) { - this.shortName = shortName; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - @JsonSchema(description = "Stream of funding (e.g. for European Commission can be H2020 or FP7)") - private String fundingStream; - - public String getFundingStream() { - return fundingStream; - } - - public void setFundingStream(String fundingStream) { - this.fundingStream = fundingStream; - } -} + public void setFunding_stream(Fundings funding_stream) { + this.funding_stream = funding_stream; + } + } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/FunderShort.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/FunderShort.java new file mode 100644 index 0000000..32711cf --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/FunderShort.java @@ -0,0 +1,58 @@ + +package eu.dnetlib.dhp.eosc.model; + +import java.io.Serializable; + +import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; + +/** + * @author miriam.baglioni + * @Date 26/01/23 + */ +public class FunderShort implements Serializable { + + @JsonSchema(description = "The short name of the funder (EC)") + private String shortName; + + @JsonSchema(description = "The name of the funder (European Commission)") + private String name; + + @JsonSchema( + description = "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)") + private String jurisdiction; + + public String getJurisdiction() { + return jurisdiction; + } + + public void setJurisdiction(String jurisdiction) { + this.jurisdiction = jurisdiction; + } + + public String getShortName() { + return shortName; + } + + public void setShortName(String shortName) { + this.shortName = shortName; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + @JsonSchema(description = "Stream of funding (e.g. for European Commission can be H2020 or FP7)") + private String fundingStream; + + public String getFundingStream() { + return fundingStream; + } + + public void setFundingStream(String fundingStream) { + this.fundingStream = fundingStream; + } +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java new file mode 100644 index 0000000..245a140 --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java @@ -0,0 +1,44 @@ +package eu.dnetlib.dhp.eosc.model; + +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + + + +import java.io.Serializable; + +import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; + +/** + * To store inforamtion about the funding stream. It has two parameters: - private String id to store the id of the + * fundings stream. The id is created by appending the shortname of the funder to the name of each level in the xml + * representing the fundng stream. For example: if the funder is the European Commission, the funding level 0 name is + * FP7, the funding level 1 name is SP3 and the funding level 2 name is PEOPLE then the id will be: EC::FP7::SP3::PEOPLE + * - private String description to describe the funding stream. It is created by concatenating the description of each + * funding level so for the example above the description would be: SEVENTH FRAMEWORK PROGRAMME - SP3-People - + * Marie-Curie Actions + */ +public class Fundings implements Serializable { + + @JsonSchema(description = "Id of the funding stream") + private String id; + private String description; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java new file mode 100644 index 0000000..4202ea2 --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java @@ -0,0 +1,66 @@ +package eu.dnetlib.dhp.eosc.model; + +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + + +import java.io.Serializable; + +import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; + +/** + * To describe the funded amount. It has the following parameters: - private String currency to store the currency of + * the fund - private float totalcost to store the total cost of the project - private float fundedamount to store the + * funded amount by the funder + */ +public class Granted implements Serializable { + @JsonSchema(description = "The currency of the granted amount (e.g. EUR)") + private String currency; + + @JsonSchema(description = "The total cost of the project") + private float totalcost; + + @JsonSchema(description = "The funded amount") + private float fundedamount; + + public String getCurrency() { + return currency; + } + + public void setCurrency(String currency) { + this.currency = currency; + } + + public float getTotalcost() { + return totalcost; + } + + public void setTotalcost(float totalcost) { + this.totalcost = totalcost; + } + + public float getFundedamount() { + return fundedamount; + } + + public void setFundedamount(float fundedamount) { + this.fundedamount = fundedamount; + } + + public static Granted newInstance(String currency, float totalcost, float fundedamount) { + Granted granted = new Granted(); + granted.currency = currency; + granted.totalcost = totalcost; + granted.fundedamount = fundedamount; + return granted; + } + + public static Granted newInstance(String currency, float fundedamount) { + Granted granted = new Granted(); + granted.currency = currency; + granted.fundedamount = fundedamount; + return granted; + } +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java new file mode 100644 index 0000000..76e9846 --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java @@ -0,0 +1,46 @@ +package eu.dnetlib.dhp.eosc.model; + +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + + +import java.io.Serializable; + +import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; + +/** + * To store information about the ec programme for the project. It has the following parameters: - private String code + * to store the code of the programme - private String description to store the description of the programme + */ +public class Programme implements Serializable { + @JsonSchema(description = "The code of the programme") + private String code; + + @JsonSchema(description = "The description of the programme") + private String description; + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public static Programme newInstance(String code, String description) { + Programme p = new Programme(); + p.code = code; + p.description = description; + return p; + } +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java index 810b657..e6a9d5b 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java @@ -1,97 +1,206 @@ - package eu.dnetlib.dhp.eosc.model; -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - /** * @author miriam.baglioni - * @Date 26/01/23 + * @Date 25/10/23 */ + + +import java.io.Serializable; +import java.util.List; + +import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; + + +/** + * This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump + * of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and + * Projects but we put the information about the Funder within the Project representation. We also removed the + * collected from element from the Project. No relation between the Project and the Datasource entity from which it is + * collected will be created. We will never create relations between Project and Datasource. In case some relation will + * be extracted from the Project they will refer the Funder and will be of type ( organization -> funds -> project, + * project -> isFundedBy -> organization) We also removed the duration parameter because the most of times it is set to + * 0. It has the following parameters: + * - private String id to store the id of the project (OpenAIRE id) + * - private String websiteurl to store the websiteurl of the project + * - private String code to store the grant agreement of the project + * - private String acronym to store the acronym of the project + * - private String title to store the tile of the project + * - private String startdate to store the start date + * - private String enddate to store the end date + * - private String callidentifier to store the call indentifier + * - private String keywords to store the keywords + * - private boolean openaccessmandateforpublications to store if the project must accomplish to the open access mandate + * for publications. This value will be set to true if one of the field in the project represented in the internal model + * is set to true + * - private boolean openaccessmandatefordataset to store if the project must accomplish to the open access mandate for + * dataset. It is set to the value in the corresponding filed of the project represented in the internal model + * - private List subject to store the list of subjects of the project + * - private List funding to store the list of funder of the project + * - private String summary to store the summary of the project + * - private Granted granted to store the granted amount + * - private List h2020programme to store the list of programmes the project is related to + */ + public class Project implements Serializable { - @JsonSchema(description = "The OpenAIRE id for the project") - protected String id;// OpenAIRE id + private String id; - @JsonSchema(description = "The grant agreement number") - protected String code; + private String websiteurl; + private String code; + private String acronym; + private String title; - @JsonSchema(description = "The acronym of the project") - protected String acronym; + private String startdate; - protected String title; + private String enddate; - @JsonSchema(description = "Information about the funder funding the project") - private Funder funder; + private String callidentifier; - private Provenance provenance; + private String keywords; - private Validated validated; + private boolean openaccessmandateforpublications; - public void setValidated(Validated validated) { - this.validated = validated; - } + private boolean openaccessmandatefordataset; + private List subject; - public Validated getValidated() { - return validated; - } + @JsonSchema(description = "Funding information for the project") + private List funding; - public Provenance getProvenance() { - return provenance; - } + private String summary; - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } + @JsonSchema(description = "The money granted to the project") + private Granted granted; - public Funder getFunder() { - return funder; - } + @JsonSchema(description = "The h2020 programme funding the project") + private List h2020programme; - public void setFunder(Funder funders) { - this.funder = funders; - } - public String getId() { - return id; - } - public void setId(String id) { - this.id = id; - } + public String getId() { + return id; + } - public String getCode() { - return code; - } + public void setId(String id) { + this.id = id; + } - public void setCode(String code) { - this.code = code; - } + public String getWebsiteurl() { + return websiteurl; + } - public String getAcronym() { - return acronym; - } + public void setWebsiteurl(String websiteurl) { + this.websiteurl = websiteurl; + } - public void setAcronym(String acronym) { - this.acronym = acronym; - } + public String getCode() { + return code; + } - public String getTitle() { - return title; - } + public void setCode(String code) { + this.code = code; + } - public void setTitle(String title) { - this.title = title; - } + public String getAcronym() { + return acronym; + } - public static Project newInstance(String id, String code, String acronym, String title, Funder funder) { - Project project = new Project(); - project.setAcronym(acronym); - project.setCode(code); - project.setFunder(funder); - project.setId(id); - project.setTitle(title); - return project; - } + public void setAcronym(String acronym) { + this.acronym = acronym; + } + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getStartdate() { + return startdate; + } + + public void setStartdate(String startdate) { + this.startdate = startdate; + } + + public String getEnddate() { + return enddate; + } + + public void setEnddate(String enddate) { + this.enddate = enddate; + } + + public String getCallidentifier() { + return callidentifier; + } + + public void setCallidentifier(String callidentifier) { + this.callidentifier = callidentifier; + } + + public String getKeywords() { + return keywords; + } + + public void setKeywords(String keywords) { + this.keywords = keywords; + } + + public boolean isOpenaccessmandateforpublications() { + return openaccessmandateforpublications; + } + + public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) { + this.openaccessmandateforpublications = openaccessmandateforpublications; + } + + public boolean isOpenaccessmandatefordataset() { + return openaccessmandatefordataset; + } + + public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) { + this.openaccessmandatefordataset = openaccessmandatefordataset; + } + + public List getSubject() { + return subject; + } + + public void setSubject(List subject) { + this.subject = subject; + } + + public List getFunding() { + return funding; + } + + public void setFunding(List funding) { + this.funding = funding; + } + + public String getSummary() { + return summary; + } + + public void setSummary(String summary) { + this.summary = summary; + } + + public Granted getGranted() { + return granted; + } + + public void setGranted(Granted granted) { + this.granted = granted; + } + + public List getH2020programme() { + return h2020programme; + } + + public void setH2020programme(List h2020programme) { + this.h2020programme = h2020programme; + } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/ProjectSummary.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/ProjectSummary.java new file mode 100644 index 0000000..d5011a0 --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/ProjectSummary.java @@ -0,0 +1,97 @@ + +package eu.dnetlib.dhp.eosc.model; + +import java.io.Serializable; + +import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; + +/** + * @author miriam.baglioni + * @Date 26/01/23 + */ +public class ProjectSummary implements Serializable { + @JsonSchema(description = "The OpenAIRE id for the project") + protected String id;// OpenAIRE id + + @JsonSchema(description = "The grant agreement number") + protected String code; + + @JsonSchema(description = "The acronym of the project") + protected String acronym; + + protected String title; + + @JsonSchema(description = "Information about the funder funding the project") + private FunderShort funder; + + private Provenance provenance; + + private Validated validated; + + public void setValidated(Validated validated) { + this.validated = validated; + } + + public Validated getValidated() { + return validated; + } + + public Provenance getProvenance() { + return provenance; + } + + public void setProvenance(Provenance provenance) { + this.provenance = provenance; + } + + public FunderShort getFunder() { + return funder; + } + + public void setFunder(FunderShort funders) { + this.funder = funders; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getAcronym() { + return acronym; + } + + public void setAcronym(String acronym) { + this.acronym = acronym; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public static ProjectSummary newInstance(String id, String code, String acronym, String title, FunderShort funder) { + ProjectSummary project = new ProjectSummary(); + project.setAcronym(acronym); + project.setCode(code); + project.setFunder(funder); + project.setId(id); + project.setTitle(title); + return project; + } + +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Result.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Result.java index e3ef5b5..df9325e 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Result.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Result.java @@ -30,7 +30,7 @@ public class Result implements Serializable { private Indicator indicator; @JsonSchema(description = "List of projects (i.e. grants) that (co-)funded the production ofn the research results") - private List projects; + private List projects; @JsonSchema( description = "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu") @@ -409,11 +409,11 @@ public class Result implements Serializable { this.collectedfrom = collectedfrom; } - public List getProjects() { + public List getProjects() { return projects; } - public void setProjects(List projects) { + public void setProjects(List projects) { this.projects = projects; } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index 9c0f785..14f4836 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -60,8 +60,8 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { final String workingPath = parser.get("workingPath"); log.info("workingPath: {}", workingPath); -// final String outputPath = parser.get("outputPath"); -// log.info("outputPath: {}", outputPath); + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); SparkConf conf = new SparkConf(); @@ -70,11 +70,11 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, workingPath + "publicationextendedaffiliation"); - addOrganizations(spark, inputPath, workingPath ); + addOrganizations(spark, inputPath, workingPath , outputPath); }); } - private static void addOrganizations(SparkSession spark, String inputPath, String workingPath) { + private static void addOrganizations(SparkSession spark, String inputPath, String workingPath, String outputPath) { Dataset results = Utils .readPath(spark, workingPath + "publication", Result.class); @@ -162,7 +162,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression","gzip") - .json(workingPath + "organization"); + .json(outputPath + "organization"); relations .joinWith(organizations, relations.col("source").equalTo(organizations.col("id"))) @@ -170,7 +170,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression","gzip") - .json(workingPath + "resultOrganization"); + .json(outputPath + "resultOrganization"); } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ResultProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ResultProject.java index 4035d17..def635e 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ResultProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ResultProject.java @@ -4,11 +4,11 @@ package eu.dnetlib.dhp.oa.graph.dump.eosc; import java.io.Serializable; import java.util.List; -import eu.dnetlib.dhp.eosc.model.Project; +import eu.dnetlib.dhp.eosc.model.ProjectSummary; public class ResultProject implements Serializable { private String resultId; - private List projectsList; + private List projectsList; public String getResultId() { return resultId; @@ -18,11 +18,11 @@ public class ResultProject implements Serializable { this.resultId = resultId; } - public List getProjectsList() { + public List getProjectsList() { return projectsList; } - public void setProjectsList(List projectsList) { + public void setProjectsList(List projectsList) { this.projectsList = projectsList; } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkPrepareResultProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkPrepareResultProject.java index 03630b6..3a0770f 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkPrepareResultProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkPrepareResultProject.java @@ -26,8 +26,8 @@ import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.eosc.model.Funder; -import eu.dnetlib.dhp.eosc.model.Project; +import eu.dnetlib.dhp.eosc.model.FunderShort; +import eu.dnetlib.dhp.eosc.model.ProjectSummary; import eu.dnetlib.dhp.eosc.model.Provenance; import eu.dnetlib.dhp.eosc.model.Validated; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -103,9 +103,9 @@ public class SparkPrepareResultProject implements Serializable { rp.setResultId(s); eu.dnetlib.dhp.schema.oaf.Project p = first._1(); projectSet.add(p.getId()); - Project ps = getProject(p, first._2); + ProjectSummary ps = getProject(p, first._2); - List projList = new ArrayList<>(); + List projList = new ArrayList<>(); projList.add(ps); rp.setProjectsList(projList); it.forEachRemaining(c -> { @@ -132,8 +132,8 @@ public class SparkPrepareResultProject implements Serializable { .json(outputPath); } - private static Project getProject(eu.dnetlib.dhp.schema.oaf.Project op, Relation relation) { - Project p = Project + private static ProjectSummary getProject(eu.dnetlib.dhp.schema.oaf.Project op, Relation relation) { + ProjectSummary p = ProjectSummary .newInstance( op.getId(), op.getCode().getValue(), @@ -148,7 +148,7 @@ public class SparkPrepareResultProject implements Serializable { Optional .ofNullable(op.getFundingtree()) .map(value -> { - List tmp = value + List tmp = value .stream() .map(ft -> getFunder(ft.getValue())) .collect(Collectors.toList()); @@ -174,8 +174,8 @@ public class SparkPrepareResultProject implements Serializable { } - private static Funder getFunder(String fundingtree) { - final Funder f = new Funder(); + private static FunderShort getFunder(String fundingtree) { + final FunderShort f = new FunderShort(); final Document doc; try { final SAXReader reader = new SAXReader(); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java index 376a677..2bd2978 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java @@ -4,23 +4,35 @@ package eu.dnetlib.dhp.oa.graph.dump.eosc; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; +import eu.dnetlib.dhp.eosc.model.*; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.Project; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.Node; +import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.oa.graph.dump.Constants; +import scala.Array; import scala.Tuple2; public class SparkUpdateProjectInfo implements Serializable { @@ -47,12 +59,15 @@ public class SparkUpdateProjectInfo implements Serializable { final String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); + final String workingPath = parser.get("workingPath"); + log.info("workingPath: {}", workingPath); final String preparedInfoPath = parser.get("preparedInfoPath"); log.info("preparedInfoPath: {}", preparedInfoPath); + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + final String dumpType = Optional .ofNullable(parser.get("dumpType")) .orElse(Constants.DUMPTYPE.COMMUNITY.getType()); @@ -64,18 +79,19 @@ public class SparkUpdateProjectInfo implements Serializable { conf, isSparkSessionManaged, spark -> { - Utils.removeOutputDir(spark, outputPath); - extend(spark, inputPath, outputPath, preparedInfoPath); + Utils.removeOutputDir(spark, workingPath + "publicationextendedproject"); + extend(spark, inputPath, workingPath, preparedInfoPath, outputPath); }); } private static void extend( SparkSession spark, String inputPath, - String outputPath, - String preparedInfoPath) { + String workingPath, + String preparedInfoPath, + String outputPath) { - Dataset result = Utils.readPath(spark, inputPath, Result.class); + Dataset result = Utils.readPath(spark, workingPath + "publicationextendedaffiliation", Result.class); Dataset resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class); result @@ -90,7 +106,186 @@ public class SparkUpdateProjectInfo implements Serializable { .write() .option("compression", "gzip") .mode(SaveMode.Append) - .json(outputPath); + .json(workingPath + "publicationextendedproject"); + + Dataset project = Utils.readPath(spark, inputPath + "/project", Project.class); + + Dataset projectIds = result.joinWith(resultProject, result.col("id").equalTo(resultProject.col("resultId"))) + .flatMap((FlatMapFunction, String>) t2 -> t2._2().getProjectsList() + .stream().map(p -> p.getId()).collect(Collectors.toList()).iterator(), Encoders.STRING()) + .distinct(); + + projectIds.joinWith(project, projectIds.col("value").equalTo(project.col("id"))) + .map((MapFunction, eu.dnetlib.dhp.eosc.model.Project>)t2->mapProject(t2._2()), Encoders.bean(eu.dnetlib.dhp.eosc.model.Project.class) ) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(outputPath + "project"); + + resultProject.flatMap((FlatMapFunction) rp -> + rp.getProjectsList().stream().map(p -> Relation.newInstance(rp.getResultId(), p.getId())) + .collect(Collectors.toList()).iterator(), Encoders.bean(Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(outputPath + "resultProject"); + } + + private static eu.dnetlib.dhp.eosc.model.Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException { + if (Boolean.TRUE.equals(p.getDataInfo().getDeletedbyinference())) + return null; + + eu.dnetlib.dhp.eosc.model.Project project = new eu.dnetlib.dhp.eosc.model.Project(); + + Optional + .ofNullable(p.getId()) + .ifPresent(id -> project.setId(id)); + + Optional + .ofNullable(p.getWebsiteurl()) + .ifPresent(w -> project.setWebsiteurl(w.getValue())); + + Optional + .ofNullable(p.getCode()) + .ifPresent(code -> project.setCode(code.getValue())); + + Optional + .ofNullable(p.getAcronym()) + .ifPresent(acronynim -> project.setAcronym(acronynim.getValue())); + + Optional + .ofNullable(p.getTitle()) + .ifPresent(title -> project.setTitle(title.getValue())); + + Optional + .ofNullable(p.getStartdate()) + .ifPresent(sdate -> project.setStartdate(sdate.getValue())); + + Optional + .ofNullable(p.getEnddate()) + .ifPresent(edate -> project.setEnddate(edate.getValue())); + + Optional + .ofNullable(p.getCallidentifier()) + .ifPresent(cide -> project.setCallidentifier(cide.getValue())); + + Optional + .ofNullable(p.getKeywords()) + .ifPresent(key -> project.setKeywords(key.getValue())); + + Optional> omandate = Optional.ofNullable(p.getOamandatepublications()); + Optional> oecsc39 = Optional.ofNullable(p.getEcsc39()); + boolean mandate = false; + if (omandate.isPresent()) { + if (omandate.get().getValue().equals("true")) { + mandate = true; + } + } + if (oecsc39.isPresent()) { + if (oecsc39.get().getValue().equals("true")) { + mandate = true; + } + } + + project.setOpenaccessmandateforpublications(mandate); + project.setOpenaccessmandatefordataset(false); + + Optional + .ofNullable(p.getEcarticle29_3()) + .ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true"))); + + project + .setSubject( + Optional + .ofNullable(p.getSubjects()) + .map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList())) + .orElse(new ArrayList<>())); + + Optional + .ofNullable(p.getSummary()) + .ifPresent(summary -> project.setSummary(summary.getValue())); + + Optional ofundedamount = Optional.ofNullable(p.getFundedamount()); + Optional> ocurrency = Optional.ofNullable(p.getCurrency()); + Optional ototalcost = Optional.ofNullable(p.getTotalcost()); + + if (ocurrency.isPresent()) { + if (ofundedamount.isPresent()) { + if (ototalcost.isPresent()) { + project + .setGranted( + Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get())); + } else { + project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get())); + } + } + } + + project + .setH2020programme( + Optional + .ofNullable(p.getH2020classification()) + .map( + classification -> classification + .stream() + .map( + c -> Programme + .newInstance( + c.getH2020Programme().getCode(), c.getH2020Programme().getDescription())) + .collect(Collectors.toList())) + .orElse(new ArrayList<>())); + + Optional>> ofundTree = Optional + .ofNullable(p.getFundingtree()); + List funList = new ArrayList<>(); + if (ofundTree.isPresent()) { + for (Field fundingtree : ofundTree.get()) { + funList.add(getFunder(fundingtree.getValue())); + } + } + project.setFunding(funList); + + return project; + } + + public static Funder getFunder(String fundingtree) throws DocumentException { + Funder f = new Funder(); + final Document doc; + + doc = new SAXReader().read(new StringReader(fundingtree)); + f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText()); + f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText()); + f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText()); + + String id = ""; + + StringBuilder bld = new StringBuilder(); + + int level = 0; + List nodes = doc.selectNodes("//funding_level_" + level); + while (!nodes.isEmpty()) { + for (org.dom4j.Node n : nodes) { + + List node = n.selectNodes("./id"); + id = ((org.dom4j.Node) node.get(0)).getText(); + id = id.substring(id.indexOf("::") + 2); + + node = n.selectNodes("./description"); + bld.append(((Node) node.get(0)).getText() + " - "); + + } + level += 1; + nodes = doc.selectNodes("//funding_level_" + level); + } + String description = bld.toString(); + if (!id.equals("")) { + Fundings fundings = new Fundings(); + fundings.setId(id); + fundings.setDescription(description.substring(0, description.length() - 3).trim()); + f.setFunding_stream(fundings); + } + + return f; } diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml index 631986f..def9f0c 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml @@ -165,32 +165,7 @@ --sourcePath${sourcePath} --workingPath${workingDir}/dump/ - - - - - - - - - yarn - cluster - Extend Dump Publication with indicators - eu.dnetlib.dhp.oa.graph.dump.eosc.ExtendWithUsageCounts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --actionSetPath${actionSetPath} - --resultPath${workingDir}/dump/publicationextendedaffiliation - --outputPath${workingDir}/dump/publicationextended + --outputPath${outputPath}/dump/ @@ -241,38 +216,15 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath} - --resultPath${workingDir}/dump/dataset - --outputPath${workingDir}/dump/datasetextendedaffiliation - - - - - - - yarn - cluster - Extend Dump Dataset with indicators - eu.dnetlib.dhp.oa.graph.dump.eosc.ExtendWithUsageCounts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --actionSetPath${actionSetPath} - --resultPath${workingDir}/dump/datasetextendedaffiliation - --outputPath${workingDir}/dump/datasetextended + --workingPath${workingDir}/dump/ + --outputPath${outputPath}/dump/ + yarn @@ -317,32 +269,8 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath} - --resultPath${workingDir}/dump/otherresearchproduct - --outputPath${workingDir}/dump/otherresearchproductextendedaffiliation - - - - - - - yarn - cluster - Extend Dump ORP with indicators - eu.dnetlib.dhp.oa.graph.dump.eosc.ExtendWithUsageCounts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --actionSetPath${actionSetPath} - --resultPath${workingDir}/dump/otherresearchproductextendedaffiliation - --outputPath${workingDir}/dump/otherresearchproductextended + --workingPath${workingDir}/dump/ + --outputPath${outputPath}/dump/ @@ -393,37 +321,14 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath} - --resultPath${workingDir}/dump/software - --outputPath${workingDir}/dump/softwareextendedaffiliation - - - - - - - yarn - cluster - Extend Dump ORP with indicators - eu.dnetlib.dhp.oa.graph.dump.eosc.ExtendWithUsageCounts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --actionSetPath${actionSetPath} - --resultPath${workingDir}/dump/softwareextendedaffiliation - --outputPath${workingDir}/dump/softwareextended + --workingPath${workingDir}/dump/ + --outputPath${outputPath}/dump/ + @@ -474,10 +379,11 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${workingDir}/dump/publicationextendedaffiliation - --outputPath${workingDir}/dump/publicationextendedproject + --sourcePath${sourcePath} + --workingPath${workingDir}/dump/ --preparedInfoPath${workingDir}/preparedInfo --dumpTypeeosc + --outputPath${outputPath}/dump/ diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java index 33c1963..e75d2ca 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java @@ -24,7 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.eosc.model.Project; +import eu.dnetlib.dhp.eosc.model.ProjectSummary; import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo; @@ -194,15 +194,15 @@ public class UpdateProjectInfoTest { .filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2' and code = '119027'") .count()); - Project project = verificationDataset + ProjectSummary project = verificationDataset .map( - (MapFunction) cr -> cr + (MapFunction) cr -> cr .getProjects() .stream() .filter(p -> p.getValidated() != null) .collect(Collectors.toList()) .get(0), - Encoders.bean(Project.class)) + Encoders.bean(ProjectSummary.class)) .first(); Assertions.assertTrue(project.getFunder().getName().equals("Academy of Finland")); @@ -213,13 +213,13 @@ public class UpdateProjectInfoTest { project = verificationDataset .map( - (MapFunction) cr -> cr + (MapFunction) cr -> cr .getProjects() .stream() .filter(p -> p.getValidated() == null) .collect(Collectors.toList()) .get(0), - Encoders.bean(Project.class)) + Encoders.bean(ProjectSummary.class)) .first(); Assertions.assertTrue(project.getFunder().getName().equals("European Commission")); From da19f117d80b3d37fef033c7d58a1ccf1873f2a1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Oct 2023 11:17:06 +0200 Subject: [PATCH 03/25] added parameter to parameter file for the mapping of projects and relations --- .../dhp/oa/graph/dump/eosc_project_input_parameters.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_input_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_input_parameters.json index 14bd4b4..7b587aa 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_input_parameters.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_input_parameters.json @@ -29,6 +29,12 @@ "paramLongName": "dumpType", "paramDescription": "the dump type", "paramRequired": false + }, + { + "paramName": "wp", + "paramLongName": "workingPath", + "paramDescription": "the working path", + "paramRequired": false } ] From a821371af2ffa3bec53c47f949eb1a90fad3363f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Oct 2023 11:46:10 +0200 Subject: [PATCH 04/25] added code to dump the relations between organizaiton and projects in the subset of entities relevant for EOSC --- .../eu/dnetlib/dhp/eosc/model/Funder.java | 25 +- .../eu/dnetlib/dhp/eosc/model/Fundings.java | 34 ++- .../eu/dnetlib/dhp/eosc/model/Granted.java | 77 +++--- .../dnetlib/dhp/eosc/model/Organization.java | 107 ++++---- .../dhp/eosc/model/OrganizationPid.java | 2 +- .../eu/dnetlib/dhp/eosc/model/Programme.java | 47 ++-- .../eu/dnetlib/dhp/eosc/model/Project.java | 236 +++++++++--------- .../ExtendEoscResultWithOrganization.java | 2 +- ...ExtendEoscResultWithOrganizationStep2.java | 136 +++++----- ...java => SparkDumpOrganizationProject.java} | 63 ++--- .../dump/eosc/SparkUpdateProjectInfo.java | 144 ++++++----- ...osc_dump_organizationprojectrelations.json | 26 ++ .../graph/dump/eosc/SelectEoscResultTest.java | 2 +- 13 files changed, 459 insertions(+), 442 deletions(-) rename dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/{SparkDumpRelation.java => SparkDumpOrganizationProject.java} (56%) create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_dump_organizationprojectrelations.json diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java index f2b198d..eb8df14 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java @@ -1,28 +1,27 @@ + package eu.dnetlib.dhp.eosc.model; /** * @author miriam.baglioni * @Date 25/10/23 */ - - - import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; +import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; /** * To store information about the funder funding the project related to the result. It extends * eu.dnetlib.dhp.schema.dump.oaf.Funder with the following parameter: - - private * eu.dnetdlib.dhp.schema.dump.oaf.graph.Fundings funding_stream to store the fundingstream */ - public class Funder extends FunderShort { +public class Funder extends FunderShort { - @JsonSchema(description = "Description of the funding stream") - private Fundings funding_stream; + @JsonSchema(description = "Description of the funding stream") + private Fundings funding_stream; - public Fundings getFunding_stream() { - return funding_stream; - } + public Fundings getFunding_stream() { + return funding_stream; + } - public void setFunding_stream(Fundings funding_stream) { - this.funding_stream = funding_stream; - } - } + public void setFunding_stream(Fundings funding_stream) { + this.funding_stream = funding_stream; + } +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java index 245a140..0440be3 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java @@ -1,12 +1,10 @@ + package eu.dnetlib.dhp.eosc.model; /** * @author miriam.baglioni * @Date 25/10/23 */ - - - import java.io.Serializable; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; @@ -22,23 +20,23 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; */ public class Fundings implements Serializable { - @JsonSchema(description = "Id of the funding stream") - private String id; - private String description; + @JsonSchema(description = "Id of the funding stream") + private String id; + private String description; - public String getId() { - return id; - } + public String getId() { + return id; + } - public void setId(String id) { - this.id = id; - } + public void setId(String id) { + this.id = id; + } - public String getDescription() { - return description; - } + public String getDescription() { + return description; + } - public void setDescription(String description) { - this.description = description; - } + public void setDescription(String description) { + this.description = description; + } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java index 4202ea2..26b28ef 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java @@ -1,11 +1,10 @@ + package eu.dnetlib.dhp.eosc.model; /** * @author miriam.baglioni * @Date 25/10/23 */ - - import java.io.Serializable; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; @@ -16,51 +15,51 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; * funded amount by the funder */ public class Granted implements Serializable { - @JsonSchema(description = "The currency of the granted amount (e.g. EUR)") - private String currency; + @JsonSchema(description = "The currency of the granted amount (e.g. EUR)") + private String currency; - @JsonSchema(description = "The total cost of the project") - private float totalcost; + @JsonSchema(description = "The total cost of the project") + private float totalcost; - @JsonSchema(description = "The funded amount") - private float fundedamount; + @JsonSchema(description = "The funded amount") + private float fundedamount; - public String getCurrency() { - return currency; - } + public String getCurrency() { + return currency; + } - public void setCurrency(String currency) { - this.currency = currency; - } + public void setCurrency(String currency) { + this.currency = currency; + } - public float getTotalcost() { - return totalcost; - } + public float getTotalcost() { + return totalcost; + } - public void setTotalcost(float totalcost) { - this.totalcost = totalcost; - } + public void setTotalcost(float totalcost) { + this.totalcost = totalcost; + } - public float getFundedamount() { - return fundedamount; - } + public float getFundedamount() { + return fundedamount; + } - public void setFundedamount(float fundedamount) { - this.fundedamount = fundedamount; - } + public void setFundedamount(float fundedamount) { + this.fundedamount = fundedamount; + } - public static Granted newInstance(String currency, float totalcost, float fundedamount) { - Granted granted = new Granted(); - granted.currency = currency; - granted.totalcost = totalcost; - granted.fundedamount = fundedamount; - return granted; - } + public static Granted newInstance(String currency, float totalcost, float fundedamount) { + Granted granted = new Granted(); + granted.currency = currency; + granted.totalcost = totalcost; + granted.fundedamount = fundedamount; + return granted; + } - public static Granted newInstance(String currency, float fundedamount) { - Granted granted = new Granted(); - granted.currency = currency; - granted.fundedamount = fundedamount; - return granted; - } + public static Granted newInstance(String currency, float fundedamount) { + Granted granted = new Granted(); + granted.currency = currency; + granted.fundedamount = fundedamount; + return granted; + } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Organization.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Organization.java index 395ac24..0c0a270 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Organization.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Organization.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; @@ -16,76 +17,76 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; * - private List pid to store the list of pids for the organization */ public class Organization implements Serializable { - private String legalshortname; - private String legalname; - private String websiteurl; + private String legalshortname; + private String legalname; + private String websiteurl; - @JsonSchema(description = "Alternative names that identify the organisation") - private List alternativenames; + @JsonSchema(description = "Alternative names that identify the organisation") + private List alternativenames; - @JsonSchema(description = "The organisation country") - private Country country; + @JsonSchema(description = "The organisation country") + private Country country; - @JsonSchema(description = "The OpenAIRE id for the organisation") - private String id; + @JsonSchema(description = "The OpenAIRE id for the organisation") + private String id; - @JsonSchema(description = "Persistent identifiers for the organisation i.e. isni 0000000090326370") - private List pid; + @JsonSchema(description = "Persistent identifiers for the organisation i.e. isni 0000000090326370") + private List pid; - public String getLegalshortname() { - return legalshortname; - } + public String getLegalshortname() { + return legalshortname; + } - public void setLegalshortname(String legalshortname) { - this.legalshortname = legalshortname; - } + public void setLegalshortname(String legalshortname) { + this.legalshortname = legalshortname; + } - public String getLegalname() { - return legalname; - } + public String getLegalname() { + return legalname; + } - public void setLegalname(String legalname) { - this.legalname = legalname; - } + public void setLegalname(String legalname) { + this.legalname = legalname; + } - public String getWebsiteurl() { - return websiteurl; - } + public String getWebsiteurl() { + return websiteurl; + } - public void setWebsiteurl(String websiteurl) { - this.websiteurl = websiteurl; - } + public void setWebsiteurl(String websiteurl) { + this.websiteurl = websiteurl; + } - public List getAlternativenames() { - return alternativenames; - } + public List getAlternativenames() { + return alternativenames; + } - public void setAlternativenames(List alternativenames) { - this.alternativenames = alternativenames; - } + public void setAlternativenames(List alternativenames) { + this.alternativenames = alternativenames; + } - public Country getCountry() { - return country; - } + public Country getCountry() { + return country; + } - public void setCountry(Country country) { - this.country = country; - } + public void setCountry(Country country) { + this.country = country; + } - public String getId() { - return id; - } + public String getId() { + return id; + } - public void setId(String id) { - this.id = id; - } + public void setId(String id) { + this.id = id; + } - public List getPid() { - return pid; - } + public List getPid() { + return pid; + } - public void setPid(List pid) { - this.pid = pid; - } + public void setPid(List pid) { + this.pid = pid; + } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/OrganizationPid.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/OrganizationPid.java index 4613d4d..fb99d97 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/OrganizationPid.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/OrganizationPid.java @@ -33,7 +33,7 @@ public class OrganizationPid implements Serializable { this.value = value; } - public static OrganizationPid newInstance(String type, String value){ + public static OrganizationPid newInstance(String type, String value) { OrganizationPid op = new OrganizationPid(); op.type = type; op.value = value; diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java index 76e9846..0d90081 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java @@ -1,11 +1,10 @@ + package eu.dnetlib.dhp.eosc.model; /** * @author miriam.baglioni * @Date 25/10/23 */ - - import java.io.Serializable; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; @@ -15,32 +14,32 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; * to store the code of the programme - private String description to store the description of the programme */ public class Programme implements Serializable { - @JsonSchema(description = "The code of the programme") - private String code; + @JsonSchema(description = "The code of the programme") + private String code; - @JsonSchema(description = "The description of the programme") - private String description; + @JsonSchema(description = "The description of the programme") + private String description; - public String getCode() { - return code; - } + public String getCode() { + return code; + } - public void setCode(String code) { - this.code = code; - } + public void setCode(String code) { + this.code = code; + } - public String getDescription() { - return description; - } + public String getDescription() { + return description; + } - public void setDescription(String description) { - this.description = description; - } + public void setDescription(String description) { + this.description = description; + } - public static Programme newInstance(String code, String description) { - Programme p = new Programme(); - p.code = code; - p.description = description; - return p; - } + public static Programme newInstance(String code, String description) { + Programme p = new Programme(); + p.code = code; + p.description = description; + return p; + } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java index e6a9d5b..6e71764 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java @@ -1,17 +1,15 @@ + package eu.dnetlib.dhp.eosc.model; /** * @author miriam.baglioni * @Date 25/10/23 */ - - import java.io.Serializable; import java.util.List; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - /** * This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump * of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and @@ -43,164 +41,162 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; */ public class Project implements Serializable { - private String id; + private String id; - private String websiteurl; - private String code; - private String acronym; - private String title; + private String websiteurl; + private String code; + private String acronym; + private String title; - private String startdate; + private String startdate; - private String enddate; + private String enddate; - private String callidentifier; + private String callidentifier; - private String keywords; + private String keywords; - private boolean openaccessmandateforpublications; + private boolean openaccessmandateforpublications; - private boolean openaccessmandatefordataset; - private List subject; + private boolean openaccessmandatefordataset; + private List subject; - @JsonSchema(description = "Funding information for the project") - private List funding; + @JsonSchema(description = "Funding information for the project") + private List funding; - private String summary; + private String summary; - @JsonSchema(description = "The money granted to the project") - private Granted granted; + @JsonSchema(description = "The money granted to the project") + private Granted granted; - @JsonSchema(description = "The h2020 programme funding the project") - private List h2020programme; + @JsonSchema(description = "The h2020 programme funding the project") + private List h2020programme; + public String getId() { + return id; + } + public void setId(String id) { + this.id = id; + } - public String getId() { - return id; - } + public String getWebsiteurl() { + return websiteurl; + } - public void setId(String id) { - this.id = id; - } + public void setWebsiteurl(String websiteurl) { + this.websiteurl = websiteurl; + } - public String getWebsiteurl() { - return websiteurl; - } + public String getCode() { + return code; + } - public void setWebsiteurl(String websiteurl) { - this.websiteurl = websiteurl; - } + public void setCode(String code) { + this.code = code; + } - public String getCode() { - return code; - } + public String getAcronym() { + return acronym; + } - public void setCode(String code) { - this.code = code; - } + public void setAcronym(String acronym) { + this.acronym = acronym; + } - public String getAcronym() { - return acronym; - } + public String getTitle() { + return title; + } - public void setAcronym(String acronym) { - this.acronym = acronym; - } + public void setTitle(String title) { + this.title = title; + } - public String getTitle() { - return title; - } + public String getStartdate() { + return startdate; + } - public void setTitle(String title) { - this.title = title; - } + public void setStartdate(String startdate) { + this.startdate = startdate; + } - public String getStartdate() { - return startdate; - } + public String getEnddate() { + return enddate; + } - public void setStartdate(String startdate) { - this.startdate = startdate; - } + public void setEnddate(String enddate) { + this.enddate = enddate; + } - public String getEnddate() { - return enddate; - } + public String getCallidentifier() { + return callidentifier; + } - public void setEnddate(String enddate) { - this.enddate = enddate; - } + public void setCallidentifier(String callidentifier) { + this.callidentifier = callidentifier; + } - public String getCallidentifier() { - return callidentifier; - } + public String getKeywords() { + return keywords; + } - public void setCallidentifier(String callidentifier) { - this.callidentifier = callidentifier; - } + public void setKeywords(String keywords) { + this.keywords = keywords; + } - public String getKeywords() { - return keywords; - } + public boolean isOpenaccessmandateforpublications() { + return openaccessmandateforpublications; + } - public void setKeywords(String keywords) { - this.keywords = keywords; - } + public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) { + this.openaccessmandateforpublications = openaccessmandateforpublications; + } - public boolean isOpenaccessmandateforpublications() { - return openaccessmandateforpublications; - } + public boolean isOpenaccessmandatefordataset() { + return openaccessmandatefordataset; + } - public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) { - this.openaccessmandateforpublications = openaccessmandateforpublications; - } + public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) { + this.openaccessmandatefordataset = openaccessmandatefordataset; + } - public boolean isOpenaccessmandatefordataset() { - return openaccessmandatefordataset; - } + public List getSubject() { + return subject; + } - public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) { - this.openaccessmandatefordataset = openaccessmandatefordataset; - } + public void setSubject(List subject) { + this.subject = subject; + } - public List getSubject() { - return subject; - } + public List getFunding() { + return funding; + } - public void setSubject(List subject) { - this.subject = subject; - } + public void setFunding(List funding) { + this.funding = funding; + } - public List getFunding() { - return funding; - } + public String getSummary() { + return summary; + } - public void setFunding(List funding) { - this.funding = funding; - } + public void setSummary(String summary) { + this.summary = summary; + } - public String getSummary() { - return summary; - } + public Granted getGranted() { + return granted; + } - public void setSummary(String summary) { - this.summary = summary; - } + public void setGranted(Granted granted) { + this.granted = granted; + } - public Granted getGranted() { - return granted; - } + public List getH2020programme() { + return h2020programme; + } - public void setGranted(Granted granted) { - this.granted = granted; - } - - public List getH2020programme() { - return h2020programme; - } - - public void setH2020programme(List h2020programme) { - this.h2020programme = h2020programme; - } + public void setH2020programme(List h2020programme) { + this.h2020programme = h2020programme; + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganization.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganization.java index ff40538..7aa692a 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganization.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganization.java @@ -6,7 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.*; -import eu.dnetlib.dhp.eosc.model.Affiliation; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -20,6 +19,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.eosc.model.Affiliation; import eu.dnetlib.dhp.eosc.model.OrganizationPid; import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.schema.common.ModelConstants; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index 14f4836..1ca6f20 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -7,8 +7,6 @@ import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.eosc.model.Affiliation; -import eu.dnetlib.dhp.eosc.model.Country; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -22,6 +20,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.eosc.model.Affiliation; +import eu.dnetlib.dhp.eosc.model.Country; import eu.dnetlib.dhp.eosc.model.OrganizationPid; import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -70,7 +70,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, workingPath + "publicationextendedaffiliation"); - addOrganizations(spark, inputPath, workingPath , outputPath); + addOrganizations(spark, inputPath, workingPath, outputPath); }); } @@ -156,85 +156,89 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .json(workingPath + "publicationextendedaffiliation"); relations - .joinWith(organizations, relations.col("source").equalTo(organizations.col("id"))) - .map((MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization(t2._2()),Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) - .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(outputPath + "organization"); + .joinWith(organizations, relations.col("source").equalTo(organizations.col("id"))) + .map( + (MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization( + t2._2()), + Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "organization"); relations - .joinWith(organizations, relations.col("source").equalTo(organizations.col("id"))) - .map((MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> eu.dnetlib.dhp.eosc.model.Relation.newInstance(t2._1().getSource(), t2._1().getTarget()), Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class) ) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(outputPath + "resultOrganization"); + .joinWith(organizations, relations.col("source").equalTo(organizations.col("id"))) + .map( + (MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> eu.dnetlib.dhp.eosc.model.Relation + .newInstance(t2._1().getSource(), t2._1().getTarget()), + Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "resultOrganization"); } - private static eu.dnetlib.dhp.eosc.model.Organization mapOrganization(Organization org){ + private static eu.dnetlib.dhp.eosc.model.Organization mapOrganization(Organization org) { - if (Boolean.TRUE.equals(org.getDataInfo().getDeletedbyinference())) - return null; - if (!Optional.ofNullable(org.getLegalname()).isPresent() - && !Optional.ofNullable(org.getLegalshortname()).isPresent()) - return null; + if (Boolean.TRUE.equals(org.getDataInfo().getDeletedbyinference())) + return null; + if (!Optional.ofNullable(org.getLegalname()).isPresent() + && !Optional.ofNullable(org.getLegalshortname()).isPresent()) + return null; eu.dnetlib.dhp.eosc.model.Organization organization = new eu.dnetlib.dhp.eosc.model.Organization(); - Optional - .ofNullable(org.getLegalshortname()) - .ifPresent(value -> organization.setLegalshortname(value.getValue())); + Optional + .ofNullable(org.getLegalshortname()) + .ifPresent(value -> organization.setLegalshortname(value.getValue())); - Optional - .ofNullable(org.getLegalname()) - .ifPresent(value -> organization.setLegalname(value.getValue())); + Optional + .ofNullable(org.getLegalname()) + .ifPresent(value -> organization.setLegalname(value.getValue())); - Optional - .ofNullable(org.getWebsiteurl()) - .ifPresent(value -> organization.setWebsiteurl(value.getValue())); + Optional + .ofNullable(org.getWebsiteurl()) + .ifPresent(value -> organization.setWebsiteurl(value.getValue())); - Optional - .ofNullable(org.getAlternativeNames()) - .ifPresent( - value -> organization - .setAlternativenames( - value - .stream() - .map(v -> v.getValue()) - .collect(Collectors.toList()))); + Optional + .ofNullable(org.getAlternativeNames()) + .ifPresent( + value -> organization + .setAlternativenames( + value + .stream() + .map(v -> v.getValue()) + .collect(Collectors.toList()))); - Optional - .ofNullable(org.getCountry()) - .ifPresent( - value -> { - if (!value.getClassid().equals(UNKNOWN)) { - organization - .setCountry( - Country.newInstance(value.getClassid(), value.getClassname())); - } + Optional + .ofNullable(org.getCountry()) + .ifPresent( + value -> { + if (!value.getClassid().equals(UNKNOWN)) { + organization + .setCountry( + Country.newInstance(value.getClassid(), value.getClassname())); + } - }); + }); - Optional - .ofNullable(org.getId()) - .ifPresent(value -> organization.setId(value)); + Optional + .ofNullable(org.getId()) + .ifPresent(value -> organization.setId(value)); - Optional - .ofNullable(org.getPid()) - .ifPresent( - value -> organization - .setPid( - value - .stream() - .map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue())) - .collect(Collectors.toList()))); - - return organization; - } + Optional + .ofNullable(org.getPid()) + .ifPresent( + value -> organization + .setPid( + value + .stream() + .map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue())) + .collect(Collectors.toList()))); + return organization; } - +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java similarity index 56% rename from dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpRelation.java rename to dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java index 607b014..8fcbce8 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java @@ -6,8 +6,12 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.Optional; +import eu.dnetlib.dhp.eosc.model.Organization; +import eu.dnetlib.dhp.eosc.model.Project; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -21,19 +25,20 @@ import eu.dnetlib.dhp.eosc.model.Provenance; import eu.dnetlib.dhp.eosc.model.RelType; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; /** * @author miriam.baglioni * @Date 12/01/23 */ -public class SparkDumpRelation implements Serializable { +public class SparkDumpOrganizationProject implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkDumpRelation.class); + private static final Logger log = LoggerFactory.getLogger(SparkDumpOrganizationProject.class); public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - SparkDumpRelation.class + SparkDumpOrganizationProject.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); @@ -66,49 +71,21 @@ public class SparkDumpRelation implements Serializable { } private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) { - Dataset relations = Utils.readPath(spark, inputPath, Relation.class); - relations + Dataset organization = Utils.readPath(spark, outputPath + "organization", Organization.class); + Dataset project = Utils.readPath(spark, outputPath + "project", Project.class); - .map((MapFunction) relation -> { - eu.dnetlib.dhp.eosc.model.Relation relNew = new eu.dnetlib.dhp.eosc.model.Relation(); - relNew - .setSource( + Dataset relation = Utils.readPath(spark, inputPath + "/relation", Relation.class) + .filter((FilterFunction) r-> !r.getDataInfo().getDeletedbyinference() && r.getRelClass().equalsIgnoreCase(ModelConstants.IS_PARTICIPANT)); - relation.getSource()); + Dataset eoscOrgs = relation.joinWith(organization, relation.col("source").equalTo(organization.col("id"))) + .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)); - relNew - .setTarget( - - relation.getTarget()); - - relNew - .setReltype( - RelType - .newInstance( - relation.getRelClass(), - relation.getSubRelType())); - - Optional odInfo = Optional.ofNullable(relation.getDataInfo()); - if (odInfo.isPresent()) { - DataInfo dInfo = odInfo.get(); - if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() && - Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) { - relNew - .setProvenance( - Provenance - .newInstance( - dInfo.getProvenanceaction().getClassname(), - dInfo.getTrust())); - } - } - - return relNew; - - }, Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Append) - .json(outputPath); + eoscOrgs.joinWith(project, eoscOrgs.col("target").equalTo(project.col("id"))) + .map((MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2-> eu.dnetlib.dhp.eosc.model.Relation.newInstance(t2._1().getSource(), t2._1().getTarget()), Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(outputPath + "organizationProject"); } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java index 2bd2978..cb9eaaf 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java @@ -10,9 +10,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.eosc.model.*; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Project; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; @@ -31,7 +28,10 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.eosc.model.*; import eu.dnetlib.dhp.oa.graph.dump.Constants; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.Project; import scala.Array; import scala.Tuple2; @@ -110,68 +110,86 @@ public class SparkUpdateProjectInfo implements Serializable { Dataset project = Utils.readPath(spark, inputPath + "/project", Project.class); - Dataset projectIds = result.joinWith(resultProject, result.col("id").equalTo(resultProject.col("resultId"))) - .flatMap((FlatMapFunction, String>) t2 -> t2._2().getProjectsList() - .stream().map(p -> p.getId()).collect(Collectors.toList()).iterator(), Encoders.STRING()) - .distinct(); + Dataset projectIds = result + .joinWith(resultProject, result.col("id").equalTo(resultProject.col("resultId"))) + .flatMap( + (FlatMapFunction, String>) t2 -> t2 + ._2() + .getProjectsList() + .stream() + .map(p -> p.getId()) + .collect(Collectors.toList()) + .iterator(), + Encoders.STRING()) + .distinct(); - projectIds.joinWith(project, projectIds.col("value").equalTo(project.col("id"))) - .map((MapFunction, eu.dnetlib.dhp.eosc.model.Project>)t2->mapProject(t2._2()), Encoders.bean(eu.dnetlib.dhp.eosc.model.Project.class) ) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(outputPath + "project"); + projectIds + .joinWith(project, projectIds.col("value").equalTo(project.col("id"))) + .map( + (MapFunction, eu.dnetlib.dhp.eosc.model.Project>) t2 -> mapProject(t2._2()), + Encoders.bean(eu.dnetlib.dhp.eosc.model.Project.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "project"); - resultProject.flatMap((FlatMapFunction) rp -> - rp.getProjectsList().stream().map(p -> Relation.newInstance(rp.getResultId(), p.getId())) - .collect(Collectors.toList()).iterator(), Encoders.bean(Relation.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(outputPath + "resultProject"); + resultProject + .flatMap( + (FlatMapFunction) rp -> rp + .getProjectsList() + .stream() + .map(p -> Relation.newInstance(rp.getResultId(), p.getId())) + .collect(Collectors.toList()) + .iterator(), + Encoders.bean(Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "resultProject"); } - private static eu.dnetlib.dhp.eosc.model.Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException { + private static eu.dnetlib.dhp.eosc.model.Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) + throws DocumentException { if (Boolean.TRUE.equals(p.getDataInfo().getDeletedbyinference())) return null; eu.dnetlib.dhp.eosc.model.Project project = new eu.dnetlib.dhp.eosc.model.Project(); Optional - .ofNullable(p.getId()) - .ifPresent(id -> project.setId(id)); + .ofNullable(p.getId()) + .ifPresent(id -> project.setId(id)); Optional - .ofNullable(p.getWebsiteurl()) - .ifPresent(w -> project.setWebsiteurl(w.getValue())); + .ofNullable(p.getWebsiteurl()) + .ifPresent(w -> project.setWebsiteurl(w.getValue())); Optional - .ofNullable(p.getCode()) - .ifPresent(code -> project.setCode(code.getValue())); + .ofNullable(p.getCode()) + .ifPresent(code -> project.setCode(code.getValue())); Optional - .ofNullable(p.getAcronym()) - .ifPresent(acronynim -> project.setAcronym(acronynim.getValue())); + .ofNullable(p.getAcronym()) + .ifPresent(acronynim -> project.setAcronym(acronynim.getValue())); Optional - .ofNullable(p.getTitle()) - .ifPresent(title -> project.setTitle(title.getValue())); + .ofNullable(p.getTitle()) + .ifPresent(title -> project.setTitle(title.getValue())); Optional - .ofNullable(p.getStartdate()) - .ifPresent(sdate -> project.setStartdate(sdate.getValue())); + .ofNullable(p.getStartdate()) + .ifPresent(sdate -> project.setStartdate(sdate.getValue())); Optional - .ofNullable(p.getEnddate()) - .ifPresent(edate -> project.setEnddate(edate.getValue())); + .ofNullable(p.getEnddate()) + .ifPresent(edate -> project.setEnddate(edate.getValue())); Optional - .ofNullable(p.getCallidentifier()) - .ifPresent(cide -> project.setCallidentifier(cide.getValue())); + .ofNullable(p.getCallidentifier()) + .ifPresent(cide -> project.setCallidentifier(cide.getValue())); Optional - .ofNullable(p.getKeywords()) - .ifPresent(key -> project.setKeywords(key.getValue())); + .ofNullable(p.getKeywords()) + .ifPresent(key -> project.setKeywords(key.getValue())); Optional> omandate = Optional.ofNullable(p.getOamandatepublications()); Optional> oecsc39 = Optional.ofNullable(p.getEcsc39()); @@ -191,19 +209,19 @@ public class SparkUpdateProjectInfo implements Serializable { project.setOpenaccessmandatefordataset(false); Optional - .ofNullable(p.getEcarticle29_3()) - .ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true"))); + .ofNullable(p.getEcarticle29_3()) + .ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true"))); project - .setSubject( - Optional - .ofNullable(p.getSubjects()) - .map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList())) - .orElse(new ArrayList<>())); + .setSubject( + Optional + .ofNullable(p.getSubjects()) + .map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList())) + .orElse(new ArrayList<>())); Optional - .ofNullable(p.getSummary()) - .ifPresent(summary -> project.setSummary(summary.getValue())); + .ofNullable(p.getSummary()) + .ifPresent(summary -> project.setSummary(summary.getValue())); Optional ofundedamount = Optional.ofNullable(p.getFundedamount()); Optional> ocurrency = Optional.ofNullable(p.getCurrency()); @@ -213,8 +231,8 @@ public class SparkUpdateProjectInfo implements Serializable { if (ofundedamount.isPresent()) { if (ototalcost.isPresent()) { project - .setGranted( - Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get())); + .setGranted( + Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get())); } else { project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get())); } @@ -222,21 +240,21 @@ public class SparkUpdateProjectInfo implements Serializable { } project - .setH2020programme( - Optional - .ofNullable(p.getH2020classification()) - .map( - classification -> classification - .stream() - .map( - c -> Programme - .newInstance( - c.getH2020Programme().getCode(), c.getH2020Programme().getDescription())) - .collect(Collectors.toList())) - .orElse(new ArrayList<>())); + .setH2020programme( + Optional + .ofNullable(p.getH2020classification()) + .map( + classification -> classification + .stream() + .map( + c -> Programme + .newInstance( + c.getH2020Programme().getCode(), c.getH2020Programme().getDescription())) + .collect(Collectors.toList())) + .orElse(new ArrayList<>())); Optional>> ofundTree = Optional - .ofNullable(p.getFundingtree()); + .ofNullable(p.getFundingtree()); List funList = new ArrayList<>(); if (ofundTree.isPresent()) { for (Field fundingtree : ofundTree.get()) { diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_dump_organizationprojectrelations.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_dump_organizationprojectrelations.json new file mode 100644 index 0000000..763e0df --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_dump_organizationprojectrelations.json @@ -0,0 +1,26 @@ + +[ + + + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the name node", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path used to store temporary output files", + "paramRequired": false + } +] + + + diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java index 691cdab..13eb61c 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java @@ -24,8 +24,8 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.eosc.model.Indicator; import eu.dnetlib.dhp.eosc.model.Affiliation; +import eu.dnetlib.dhp.eosc.model.Indicator; import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.schema.action.AtomicAction; import scala.Tuple2; From 25267c16898f0452789f1613b54b3366469c38ec Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Oct 2023 11:48:42 +0200 Subject: [PATCH 05/25] extended the workflow to add the dump for the relations --- .../oa/graph/dump/eosc/oozie_app/workflow.xml | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml index def9f0c..077bb4f 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml @@ -603,7 +603,31 @@ - + + + + + yarn + cluster + Dump for the relations between organization and projects in the subset of entities relevant for EOSC + eu.dnetlib.dhp.oa.graph.dump.eosc.SparkDumpOrganizationProject + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath} + --outputPath${outputPath}/dump/ + + + + eu.dnetlib.dhp.oa.graph.dump.MakeTar From 8d83b5173ca0d7c436d805f507ad3ea18f155e66 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Oct 2023 11:59:16 +0200 Subject: [PATCH 06/25] extended the model to accomodate the new entities and relation to be dumped --- .../eu/dnetlib/dhp/eosc/model/Funder.java | 5 +++ .../eu/dnetlib/dhp/eosc/model/Fundings.java | 5 +++ .../eu/dnetlib/dhp/eosc/model/Granted.java | 5 +++ .../eu/dnetlib/dhp/eosc/model/Programme.java | 5 +++ .../eu/dnetlib/dhp/eosc/model/Project.java | 5 +++ .../eosc/SparkDumpOrganizationProject.java | 34 ++++++++++++------- 6 files changed, 46 insertions(+), 13 deletions(-) diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java index eb8df14..6af9bb8 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java @@ -1,6 +1,11 @@ package eu.dnetlib.dhp.eosc.model; +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java index 0440be3..739bf69 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java @@ -1,6 +1,11 @@ package eu.dnetlib.dhp.eosc.model; +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java index 26b28ef..178bd48 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java @@ -1,6 +1,11 @@ package eu.dnetlib.dhp.eosc.model; +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java index 0d90081..d1190f7 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java @@ -1,6 +1,11 @@ package eu.dnetlib.dhp.eosc.model; +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java index 6e71764..53bfe6b 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java @@ -1,6 +1,11 @@ package eu.dnetlib.dhp.eosc.model; +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java index 8fcbce8..62aef46 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java @@ -6,9 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.Optional; -import eu.dnetlib.dhp.eosc.model.Organization; -import eu.dnetlib.dhp.eosc.model.Project; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -21,8 +18,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.eosc.model.Organization; +import eu.dnetlib.dhp.eosc.model.Project; import eu.dnetlib.dhp.eosc.model.Provenance; import eu.dnetlib.dhp.eosc.model.RelType; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; @@ -74,18 +74,26 @@ public class SparkDumpOrganizationProject implements Serializable { Dataset organization = Utils.readPath(spark, outputPath + "organization", Organization.class); Dataset project = Utils.readPath(spark, outputPath + "project", Project.class); - Dataset relation = Utils.readPath(spark, inputPath + "/relation", Relation.class) - .filter((FilterFunction) r-> !r.getDataInfo().getDeletedbyinference() && r.getRelClass().equalsIgnoreCase(ModelConstants.IS_PARTICIPANT)); + Dataset relation = Utils + .readPath(spark, inputPath + "/relation", Relation.class) + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() + && r.getRelClass().equalsIgnoreCase(ModelConstants.IS_PARTICIPANT)); - Dataset eoscOrgs = relation.joinWith(organization, relation.col("source").equalTo(organization.col("id"))) - .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)); + Dataset eoscOrgs = relation + .joinWith(organization, relation.col("source").equalTo(organization.col("id"))) + .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)); - eoscOrgs.joinWith(project, eoscOrgs.col("target").equalTo(project.col("id"))) - .map((MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2-> eu.dnetlib.dhp.eosc.model.Relation.newInstance(t2._1().getSource(), t2._1().getTarget()), Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(outputPath + "organizationProject"); + eoscOrgs + .joinWith(project, eoscOrgs.col("target").equalTo(project.col("id"))) + .map( + (MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> eu.dnetlib.dhp.eosc.model.Relation + .newInstance(t2._1().getSource(), t2._1().getTarget()), + Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "organizationProject"); } From aa48d5270779fcdb3dfdc73cd7a9afc9cae47920 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Oct 2023 15:18:44 +0200 Subject: [PATCH 07/25] fixing issues --- ...ExtendEoscResultWithOrganizationStep2.java | 36 ++++++++++--------- .../oa/graph/dump/eosc/oozie_app/workflow.xml | 4 +++ ...d_result_with_organization_parameters.json | 7 +++- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index 1ca6f20..af4bb19 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -63,6 +63,9 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + final String resultType = parser.get("resultType"); + log.info("resultType: {}", resultType); + SparkConf conf = new SparkConf(); runWithSparkSession( @@ -70,14 +73,14 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, workingPath + "publicationextendedaffiliation"); - addOrganizations(spark, inputPath, workingPath, outputPath); + addOrganizations(spark, inputPath, workingPath, outputPath, resultType); }); } - private static void addOrganizations(SparkSession spark, String inputPath, String workingPath, String outputPath) { + private static void addOrganizations(SparkSession spark, String inputPath, String workingPath, String outputPath, String resultType) { Dataset results = Utils - .readPath(spark, workingPath + "publication", Result.class); + .readPath(spark, workingPath + "resultType", Result.class); Dataset relations = Utils .readPath(spark, inputPath + "/relation", Relation.class) @@ -155,20 +158,21 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .option("compression", "gzip") .json(workingPath + "publicationextendedaffiliation"); - relations - .joinWith(organizations, relations.col("source").equalTo(organizations.col("id"))) - .map( - (MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization( - t2._2()), - Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) - .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "organization"); + Dataset organizationWithAffiliation = relations + .joinWith(results, relations.col("target").equalTo(results.col("id"))) + .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)); - relations - .joinWith(organizations, relations.col("source").equalTo(organizations.col("id"))) + organizationWithAffiliation.joinWith(organizations, organizationWithAffiliation.col("source").equalTo(organizations.col("id"))) + .map((MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization( + t2._2()), + Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "organization"); + + organizationWithAffiliation.joinWith(organizations, organizationWithAffiliation.col("source").equalTo(organizations.col("id"))) .map( (MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> eu.dnetlib.dhp.eosc.model.Relation .newInstance(t2._1().getSource(), t2._1().getTarget()), diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml index 077bb4f..fd1bcdc 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml @@ -166,6 +166,7 @@ --sourcePath${sourcePath} --workingPath${workingDir}/dump/ --outputPath${outputPath}/dump/ + --resultTypepublication @@ -218,6 +219,7 @@ --sourcePath${sourcePath} --workingPath${workingDir}/dump/ --outputPath${outputPath}/dump/ + --resultTypedataset @@ -271,6 +273,7 @@ --sourcePath${sourcePath} --workingPath${workingDir}/dump/ --outputPath${outputPath}/dump/ + --resultTypeotherresearchproduct @@ -323,6 +326,7 @@ --sourcePath${sourcePath} --workingPath${workingDir}/dump/ --outputPath${outputPath}/dump/ + --resultTypesoftware diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_extend_result_with_organization_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_extend_result_with_organization_parameters.json index 3a448b6..3bd3108 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_extend_result_with_organization_parameters.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_extend_result_with_organization_parameters.json @@ -28,7 +28,12 @@ "paramLongName":"workingPath", "paramDescription": "The path to the community map", "paramRequired": false -} +}, + { + "paramName":"rt", + "paramLongName":"resultType", + "paramDescription": "The path to the community map", + "paramRequired": false} ] From c946f5c5b8eb1c74e2b4d1f5020af6f13749a606 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Oct 2023 15:38:46 +0200 Subject: [PATCH 08/25] - --- .../eu/dnetlib/dhp/eosc/model/Funder.java | 1 - .../eu/dnetlib/dhp/eosc/model/Fundings.java | 1 - .../eu/dnetlib/dhp/eosc/model/Granted.java | 1 - .../eu/dnetlib/dhp/eosc/model/Programme.java | 1 - .../eu/dnetlib/dhp/eosc/model/Project.java | 1 - ...ExtendEoscResultWithOrganizationStep2.java | 32 +++++++++++-------- 6 files changed, 18 insertions(+), 19 deletions(-) diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java index 6af9bb8..99d8366 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.eosc.model; * @author miriam.baglioni * @Date 25/10/23 */ - /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java index 739bf69..bbafadd 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.eosc.model; * @author miriam.baglioni * @Date 25/10/23 */ - /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java index 178bd48..768d687 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.eosc.model; * @author miriam.baglioni * @Date 25/10/23 */ - /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java index d1190f7..1d35957 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.eosc.model; * @author miriam.baglioni * @Date 25/10/23 */ - /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java index 53bfe6b..e30f063 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.eosc.model; * @author miriam.baglioni * @Date 25/10/23 */ - /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index af4bb19..3958bdd 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -77,10 +77,11 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { }); } - private static void addOrganizations(SparkSession spark, String inputPath, String workingPath, String outputPath, String resultType) { + private static void addOrganizations(SparkSession spark, String inputPath, String workingPath, String outputPath, + String resultType) { Dataset results = Utils - .readPath(spark, workingPath + "resultType", Result.class); + .readPath(spark, workingPath + resultType, Result.class); Dataset relations = Utils .readPath(spark, inputPath + "/relation", Relation.class) @@ -159,20 +160,23 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .json(workingPath + "publicationextendedaffiliation"); Dataset organizationWithAffiliation = relations - .joinWith(results, relations.col("target").equalTo(results.col("id"))) - .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)); + .joinWith(results, relations.col("target").equalTo(results.col("id"))) + .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)); - organizationWithAffiliation.joinWith(organizations, organizationWithAffiliation.col("source").equalTo(organizations.col("id"))) - .map((MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization( - t2._2()), - Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) - .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "organization"); + organizationWithAffiliation + .joinWith(organizations, organizationWithAffiliation.col("source").equalTo(organizations.col("id"))) + .map( + (MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization( + t2._2()), + Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "organization"); - organizationWithAffiliation.joinWith(organizations, organizationWithAffiliation.col("source").equalTo(organizations.col("id"))) + organizationWithAffiliation + .joinWith(organizations, organizationWithAffiliation.col("source").equalTo(organizations.col("id"))) .map( (MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> eu.dnetlib.dhp.eosc.model.Relation .newInstance(t2._1().getSource(), t2._1().getTarget()), From 5a3f0d949cf49af360f8276c5109d5a22e023c4f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 26 Oct 2023 08:48:52 +0200 Subject: [PATCH 09/25] - --- ...ExtendEoscResultWithOrganizationStep2.java | 76 +++++++----- .../dump/eosc/ExtendAffiliationTest.java | 116 ++++++++++++++++++ 2 files changed, 164 insertions(+), 28 deletions(-) create mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index 3958bdd..48de341 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -74,9 +74,56 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { spark -> { Utils.removeOutputDir(spark, workingPath + "publicationextendedaffiliation"); addOrganizations(spark, inputPath, workingPath, outputPath, resultType); + dumpOrganizationAndRelations(spark, inputPath, workingPath, outputPath, resultType); }); } + private static void dumpOrganizationAndRelations(SparkSession spark, String inputPath, String workingPath, + String outputPath, String resultType) { + Dataset relation = Utils + .readPath(spark, inputPath + "/relation", Relation.class) + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && + r.getSubRelType().equalsIgnoreCase(ModelConstants.AFFILIATION)); + + Dataset organization = Utils + .readPath(spark, inputPath + "/organization", Organization.class) + .filter((FilterFunction) o -> !o.getDataInfo().getDeletedbyinference()); + + Dataset result = Utils.readPath(spark, workingPath + resultType, Result.class); + + // result -> organization takes the relation of type affiliation having the source in the results related to + // EOSC + Dataset eoscRelation = result + .joinWith(relation, result.col("id").equalTo(relation.col("source"))) + .map((MapFunction, Relation>) t2 -> t2._2(), Encoders.bean(Relation.class)); + + System.out.println(eoscRelation.count()); + // from eoscRelation select the organization + eoscRelation + .joinWith(organization, eoscRelation.col("target").equalTo(organization.col("id"))) + .map( + (MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization( + t2._2()), + Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "organization"); + + eoscRelation + .joinWith(organization, eoscRelation.col("target").equalTo(organization.col("id"))) + .map( + (MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> eu.dnetlib.dhp.eosc.model.Relation + .newInstance(t2._1().getSource(), t2._1().getTarget()), + Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "resultOrganization"); + + } + private static void addOrganizations(SparkSession spark, String inputPath, String workingPath, String outputPath, String resultType) { @@ -157,34 +204,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(workingPath + "publicationextendedaffiliation"); - - Dataset organizationWithAffiliation = relations - .joinWith(results, relations.col("target").equalTo(results.col("id"))) - .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)); - - organizationWithAffiliation - .joinWith(organizations, organizationWithAffiliation.col("source").equalTo(organizations.col("id"))) - .map( - (MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization( - t2._2()), - Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) - .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "organization"); - - organizationWithAffiliation - .joinWith(organizations, organizationWithAffiliation.col("source").equalTo(organizations.col("id"))) - .map( - (MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> eu.dnetlib.dhp.eosc.model.Relation - .newInstance(t2._1().getSource(), t2._1().getTarget()), - Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "resultOrganization"); + .json(workingPath + resultType + "extendedaffiliation"); } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java new file mode 100644 index 0000000..3c9034d --- /dev/null +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java @@ -0,0 +1,116 @@ + +package eu.dnetlib.dhp.oa.graph.dump.eosc; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Optional; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.eosc.model.Organization; +import eu.dnetlib.dhp.eosc.model.Result; + +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ +public class ExtendAffiliationTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory + .getLogger(ExtendAffiliationTest.class); + + private static HashMap map = new HashMap<>(); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(ExtendAffiliationTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(ExtendAffiliationTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(ExtendAffiliationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void selectEoscResults() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/input") + .getPath(); + + final String workingPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/") + .getPath(); + final String mdp = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/masterduplicate") + .getPath(); + + ExtendEoscResultWithOrganizationStep2.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/", + "-sourcePath", sourcePath, + "-resultType", "publication", + "-workingPath", workingPath + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/organization") + .map(item -> OBJECT_MAPPER.readValue(item, Organization.class)); + + System.out.println(tmp.count()); + +// Assertions.assertEquals(3, tmp.count()); +// +// Assertions +// .assertEquals( +// 0, +// tmp +// .filter(r -> Optional.ofNullable(r.getAffiliation()).isPresent() && r.getAffiliation().size() > 0) +// .count()); +// +// tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + } + +} From 94656b6530e43e90a2f06922928412592ce53abc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 27 Oct 2023 09:29:02 +0200 Subject: [PATCH 10/25] Added parameter file. Fixed issues in path name --- ...ExtendEoscResultWithOrganizationStep2.java | 18 +- .../dump/eosc/SparkUpdateProjectInfo.java | 14 +- .../oa/graph/dump/eosc/oozie_app/workflow.xml | 20 ++- .../dump/eosc_project_input_parameters.json | 6 + .../eosc_relationorgproject_parameter.json | 23 +++ .../dump/eosc/ExtendAffiliationTest.java | 159 ++++++++++++++++-- .../graph/dump/eosc/SelectEoscResultTest.java | 130 -------------- .../dump/eosc/input/organization/organization | 4 +- .../graph/dump/eosc/input/relation/relation | 6 +- 9 files changed, 219 insertions(+), 161 deletions(-) create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_relationorgproject_parameter.json diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index 48de341..57a3e24 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -10,6 +10,7 @@ import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; @@ -19,6 +20,8 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.eosc.model.Affiliation; import eu.dnetlib.dhp.eosc.model.Country; @@ -72,7 +75,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { conf, isSparkSessionManaged, spark -> { - Utils.removeOutputDir(spark, workingPath + "publicationextendedaffiliation"); + Utils.removeOutputDir(spark, workingPath + resultType + "extendedaffiliation"); addOrganizations(spark, inputPath, workingPath, outputPath, resultType); dumpOrganizationAndRelations(spark, inputPath, workingPath, outputPath, resultType); }); @@ -98,7 +101,9 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .joinWith(relation, result.col("id").equalTo(relation.col("source"))) .map((MapFunction, Relation>) t2 -> t2._2(), Encoders.bean(Relation.class)); - System.out.println(eoscRelation.count()); + eoscRelation + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + // from eoscRelation select the organization eoscRelation .joinWith(organization, eoscRelation.col("target").equalTo(organization.col("id"))) @@ -106,6 +111,11 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { (MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization( t2._2()), Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) + .groupByKey((MapFunction) o -> o.getId(), Encoders.STRING()) + .mapGroups( + (MapGroupsFunction) ( + k, v) -> v.next(), + Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") @@ -136,7 +146,9 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible() && r.getSubRelType().equalsIgnoreCase(ModelConstants.AFFILIATION)); - Dataset organizations = Utils.readPath(spark, inputPath + "/organization", Organization.class); + Dataset organizations = Utils + .readPath(spark, inputPath + "/organization", Organization.class) + .filter((FilterFunction) o -> !o.getDataInfo().getDeletedbyinference()); Dataset resultOrganization = relations .joinWith(organizations, relations.col("source").equalTo(organizations.col("id")), "left") diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java index cb9eaaf..38b886f 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java @@ -68,6 +68,9 @@ public class SparkUpdateProjectInfo implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + final String resultType = parser.get("resultType"); + log.info("resultType: {}", resultType); + final String dumpType = Optional .ofNullable(parser.get("dumpType")) .orElse(Constants.DUMPTYPE.COMMUNITY.getType()); @@ -79,8 +82,8 @@ public class SparkUpdateProjectInfo implements Serializable { conf, isSparkSessionManaged, spark -> { - Utils.removeOutputDir(spark, workingPath + "publicationextendedproject"); - extend(spark, inputPath, workingPath, preparedInfoPath, outputPath); + Utils.removeOutputDir(spark, workingPath + resultType + "extendedproject"); + extend(spark, inputPath, workingPath, preparedInfoPath, outputPath, resultType); }); } @@ -89,9 +92,10 @@ public class SparkUpdateProjectInfo implements Serializable { String inputPath, String workingPath, String preparedInfoPath, - String outputPath) { + String outputPath, + String resultType) { - Dataset result = Utils.readPath(spark, workingPath + "publicationextendedaffiliation", Result.class); + Dataset result = Utils.readPath(spark, workingPath + resultType + "extendedaffiliation", Result.class); Dataset resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class); result @@ -106,7 +110,7 @@ public class SparkUpdateProjectInfo implements Serializable { .write() .option("compression", "gzip") .mode(SaveMode.Append) - .json(workingPath + "publicationextendedproject"); + .json(workingPath + resultType + "extendedproject"); Dataset project = Utils.readPath(spark, inputPath + "/project", Project.class); diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml index fd1bcdc..c435fae 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml @@ -388,6 +388,7 @@ --preparedInfoPath${workingDir}/preparedInfo --dumpTypeeosc --outputPath${outputPath}/dump/ + --resultTypepublication @@ -410,10 +411,12 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${workingDir}/dump/datasetextendedaffiliation - --outputPath${workingDir}/dump/datasetextendedproject + --sourcePath${sourcePath} + --workingPath${workingDir}/dump/ --preparedInfoPath${workingDir}/preparedInfo --dumpTypeeosc + --outputPath${outputPath}/dump/ + --resultTypedataset @@ -436,10 +439,12 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${workingDir}/dump/otherresearchproductextendedaffiliation - --outputPath${workingDir}/dump/otherresearchproductextendedproject + --sourcePath${sourcePath} + --workingPath${workingDir}/dump/ --preparedInfoPath${workingDir}/preparedInfo --dumpTypeeosc + --outputPath${outputPath}/dump/ + --resultTypeotherresearchproduct @@ -462,11 +467,12 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${workingDir}/dump/softwareextendedaffiliation - - --outputPath${workingDir}/dump/softwareextendedproject + --sourcePath${sourcePath} + --workingPath${workingDir}/dump/ --preparedInfoPath${workingDir}/preparedInfo --dumpTypeeosc + --outputPath${outputPath}/dump/ + --resultTypesoftware diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_input_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_input_parameters.json index 7b587aa..d8f1201 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_input_parameters.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_input_parameters.json @@ -35,6 +35,12 @@ "paramLongName": "workingPath", "paramDescription": "the working path", "paramRequired": false + }, + { + "paramName": "rt", + "paramLongName": "resultType", + "paramDescription": "the working path", + "paramRequired": false } ] diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_relationorgproject_parameter.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_relationorgproject_parameter.json new file mode 100644 index 0000000..b3bce9a --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_relationorgproject_parameter.json @@ -0,0 +1,23 @@ +[ + + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } +] + + diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java index 3c9034d..cf7d016 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java @@ -5,6 +5,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; +import java.util.List; import java.util.Optional; import org.apache.commons.io.FileUtils; @@ -21,7 +22,9 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.eosc.model.Affiliation; import eu.dnetlib.dhp.eosc.model.Organization; +import eu.dnetlib.dhp.eosc.model.Relation; import eu.dnetlib.dhp.eosc.model.Result; /** @@ -70,6 +73,139 @@ public class ExtendAffiliationTest { spark.stop(); } + @Test + public void ExtendEoscResultWithOrganizationTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/input") + .getPath(); + + final String workingPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/") + .getPath(); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + spark + .read() + .textFile("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/publication") + .write() + .text(workingDir.toString() + "/working/publication"); + + ExtendEoscResultWithOrganizationStep2.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/", + "-sourcePath", sourcePath, + "-resultType", "publication", + "-workingPath", workingDir.toString() + "/working" + }); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/working/publicationextendedaffiliation") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(3, tmp.count()); + + Assertions + .assertEquals( + 2, + tmp + .filter(r -> Optional.ofNullable(r.getAffiliation()).isPresent() && r.getAffiliation().size() > 0) + .count()); + + Assertions + .assertEquals( + 2, + tmp + .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba")) + .first() + .getAffiliation() + .size()); + + List affiliations = tmp + .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba")) + .first() + .getAffiliation(); + + Assertions + .assertTrue( + affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("Doris Engineering (France)"))); + Assertions.assertTrue(affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("RENNES METROPOLE"))); + + Affiliation organization = affiliations + .stream() + .filter(a -> a.getId().equalsIgnoreCase("20|13811704aa70::51a6ade52065e3b371d1ae822e07f1ff")) + .findFirst() + .get(); + Assertions.assertEquals("Doris Engineering (France)", organization.getName()); + Assertions + .assertTrue( + organization + .getPid() + .stream() + .anyMatch( + p -> p.getValue().equalsIgnoreCase("grid.432986.2") && p.getType().equalsIgnoreCase("grid"))); + Assertions + .assertTrue( + organization + .getPid() + .stream() + .anyMatch( + p -> p.getValue().equalsIgnoreCase("https://ror.org/03nd0ms94") + && p.getType().equalsIgnoreCase("ror"))); + Assertions.assertEquals(2, organization.getPid().size()); + + organization = affiliations + .stream() + .filter(a -> a.getId().equalsIgnoreCase("20|MetisRadboud::b58bdbe8ae5acead04fc76777d2f8017")) + .findFirst() + .get(); + Assertions.assertEquals("RENNES METROPOLE", organization.getName()); + Assertions.assertEquals(1, organization.getPid().size()); + Assertions + .assertTrue( + organization.getPid().get(0).getValue().equalsIgnoreCase("892062829") + && organization.getPid().get(0).getType().equalsIgnoreCase("pic")); + + Assertions + .assertEquals( + 1, + tmp + .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) + .first() + .getAffiliation() + .size()); + Assertions + .assertEquals( + "MIKARE RESEARCH", + tmp + .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) + .first() + .getAffiliation() + .get(0) + .getName()); + Assertions + .assertEquals( + 0, + tmp + .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) + .first() + .getAffiliation() + .get(0) + .getPid() + .size()); + + Assertions + .assertFalse( + Optional + .ofNullable( + tmp + .filter( + r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::ff21e3c55d527fa7db171137c5fd1f1f")) + .first() + .getAffiliation()) + .isPresent()); + } + @Test public void selectEoscResults() throws Exception { @@ -80,9 +216,6 @@ public class ExtendAffiliationTest { final String workingPath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/") .getPath(); - final String mdp = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/masterduplicate") - .getPath(); ExtendEoscResultWithOrganizationStep2.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -98,18 +231,18 @@ public class ExtendAffiliationTest { .textFile(workingDir.toString() + "/organization") .map(item -> OBJECT_MAPPER.readValue(item, Organization.class)); + JavaRDD rels = sc + .textFile(workingDir.toString() + "/resultOrganization") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + System.out.println(tmp.count()); -// Assertions.assertEquals(3, tmp.count()); -// -// Assertions -// .assertEquals( -// 0, -// tmp -// .filter(r -> Optional.ofNullable(r.getAffiliation()).isPresent() && r.getAffiliation().size() > 0) -// .count()); -// -// tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + Assertions.assertEquals(2, tmp.count()); + + Assertions.assertEquals(2, rels.count()); + + rels.foreach(r -> Assertions.assertTrue(r.getSource().startsWith("50|"))); + rels.foreach(r -> Assertions.assertTrue(r.getTarget().startsWith("20|"))); } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java index 13eb61c..0b7ffbf 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java @@ -128,136 +128,6 @@ public class SelectEoscResultTest { // legalname = MIKARE RESEARCH // pid = [] // for 50|06cdd3ff4700::ff21e3c55d527fa7db171137c5fd1f1f no affiliation relation is provided - @Test - public void ExtendEoscResultWithOrganizationTest() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/input") - .getPath(); - - final String cmp = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - String resultPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/publication") - .getPath(); - - ExtendEoscResultWithOrganizationStep2.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/publication", - "-sourcePath", sourcePath, -// "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", - "-resultPath", resultPath - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/publication") - .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - - Assertions.assertEquals(3, tmp.count()); - - Assertions - .assertEquals( - 2, - tmp - .filter(r -> Optional.ofNullable(r.getAffiliation()).isPresent() && r.getAffiliation().size() > 0) - .count()); - - Assertions - .assertEquals( - 2, - tmp - .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba")) - .first() - .getAffiliation() - .size()); - - List affiliations = tmp - .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba")) - .first() - .getAffiliation(); - - Assertions - .assertTrue( - affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("Doris Engineering (France)"))); - Assertions.assertTrue(affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("RENNES METROPOLE"))); - - Affiliation organization = affiliations - .stream() - .filter(a -> a.getId().equalsIgnoreCase("20|13811704aa70::51a6ade52065e3b371d1ae822e07f1ff")) - .findFirst() - .get(); - Assertions.assertEquals("Doris Engineering (France)", organization.getName()); - Assertions - .assertTrue( - organization - .getPid() - .stream() - .anyMatch( - p -> p.getValue().equalsIgnoreCase("grid.432986.2") && p.getType().equalsIgnoreCase("grid"))); - Assertions - .assertTrue( - organization - .getPid() - .stream() - .anyMatch( - p -> p.getValue().equalsIgnoreCase("https://ror.org/03nd0ms94") - && p.getType().equalsIgnoreCase("ror"))); - Assertions.assertEquals(2, organization.getPid().size()); - - organization = affiliations - .stream() - .filter(a -> a.getId().equalsIgnoreCase("20|MetisRadboud::b58bdbe8ae5acead04fc76777d2f8017")) - .findFirst() - .get(); - Assertions.assertEquals("RENNES METROPOLE", organization.getName()); - Assertions.assertEquals(1, organization.getPid().size()); - Assertions - .assertTrue( - organization.getPid().get(0).getValue().equalsIgnoreCase("892062829") - && organization.getPid().get(0).getType().equalsIgnoreCase("pic")); - - Assertions - .assertEquals( - 1, - tmp - .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) - .first() - .getAffiliation() - .size()); - Assertions - .assertEquals( - "MIKARE RESEARCH", - tmp - .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) - .first() - .getAffiliation() - .get(0) - .getName()); - Assertions - .assertEquals( - 0, - tmp - .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) - .first() - .getAffiliation() - .get(0) - .getPid() - .size()); - - Assertions - .assertFalse( - Optional - .ofNullable( - tmp - .filter( - r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::ff21e3c55d527fa7db171137c5fd1f1f")) - .first() - .getAffiliation()) - .isPresent()); - } @Test public void verifyIndicatorsTest() throws Exception { diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/organization/organization b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/organization/organization index 7ae63f4..958208e 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/organization/organization +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/organization/organization @@ -1,6 +1,6 @@ -{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::14998bc495ea97a2388f193a6a8ed4c1","value":"CHIST-ERA"}],"country":{"classid":"GB","classname":"United Kingdom","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2022-02-08","dateoftransformation":"2022-04-27","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"extraInfo":[],"id":"20|chistera____::9146e9ef10640675f361d674e77bd254","lastupdatetimestamp":1663596903288,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"School of Computer Science, University of Birmingham"},"originalId":["chistera____::b232e58bac079622eda0536cd5832e39"],"pid":[]} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::14998bc495ea97a2388f193a6a8ed4c1","value":"CHIST-ERA"}],"country":{"classid":"GB","classname":"United Kingdom","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2022-02-08","dateoftransformation":"2022-04-27","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"extraInfo":[],"id":"20|chistera____::9146e9ef10640675f361d674e77bd254","lastupdatetimestamp":1663596903288,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"School of Computer Science, University of Birmingham"},"originalId":["chistera____::b232e58bac079622eda0536cd5832e39"],"pid":[]} {"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::a55eb91348674d853191f4f4fd73d078","value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020"}],"country":{"classid":"FR","classname":"France","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2021-03-29","dateoftransformation":"2022-08-20","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"true"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"true"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"extraInfo":[],"id":"20|MetisRadboud::b58bdbe8ae5acead04fc76777d2f8017","lastupdatetimestamp":1663596903288,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"RENNES METROPOLE"},"originalId":["corda__h2020::892062829"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"qualifier":{"classid":"PIC","classname":"PIC","schemeid":"","schemename":""},"value":"892062829"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"http://metropole.rennes.fr"}} -{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::a55eb91348674d853191f4f4fd73d078","value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020"}],"country":{"classid":"SE","classname":"Sweden","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2018-03-12","dateoftransformation":"2022-07-30","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"true"},"extraInfo":[],"id":"20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64","lastupdatetimestamp":1663596903288,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"EVOTHINGS AB"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"EVOTHINGS"},"originalId":["corda__h2020::922724335"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"qualifier":{"classid":"PIC","classname":"PIC","schemeid":"","schemename":""},"value":"922724335"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"https://evothings.com"}} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::a55eb91348674d853191f4f4fd73d078","value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020"}],"country":{"classid":"SE","classname":"Sweden","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2018-03-12","dateoftransformation":"2022-07-30","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"true"},"extraInfo":[],"id":"20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64","lastupdatetimestamp":1663596903288,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"EVOTHINGS AB"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"EVOTHINGS"},"originalId":["corda__h2020::922724335"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"qualifier":{"classid":"PIC","classname":"PIC","schemeid":"","schemename":""},"value":"922724335"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"https://evothings.com"}} {"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::9e9e8c76d739212c63eff362e321ba33","value":"NIH - National Institutes of Health"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2016-07-20","dateoftransformation":"2018-09-13","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"extraInfo":[],"id":"20|____________::d1b0ee22411434cf905692d0fac25749","lastupdatetimestamp":1663596903288,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"MIKARE RESEARCH"},"originalId":["nih_________::MIKARE_RESEARCH"],"pid":[]} {"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::9e9e8c76d739212c63eff362e321ba33","value":"NIH - National Institutes of Health"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"dateofcollection":"2016-07-15","dateoftransformation":"2018-09-13","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"extraInfo":[],"id":"20|nih_________::7523ba08be91b521952082f0c25daf5f","lastupdatetimestamp":1663596903288,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"XENOTECH, LLC"},"originalId":["nih_________::XENOTECH__LLC"],"pid":[]} {"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::dd69b4a1513c9de9f46faf24048da1e8","value":"NSF - National Science Foundation"}],"country":{"classid":"CA","classname":"Canada","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"dateofcollection":"2019-09-12","dateoftransformation":"2019-09-12","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"extraInfo":[],"id":"20|nsf_________::47d4993b8a0c212e9f7c5db3ffec3111","lastupdatetimestamp":1663596903288,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"Johnson Benjamin W"},"originalId":["nsf_________::Johnson_________________Benjamin_______W"],"pid":[]} diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/relation/relation b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/relation/relation index f00c8ec..7226c93 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/relation/relation +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/relation/relation @@ -9,4 +9,8 @@ {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466741040,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|____________::d1b0ee22411434cf905692d0fac25749","subRelType":"affiliation","target":"50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98","validated":false} {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466737372,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|____________::d1b0ee22411434cf905692d0fac25749","subRelType":"affiliation","target":"50|pmid________::3a5bb2b50c18e755cbe67b9ca7d821ee","validated":false} {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466717565,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::04ab269cfcf6bd571b6285151ec554b5","subRelType":"affiliation","target":"50|nora_uio__no::01152f3e683765695bbad68fc692b85e","validated":false} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466733174,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::0838366fa1df3c1599ddefc2168ada5d","subRelType":"affiliation","target":"50|arXiv_______::abe2b16af6067994dda4beab6410b35d","validated":false} \ No newline at end of file +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466733174,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::0838366fa1df3c1599ddefc2168ada5d","subRelType":"affiliation","target":"50|arXiv_______::abe2b16af6067994dda4beab6410b35d","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466741040,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|chistera____::9146e9ef10640675f361d674e77bd254","subRelType":"affiliation","source":"50|355e65625b88::38d0ab3b2212878dee7072170f1561ee","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466737372,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|chistera____::9146e9ef10640675f361d674e77bd254","subRelType":"affiliation","source":"50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466717565,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|aka_________::04ab269cfcf6bd571b6285151ec554b5","subRelType":"affiliation","source":"50|355e65625b88::38d0ab3b2212878dee7072170f1561ee","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466733174,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64","subRelType":"affiliation","source":"50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98","validated":false} \ No newline at end of file From 52a6e2f7ffa54793cad7299cbbb70e5ce57c868d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 27 Oct 2023 10:51:58 +0200 Subject: [PATCH 11/25] adding testing classes --- .../eosc/SparkDumpOrganizationProject.java | 2 +- .../dump/eosc/SparkUpdateProjectInfo.java | 7 +- ...anizationprojectrelations_parameters.json} | 0 .../oa/graph/dump/eosc/ExtendProjectTest.java | 218 ++++++++++++++++++ .../oa/graph/dump/eosc/input/project/project | 15 ++ .../eosc/working/preparedInfo/preparedInfo | 2 + 6 files changed, 238 insertions(+), 6 deletions(-) rename dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{eosc_dump_organizationprojectrelations.json => eosc_organizationprojectrelations_parameters.json} (100%) create mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java create mode 100644 dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/project/project create mode 100644 dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/preparedInfo/preparedInfo diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java index 62aef46..4aa9dba 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java @@ -40,7 +40,7 @@ public class SparkDumpOrganizationProject implements Serializable { .toString( SparkDumpOrganizationProject.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/eosc_organizationprojectrelations_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java index 38b886f..e9fdb87 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java @@ -13,11 +13,9 @@ import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.*; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; @@ -96,7 +94,6 @@ public class SparkUpdateProjectInfo implements Serializable { String resultType) { Dataset result = Utils.readPath(spark, workingPath + resultType + "extendedaffiliation", Result.class); - Dataset resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class); result .joinWith( diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_dump_organizationprojectrelations.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_organizationprojectrelations_parameters.json similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_dump_organizationprojectrelations.json rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_organizationprojectrelations_parameters.json diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java new file mode 100644 index 0000000..d463b57 --- /dev/null +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java @@ -0,0 +1,218 @@ + +package eu.dnetlib.dhp.oa.graph.dump.eosc; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.eosc.model.*; +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.List; +import java.util.Optional; + +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ +public class ExtendProjectTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory + .getLogger(ExtendProjectTest.class); + + private static HashMap map = new HashMap<>(); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(ExtendProjectTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(ExtendProjectTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(ExtendProjectTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void ExtendEoscResultWithProjectTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/input") + .getPath(); + + final String workingPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/") + .getPath(); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + spark + .read() + .textFile(workingPath + "publication") + .write() + .text(workingDir.toString() + "/working/publicationextendedaffiliation"); + + + + SparkUpdateProjectInfo.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/", + "-sourcePath", sourcePath, + "-resultType", "publication", + "-workingPath", workingDir.toString() + "/working/", + "-preparedInfoPath", workingPath + "preparedInfo" + }); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/working/publicationextendedproject") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(3, tmp.count()); + + Assertions + .assertEquals( + 2, + tmp + .filter(r -> Optional.ofNullable(r.getProjects()).isPresent() && r.getProjects().size() > 0) + .count()); + + Assertions + .assertEquals( + 2, + tmp + .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba")) + .first() + .getProjects() + .size()); + + Assertions + .assertEquals( + 3, + tmp + .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) + .first() + .getProjects() + .size()); + + List projectSummaries = tmp + .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba")) + .first() + .getProjects(); + + Assertions + .assertTrue( + projectSummaries.stream().anyMatch(p -> p.getFunder().getShortName().equals("NSF"))); + + Assertions + .assertTrue( + projectSummaries.stream().anyMatch(p -> p.getFunder().getShortName().equals("UKRI"))); + + + + JavaRDD projects = sc + .textFile(workingDir.toString() + "/project") + .map(item -> OBJECT_MAPPER.readValue(item, Project.class)); + + JavaRDD rels = sc + .textFile(workingDir.toString() + "/resultProject") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + System.out.println(projects.count()); + + Assertions.assertEquals(5, projects.count()); + + Assertions.assertEquals(5, rels.count()); + + rels.foreach(r -> Assertions.assertTrue(r.getSource().startsWith("50|"))); + rels.foreach(r -> Assertions.assertTrue(r.getTarget().startsWith("40|"))); + + } + + @Test + public void selectEoscResults() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/input") + .getPath(); + + final String workingPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/") + .getPath(); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + spark + .read() + .textFile(workingPath + "publication") + .write() + .text(workingDir.toString() + "/working/publicationextendedaffiliation"); + + spark + .read() + .textFile(workingPath + "preparedInfo") + .write() + .text(workingDir.toString() + "/working/preparedInfo"); + + ExtendEoscResultWithOrganizationStep2.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/", + "-sourcePath", sourcePath, + "-resultType", "publication", + "-workingPath", workingDir.toString() + "/working/" + }); + + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/organization") + .map(item -> OBJECT_MAPPER.readValue(item, Organization.class)); + + JavaRDD rels = sc + .textFile(workingDir.toString() + "/resultProject") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + System.out.println(tmp.count()); + + Assertions.assertEquals(2, tmp.count()); + + Assertions.assertEquals(2, rels.count()); + + rels.foreach(r -> Assertions.assertTrue(r.getSource().startsWith("50|"))); + rels.foreach(r -> Assertions.assertTrue(r.getTarget().startsWith("20|"))); + + } + +} diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/project/project b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/project/project new file mode 100644 index 0000000..f225efd --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/project/project @@ -0,0 +1,15 @@ +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Researcher exchange to Finland LT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"200618"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2002-12-31"},"extraInfo":[],"fundedamount":2760.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|ukri________::081b09db1211a7b89eb3610d3160e9ba","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2,760 €"},"originalId":["aka_________::200618"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2002-04-24"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Atomic Emission."},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Molecular Regulatory Networks of Life (R’Life), call for Academy-funded researchers BTY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"328474"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2020-10-01","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2022-01-31"},"extraInfo":[],"fundedamount":250000.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"h2020classification":[],"id":"40|nsf_________::d1c070f4252c32e23ccc3f4211c9c621","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"measures":[{"id":"downloads","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:usage_counts","classname":"measure:usage_counts","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"count","value":"5"}]},{"id":"views","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:usage_counts","classname":"measure:usage_counts","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"count","value":"3"}]},{"id":"numOfInfluentialResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"1"}]},{"id":"numOfPopularResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"1"}]},{"id":"totalImpulse","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"15"}]},{"id":"totalCitationCount","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"15"}]}],"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"originalId":["aka_________::328474"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2020-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"A novel family-based sequencing approach and dissection of regulatory networks underlying a colour polymorphism"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Academy Project LT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"287027"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2019-08-31"},"extraInfo":[],"fundedamount":644270.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"h2020classification":[],"id":"40|corda__h2020::5e49c0ee515f36e416a00cc292dfb310","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"measures":[{"id":"numOfInfluentialResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"17"}]},{"id":"numOfPopularResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"17"}]},{"id":"totalImpulse","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"40"}]},{"id":"totalCitationCount","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"45"}]}],"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"644,270 €"},"originalId":["aka_________::287027"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2015-09-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Advanced numerical computation methods for massive parabolic problems"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Apurahat tutkijainvaihtoon ja muuhun kahdenvälisiin sopimuksiin perustuvaan yhteistoimintaan LT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"107987"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2005-12-31"},"extraInfo":[],"fundedamount":700.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|nserc_______::1e5e62235d094afd01cd56e65112fc63","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"700 €"},"originalId":["aka_________::107987"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2005-02-15"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Structure of the upper mantle beneath Central Fennoscandian Shield from seismic anisotropy studies"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Appropriations for development studies BY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"201608"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2005-12-31"},"extraInfo":[],"fundedamount":300010.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|corda_______::175629cbea2038ed02c85e7132fc4be2","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"300,010 €"},"originalId":["aka_________::201608"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2003-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Symbiotic Legumes For Sustainable Food Production and Prevention of Land Deglaration in China"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Tutkijanvaihto Suomeen LT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"79768"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2001-12-31"},"extraInfo":[],"fundedamount":2339.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::05dcdacd497a6bd12059752ca28be296","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2,339 €"},"originalId":["aka_________::79768"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2001-10-03"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Optical properties of photonic nano sized sol-gel materials undoped and doped EU3+ in different tempera- tures"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Work of foreign researcher´s in Finland TT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"203246"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2003-12-31"},"extraInfo":[],"fundedamount":12390.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::089508cbd9a41cb209843ccfc6ce4353","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"12,390 €"},"originalId":["aka_________::203246"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2003-02-13"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Venäläisen tutkijan Ksenia Gurevichin kutsuminen Suomeen"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Academy Research Fellow LT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"317726"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2023-08-31"},"extraInfo":[],"fundedamount":438874.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"h2020classification":[],"id":"40|aka_________::08eb927e62b8f4909fd84fb18f7fdeb6","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"measures":[{"id":"numOfInfluentialResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"11"}]},{"id":"numOfPopularResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"11"}]},{"id":"totalImpulse","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"55"}]},{"id":"totalCitationCount","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"57"}]}],"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"438,874 €"},"originalId":["aka_________::317726"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2018-09-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Co-design of control and communication systems for wireless networked control systems"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Special funding for system-level research into climate change mitigation and adaptation KY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"340408"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2022-02-08","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2024-11-30"},"extraInfo":[],"fundedamount":311063.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::0cf103938ab0c02ff2552da5bed7d449","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"originalId":["aka_________::340408"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2021-02-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"FoodStep - a sustainable model for food services and early childhood education and care / Consortium: FoodStep"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Ulkomaisten tutkijoiden työskentely Suomessa KY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"79137"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2002-12-31"},"extraInfo":[],"fundedamount":1510.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::0debb0996b15425e12308be88149031e","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"1,510 €"},"originalId":["aka_________::79137"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2002-02-20"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Venäläisen tutkijan Lysakovan kutsu minen"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Researcher mobility to Finland (inviting foreign researchers to Finland) BY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"128411"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2008-12-31"},"extraInfo":[],"fundedamount":3370.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::10f4268d2ac9a7a85ba3aa21580b1a61","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"3,370 €"},"originalId":["aka_________::128411"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2008-04-04"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"ROLE OF HEMOGLOBIN OVEREXPERSSION ON NITRIC OXIDE AND REACTIVE OXYGEN SPECIES SIGNALING IN ABIOTIC STRESSES IN ARABIDOPSIS THALIANA"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Academy Project funding LT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"298298"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2020-08-31"},"extraInfo":[],"fundedamount":277129.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"h2020classification":[],"id":"40|aka_________::11fdb2f8600c0bd84c4f8c901e04713d","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"measures":[{"id":"downloads","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:usage_counts","classname":"measure:usage_counts","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"count","value":"217"}]},{"id":"views","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:usage_counts","classname":"measure:usage_counts","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"count","value":"102"}]},{"id":"numOfInfluentialResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"21"}]},{"id":"numOfPopularResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"21"}]},{"id":"totalImpulse","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"105"}]},{"id":"totalCitationCount","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"116"}]}],"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"277,129 €"},"originalId":["aka_________::298298"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2016-09-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2D Layered Materials for Photonics / Consortium: 2D-LAMP"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Ympäristöntutkimus Egyptin Academy of Science Research and Technologyn kanssa TT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"122859"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2010-12-31"},"extraInfo":[],"fundedamount":240000.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"h2020classification":[],"id":"40|aka_________::19466121d8747bd79d1ec4d109b63c52","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"measures":[{"id":"downloads","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:usage_counts","classname":"measure:usage_counts","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"count","value":"3"}]},{"id":"views","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:usage_counts","classname":"measure:usage_counts","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"count","value":"0"}]},{"id":"numOfInfluentialResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"2"}]},{"id":"numOfPopularResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"2"}]},{"id":"totalImpulse","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"15"}]},{"id":"totalCitationCount","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"36"}]}],"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"240,000 €"},"originalId":["aka_________::122859"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2008-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Environmental exposure to aflatoxins: effects on human placental expression of metabolism-associated genes and proteins putatively important for the well-being of foetus"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Subsidy for organising of international conferences KY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"200429"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2002-12-31"},"extraInfo":[],"fundedamount":3430.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::19be64f15fff19133e4f02671af5d1f5","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"3,430 €"},"originalId":["aka_________::200429"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2002-03-08"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"A seminar on After Microhistory at the University of Helsinki"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Tutkijainvaihto Suomeen KY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"213903"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2005-12-31"},"extraInfo":[],"fundedamount":1050.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::1b43f4ce3394bedb97b6050677cf6dca","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1694215841116,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"1,050 €"},"originalId":["aka_________::213903"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2005-02-24"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Worldview and cultural identity of the Nenets of the Yamal Peninsula"},"totalcost":0.0} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/preparedInfo/preparedInfo b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/preparedInfo/preparedInfo new file mode 100644 index 0000000..5150792 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/preparedInfo/preparedInfo @@ -0,0 +1,2 @@ +{"projectsList":[{"code":"0430175","funder":{"fundingStream":"Directorate for Computer & Information Science & Engineering","jurisdiction":"US","name":"National Science Foundation","shortName":"NSF"},"id":"40|nsf_________::d1c070f4252c32e23ccc3f4211c9c621","provenance":{"provenance":"Harvested","trust":"0.900"},"title":"Collaborative Research: Temporal Aspects"},{"code":"EP/F01161X/1","funder":{"fundingStream":"EPSRC","jurisdiction":"GB","name":"UK Research and Innovation","shortName":"UKRI"},"id":"40|ukri________::081b09db1211a7b89eb3610d3160e9ba","provenance":{"provenance":"Harvested","trust":"0.900"},"title":"The complexity of valued constraints"}],"resultId":"50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba"} +{"projectsList":[{"acronym":"METIS","code":"317669","funder":{"fundingStream":"FP7","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda_______::175629cbea2038ed02c85e7132fc4be2","provenance":{"provenance":"Harvested","trust":"0.900"},"title":"Mobile and wireless communications Enablers for Twenty-twenty (2020) Information Society"},{"code":"unidentified","funder":{"jurisdiction":"CA","name":"Natural Sciences and Engineering Research Council of Canada","shortName":"NSERC"},"id":"40|nserc_______::1e5e62235d094afd01cd56e65112fc63","provenance":{"provenance":"Harvested","trust":"0.900"},"title":"unidentified"},{"acronym":"MiLC","code":"753431","funder":{"fundingStream":"H2020","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda__h2020::5e49c0ee515f36e416a00cc292dfb310","provenance":{"provenance":"Harvested","trust":"0.900"},"title":"Monotonicity in Logic and Complexity"}],"resultId":"50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98"} \ No newline at end of file From 2fe2f0aa9e850d0f33c3e92d473f6c2b0d717091 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 27 Oct 2023 10:52:23 +0200 Subject: [PATCH 12/25] removing not needed method --- .../oa/graph/dump/eosc/ExtendProjectTest.java | 53 +------------------ 1 file changed, 1 insertion(+), 52 deletions(-) diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java index d463b57..c3ef919 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java @@ -162,57 +162,6 @@ public class ExtendProjectTest { } - @Test - public void selectEoscResults() throws Exception { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/input") - .getPath(); - - final String workingPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/") - .getPath(); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - spark - .read() - .textFile(workingPath + "publication") - .write() - .text(workingDir.toString() + "/working/publicationextendedaffiliation"); - - spark - .read() - .textFile(workingPath + "preparedInfo") - .write() - .text(workingDir.toString() + "/working/preparedInfo"); - - ExtendEoscResultWithOrganizationStep2.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/", - "-sourcePath", sourcePath, - "-resultType", "publication", - "-workingPath", workingDir.toString() + "/working/" - }); - - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/organization") - .map(item -> OBJECT_MAPPER.readValue(item, Organization.class)); - - JavaRDD rels = sc - .textFile(workingDir.toString() + "/resultProject") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - - System.out.println(tmp.count()); - - Assertions.assertEquals(2, tmp.count()); - - Assertions.assertEquals(2, rels.count()); - - rels.foreach(r -> Assertions.assertTrue(r.getSource().startsWith("50|"))); - rels.foreach(r -> Assertions.assertTrue(r.getTarget().startsWith("20|"))); - - } + } From 33eaacdd58538d9372691109e0a78cf3494a36de Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 27 Oct 2023 17:46:00 +0200 Subject: [PATCH 13/25] adding test classes --- .../eosc/SparkDumpOrganizationProject.java | 3 +- .../dump/eosc/ExtendAffiliationTest.java | 67 ++-------- .../oa/graph/dump/eosc/ExtendProjectTest.java | 4 +- .../eosc/OrganizationProjectRelationTest.java | 118 ++++++++++++++++++ .../graph/dump/eosc/input/relation/relation | 6 +- .../eosc/working/organization/organization | 2 + .../graph/dump/eosc/working/project/project | 5 + 7 files changed, 148 insertions(+), 57 deletions(-) create mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/OrganizationProjectRelationTest.java create mode 100644 dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/organization/organization create mode 100644 dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/project/project diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java index 4aa9dba..9f94535 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java @@ -63,7 +63,7 @@ public class SparkDumpOrganizationProject implements Serializable { conf, isSparkSessionManaged, spark -> { - Utils.removeOutputDir(spark, outputPath); + Utils.removeOutputDir(spark, outputPath + "/organizationProject"); dumpRelation(spark, inputPath, outputPath); }); @@ -72,6 +72,7 @@ public class SparkDumpOrganizationProject implements Serializable { private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) { Dataset organization = Utils.readPath(spark, outputPath + "organization", Organization.class); + Dataset project = Utils.readPath(spark, outputPath + "project", Project.class); Dataset relation = Utils diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java index cf7d016..d73e954 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java @@ -87,7 +87,7 @@ public class ExtendAffiliationTest { spark .read() - .textFile("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/publication") + .textFile(workingPath + "publication") .write() .text(workingDir.toString() + "/working/publication"); @@ -96,25 +96,33 @@ public class ExtendAffiliationTest { "-outputPath", workingDir.toString() + "/", "-sourcePath", sourcePath, "-resultType", "publication", - "-workingPath", workingDir.toString() + "/working" + "-workingPath", workingDir.toString() + "/working/" }); + /* + affiliation relationships + 20|13811704aa70::51a6ade52065e3b371d1ae822e07f1ff -> 50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba + + */ + JavaRDD tmp = sc .textFile(workingDir.toString() + "/working/publicationextendedaffiliation") .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); Assertions.assertEquals(3, tmp.count()); + tmp.foreach(p->System.out.println(OBJECT_MAPPER.writeValueAsString(p))); + Assertions .assertEquals( - 2, + 1, tmp .filter(r -> Optional.ofNullable(r.getAffiliation()).isPresent() && r.getAffiliation().size() > 0) .count()); Assertions .assertEquals( - 2, + 1, tmp .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba")) .first() @@ -129,7 +137,6 @@ public class ExtendAffiliationTest { Assertions .assertTrue( affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("Doris Engineering (France)"))); - Assertions.assertTrue(affiliations.stream().anyMatch(a -> a.getName().equalsIgnoreCase("RENNES METROPOLE"))); Affiliation organization = affiliations .stream() @@ -154,56 +161,7 @@ public class ExtendAffiliationTest { && p.getType().equalsIgnoreCase("ror"))); Assertions.assertEquals(2, organization.getPid().size()); - organization = affiliations - .stream() - .filter(a -> a.getId().equalsIgnoreCase("20|MetisRadboud::b58bdbe8ae5acead04fc76777d2f8017")) - .findFirst() - .get(); - Assertions.assertEquals("RENNES METROPOLE", organization.getName()); - Assertions.assertEquals(1, organization.getPid().size()); - Assertions - .assertTrue( - organization.getPid().get(0).getValue().equalsIgnoreCase("892062829") - && organization.getPid().get(0).getType().equalsIgnoreCase("pic")); - Assertions - .assertEquals( - 1, - tmp - .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) - .first() - .getAffiliation() - .size()); - Assertions - .assertEquals( - "MIKARE RESEARCH", - tmp - .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) - .first() - .getAffiliation() - .get(0) - .getName()); - Assertions - .assertEquals( - 0, - tmp - .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) - .first() - .getAffiliation() - .get(0) - .getPid() - .size()); - - Assertions - .assertFalse( - Optional - .ofNullable( - tmp - .filter( - r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::ff21e3c55d527fa7db171137c5fd1f1f")) - .first() - .getAffiliation()) - .isPresent()); } @Test @@ -244,6 +202,7 @@ public class ExtendAffiliationTest { rels.foreach(r -> Assertions.assertTrue(r.getSource().startsWith("50|"))); rels.foreach(r -> Assertions.assertTrue(r.getTarget().startsWith("20|"))); + } } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java index c3ef919..aec45af 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java @@ -160,8 +160,10 @@ public class ExtendProjectTest { rels.foreach(r -> Assertions.assertTrue(r.getSource().startsWith("50|"))); rels.foreach(r -> Assertions.assertTrue(r.getTarget().startsWith("40|"))); + + } - + } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/OrganizationProjectRelationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/OrganizationProjectRelationTest.java new file mode 100644 index 0000000..22ed7bf --- /dev/null +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/OrganizationProjectRelationTest.java @@ -0,0 +1,118 @@ + +package eu.dnetlib.dhp.oa.graph.dump.eosc; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.eosc.model.Relation; +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; + +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ +public class OrganizationProjectRelationTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory + .getLogger(OrganizationProjectRelationTest.class); + + private static HashMap map = new HashMap<>(); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(OrganizationProjectRelationTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(OrganizationProjectRelationTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(OrganizationProjectRelationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void ExtendEoscResultWithProjectTest() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/input") + .getPath(); + + final String workingPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/working/") + .getPath(); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + spark + .read() + .textFile(workingPath + "organization") + .write() + .text(workingDir.toString() + "/working/organization"); + + spark + .read() + .textFile(workingPath + "project") + .write() + .text(workingDir.toString() + "/working/project"); + + + + SparkDumpOrganizationProject.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/working/", + "-sourcePath", sourcePath + + }); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/working/organizationProject") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Assertions.assertEquals(3, tmp.count()); + + Assertions.assertEquals(1,tmp.filter(r -> r.getSource().equalsIgnoreCase("20|chistera____::9146e9ef10640675f361d674e77bd254")).count()); + Assertions.assertEquals(2,tmp.filter(r -> r.getSource().equalsIgnoreCase("20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64")).count()); + + Assertions.assertEquals(1,tmp.filter(r -> r.getSource().equalsIgnoreCase("20|chistera____::9146e9ef10640675f361d674e77bd254") && + r.getTarget().equalsIgnoreCase("40|nsf_________::d1c070f4252c32e23ccc3f4211c9c621")).count()); + + } + + + +} diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/relation/relation b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/relation/relation index 7226c93..42a9ea2 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/relation/relation +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/relation/relation @@ -13,4 +13,8 @@ {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466741040,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|chistera____::9146e9ef10640675f361d674e77bd254","subRelType":"affiliation","source":"50|355e65625b88::38d0ab3b2212878dee7072170f1561ee","validated":false} {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466737372,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|chistera____::9146e9ef10640675f361d674e77bd254","subRelType":"affiliation","source":"50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98","validated":false} {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466717565,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|aka_________::04ab269cfcf6bd571b6285151ec554b5","subRelType":"affiliation","source":"50|355e65625b88::38d0ab3b2212878dee7072170f1561ee","validated":false} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466733174,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64","subRelType":"affiliation","source":"50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98","validated":false} \ No newline at end of file +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466733174,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","target":"20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64","subRelType":"affiliation","source":"50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466741040,"properties":[],"relClass":"isParticipant","relType":"resultOrganization","target":"40|nsf_________::d1c070f4252c32e23ccc3f4211c9c621","subRelType":"participation","source":"20|chistera____::9146e9ef10640675f361d674e77bd254","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8847"},"lastupdatetimestamp":1658466737372,"properties":[],"relClass":"isParticipant","relType":"resultOrganization","target":"40|ukri________::081b09db1211a7b89eb3610d3160e9ba","subRelType":"participation","source":"20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466717565,"properties":[],"relClass":"isParticipant","relType":"resultOrganization","target":"40|nsf_________::d1c070f4252c32e23ccc3f4211c9c621","subRelType":"participation","source":"20|nih_________::7523ba08be91b521952082f0c25daf5f","validated":false} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8998"},"lastupdatetimestamp":1658466733174,"properties":[],"relClass":"isParticipant","relType":"resultOrganization","target":"40|corda__h2020::5e49c0ee515f36e416a00cc292dfb310","subRelType":"participation","source":"20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64","validated":false} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/organization/organization b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/organization/organization new file mode 100644 index 0000000..f773198 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/organization/organization @@ -0,0 +1,2 @@ +{"legalshortname":null,"legalname":"School of Computer Science, University of Birmingham","websiteurl":null,"alternativenames":[],"country":{"code":"GB","label":"United Kingdom"},"id":"20|chistera____::9146e9ef10640675f361d674e77bd254","pid":[]} +{"legalshortname":"EVOTHINGS","legalname":"EVOTHINGS AB","websiteurl":"https://evothings.com","alternativenames":[],"country":{"code":"SE","label":"Sweden"},"id":"20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64","pid":[{"type":"PIC","value":"922724335"}]} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/project/project b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/project/project new file mode 100644 index 0000000..f6e7689 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/working/project/project @@ -0,0 +1,5 @@ +{"id":"40|corda__h2020::5e49c0ee515f36e416a00cc292dfb310","websiteurl":null,"code":"287027","acronym":null,"title":"Advanced numerical computation methods for massive parabolic problems","startdate":"2015-09-01","enddate":"2019-08-31","callidentifier":"Academy Project LT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null,"funding_stream":null}],"summary":null,"granted":{"currency":"EUR","totalcost":0.0,"fundedamount":644270.0},"h2020programme":[]} +{"id":"40|nsf_________::d1c070f4252c32e23ccc3f4211c9c621","websiteurl":null,"code":"328474","acronym":null,"title":"A novel family-based sequencing approach and dissection of regulatory networks underlying a colour polymorphism","startdate":"2020-01-01","enddate":"2022-01-31","callidentifier":"Molecular Regulatory Networks of Life (R’Life), call for Academy-funded researchers BTY","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null,"funding_stream":null}],"summary":null,"granted":{"currency":"EUR","totalcost":0.0,"fundedamount":250000.0},"h2020programme":[]} +{"id":"40|nserc_______::1e5e62235d094afd01cd56e65112fc63","websiteurl":null,"code":"107987","acronym":null,"title":"Structure of the upper mantle beneath Central Fennoscandian Shield from seismic anisotropy studies","startdate":"2005-02-15","enddate":"2005-12-31","callidentifier":"Apurahat tutkijainvaihtoon ja muuhun kahdenvälisiin sopimuksiin perustuvaan yhteistoimintaan LT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null,"funding_stream":null}],"summary":null,"granted":{"currency":"EUR","totalcost":0.0,"fundedamount":700.0},"h2020programme":[]} +{"id":"40|corda_______::175629cbea2038ed02c85e7132fc4be2","websiteurl":null,"code":"201608","acronym":null,"title":"Symbiotic Legumes For Sustainable Food Production and Prevention of Land Deglaration in China","startdate":"2003-01-01","enddate":"2005-12-31","callidentifier":"Appropriations for development studies BY","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null,"funding_stream":null}],"summary":null,"granted":{"currency":"EUR","totalcost":0.0,"fundedamount":300010.0},"h2020programme":[]} +{"id":"40|ukri________::081b09db1211a7b89eb3610d3160e9ba","websiteurl":null,"code":"200618","acronym":null,"title":"Atomic Emission.","startdate":"2002-04-24","enddate":"2002-12-31","callidentifier":"Researcher exchange to Finland LT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null,"funding_stream":null}],"summary":null,"granted":{"currency":"EUR","totalcost":0.0,"fundedamount":2760.0},"h2020programme":[]} \ No newline at end of file From 094d8b996e158c1e5d83986dae94cb062e5999e1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 7 Nov 2023 11:46:31 +0100 Subject: [PATCH 14/25] added test classes --- .../dump/eosc/ExtendAffiliationTest.java | 9 ++-- .../oa/graph/dump/eosc/ExtendProjectTest.java | 54 +++++++++---------- .../eosc/OrganizationProjectRelationTest.java | 50 ++++++++++------- 3 files changed, 59 insertions(+), 54 deletions(-) diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java index d73e954..23d807c 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendAffiliationTest.java @@ -100,9 +100,8 @@ public class ExtendAffiliationTest { }); /* - affiliation relationships - 20|13811704aa70::51a6ade52065e3b371d1ae822e07f1ff -> 50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba - + * affiliation relationships 20|13811704aa70::51a6ade52065e3b371d1ae822e07f1ff -> + * 50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba */ JavaRDD tmp = sc @@ -111,7 +110,7 @@ public class ExtendAffiliationTest { Assertions.assertEquals(3, tmp.count()); - tmp.foreach(p->System.out.println(OBJECT_MAPPER.writeValueAsString(p))); + tmp.foreach(p -> System.out.println(OBJECT_MAPPER.writeValueAsString(p))); Assertions .assertEquals( @@ -161,7 +160,6 @@ public class ExtendAffiliationTest { && p.getType().equalsIgnoreCase("ror"))); Assertions.assertEquals(2, organization.getPid().size()); - } @Test @@ -202,7 +200,6 @@ public class ExtendAffiliationTest { rels.foreach(r -> Assertions.assertTrue(r.getSource().startsWith("50|"))); rels.foreach(r -> Assertions.assertTrue(r.getTarget().startsWith("20|"))); - } } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java index aec45af..55b4641 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendProjectTest.java @@ -1,8 +1,13 @@ package eu.dnetlib.dhp.oa.graph.dump.eosc; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.eosc.model.*; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.List; +import java.util.Optional; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -15,12 +20,9 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; -import java.util.List; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.eosc.model.*; /** * @author miriam.baglioni @@ -86,15 +88,13 @@ public class ExtendProjectTest { .write() .text(workingDir.toString() + "/working/publicationextendedaffiliation"); - - SparkUpdateProjectInfo.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-outputPath", workingDir.toString() + "/", "-sourcePath", sourcePath, "-resultType", "publication", "-workingPath", workingDir.toString() + "/working/", - "-preparedInfoPath", workingPath + "preparedInfo" + "-preparedInfoPath", workingPath + "preparedInfo" }); JavaRDD tmp = sc @@ -120,13 +120,13 @@ public class ExtendProjectTest { .size()); Assertions - .assertEquals( - 3, - tmp - .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) - .first() - .getProjects() - .size()); + .assertEquals( + 3, + tmp + .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::cd7711c65d518859f1d87056e2c45d98")) + .first() + .getProjects() + .size()); List projectSummaries = tmp .filter(r -> r.getId().equalsIgnoreCase("50|06cdd3ff4700::93859bd27121c3ee7c6ee4bfb1790cba")) @@ -138,18 +138,16 @@ public class ExtendProjectTest { projectSummaries.stream().anyMatch(p -> p.getFunder().getShortName().equals("NSF"))); Assertions - .assertTrue( - projectSummaries.stream().anyMatch(p -> p.getFunder().getShortName().equals("UKRI"))); - - + .assertTrue( + projectSummaries.stream().anyMatch(p -> p.getFunder().getShortName().equals("UKRI"))); JavaRDD projects = sc - .textFile(workingDir.toString() + "/project") - .map(item -> OBJECT_MAPPER.readValue(item, Project.class)); + .textFile(workingDir.toString() + "/project") + .map(item -> OBJECT_MAPPER.readValue(item, Project.class)); JavaRDD rels = sc - .textFile(workingDir.toString() + "/resultProject") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + .textFile(workingDir.toString() + "/resultProject") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); System.out.println(projects.count()); @@ -160,10 +158,6 @@ public class ExtendProjectTest { rels.foreach(r -> Assertions.assertTrue(r.getSource().startsWith("50|"))); rels.foreach(r -> Assertions.assertTrue(r.getTarget().startsWith("40|"))); - - } - - } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/OrganizationProjectRelationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/OrganizationProjectRelationTest.java index 22ed7bf..a96f5d1 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/OrganizationProjectRelationTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/OrganizationProjectRelationTest.java @@ -1,8 +1,11 @@ package eu.dnetlib.dhp.oa.graph.dump.eosc; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.eosc.model.Relation; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -15,10 +18,9 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.eosc.model.Relation; /** * @author miriam.baglioni @@ -85,12 +87,10 @@ public class OrganizationProjectRelationTest { .text(workingDir.toString() + "/working/organization"); spark - .read() - .textFile(workingPath + "project") - .write() - .text(workingDir.toString() + "/working/project"); - - + .read() + .textFile(workingPath + "project") + .write() + .text(workingDir.toString() + "/working/project"); SparkDumpOrganizationProject.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -105,14 +105,28 @@ public class OrganizationProjectRelationTest { Assertions.assertEquals(3, tmp.count()); - Assertions.assertEquals(1,tmp.filter(r -> r.getSource().equalsIgnoreCase("20|chistera____::9146e9ef10640675f361d674e77bd254")).count()); - Assertions.assertEquals(2,tmp.filter(r -> r.getSource().equalsIgnoreCase("20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64")).count()); + Assertions + .assertEquals( + 1, + tmp + .filter(r -> r.getSource().equalsIgnoreCase("20|chistera____::9146e9ef10640675f361d674e77bd254")) + .count()); + Assertions + .assertEquals( + 2, + tmp + .filter(r -> r.getSource().equalsIgnoreCase("20|corda__h2020::dfe84ab5cad50d4dcfaf5bd0c86e1b64")) + .count()); - Assertions.assertEquals(1,tmp.filter(r -> r.getSource().equalsIgnoreCase("20|chistera____::9146e9ef10640675f361d674e77bd254") && - r.getTarget().equalsIgnoreCase("40|nsf_________::d1c070f4252c32e23ccc3f4211c9c621")).count()); + Assertions + .assertEquals( + 1, + tmp + .filter( + r -> r.getSource().equalsIgnoreCase("20|chistera____::9146e9ef10640675f361d674e77bd254") && + r.getTarget().equalsIgnoreCase("40|nsf_________::d1c070f4252c32e23ccc3f4211c9c621")) + .count()); } - - } From d32b0861a2f0f93e0ac54bbc79d7ae3c8327e7ff Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 10 Nov 2023 14:55:09 +0100 Subject: [PATCH 15/25] - --- .../eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java index 6f02871..5c2fe14 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java @@ -80,14 +80,12 @@ public class SparkSelectRelation implements Serializable { Dataset> resultIds = Utils .readPath(spark, outputPath + "/publication", Result.class) - .map( (MapFunction>) p -> new Tuple2<>(p.getId(), p.getType()), Encoders.tuple(Encoders.STRING(), Encoders.STRING())) .union( Utils .readPath(spark, outputPath + "/dataset", Result.class) - .map( (MapFunction>) d -> new Tuple2<>(d.getId(), d.getType()), Encoders.tuple(Encoders.STRING(), Encoders.STRING()))) From dbfd744f9c5a762257e897305464d996778f31d8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 13 Nov 2023 11:30:02 +0100 Subject: [PATCH 16/25] fixed issue --- .../ExtendEoscResultWithOrganizationStep2.java | 3 --- .../dump/eosc/SelectEoscResultsJobStep1.java | 11 ----------- .../graph/dump/eosc/SparkUpdateProjectInfo.java | 10 ++++++++-- .../dnetlib/dhp/oa/graph/dump/eosc/job.properties | 15 +++++++++++++++ .../dhp/oa/graph/dump/eosc/oozie_app/workflow.xml | 2 -- 5 files changed, 23 insertions(+), 18 deletions(-) create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index 57a3e24..8d106ef 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -101,9 +101,6 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .joinWith(relation, result.col("id").equalTo(relation.col("source"))) .map((MapFunction, Relation>) t2 -> t2._2(), Encoders.bean(Relation.class)); - eoscRelation - .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); - // from eoscRelation select the organization eoscRelation .joinWith(organization, eoscRelation.col("target").equalTo(organization.col("id"))) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java index 631058a..65fcd54 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java @@ -77,17 +77,6 @@ public class SelectEoscResultsJobStep1 implements Serializable { String inputPath, String outputPath, Class inputClazz, String communityMapPath, String eoscDatasourceIdsPath) { -// final StructType structureSchema = new StructType() -// .add("eoscId", DataTypes.StringType) -// .add("graphId", DataTypes.StringType) -// .add("graphName", DataTypes.StringType); -// -// // .fromDDL("`graphId`: STRING, `eoscId`:STRING"); -// org.apache.spark.sql.Dataset df = spark -// .read() -// .schema(structureSchema) -// .json(eoscDatasourceIdsPath); - List df = Utils .readPath(spark, eoscDatasourceIdsPath, MasterDuplicate.class) .collectAsList(); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java index e9fdb87..0f0859e 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java @@ -112,7 +112,7 @@ public class SparkUpdateProjectInfo implements Serializable { Dataset project = Utils.readPath(spark, inputPath + "/project", Project.class); Dataset projectIds = result - .joinWith(resultProject, result.col("id").equalTo(resultProject.col("resultId"))) + .joinWith(resultProject, result.col("id").equalTo(resultProject.col("resultId")), "left") .flatMap( (FlatMapFunction, String>) t2 -> t2 ._2() @@ -134,7 +134,13 @@ public class SparkUpdateProjectInfo implements Serializable { .option("compression", "gzip") .json(outputPath + "project"); - resultProject + result + .joinWith( + resultProject, result.col("id").equalTo(resultProject.col("resultId")), + "left") + .map( + (MapFunction, ResultProject>) t2 -> t2._2(), + Encoders.bean(ResultProject.class)) .flatMap( (FlatMapFunction) rp -> rp .getProjectsList() diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties new file mode 100644 index 0000000..d22cf14 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties @@ -0,0 +1,15 @@ +#PROPERTIES FOR EOSC DUMP +sourcePath=/tmp/prod_provision/graph/18_graph_blacklisted/ +outputPath=/tmp/miriam/graph_dumps/eosc_prod_extended +#accessToken for the openaire sandbox following +accessToken=OzzOsyucEIHxCEfhlpsMo3myEiwpCza3trCRL7ddfGTAK9xXkIP2MbXd6Vg4 +connectionUrl=https://sandbox.zenodo.org/api/deposit/depositions +singleDeposition=false +conceptRecordId=1094304 +depositionType=version +metadata="" +depositionId=6616871 +removeSet=merges;isMergedIn +postgresURL=jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus +postgresUser=dnet +postgresPassword=dnetPwd \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml index c435fae..1513c70 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml @@ -225,8 +225,6 @@ - - yarn From fd242c1c870885fb1b9f74492e92555c4828e891 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 13 Nov 2023 18:16:53 +0100 Subject: [PATCH 17/25] - --- .../resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties index d22cf14..10c14b2 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties @@ -1,5 +1,5 @@ #PROPERTIES FOR EOSC DUMP -sourcePath=/tmp/prod_provision/graph/18_graph_blacklisted/ +sourcePath=/tmp/miriam/graphCopy outputPath=/tmp/miriam/graph_dumps/eosc_prod_extended #accessToken for the openaire sandbox following accessToken=OzzOsyucEIHxCEfhlpsMo3myEiwpCza3trCRL7ddfGTAK9xXkIP2MbXd6Vg4 From 0f602bae9d97b2ea7f53d49d966084cd1df96959 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 14 Nov 2023 11:33:50 +0100 Subject: [PATCH 18/25] fixed issue. Need to extend wf --- ...ExtendEoscResultWithOrganizationStep2.java | 54 ++++++++++++------- .../dhp/oa/graph/dump/eosc/job.properties | 3 +- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index 8d106ef..be78482 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -76,13 +76,13 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, workingPath + resultType + "extendedaffiliation"); - addOrganizations(spark, inputPath, workingPath, outputPath, resultType); - dumpOrganizationAndRelations(spark, inputPath, workingPath, outputPath, resultType); + addOrganizations(spark, inputPath, workingPath, resultType); + dumpOrganizationAndRelations(spark, inputPath, workingPath, resultType); }); } private static void dumpOrganizationAndRelations(SparkSession spark, String inputPath, String workingPath, - String outputPath, String resultType) { + String resultType) { Dataset relation = Utils .readPath(spark, inputPath + "/relation", Relation.class) .filter( @@ -102,36 +102,46 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .map((MapFunction, Relation>) t2 -> t2._2(), Encoders.bean(Relation.class)); // from eoscRelation select the organization - eoscRelation + Dataset organizationIds = eoscRelation .joinWith(organization, eoscRelation.col("target").equalTo(organization.col("id"))) .map( - (MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization( + (MapFunction, String>) t2 -> t2._2().getId(), + Encoders.STRING()) + .distinct(); + + organizationIds + .joinWith(organization, organizationIds.col("value").equalTo(organization.col("id"))) + .map( + (MapFunction, eu.dnetlib.dhp.eosc.model.Organization>) t2 -> mapOrganization( t2._2()), Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) - .groupByKey((MapFunction) o -> o.getId(), Encoders.STRING()) - .mapGroups( - (MapGroupsFunction) ( - k, v) -> v.next(), - Encoders.bean(eu.dnetlib.dhp.eosc.model.Organization.class)) + + .filter(Objects::nonNull) + .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + "organization"); + .json(workingPath + resultType + "/organization"); eoscRelation .joinWith(organization, eoscRelation.col("target").equalTo(organization.col("id"))) .map( - (MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> eu.dnetlib.dhp.eosc.model.Relation - .newInstance(t2._1().getSource(), t2._1().getTarget()), + (MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> { + if (isToBeDumpedOrg(t2._2())) + return eu.dnetlib.dhp.eosc.model.Relation + .newInstance(t2._1().getSource(), t2._1().getTarget()); + return null; + }, Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) + .filter(Objects::nonNull) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + "resultOrganization"); + .json(workingPath + resultType + "/resultOrganization"); } - private static void addOrganizations(SparkSession spark, String inputPath, String workingPath, String outputPath, + private static void addOrganizations(SparkSession spark, String inputPath, String workingPath, String resultType) { Dataset results = Utils @@ -219,10 +229,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { private static eu.dnetlib.dhp.eosc.model.Organization mapOrganization(Organization org) { - if (Boolean.TRUE.equals(org.getDataInfo().getDeletedbyinference())) - return null; - if (!Optional.ofNullable(org.getLegalname()).isPresent() - && !Optional.ofNullable(org.getLegalshortname()).isPresent()) + if (isToBeDumpedOrg(org)) return null; eu.dnetlib.dhp.eosc.model.Organization organization = new eu.dnetlib.dhp.eosc.model.Organization(); @@ -278,4 +285,13 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { return organization; } + private static boolean isToBeDumpedOrg(Organization org) { + if (Boolean.TRUE.equals(org.getDataInfo().getDeletedbyinference())) + return true; + if (!Optional.ofNullable(org.getLegalname()).isPresent() + && !Optional.ofNullable(org.getLegalshortname()).isPresent()) + return true; + return false; + } + } diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties index 10c14b2..612a16e 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties @@ -12,4 +12,5 @@ depositionId=6616871 removeSet=merges;isMergedIn postgresURL=jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus postgresUser=dnet -postgresPassword=dnetPwd \ No newline at end of file +postgresPassword=dnetPwd +isLookUpUrl=http://services.openaire.eu:8280/is/services/isLookUp?wsdl \ No newline at end of file From db388ebc21c2dddec06d907845ccce94385248ec Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 16 Nov 2023 12:17:42 +0100 Subject: [PATCH 19/25] - --- ...ExtendEoscResultWithOrganizationStep2.java | 127 ++++++++++-------- .../dump/eosc/SelectEoscResultsJobStep1.java | 2 + .../eosc/SparkDumpOrganizationProject.java | 74 +++++++++- .../dump/eosc/SparkUpdateProjectInfo.java | 4 +- .../oa/graph/dump/eosc/oozie_app/workflow.xml | 1 + 5 files changed, 145 insertions(+), 63 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index be78482..422ba5c 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -17,6 +17,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; +import org.jetbrains.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -76,8 +77,10 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, workingPath + resultType + "extendedaffiliation"); + Utils.removeOutputDir(spark, workingPath + resultType + "organization"); + Utils.removeOutputDir(spark, workingPath + resultType + "resultOrganization"); addOrganizations(spark, inputPath, workingPath, resultType); - dumpOrganizationAndRelations(spark, inputPath, workingPath, resultType); + dumpOrganizationAndRelations(spark, inputPath, workingPath, resultType); }); } @@ -101,6 +104,8 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .joinWith(relation, result.col("id").equalTo(relation.col("source"))) .map((MapFunction, Relation>) t2 -> t2._2(), Encoders.bean(Relation.class)); + log.info("Number of affiliation relation for " + resultType + " = " + eoscRelation.count()); + // from eoscRelation select the organization Dataset organizationIds = eoscRelation .joinWith(organization, eoscRelation.col("target").equalTo(organization.col("id"))) @@ -121,7 +126,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(workingPath + resultType + "/organization"); + .json(workingPath + resultType + "organization"); eoscRelation .joinWith(organization, eoscRelation.col("target").equalTo(organization.col("id"))) @@ -137,7 +142,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(workingPath + resultType + "/resultOrganization"); + .json(workingPath + resultType + "resultOrganization"); } @@ -159,67 +164,18 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { Dataset resultOrganization = relations .joinWith(organizations, relations.col("source").equalTo(organizations.col("id")), "left") - .map((MapFunction, ResultOrganizations>) t2 -> { - if (t2._2() != null) { - ResultOrganizations rOrg = new ResultOrganizations(); - rOrg.setResultId(t2._1().getTarget()); - Affiliation org = new Affiliation(); - org.setId(t2._2().getId()); - if (Optional.ofNullable(t2._2().getLegalname()).isPresent()) { - org.setName(t2._2().getLegalname().getValue()); - } else { - org.setName(""); - } - HashMap> organizationPids = new HashMap<>(); - if (Optional.ofNullable(t2._2().getPid()).isPresent()) - t2._2().getPid().forEach(p -> { - if (!organizationPids.containsKey(p.getQualifier().getClassid())) - organizationPids.put(p.getQualifier().getClassid(), new HashSet<>()); - organizationPids.get(p.getQualifier().getClassid()).add(p.getValue()); - }); - List pids = new ArrayList<>(); - for (String key : organizationPids.keySet()) { - for (String value : organizationPids.get(key)) { - OrganizationPid pid = new OrganizationPid(); - pid.setValue(value); - pid.setType(key); - pids.add(pid); - } - } - org.setPid(pids); - rOrg.setAffiliation(org); - return rOrg; - } - return null; - - }, Encoders.bean(ResultOrganizations.class)) + .map((MapFunction, ResultOrganizations>) t2 -> getResultOrganizations(t2), Encoders.bean(ResultOrganizations.class)) .filter(Objects::nonNull); + System.out.println(resultOrganization.count()); + results .joinWith(resultOrganization, results.col("id").equalTo(resultOrganization.col("resultId")), "left") .groupByKey( (MapFunction, String>) t2 -> t2._1().getId(), Encoders.STRING()) .mapGroups( - (MapGroupsFunction, Result>) (s, it) -> { - Tuple2 first = it.next(); - if (first._2() == null) { - return first._1(); - } - Result ret = first._1(); - List affiliation = new ArrayList<>(); - Set alreadyInsertedAffiliations = new HashSet<>(); - affiliation.add(first._2().getAffiliation()); - alreadyInsertedAffiliations.add(first._2().getAffiliation().getId()); - it.forEachRemaining(res -> { - if (!alreadyInsertedAffiliations.contains(res._2().getAffiliation().getId())) { - affiliation.add(res._2().getAffiliation()); - alreadyInsertedAffiliations.add(res._2().getAffiliation().getId()); - } - - }); - ret.setAffiliation(affiliation); - return ret; - }, Encoders.bean(Result.class)) + (MapGroupsFunction, Result>) (s, it) -> addAffiliation(it) + , Encoders.bean(Result.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") @@ -227,6 +183,63 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { } + @Nullable + private static ResultOrganizations getResultOrganizations(Tuple2 t2) { + if (t2._2() != null) { + Organization organization = t2._2(); + ResultOrganizations rOrg = new ResultOrganizations(); + rOrg.setResultId(t2._1().getTarget()); + Affiliation org = new Affiliation(); + org.setId(organization.getId()); + if (Optional.ofNullable(organization.getLegalname()).isPresent()) { + org.setName(organization.getLegalname().getValue()); + } else { + org.setName(""); + } + HashMap> organizationPids = new HashMap<>(); + if (Optional.ofNullable(organization.getPid()).isPresent()) + organization.getPid().forEach(p -> { + if (!organizationPids.containsKey(p.getQualifier().getClassid())) + organizationPids.put(p.getQualifier().getClassid(), new HashSet<>()); + organizationPids.get(p.getQualifier().getClassid()).add(p.getValue()); + }); + List pids = new ArrayList<>(); + for (String key : organizationPids.keySet()) { + for (String value : organizationPids.get(key)) { + OrganizationPid pid = new OrganizationPid(); + pid.setValue(value); + pid.setType(key); + pids.add(pid); + } + } + org.setPid(pids); + rOrg.setAffiliation(org); + return rOrg; + } + return null; + } + + private static Result addAffiliation(Iterator> it) { + Tuple2 first = it.next(); + if (first._2() == null) { + return first._1(); + } + Result ret = first._1(); + List affiliation = new ArrayList<>(); + Set alreadyInsertedAffiliations = new HashSet<>(); + affiliation.add(first._2().getAffiliation()); + alreadyInsertedAffiliations.add(first._2().getAffiliation().getId()); + it.forEachRemaining(res -> { + if (!alreadyInsertedAffiliations.contains(res._2().getAffiliation().getId())) { + affiliation.add(res._2().getAffiliation()); + alreadyInsertedAffiliations.add(res._2().getAffiliation().getId()); + } + + }); + ret.setAffiliation(affiliation); + return ret; + } + private static eu.dnetlib.dhp.eosc.model.Organization mapOrganization(Organization org) { if (isToBeDumpedOrg(org)) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java index 65fcd54..d7f0ece 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java @@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -95,6 +96,7 @@ public class SelectEoscResultsJobStep1 implements Serializable { (MapFunction) r -> (Result) ResultMapper .map(r, communityMap, df), Encoders.bean(Result.class)) + .filter(Objects::nonNull) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java index 9f94535..57211e3 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java @@ -10,6 +10,7 @@ import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -57,6 +58,9 @@ public class SparkDumpOrganizationProject implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + final String workingPath = parser.get("workingPath"); + log.info("workingPath: {}", workingPath); + SparkConf conf = new SparkConf(); runWithSparkSession( @@ -64,16 +68,42 @@ public class SparkDumpOrganizationProject implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath + "/organizationProject"); - dumpRelation(spark, inputPath, outputPath); + dumpRelation(spark, inputPath, outputPath, workingPath); }); } - private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) { - Dataset organization = Utils.readPath(spark, outputPath + "organization", Organization.class); + private static void dumpRelation(SparkSession spark, String inputPath, String outputPath, String workingPath) { + Dataset organization = Utils + .readPath(spark, workingPath + "publicationorganization", Organization.class) + .union(Utils.readPath(spark, workingPath + "datasetorganization", Organization.class)) + .union(Utils.readPath(spark, workingPath + "softwareorganization", Organization.class)) + .union(Utils.readPath(spark, workingPath + "otherresearchproductorganization", Organization.class)) + .groupByKey((MapFunction) o -> o.getId(), Encoders.STRING()) + .mapGroups( + (MapGroupsFunction) (k, v) -> v.next(), + Encoders.bean(Organization.class)); - Dataset project = Utils.readPath(spark, outputPath + "project", Project.class); + Dataset project = Utils + .readPath(spark, workingPath + "publicationproject", Project.class) + .union(Utils.readPath(spark, workingPath + "datasetproject", Project.class)) + .union(Utils.readPath(spark, workingPath + "softwareproject", Project.class)) + .union(Utils.readPath(spark, workingPath + "otherresearchproductproject", Project.class)) + .groupByKey((MapFunction) o -> o.getId(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, v) -> v.next(), Encoders.bean(Project.class)); + + organization + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "organization"); + + project + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "project"); Dataset relation = Utils .readPath(spark, inputPath + "/relation", Relation.class) @@ -96,6 +126,42 @@ public class SparkDumpOrganizationProject implements Serializable { .option("compression", "gzip") .json(outputPath + "organizationProject"); + Utils + .readPath(spark, workingPath + "publicationresultOrganization", eu.dnetlib.dhp.eosc.model.Relation.class) + .union( + Utils + .readPath( + spark, workingPath + "datasetresultOrganization", eu.dnetlib.dhp.eosc.model.Relation.class)) + .union( + Utils + .readPath( + spark, workingPath + "softwareresultOrganization", eu.dnetlib.dhp.eosc.model.Relation.class)) + .union( + Utils + .readPath( + spark, workingPath + "otherresearchproductresultOrganization", + eu.dnetlib.dhp.eosc.model.Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "resultOrganization"); + + Utils + .readPath(spark, workingPath + "publicationresultProject", eu.dnetlib.dhp.eosc.model.Relation.class) + .union( + Utils.readPath(spark, workingPath + "datasetresultProject", eu.dnetlib.dhp.eosc.model.Relation.class)) + .union( + Utils.readPath(spark, workingPath + "softwareresultProject", eu.dnetlib.dhp.eosc.model.Relation.class)) + .union( + Utils + .readPath( + spark, workingPath + "otherresearchproductresultProject", + eu.dnetlib.dhp.eosc.model.Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "resultProject"); + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java index 0f0859e..6127a88 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java @@ -132,7 +132,7 @@ public class SparkUpdateProjectInfo implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + "project"); + .json(workingPath + resultType + "project"); result .joinWith( @@ -152,7 +152,7 @@ public class SparkUpdateProjectInfo implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + "resultProject"); + .json(workingPath + resultType + "resultProject"); } private static eu.dnetlib.dhp.eosc.model.Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml index 1513c70..9b34569 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml @@ -632,6 +632,7 @@ --sourcePath${sourcePath} --outputPath${outputPath}/dump/ + --workingPath${workingDir}/dump/ From 9a06a552c4d285cd6603f82e7a9cc62156ad772e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 17 Nov 2023 16:49:06 +0100 Subject: [PATCH 20/25] fixed last issues --- ...ExtendEoscResultWithOrganizationStep2.java | 66 ++++++++++++++----- .../dump/eosc/SparkUpdateProjectInfo.java | 5 +- ...ganizationprojectrelations_parameters.json | 6 ++ 3 files changed, 58 insertions(+), 19 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index 422ba5c..ab8b368 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -10,7 +10,6 @@ import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; @@ -20,8 +19,7 @@ import org.apache.spark.sql.SparkSession; import org.jetbrains.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; +import org.spark_project.jetty.util.StringUtil; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.eosc.model.Affiliation; @@ -132,13 +130,14 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .joinWith(organization, eoscRelation.col("target").equalTo(organization.col("id"))) .map( (MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> { - if (isToBeDumpedOrg(t2._2())) - return eu.dnetlib.dhp.eosc.model.Relation - .newInstance(t2._1().getSource(), t2._1().getTarget()); - return null; + if (isToBeRemovedOrg(t2._2())) + return new eu.dnetlib.dhp.eosc.model.Relation(); + return eu.dnetlib.dhp.eosc.model.Relation + .newInstance(t2._1().getSource(), t2._1().getTarget()); + }, Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) - .filter(Objects::nonNull) + .filter((FilterFunction) r -> StringUtil.isNotBlank(r.getSource())) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") @@ -163,19 +162,54 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { .filter((FilterFunction) o -> !o.getDataInfo().getDeletedbyinference()); Dataset resultOrganization = relations - .joinWith(organizations, relations.col("source").equalTo(organizations.col("id")), "left") - .map((MapFunction, ResultOrganizations>) t2 -> getResultOrganizations(t2), Encoders.bean(ResultOrganizations.class)) - .filter(Objects::nonNull); + .joinWith(organizations, relations.col("source").equalTo(organizations.col("id"))) + .map((MapFunction, ResultOrganizations>) t2 -> { + ResultOrganizations rOrg = new ResultOrganizations(); - System.out.println(resultOrganization.count()); + if (t2._2() != null) { + + rOrg.setResultId(t2._1().getTarget()); + Affiliation org = new Affiliation(); + org.setId(t2._2().getId()); + if (Optional.ofNullable(t2._2().getLegalname()).isPresent()) { + org.setName(t2._2().getLegalname().getValue()); + } else { + org.setName(""); + } + HashMap> organizationPids = new HashMap<>(); + if (Optional.ofNullable(t2._2().getPid()).isPresent()) + t2._2().getPid().forEach(p -> { + if (!organizationPids.containsKey(p.getQualifier().getClassid())) + organizationPids.put(p.getQualifier().getClassid(), new HashSet<>()); + organizationPids.get(p.getQualifier().getClassid()).add(p.getValue()); + }); + List pids = new ArrayList<>(); + for (String key : organizationPids.keySet()) { + for (String value : organizationPids.get(key)) { + OrganizationPid pid = new OrganizationPid(); + pid.setValue(value); + pid.setType(key); + pids.add(pid); + } + } + org.setPid(pids); + rOrg.setAffiliation(org); + return rOrg; + } + return rOrg; + + }, Encoders.bean(ResultOrganizations.class)) + .filter((FilterFunction) ro -> ro.getResultId() != null); + + // resultOrganization.count(); results .joinWith(resultOrganization, results.col("id").equalTo(resultOrganization.col("resultId")), "left") .groupByKey( (MapFunction, String>) t2 -> t2._1().getId(), Encoders.STRING()) .mapGroups( - (MapGroupsFunction, Result>) (s, it) -> addAffiliation(it) - , Encoders.bean(Result.class)) + (MapGroupsFunction, Result>) (s, it) -> addAffiliation(it), + Encoders.bean(Result.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") @@ -242,7 +276,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { private static eu.dnetlib.dhp.eosc.model.Organization mapOrganization(Organization org) { - if (isToBeDumpedOrg(org)) + if (isToBeRemovedOrg(org)) return null; eu.dnetlib.dhp.eosc.model.Organization organization = new eu.dnetlib.dhp.eosc.model.Organization(); @@ -298,7 +332,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { return organization; } - private static boolean isToBeDumpedOrg(Organization org) { + private static boolean isToBeRemovedOrg(Organization org) { if (Boolean.TRUE.equals(org.getDataInfo().getDeletedbyinference())) return true; if (!Optional.ofNullable(org.getLegalname()).isPresent() diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java index 6127a88..340c1b6 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java @@ -112,7 +112,7 @@ public class SparkUpdateProjectInfo implements Serializable { Dataset project = Utils.readPath(spark, inputPath + "/project", Project.class); Dataset projectIds = result - .joinWith(resultProject, result.col("id").equalTo(resultProject.col("resultId")), "left") + .joinWith(resultProject, result.col("id").equalTo(resultProject.col("resultId"))) .flatMap( (FlatMapFunction, String>) t2 -> t2 ._2() @@ -136,8 +136,7 @@ public class SparkUpdateProjectInfo implements Serializable { result .joinWith( - resultProject, result.col("id").equalTo(resultProject.col("resultId")), - "left") + resultProject, result.col("id").equalTo(resultProject.col("resultId"))) .map( (MapFunction, ResultProject>) t2 -> t2._2(), Encoders.bean(ResultProject.class)) diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_organizationprojectrelations_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_organizationprojectrelations_parameters.json index 763e0df..81d594b 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_organizationprojectrelations_parameters.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_organizationprojectrelations_parameters.json @@ -19,6 +19,12 @@ "paramLongName": "isSparkSessionManaged", "paramDescription": "the path used to store temporary output files", "paramRequired": false + }, + { + "paramName": "wp", + "paramLongName": "workingPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true } ] From 1a196d03f9b3d2a5a945680306babe8bc09187ca Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 4 Jan 2024 11:26:04 +0100 Subject: [PATCH 21/25] changed dependency to the dhp-schema model to 4.17.2 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 00b5e97..20eee8d 100644 --- a/pom.xml +++ b/pom.xml @@ -102,7 +102,7 @@ 5.6.1 3.5 11.0.2 - [3.17.1] + [4.17.2] \ No newline at end of file From f6f734bf5e7ac1b0f8765b4fd1ee05d490e5317e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 4 Jan 2024 12:10:36 +0100 Subject: [PATCH 22/25] adding use of communityAPIs instead of ISLOOKUPRUL --- api/pom.xml | 49 ++++++ .../dhp/communityapi/QueryCommunityAPI.java | 75 +++++++++ .../model/CommunityContentprovider.java | 30 ++++ .../model/CommunityEntityMap.java | 21 +++ .../communityapi/model/CommunityModel.java | 82 ++++++++++ .../communityapi/model/CommunitySummary.java | 15 ++ .../dhp/communityapi/model/ContentModel.java | 51 +++++++ .../communityapi/model/DatasourceList.java | 11 ++ .../communityapi/model/OrganizationList.java | 16 ++ .../dhp/communityapi/model/ProjectModel.java | 44 ++++++ dump/pom.xml | 12 +- .../dhp/oa/graph/dump/UtilCommunityAPI.java | 144 ++++++++++++++++++ .../oa/graph/dump/eosc/SaveCommunityMap.java | 44 +++--- .../dhp/oa/graph/dump/eosc/job.properties | 2 +- pom.xml | 1 + 15 files changed, 574 insertions(+), 23 deletions(-) create mode 100644 api/pom.xml create mode 100644 api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java create mode 100644 api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java create mode 100644 api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java create mode 100644 api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java create mode 100644 api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java create mode 100644 api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java create mode 100644 api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java create mode 100644 api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java create mode 100644 api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java diff --git a/api/pom.xml b/api/pom.xml new file mode 100644 index 0000000..c719582 --- /dev/null +++ b/api/pom.xml @@ -0,0 +1,49 @@ + + + 4.0.0 + + + eu.dnetlib.dhp + dhp-graph-dump + 1.2.5-SNAPSHOT + + + eu.dnetlib.dhp + api + 1.2.5-SNAPSHOT + + + 8 + 8 + + + + + + + dom4j + dom4j + + + + jaxen + jaxen + + + + eu.dnetlib.dhp + dhp-common + ${project.version} + + + com.fasterxml.jackson.core + jackson-annotations + compile + + + + + + \ No newline at end of file diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java new file mode 100644 index 0000000..fca6406 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java @@ -0,0 +1,75 @@ +package eu.dnetlib.dhp.communityapi; + + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class QueryCommunityAPI { + private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/"; + + private static String get(String geturl) throws IOException { + URL url = new URL(geturl); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setDoOutput(true); + conn.setRequestMethod("GET"); + + int responseCode = conn.getResponseCode(); + String body = getBody(conn); + conn.disconnect(); + if (responseCode != HttpURLConnection.HTTP_OK) + throw new IOException("Unexpected code " + responseCode + body); + + return body; + } + + public static String communities() throws IOException { + + return get(PRODUCTION_BASE_URL + "community/communities"); + } + + public static String community(String id) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id); + + } + + public static String communityDatasource(String id) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders"); + + } + + public static String communityPropagationOrganization(String id) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations"); + } + + public static String communityProjects(String id, String page, String size) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); + } + + private static String getBody(HttpURLConnection conn) throws IOException { + String body = "{}"; + try (BufferedReader br = new BufferedReader( + new InputStreamReader(conn.getInputStream(), "utf-8"))) { + StringBuilder response = new StringBuilder(); + String responseLine = null; + while ((responseLine = br.readLine()) != null) { + response.append(responseLine.trim()); + } + + body = response.toString(); + + } + return body; + } + +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java new file mode 100644 index 0000000..a1de823 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java @@ -0,0 +1,30 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +@JsonAutoDetect +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityContentprovider { + private String openaireId; + + private String enabled; + + public String getEnabled() { + return enabled; + } + + public void setEnabled(String enabled) { + this.enabled = enabled; + } + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(final String openaireId) { + this.openaireId = openaireId; + } + +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java new file mode 100644 index 0000000..efc0399 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java @@ -0,0 +1,21 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +public class CommunityEntityMap extends HashMap> { + + public CommunityEntityMap() { + super(); + } + + public List get(String key) { + + if (super.get(key) == null) { + return new ArrayList<>(); + } + return super.get(key); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java new file mode 100644 index 0000000..144dfd7 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java @@ -0,0 +1,82 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityModel implements Serializable { + private String id; + private String name; + private String description; + + private String status; + + private String type; + + private List subjects; + + private String zenodoCommunity; + + public List getSubjects() { + return subjects; + } + + public void setSubjects(List subjects) { + this.subjects = subjects; + } + + public String getZenodoCommunity() { + return zenodoCommunity; + } + + public void setZenodoCommunity(String zenodoCommunity) { + this.zenodoCommunity = zenodoCommunity; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java new file mode 100644 index 0000000..93bbe83 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java @@ -0,0 +1,15 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class CommunitySummary extends ArrayList implements Serializable { + public CommunitySummary() { + super(); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java new file mode 100644 index 0000000..ea0ed33 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java @@ -0,0 +1,51 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ContentModel implements Serializable { + private List content; + private Integer totalPages; + private Boolean last; + private Integer number; + + public List getContent() { + return content; + } + + public void setContent(List content) { + this.content = content; + } + + public Integer getTotalPages() { + return totalPages; + } + + public void setTotalPages(Integer totalPages) { + this.totalPages = totalPages; + } + + public Boolean getLast() { + return last; + } + + public void setLast(Boolean last) { + this.last = last; + } + + public Integer getNumber() { + return number; + } + + public void setNumber(Integer number) { + this.number = number; + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java new file mode 100644 index 0000000..9a2f44a --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java @@ -0,0 +1,11 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.ArrayList; + +public class DatasourceList extends ArrayList implements Serializable { + public DatasourceList() { + super(); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java new file mode 100644 index 0000000..96305ff --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java @@ -0,0 +1,16 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +public class OrganizationList extends ArrayList implements Serializable { + + public OrganizationList() { + super(); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java new file mode 100644 index 0000000..94b6114 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ProjectModel implements Serializable { + + private String openaireId; + + private String funder; + + private String gratId; + + public String getFunder() { + return funder; + } + + public void setFunder(String funder) { + this.funder = funder; + } + + public String getGratId() { + return gratId; + } + + public void setGratId(String gratId) { + this.gratId = gratId; + } + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(String openaireId) { + this.openaireId = openaireId; + } +} diff --git a/dump/pom.xml b/dump/pom.xml index 60bc4b4..e9a8100 100644 --- a/dump/pom.xml +++ b/dump/pom.xml @@ -54,7 +54,17 @@ dump-schema 1.2.5-SNAPSHOT - + + eu.dnetlib.dhp + api + 1.2.5-SNAPSHOT + + + eu.dnetlib.dhp + api + 1.2.5-SNAPSHOT + compile + diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java new file mode 100644 index 0000000..22eca32 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java @@ -0,0 +1,144 @@ + +package eu.dnetlib.dhp.oa.graph.dump; + +import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.communityapi.model.*; + +import eu.dnetlib.dhp.utils.DHPUtils; + +public class UtilCommunityAPI { + + private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class); + + public CommunityMap getCommunityMap(boolean singleCommunity, String communityId) + throws IOException { + if (singleCommunity) + return getMap(Arrays.asList(getCommunity(communityId))); + return getMap(getValidCommunities()); + + } + + private CommunityMap getMap(List communities) { + final CommunityMap map = new CommunityMap(); + communities.forEach(c -> map.put(c.getId(), c.getName())); + return map; + } + + + private List getValidCommunities() throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class) + .stream() + .filter( + community -> (community.getStatus().equals("all") || community.getStatus().equalsIgnoreCase("public")) + && + (community.getType().equals("ri") || community.getType().equals("community"))) + .collect(Collectors.toList()); + + } + + private CommunityModel getCommunity(String id) throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class); + + } + + + + private List getDatasourceList(String id) { + List datasourceList = new ArrayList<>(); + try { + + new ObjectMapper() + .readValue( + eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id), + DatasourceList.class) + .stream() + .forEach(ds -> { + if (Optional.ofNullable(ds.getOpenaireId()).isPresent()) { + + datasourceList.add(ds.getOpenaireId()); + } + + }); + + } catch (IOException e) { + throw new RuntimeException(e); + } + return datasourceList; + } + + private List getProjectList(String id) { + int page = -1; + int size = 100; + ContentModel cm = null; + ; + ArrayList projectList = new ArrayList<>(); + do { + page++; + try { + cm = new ObjectMapper() + .readValue( + eu.dnetlib.dhp.communityapi.QueryCommunityAPI + .communityProjects( + id, String.valueOf(page), String.valueOf(size)), + ContentModel.class); + if (cm.getContent().size() > 0) { + cm.getContent().forEach(p -> { + if (Optional.ofNullable(p.getOpenaireId()).isPresent()) + projectList.add(p.getOpenaireId()); + + }); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } while (!cm.getLast()); + + return projectList; + } + + /** + * it returns for each organization the list of associated communities + */ + public CommunityEntityMap getCommunityOrganization() throws IOException { + CommunityEntityMap organizationMap = new CommunityEntityMap(); + getValidCommunities() + .forEach(community -> { + String id = community.getId(); + try { + List associatedOrgs = MAPPER + .readValue( + eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id), + OrganizationList.class); + associatedOrgs.forEach(o -> { + if (!organizationMap + .keySet() + .contains(o)) + organizationMap.put(o, new ArrayList<>()); + organizationMap.get(o).add(community.getId()); + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + return organizationMap; + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java index 414214f..5caedf4 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java @@ -8,21 +8,19 @@ import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.Optional; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** - * This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the + * This class connects with the community APIs for production. It saves the information about the * context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community * - research infrastructure/initiative , the value is the label of the research community - research * infrastructure/initiative. @@ -31,11 +29,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; public class SaveCommunityMap implements Serializable { private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); - private final transient QueryInformationSystem queryInformationSystem; + private final transient UtilCommunityAPI queryInformationSystem; private final transient BufferedWriter writer; - public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException { + public SaveCommunityMap(String hdfsPath, String hdfsNameNode) throws IOException { final Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); @@ -45,8 +43,7 @@ public class SaveCommunityMap implements Serializable { fileSystem.delete(hdfsWritePath, true); } - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); + queryInformationSystem = new UtilCommunityAPI(); FSDataOutputStream fos = fileSystem.create(hdfsWritePath); writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); @@ -54,10 +51,10 @@ public class SaveCommunityMap implements Serializable { public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils - .toString( - SaveCommunityMap.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json")); + .toString( + SaveCommunityMap.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -68,24 +65,29 @@ public class SaveCommunityMap implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final String isLookUpUrl = parser.get("isLookUpUrl"); - log.info("isLookUpUrl: {}", isLookUpUrl); + final Boolean singleCommunity = Optional + .ofNullable(parser.get("singleDeposition")) + .map(Boolean::valueOf) + .orElse(false); - final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl); + final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null); - scm.saveCommunityMap(); + final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode); + + scm.saveCommunityMap(singleCommunity, community_id); } - private void saveCommunityMap() - throws ISLookUpException, IOException, DocumentException, SAXException { + private void saveCommunityMap(boolean singleCommunity, String communityId) + throws IOException { final String communityMapString = Utils.OBJECT_MAPPER - .writeValueAsString(queryInformationSystem.getCommunityMap()); + .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)); log.info("communityMap {} ", communityMapString); writer - .write( - communityMapString); + .write( + communityMapString); writer.close(); } } + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties index 612a16e..1250d9e 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties @@ -1,5 +1,5 @@ #PROPERTIES FOR EOSC DUMP -sourcePath=/tmp/miriam/graphCopy +sourcePath=/tmp/prod_provision/graph/20_graph_blacklisted outputPath=/tmp/miriam/graph_dumps/eosc_prod_extended #accessToken for the openaire sandbox following accessToken=OzzOsyucEIHxCEfhlpsMo3myEiwpCza3trCRL7ddfGTAK9xXkIP2MbXd6Vg4 diff --git a/pom.xml b/pom.xml index 20eee8d..6641dbf 100644 --- a/pom.xml +++ b/pom.xml @@ -6,6 +6,7 @@ dump-schema dump + api From 9435c6756eaf013e383e2d8aff909f0ea6dd6011 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 4 Jan 2024 12:17:28 +0100 Subject: [PATCH 23/25] refactoring --- .../dhp/communityapi/QueryCommunityAPI.java | 84 +++++----- .../model/CommunityContentprovider.java | 28 ++-- .../model/CommunityEntityMap.java | 18 +-- .../communityapi/model/CommunityModel.java | 98 ++++++------ .../communityapi/model/CommunitySummary.java | 6 +- .../dhp/communityapi/model/ContentModel.java | 56 +++---- .../communityapi/model/DatasourceList.java | 6 +- .../communityapi/model/OrganizationList.java | 6 +- .../dhp/communityapi/model/ProjectModel.java | 42 ++--- .../dhp/oa/graph/dump/UtilCommunityAPI.java | 147 ++++-------------- .../oa/graph/dump/eosc/SaveCommunityMap.java | 25 ++- .../oa/graph/dump/eosc/oozie_app/workflow.xml | 1 - .../dhp/oa/graph/dump/eosc_cm_parameters.json | 7 +- 13 files changed, 214 insertions(+), 310 deletions(-) diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java index fca6406..0b7cdaf 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.communityapi; +package eu.dnetlib.dhp.communityapi; import java.io.BufferedReader; import java.io.IOException; @@ -12,64 +12,64 @@ import java.net.URL; * @Date 06/10/23 */ public class QueryCommunityAPI { - private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/"; + private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/"; - private static String get(String geturl) throws IOException { - URL url = new URL(geturl); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setDoOutput(true); - conn.setRequestMethod("GET"); + private static String get(String geturl) throws IOException { + URL url = new URL(geturl); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setDoOutput(true); + conn.setRequestMethod("GET"); - int responseCode = conn.getResponseCode(); - String body = getBody(conn); - conn.disconnect(); - if (responseCode != HttpURLConnection.HTTP_OK) - throw new IOException("Unexpected code " + responseCode + body); + int responseCode = conn.getResponseCode(); + String body = getBody(conn); + conn.disconnect(); + if (responseCode != HttpURLConnection.HTTP_OK) + throw new IOException("Unexpected code " + responseCode + body); - return body; - } + return body; + } - public static String communities() throws IOException { + public static String communities() throws IOException { - return get(PRODUCTION_BASE_URL + "community/communities"); - } + return get(PRODUCTION_BASE_URL + "community/communities"); + } - public static String community(String id) throws IOException { + public static String community(String id) throws IOException { - return get(PRODUCTION_BASE_URL + "community/" + id); + return get(PRODUCTION_BASE_URL + "community/" + id); - } + } - public static String communityDatasource(String id) throws IOException { + public static String communityDatasource(String id) throws IOException { - return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders"); + return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders"); - } + } - public static String communityPropagationOrganization(String id) throws IOException { + public static String communityPropagationOrganization(String id) throws IOException { - return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations"); - } + return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations"); + } - public static String communityProjects(String id, String page, String size) throws IOException { + public static String communityProjects(String id, String page, String size) throws IOException { - return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); - } + return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); + } - private static String getBody(HttpURLConnection conn) throws IOException { - String body = "{}"; - try (BufferedReader br = new BufferedReader( - new InputStreamReader(conn.getInputStream(), "utf-8"))) { - StringBuilder response = new StringBuilder(); - String responseLine = null; - while ((responseLine = br.readLine()) != null) { - response.append(responseLine.trim()); - } + private static String getBody(HttpURLConnection conn) throws IOException { + String body = "{}"; + try (BufferedReader br = new BufferedReader( + new InputStreamReader(conn.getInputStream(), "utf-8"))) { + StringBuilder response = new StringBuilder(); + String responseLine = null; + while ((responseLine = br.readLine()) != null) { + response.append(responseLine.trim()); + } - body = response.toString(); + body = response.toString(); - } - return body; - } + } + return body; + } } diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java index a1de823..52c65b1 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java @@ -7,24 +7,24 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @JsonAutoDetect @JsonIgnoreProperties(ignoreUnknown = true) public class CommunityContentprovider { - private String openaireId; + private String openaireId; - private String enabled; + private String enabled; - public String getEnabled() { - return enabled; - } + public String getEnabled() { + return enabled; + } - public void setEnabled(String enabled) { - this.enabled = enabled; - } + public void setEnabled(String enabled) { + this.enabled = enabled; + } - public String getOpenaireId() { - return openaireId; - } + public String getOpenaireId() { + return openaireId; + } - public void setOpenaireId(final String openaireId) { - this.openaireId = openaireId; - } + public void setOpenaireId(final String openaireId) { + this.openaireId = openaireId; + } } diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java index efc0399..91e17e5 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java @@ -7,15 +7,15 @@ import java.util.List; public class CommunityEntityMap extends HashMap> { - public CommunityEntityMap() { - super(); - } + public CommunityEntityMap() { + super(); + } - public List get(String key) { + public List get(String key) { - if (super.get(key) == null) { - return new ArrayList<>(); - } - return super.get(key); - } + if (super.get(key) == null) { + return new ArrayList<>(); + } + return super.get(key); + } } diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java index 144dfd7..f942eea 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java @@ -12,71 +12,71 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; */ @JsonIgnoreProperties(ignoreUnknown = true) public class CommunityModel implements Serializable { - private String id; - private String name; - private String description; + private String id; + private String name; + private String description; - private String status; + private String status; - private String type; + private String type; - private List subjects; + private List subjects; - private String zenodoCommunity; + private String zenodoCommunity; - public List getSubjects() { - return subjects; - } + public List getSubjects() { + return subjects; + } - public void setSubjects(List subjects) { - this.subjects = subjects; - } + public void setSubjects(List subjects) { + this.subjects = subjects; + } - public String getZenodoCommunity() { - return zenodoCommunity; - } + public String getZenodoCommunity() { + return zenodoCommunity; + } - public void setZenodoCommunity(String zenodoCommunity) { - this.zenodoCommunity = zenodoCommunity; - } + public void setZenodoCommunity(String zenodoCommunity) { + this.zenodoCommunity = zenodoCommunity; + } - public String getType() { - return type; - } + public String getType() { + return type; + } - public void setType(String type) { - this.type = type; - } + public void setType(String type) { + this.type = type; + } - public String getStatus() { - return status; - } + public String getStatus() { + return status; + } - public void setStatus(String status) { - this.status = status; - } + public void setStatus(String status) { + this.status = status; + } - public String getId() { - return id; - } + public String getId() { + return id; + } - public void setId(String id) { - this.id = id; - } + public void setId(String id) { + this.id = id; + } - public String getName() { - return name; - } + public String getName() { + return name; + } - public void setName(String name) { - this.name = name; - } + public void setName(String name) { + this.name = name; + } - public String getDescription() { - return description; - } + public String getDescription() { + return description; + } - public void setDescription(String description) { - this.description = description; - } + public void setDescription(String description) { + this.description = description; + } } diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java index 93bbe83..47d4d1b 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java @@ -9,7 +9,7 @@ import java.util.ArrayList; * @Date 06/10/23 */ public class CommunitySummary extends ArrayList implements Serializable { - public CommunitySummary() { - super(); - } + public CommunitySummary() { + super(); + } } diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java index ea0ed33..9d7245d 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java @@ -12,40 +12,40 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; */ @JsonIgnoreProperties(ignoreUnknown = true) public class ContentModel implements Serializable { - private List content; - private Integer totalPages; - private Boolean last; - private Integer number; + private List content; + private Integer totalPages; + private Boolean last; + private Integer number; - public List getContent() { - return content; - } + public List getContent() { + return content; + } - public void setContent(List content) { - this.content = content; - } + public void setContent(List content) { + this.content = content; + } - public Integer getTotalPages() { - return totalPages; - } + public Integer getTotalPages() { + return totalPages; + } - public void setTotalPages(Integer totalPages) { - this.totalPages = totalPages; - } + public void setTotalPages(Integer totalPages) { + this.totalPages = totalPages; + } - public Boolean getLast() { - return last; - } + public Boolean getLast() { + return last; + } - public void setLast(Boolean last) { - this.last = last; - } + public void setLast(Boolean last) { + this.last = last; + } - public Integer getNumber() { - return number; - } + public Integer getNumber() { + return number; + } - public void setNumber(Integer number) { - this.number = number; - } + public void setNumber(Integer number) { + this.number = number; + } } diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java index 9a2f44a..0cd98a7 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java @@ -5,7 +5,7 @@ import java.io.Serializable; import java.util.ArrayList; public class DatasourceList extends ArrayList implements Serializable { - public DatasourceList() { - super(); - } + public DatasourceList() { + super(); + } } diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java index 96305ff..33ccfd5 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java @@ -10,7 +10,7 @@ import java.util.ArrayList; */ public class OrganizationList extends ArrayList implements Serializable { - public OrganizationList() { - super(); - } + public OrganizationList() { + super(); + } } diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java index 94b6114..72429ea 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java @@ -12,33 +12,33 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @JsonIgnoreProperties(ignoreUnknown = true) public class ProjectModel implements Serializable { - private String openaireId; + private String openaireId; - private String funder; + private String funder; - private String gratId; + private String gratId; - public String getFunder() { - return funder; - } + public String getFunder() { + return funder; + } - public void setFunder(String funder) { - this.funder = funder; - } + public void setFunder(String funder) { + this.funder = funder; + } - public String getGratId() { - return gratId; - } + public String getGratId() { + return gratId; + } - public void setGratId(String gratId) { - this.gratId = gratId; - } + public void setGratId(String gratId) { + this.gratId = gratId; + } - public String getOpenaireId() { - return openaireId; - } + public String getOpenaireId() { + return openaireId; + } - public void setOpenaireId(String openaireId) { - this.openaireId = openaireId; - } + public void setOpenaireId(String openaireId) { + this.openaireId = openaireId; + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java index 22eca32..97d4bea 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java @@ -1,13 +1,9 @@ package eu.dnetlib.dhp.oa.graph.dump; -import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER; - import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Optional; import java.util.stream.Collectors; import org.slf4j.Logger; @@ -16,129 +12,44 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.communityapi.model.*; - -import eu.dnetlib.dhp.utils.DHPUtils; +import eu.dnetlib.dhp.oa.graph.dump.eosc.CommunityMap; public class UtilCommunityAPI { - private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class); + private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class); - public CommunityMap getCommunityMap(boolean singleCommunity, String communityId) - throws IOException { - if (singleCommunity) - return getMap(Arrays.asList(getCommunity(communityId))); - return getMap(getValidCommunities()); + public CommunityMap getCommunityMap(boolean singleCommunity, String communityId) + throws IOException { + if (singleCommunity) + return getMap(Arrays.asList(getCommunity(communityId))); + return getMap(getValidCommunities()); - } + } - private CommunityMap getMap(List communities) { - final CommunityMap map = new CommunityMap(); - communities.forEach(c -> map.put(c.getId(), c.getName())); - return map; - } - + private CommunityMap getMap(List communities) { + final CommunityMap map = new CommunityMap(); + communities.forEach(c -> map.put(c.getId(), c.getName())); + return map; + } - private List getValidCommunities() throws IOException { - ObjectMapper mapper = new ObjectMapper(); - return mapper - .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class) - .stream() - .filter( - community -> (community.getStatus().equals("all") || community.getStatus().equalsIgnoreCase("public")) - && - (community.getType().equals("ri") || community.getType().equals("community"))) - .collect(Collectors.toList()); + private List getValidCommunities() throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class) + .stream() + .filter( + community -> (community.getStatus().equals("all") || community.getStatus().equalsIgnoreCase("public")) + && + (community.getType().equals("ri") || community.getType().equals("community"))) + .collect(Collectors.toList()); - } + } - private CommunityModel getCommunity(String id) throws IOException { - ObjectMapper mapper = new ObjectMapper(); - return mapper - .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class); + private CommunityModel getCommunity(String id) throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class); - } - - - - private List getDatasourceList(String id) { - List datasourceList = new ArrayList<>(); - try { - - new ObjectMapper() - .readValue( - eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id), - DatasourceList.class) - .stream() - .forEach(ds -> { - if (Optional.ofNullable(ds.getOpenaireId()).isPresent()) { - - datasourceList.add(ds.getOpenaireId()); - } - - }); - - } catch (IOException e) { - throw new RuntimeException(e); - } - return datasourceList; - } - - private List getProjectList(String id) { - int page = -1; - int size = 100; - ContentModel cm = null; - ; - ArrayList projectList = new ArrayList<>(); - do { - page++; - try { - cm = new ObjectMapper() - .readValue( - eu.dnetlib.dhp.communityapi.QueryCommunityAPI - .communityProjects( - id, String.valueOf(page), String.valueOf(size)), - ContentModel.class); - if (cm.getContent().size() > 0) { - cm.getContent().forEach(p -> { - if (Optional.ofNullable(p.getOpenaireId()).isPresent()) - projectList.add(p.getOpenaireId()); - - }); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } while (!cm.getLast()); - - return projectList; - } - - /** - * it returns for each organization the list of associated communities - */ - public CommunityEntityMap getCommunityOrganization() throws IOException { - CommunityEntityMap organizationMap = new CommunityEntityMap(); - getValidCommunities() - .forEach(community -> { - String id = community.getId(); - try { - List associatedOrgs = MAPPER - .readValue( - eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id), - OrganizationList.class); - associatedOrgs.forEach(o -> { - if (!organizationMap - .keySet() - .contains(o)) - organizationMap.put(o, new ArrayList<>()); - organizationMap.get(o).add(community.getId()); - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - - return organizationMap; - } + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java index 5caedf4..ce07028 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java @@ -8,7 +8,6 @@ import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.Optional; -import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -18,6 +17,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; /** * This class connects with the community APIs for production. It saves the information about the @@ -51,10 +51,10 @@ public class SaveCommunityMap implements Serializable { public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils - .toString( - SaveCommunityMap.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json")); + .toString( + SaveCommunityMap.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -66,9 +66,9 @@ public class SaveCommunityMap implements Serializable { log.info("outputPath: {}", outputPath); final Boolean singleCommunity = Optional - .ofNullable(parser.get("singleDeposition")) - .map(Boolean::valueOf) - .orElse(false); + .ofNullable(parser.get("singleDeposition")) + .map(Boolean::valueOf) + .orElse(false); final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null); @@ -79,15 +79,14 @@ public class SaveCommunityMap implements Serializable { } private void saveCommunityMap(boolean singleCommunity, String communityId) - throws IOException { + throws IOException { final String communityMapString = Utils.OBJECT_MAPPER - .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)); + .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)); log.info("communityMap {} ", communityMapString); writer - .write( - communityMapString); + .write( + communityMapString); writer.close(); } } - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml index 9b34569..843cffc 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/oozie_app/workflow.xml @@ -95,7 +95,6 @@ eu.dnetlib.dhp.oa.graph.dump.eosc.SaveCommunityMap --outputPath${workingDir}/communityMap --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json index 6e42bfa..a33ec9d 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json @@ -1,12 +1,7 @@ [ - { - "paramName":"is", - "paramLongName":"isLookUpUrl", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": true - }, + { "paramName":"nn", "paramLongName":"nameNode", From 231ed85aa119d9e57a3eab4b6046cb055c59177e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 8 Jan 2024 11:59:36 +0100 Subject: [PATCH 24/25] - --- .../oa/graph/dump/eosc/SelectEoscResultsJobStep1.java | 11 +++++++++++ .../dhp/oa/graph/dump/countryresults/job.properties | 3 +++ 2 files changed, 14 insertions(+) create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/job.properties diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java index d7f0ece..be34abd 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java @@ -78,6 +78,17 @@ public class SelectEoscResultsJobStep1 implements Serializable { String inputPath, String outputPath, Class inputClazz, String communityMapPath, String eoscDatasourceIdsPath) { +// final StructType structureSchema = new StructType() +// .add("eoscId", DataTypes.StringType) +// .add("graphId", DataTypes.StringType) +// .add("graphName", DataTypes.StringType); +// +// // .fromDDL("`graphId`: STRING, `eoscId`:STRING"); +// org.apache.spark.sql.Dataset df = spark +// .read() +// .schema(structureSchema) +// .json(eoscDatasourceIdsPath); + List df = Utils .readPath(spark, eoscDatasourceIdsPath, MasterDuplicate.class) .collectAsList(); diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/job.properties b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/job.properties new file mode 100644 index 0000000..b2eab7a --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/job.properties @@ -0,0 +1,3 @@ +sourcePath=/tmp/prod_provision/graph/20_graph_blacklisted +outputPath=/tmp/miriam/graph_dumps/country_PT +country=PT \ No newline at end of file From ec30ec0d29637f403ced3cd89410af5a3a5bafa3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 8 Jan 2024 12:04:21 +0100 Subject: [PATCH 25/25] - --- .../dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java index be34abd..890eedc 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java @@ -107,7 +107,7 @@ public class SelectEoscResultsJobStep1 implements Serializable { (MapFunction) r -> (Result) ResultMapper .map(r, communityMap, df), Encoders.bean(Result.class)) - .filter(Objects::nonNull) + .filter(Objects::nonNull) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip")