From 8d83b5173ca0d7c436d805f507ad3ea18f155e66 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Oct 2023 11:59:16 +0200 Subject: [PATCH] extended the model to accomodate the new entities and relation to be dumped --- .../eu/dnetlib/dhp/eosc/model/Funder.java | 5 +++ .../eu/dnetlib/dhp/eosc/model/Fundings.java | 5 +++ .../eu/dnetlib/dhp/eosc/model/Granted.java | 5 +++ .../eu/dnetlib/dhp/eosc/model/Programme.java | 5 +++ .../eu/dnetlib/dhp/eosc/model/Project.java | 5 +++ .../eosc/SparkDumpOrganizationProject.java | 34 ++++++++++++------- 6 files changed, 46 insertions(+), 13 deletions(-) diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java index eb8df14..6af9bb8 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java @@ -1,6 +1,11 @@ package eu.dnetlib.dhp.eosc.model; +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java index 0440be3..739bf69 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Fundings.java @@ -1,6 +1,11 @@ package eu.dnetlib.dhp.eosc.model; +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java index 26b28ef..178bd48 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Granted.java @@ -1,6 +1,11 @@ package eu.dnetlib.dhp.eosc.model; +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java index 0d90081..d1190f7 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Programme.java @@ -1,6 +1,11 @@ package eu.dnetlib.dhp.eosc.model; +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java index 6e71764..53bfe6b 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java @@ -1,6 +1,11 @@ package eu.dnetlib.dhp.eosc.model; +/** + * @author miriam.baglioni + * @Date 25/10/23 + */ + /** * @author miriam.baglioni * @Date 25/10/23 diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java index 8fcbce8..62aef46 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpOrganizationProject.java @@ -6,9 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.Optional; -import eu.dnetlib.dhp.eosc.model.Organization; -import eu.dnetlib.dhp.eosc.model.Project; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -21,8 +18,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.eosc.model.Organization; +import eu.dnetlib.dhp.eosc.model.Project; import eu.dnetlib.dhp.eosc.model.Provenance; import eu.dnetlib.dhp.eosc.model.RelType; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; @@ -74,18 +74,26 @@ public class SparkDumpOrganizationProject implements Serializable { Dataset organization = Utils.readPath(spark, outputPath + "organization", Organization.class); Dataset project = Utils.readPath(spark, outputPath + "project", Project.class); - Dataset relation = Utils.readPath(spark, inputPath + "/relation", Relation.class) - .filter((FilterFunction) r-> !r.getDataInfo().getDeletedbyinference() && r.getRelClass().equalsIgnoreCase(ModelConstants.IS_PARTICIPANT)); + Dataset relation = Utils + .readPath(spark, inputPath + "/relation", Relation.class) + .filter( + (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() + && r.getRelClass().equalsIgnoreCase(ModelConstants.IS_PARTICIPANT)); - Dataset eoscOrgs = relation.joinWith(organization, relation.col("source").equalTo(organization.col("id"))) - .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)); + Dataset eoscOrgs = relation + .joinWith(organization, relation.col("source").equalTo(organization.col("id"))) + .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)); - eoscOrgs.joinWith(project, eoscOrgs.col("target").equalTo(project.col("id"))) - .map((MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2-> eu.dnetlib.dhp.eosc.model.Relation.newInstance(t2._1().getSource(), t2._1().getTarget()), Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(outputPath + "organizationProject"); + eoscOrgs + .joinWith(project, eoscOrgs.col("target").equalTo(project.col("id"))) + .map( + (MapFunction, eu.dnetlib.dhp.eosc.model.Relation>) t2 -> eu.dnetlib.dhp.eosc.model.Relation + .newInstance(t2._1().getSource(), t2._1().getTarget()), + Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "organizationProject"); }