From 227e84be9924d1a3b172cce239df9bb8944b64d6 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Wed, 6 Nov 2024 16:36:34 +0100 Subject: [PATCH] [orcidenrichment] fixing issue --- .../SparkPropagateOrcidAuthor.java | 10 +++++----- .../orcidtoresultfromsemrel/oozie_app/workflow.xml | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java index 9f7b2f29e..69aadd108 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkPropagateOrcidAuthor.java @@ -33,7 +33,7 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors { // Create instance and run the Spark application SparkPropagateOrcidAuthor app = new SparkPropagateOrcidAuthor("/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json", args, log); - app.run(); + app.initialize().run(); } @@ -67,7 +67,7 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors { .keySet().stream().filter(ModelSupport::isResult) .forEach(e -> { Dataset orcidDnet = spark.read().schema(Encoders.bean(Result.class).schema()) - .json(graphPath + e.name()) + .json(graphPath + "/"+ e.name()) .as(Encoders.bean(Result.class)) .filter((FilterFunction) r -> r.getAuthor().stream() .anyMatch(a -> a.getPid() @@ -80,13 +80,13 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors { .selectExpr("_1 as target", "_2 as orcid_authors"); Dataset result = spark.read().schema(Encoders.bean(Result.class).schema()) - .json(graphPath + e.name()) + .json(graphPath + "/"+ e.name()) .as(Encoders.bean(Result.class)) .selectExpr("id", "author as graph_authors"); Dataset supplements = spark.read() .schema(Encoders.bean(Relation.class).schema()) - .json(graphPath + "relation") + .json(graphPath + "/"+ "relation") .where("relclass IN('" + ModelConstants.IS_SUPPLEMENT_TO + "', '" + ModelConstants.IS_SUPPLEMENTED_BY + "')") .selectExpr("source as id", "target"); @@ -98,7 +98,7 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .parquet(targetPath + e.name() + "_unmatched"); + .parquet(targetPath + "/"+ e.name() + "_unmatched"); }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index 211ab0200..6feb22a20 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -112,10 +112,10 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=8000 - --graphPath${sourcePath}/ - --orcidPath${sourcePath}/ - --workingDir${workingDir}/ - --targetPath${outputPath}/ + --graphPath${sourcePath} + --orcidPath${sourcePath} + --workingDir${workingDir} + --targetPath${outputPath} --matchingSourcegraph