From 8d97949316a93884a123746cec0d60f6c34b2a2e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 7 Oct 2022 09:52:45 +0200 Subject: [PATCH 1/2] [cleaning] fixed loop in wf nodes --- .../eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml index 2ba0a7ad7..08e74a5e5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml @@ -492,7 +492,7 @@ --datasourcePath${workingDir}/working/hostedby --collectedfrom${collectedfrom} - + @@ -521,7 +521,7 @@ --datasourcePath${workingDir}/working/hostedby --collectedfrom${collectedfrom} - + @@ -550,7 +550,7 @@ --datasourcePath${workingDir}/working/hostedby --collectedfrom${collectedfrom} - + From ece40adc09b5124386fb4f28391f7b151ea0ceea Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 11 Oct 2022 10:10:20 +0200 Subject: [PATCH 2/2] [cleaning] fixing NPE in the country cleaning phase --- .../dhp/oa/graph/clean/country/GetDatasourceFromCountry.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java index dd5af6998..d3741d3e8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/country/GetDatasourceFromCountry.java @@ -65,7 +65,6 @@ public class GetDatasourceFromCountry implements Serializable { conf, isSparkSessionManaged, spark -> { - getDatasourceFromCountry(spark, country, inputPath, workingPath); }); } @@ -83,7 +82,6 @@ public class GetDatasourceFromCountry implements Serializable { (FilterFunction) o -> !o.getDataInfo().getDeletedbyinference() && o.getCountry().getClassid().length() > 0 && o.getCountry().getClassid().equals(country)); - ; // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass Dataset relation = spark @@ -97,7 +95,7 @@ public class GetDatasourceFromCountry implements Serializable { !rel.getDataInfo().getDeletedbyinference()); organization - .joinWith(relation, organization.col("id").equalTo(relation.col("target")), "left") + .joinWith(relation, organization.col("id").equalTo(relation.col("target"))) .map((MapFunction, String>) t2 -> t2._2().getSource(), Encoders.STRING()) .write() .mode(SaveMode.Overwrite)