From 8e7ef79ce09d41d57d9d70f90875563bd2799e40 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Aug 2024 12:13:48 +0200 Subject: [PATCH] [bip affiliations] considers only DOI based records --- .../bipaffiliations/PrepareAffiliationRelations.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java index 8f911e980..98915bdc5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java @@ -129,7 +129,8 @@ public class PrepareAffiliationRelations implements Serializable { Dataset df = spark .read() .schema("`DOI` STRING, `Matchings` ARRAY>") - .json(inputPath); + .json(inputPath) + .where("DOI is not null"); // unroll nested arrays df = df