diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java index 8f911e980..98915bdc5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java @@ -129,7 +129,8 @@ public class PrepareAffiliationRelations implements Serializable { Dataset df = spark .read() .schema("`DOI` STRING, `Matchings` ARRAY>") - .json(inputPath); + .json(inputPath) + .where("DOI is not null"); // unroll nested arrays df = df