[bip affiliations] considers only DOI based records

This commit is contained in:
Claudio Atzori 2024-08-05 12:13:48 +02:00
parent fecbf93e0e
commit 8e7ef79ce0
1 changed files with 2 additions and 1 deletions

View File

@ -129,7 +129,8 @@ public class PrepareAffiliationRelations implements Serializable {
Dataset<Row> df = spark
.read()
.schema("`DOI` STRING, `Matchings` ARRAY<STRUCT<`RORid`:STRING,`Confidence`:DOUBLE>>")
.json(inputPath);
.json(inputPath)
.where("DOI is not null");
// unroll nested arrays
df = df