[Enrichment Step] issue of NPE on author should be fixed

This commit is contained in:
Miriam Baglioni 2022-04-13 14:34:48 +02:00
parent 8a39a85a5f
commit 550e1a4e33
1 changed files with 15 additions and 7 deletions

View File

@ -117,14 +117,22 @@ public class PrepareResultOrcidAssociationStep1 {
Dataset<Relation> relation = readPath(spark, outputPath + "/relationSubset", Relation.class);
log.info("Reading Graph table from: {}", inputResultPath);
readPath(spark, inputResultPath, resultClazz)
.filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible())
.filter((FilterFunction<R>) r -> r.getAuthor().stream().anyMatch(a -> hasAllowedPid(a, allowedPids)))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/resultSubset");
.filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible())
.filter((FilterFunction<R>) r ->
Optional.ofNullable(r.getAuthor())
.map(al -> al.stream().anyMatch(
a -> hasAllowedPid(a, allowedPids)))
.orElse(false)
)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/resultSubset");
Dataset<R> result = readPath(spark, outputPath + "/resultSubset", resultClazz);