[Enrichment Step] issue of NPE on author should be fixed

This commit is contained in:
Miriam Baglioni 2022-04-13 14:39:13 +02:00
parent 550e1a4e33
commit d205bf78d8
1 changed files with 17 additions and 14 deletions

View File

@ -117,22 +117,25 @@ public class PrepareResultOrcidAssociationStep1 {
Dataset<Relation> relation = readPath(spark, outputPath + "/relationSubset", Relation.class);
log.info("Reading Graph table from: {}", inputResultPath);
readPath(spark, inputResultPath, resultClazz)
.filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible())
.filter((FilterFunction<R>) r ->
Optional.ofNullable(r.getAuthor())
.map(al -> al.stream().anyMatch(
a -> hasAllowedPid(a, allowedPids)))
.orElse(false)
)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/resultSubset");
.filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible())
.filter(
(FilterFunction<R>) r -> Optional
.ofNullable(r.getAuthor())
.map(
al -> al
.stream()
.anyMatch(
a -> hasAllowedPid(a, allowedPids)))
.orElse(false)
)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/resultSubset");
Dataset<R> result = readPath(spark, outputPath + "/resultSubset", resultClazz);