[Enrichment Step] issue of NPE on author should be fixed

This commit is contained in:
Miriam Baglioni 2022-04-13 14:34:48 +02:00
parent 8a39a85a5f
commit 550e1a4e33
1 changed files with 15 additions and 7 deletions

View File

@ -117,15 +117,23 @@ public class PrepareResultOrcidAssociationStep1 {
Dataset<Relation> relation = readPath(spark, outputPath + "/relationSubset", Relation.class); Dataset<Relation> relation = readPath(spark, outputPath + "/relationSubset", Relation.class);
log.info("Reading Graph table from: {}", inputResultPath); log.info("Reading Graph table from: {}", inputResultPath);
readPath(spark, inputResultPath, resultClazz) readPath(spark, inputResultPath, resultClazz)
.filter( .filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible()) (FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible())
.filter((FilterFunction<R>) r -> r.getAuthor().stream().anyMatch(a -> hasAllowedPid(a, allowedPids))) .filter((FilterFunction<R>) r ->
Optional.ofNullable(r.getAuthor())
.map(al -> al.stream().anyMatch(
a -> hasAllowedPid(a, allowedPids)))
.orElse(false)
)
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath + "/resultSubset"); .json(outputPath + "/resultSubset");
Dataset<R> result = readPath(spark, outputPath + "/resultSubset", resultClazz); Dataset<R> result = readPath(spark, outputPath + "/resultSubset", resultClazz);
result.foreach((ForeachFunction<R>) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); result.foreach((ForeachFunction<R>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));