From 55595d723599a3d0b30fbec81742829ad801e9b0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 26 May 2020 10:28:35 +0200 Subject: [PATCH] HACK: patch NULL values with defaults found in result.datainfo.deletedbyinference and result.context --- .../eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 1c65e8ade..4800def0a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag; import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.ArrayList; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -100,6 +101,7 @@ public class SparkBulkTagJob { ResultTagger resultTagger = new ResultTagger(); readPath(spark, inputPath, resultClazz) + .map(patchResult(), Encoders.bean(resultClazz)) .map( (MapFunction) value -> resultTagger .enrichContextCriteria( @@ -119,4 +121,17 @@ public class SparkBulkTagJob { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } + // TODO remove this hack as soon as the values fixed by this method will be provided as NON null + private static MapFunction patchResult() { + return (MapFunction) r -> { + if (r.getDataInfo().getDeletedbyinference() == null) { + r.getDataInfo().setDeletedbyinference(false); + } + if (r.getContext() == null) { + r.setContext(new ArrayList<>()); + } + return r; + }; + } + }