From b0fed1725edc8c000619906751ad46a105c9449b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 19 Oct 2023 12:13:45 +0200 Subject: [PATCH] avoid NPEs --- .../oaf/utils/GraphCleaningFunctions.java | 10 ++++++ .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 7 ++-- .../PrepareResultCountrySet.java | 32 ++++++++++++++----- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 324e3dd58..b4402a2fb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -286,6 +286,12 @@ public class GraphCleaningFunctions extends CleaningFunctions { public static T cleanup(T value, VocabularyGroup vocs) { + if (Objects.isNull(value.getDataInfo())) { + final DataInfo d = new DataInfo(); + d.setDeletedbyinference(false); + value.setDataInfo(d); + } + if (value instanceof OafEntity) { OafEntity e = (OafEntity) value; @@ -305,6 +311,10 @@ public class GraphCleaningFunctions extends CleaningFunctions { } else if (value instanceof Result) { Result r = (Result) value; + if (Objects.isNull(r.getContext())) { + r.setContext(new ArrayList<>()); + } + if (Objects.nonNull(r.getFulltext()) && (ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) || ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 3186ed5c0..fc3882b73 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -25,6 +25,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class SparkBulkTagJob { @@ -170,10 +171,12 @@ public class SparkBulkTagJob { // TODO remove this hack as soon as the values fixed by this method will be provided as NON null private static MapFunction patchResult() { return r -> { - if (r.getDataInfo().getDeletedbyinference() == null) { + if (Objects.isNull(r.getDataInfo())) { + r.setDataInfo(OafMapperUtils.dataInfo(false, "", false, false, OafMapperUtils.unknown("", ""), "")); + } else if (r.getDataInfo().getDeletedbyinference() == null) { r.getDataInfo().setDeletedbyinference(false); } - if (r.getContext() == null) { + if (Objects.isNull(r.getContext())) { r.setContext(new ArrayList<>()); } return r; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 28b6f616d..184d24751 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -5,10 +5,7 @@ import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -88,14 +85,33 @@ public class PrepareResultCountrySet { // selects all the results non deleted by inference and non invisible Dataset result = readPath(spark, inputPath, resultClazz) .filter( - (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && - !r.getDataInfo().getInvisible()); + (FilterFunction) r -> Optional + .ofNullable(r.getDataInfo()) + .map(dataInfo -> !dataInfo.getDeletedbyinference() && !dataInfo.getInvisible()) + .orElse(true)); // of the results collects the distinct keys for collected from (at the level of the result) and hosted by // and produces pairs resultId, key for each distinct key associated to the result result.flatMap((FlatMapFunction) r -> { - Set cfhb = r.getCollectedfrom().stream().map(cf -> cf.getKey()).collect(Collectors.toSet()); - cfhb.addAll(r.getInstance().stream().map(i -> i.getHostedby().getKey()).collect(Collectors.toSet())); + Set cfhb = Optional + .ofNullable(r.getCollectedfrom()) + .map(cf -> cf.stream().map(KeyValue::getKey).collect(Collectors.toSet())) + .orElse(new HashSet<>()); + cfhb + .addAll( + Optional + .ofNullable(r.getInstance()) + .map( + i -> i + .stream() + .map( + ii -> Optional + .ofNullable(ii.getHostedby()) + .map(KeyValue::getKey) + .orElse(null)) + .filter(Objects::nonNull) + .collect(Collectors.toSet())) + .orElse(new HashSet<>())); return cfhb .stream() .map(value -> EntityEntityRel.newInstance(r.getId(), value))