[BulkTagging] added fix if result.dataInfo is null

This commit is contained in:
Miriam Baglioni 2022-04-14 09:04:24 +02:00
parent b93a141d6c
commit 8e8933d41a
1 changed files with 5 additions and 0 deletions

View File

@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -102,6 +103,7 @@ public class SparkBulkTagJob {
ResultTagger resultTagger = new ResultTagger(); ResultTagger resultTagger = new ResultTagger();
readPath(spark, inputPath, resultClazz) readPath(spark, inputPath, resultClazz)
.map(patchResult(), Encoders.bean(resultClazz)) .map(patchResult(), Encoders.bean(resultClazz))
.filter(Objects::nonNull)
.map( .map(
(MapFunction<R, R>) value -> resultTagger (MapFunction<R, R>) value -> resultTagger
.enrichContextCriteria( .enrichContextCriteria(
@ -124,6 +126,9 @@ public class SparkBulkTagJob {
// TODO remove this hack as soon as the values fixed by this method will be provided as NON null // TODO remove this hack as soon as the values fixed by this method will be provided as NON null
private static <R extends Result> MapFunction<R, R> patchResult() { private static <R extends Result> MapFunction<R, R> patchResult() {
return r -> { return r -> {
if(r.getDataInfo() == null){
return null;
}
if (r.getDataInfo().getDeletedbyinference() == null) { if (r.getDataInfo().getDeletedbyinference() == null) {
r.getDataInfo().setDeletedbyinference(false); r.getDataInfo().setDeletedbyinference(false);
} }