diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 324e3dd58..b4402a2fb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -286,6 +286,12 @@ public class GraphCleaningFunctions extends CleaningFunctions { public static T cleanup(T value, VocabularyGroup vocs) { + if (Objects.isNull(value.getDataInfo())) { + final DataInfo d = new DataInfo(); + d.setDeletedbyinference(false); + value.setDataInfo(d); + } + if (value instanceof OafEntity) { OafEntity e = (OafEntity) value; @@ -305,6 +311,10 @@ public class GraphCleaningFunctions extends CleaningFunctions { } else if (value instanceof Result) { Result r = (Result) value; + if (Objects.isNull(r.getContext())) { + r.setContext(new ArrayList<>()); + } + if (Objects.nonNull(r.getFulltext()) && (ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) || ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 68c740dd5..15712ad66 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -27,6 +27,7 @@ import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; public class SparkBulkTagJob { @@ -165,10 +166,12 @@ public class SparkBulkTagJob { // TODO remove this hack as soon as the values fixed by this method will be provided as NON null private static MapFunction patchResult() { return r -> { - if (r.getDataInfo().getDeletedbyinference() == null) { + if (Objects.isNull(r.getDataInfo())) { + r.setDataInfo(OafMapperUtils.dataInfo(false, "", false, false, OafMapperUtils.unknown("", ""), "")); + } else if (r.getDataInfo().getDeletedbyinference() == null) { r.getDataInfo().setDeletedbyinference(false); } - if (r.getContext() == null) { + if (Objects.isNull(r.getContext())) { r.setContext(new ArrayList<>()); } return r; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 28b6f616d..184d24751 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -5,10 +5,7 @@ import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -88,14 +85,33 @@ public class PrepareResultCountrySet { // selects all the results non deleted by inference and non invisible Dataset result = readPath(spark, inputPath, resultClazz) .filter( - (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && - !r.getDataInfo().getInvisible()); + (FilterFunction) r -> Optional + .ofNullable(r.getDataInfo()) + .map(dataInfo -> !dataInfo.getDeletedbyinference() && !dataInfo.getInvisible()) + .orElse(true)); // of the results collects the distinct keys for collected from (at the level of the result) and hosted by // and produces pairs resultId, key for each distinct key associated to the result result.flatMap((FlatMapFunction) r -> { - Set cfhb = r.getCollectedfrom().stream().map(cf -> cf.getKey()).collect(Collectors.toSet()); - cfhb.addAll(r.getInstance().stream().map(i -> i.getHostedby().getKey()).collect(Collectors.toSet())); + Set cfhb = Optional + .ofNullable(r.getCollectedfrom()) + .map(cf -> cf.stream().map(KeyValue::getKey).collect(Collectors.toSet())) + .orElse(new HashSet<>()); + cfhb + .addAll( + Optional + .ofNullable(r.getInstance()) + .map( + i -> i + .stream() + .map( + ii -> Optional + .ofNullable(ii.getHostedby()) + .map(KeyValue::getKey) + .orElse(null)) + .filter(Objects::nonNull) + .collect(Collectors.toSet())) + .orElse(new HashSet<>())); return cfhb .stream() .map(value -> EntityEntityRel.newInstance(r.getId(), value)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml index 6fb2a1253..86847ed46 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml @@ -1,4 +1,12 @@ + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + oozie.use.system.libpath true @@ -7,4 +15,28 @@ oozie.action.sharelib.for.spark spark2 + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 9cd82a343..f361a266c 100644 --- a/pom.xml +++ b/pom.xml @@ -888,7 +888,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [3.17.1] + [3.17.2] [4.0.3] [6.0.5] [3.1.6]