From c48f6e9c57c6b1fe549d59e032bc0cad6fa01f66 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 14 Sep 2022 17:11:26 +0200 Subject: [PATCH] [aggregator graph] save invalid records aside for further inspection --- .../dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java | 2 +- .../dhp/oa/graph/raw/GenerateEntitiesApplication.java | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index cdc707084..0a32766c9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -143,7 +143,7 @@ public abstract class AbstractMdRecordToOafMapper { return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); } catch (DocumentException e) { log.error("Error with record:\n" + xml); - return Lists.newArrayList(); + return null; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index e9de43f7f..290a22656 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -126,8 +126,6 @@ public class GenerateEntitiesApplication { log.info("Generate entities from files:"); existingSourcePaths.forEach(log::info); - JavaRDD inputRdd = sc.emptyRDD(); - for (final String sp : existingSourcePaths) { RDD invalidRecords = sc .sequenceFile(sp, Text.class, Text.class) @@ -141,7 +139,11 @@ public class GenerateEntitiesApplication { .mode(SaveMode.Append) .option("compression", "gzip") .text(invalidPath); + } + JavaRDD inputRdd = sc.emptyRDD(); + + for (final String sp : existingSourcePaths) { inputRdd = inputRdd .union( sc @@ -223,7 +225,7 @@ public class GenerateEntitiesApplication { final boolean shouldHashId, final VocabularyGroup vocs) { - if (convertToListOaf(id, s, shouldHashId, vocs).isEmpty()) { + if (Objects.isNull(convertToListOaf(id, s, shouldHashId, vocs))) { return s; } return null;