From 1e42d984e1ead5e1e7519d69f8150f44060f9565 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 15 Sep 2022 10:49:42 +0200 Subject: [PATCH] [aggregator graph] save invalid records aside for further inspection --- .../dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java | 2 +- .../dhp/oa/graph/raw/GenerateEntitiesApplication.java | 10 +++++----- .../java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java | 7 ++++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 0a32766c9..cdc707084 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -143,7 +143,7 @@ public abstract class AbstractMdRecordToOafMapper { return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); } catch (DocumentException e) { log.error("Error with record:\n" + xml); - return null; + return Lists.newArrayList(); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 290a22656..06d5e9acb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -150,8 +150,8 @@ public class GenerateEntitiesApplication { .sequenceFile(sp, Text.class, Text.class) .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) .map(k -> convertToListOaf(k._1(), k._2(), shouldHashId, vocs)) - .filter(Objects::nonNull) - .flatMap(List::iterator)); + .flatMap(List::iterator) + .filter(Objects::nonNull)); } switch (mode) { @@ -225,7 +225,8 @@ public class GenerateEntitiesApplication { final boolean shouldHashId, final VocabularyGroup vocs) { - if (Objects.isNull(convertToListOaf(id, s, shouldHashId, vocs))) { + final List oaf = convertToListOaf(id, s, shouldHashId, vocs); + if (Optional.ofNullable(oaf).map(List::isEmpty).orElse(false)) { return s; } return null; @@ -235,8 +236,7 @@ public class GenerateEntitiesApplication { try { return OBJECT_MAPPER.readValue(s, clazz); } catch (final Exception e) { - log.error("Error parsing object of class: {}", clazz); - log.error(s); + log.error("Error parsing object of class: {}:\n{}", clazz, s); throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 390920027..506a69012 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -926,11 +926,12 @@ class MappersTest { } @Test - void testNotWellFormed() throws IOException, DocumentException { + void testNotWellFormed() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_notwellformed.xml"))); - assertEquals(null, new OafToOafMapper(vocs, false, true).processMdRecord(xml)); - + final List actual = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + assertNotNull(actual); + assertTrue(actual.isEmpty()); } private void assertValidId(final String id) {