From 805de4eca15822092f99bc6c1e64b0cbda95b668 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 16 Jul 2020 10:11:32 +0200 Subject: [PATCH] fix: filter the blocks with size = 1 --- .../src/main/java/eu/dnetlib/dhp/oa/dedup/Deduper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/Deduper.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/Deduper.java index 5e8a50fcc..68201677e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/Deduper.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/Deduper.java @@ -51,8 +51,8 @@ public class Deduper implements Serializable { .map(it -> Block.from(it, a)) .collect(Collectors.toList()) .iterator()) - .filter(b -> b.getDocuments().size() > 1) .mapToPair(block -> new Tuple2<>(block.getKey(), block)) - .reduceByKey((b1, b2) -> Block.from(b1, b2, of, maxQueueSize)); + .reduceByKey((b1, b2) -> Block.from(b1, b2, of, maxQueueSize)) + .filter(b -> b._2().getDocuments().size() > 1); } }