1
0
Fork 0

fix: filter the blocks with size = 1

This commit is contained in:
Claudio Atzori 2020-07-16 10:11:32 +02:00
parent 4b9fb2ffb8
commit 805de4eca1
1 changed files with 2 additions and 2 deletions

View File

@ -51,8 +51,8 @@ public class Deduper implements Serializable {
.map(it -> Block.from(it, a)) .map(it -> Block.from(it, a))
.collect(Collectors.toList()) .collect(Collectors.toList())
.iterator()) .iterator())
.filter(b -> b.getDocuments().size() > 1)
.mapToPair(block -> new Tuple2<>(block.getKey(), block)) .mapToPair(block -> new Tuple2<>(block.getKey(), block))
.reduceByKey((b1, b2) -> Block.from(b1, b2, of, maxQueueSize)); .reduceByKey((b1, b2) -> Block.from(b1, b2, of, maxQueueSize))
.filter(b -> b._2().getDocuments().size() > 1);
} }
} }