1
0
Fork 0

fix: deduper must use queueMaxSize instead of groupMaxSize for the block definition

This commit is contained in:
Claudio Atzori 2020-07-02 12:43:51 +02:00
parent d380b85246
commit 0f77cac4b5
1 changed files with 1 additions and 1 deletions

View File

@ -37,7 +37,7 @@ public class Deduper implements Serializable {
public static JavaPairRDD<String, Block> createSortedBlocks( public static JavaPairRDD<String, Block> createSortedBlocks(
JavaPairRDD<String, MapDocument> mapDocs, DedupConfig config) { JavaPairRDD<String, MapDocument> mapDocs, DedupConfig config) {
final String of = config.getWf().getOrderField(); final String of = config.getWf().getOrderField();
final int maxQueueSize = config.getWf().getGroupMaxSize(); final int maxQueueSize = config.getWf().getQueueMaxSize();
return mapDocs return mapDocs
// the reduce is just to be sure that we haven't document with same id // the reduce is just to be sure that we haven't document with same id