forked from D-Net/dnet-hadoop
fix: deduper must use queueMaxSize instead of groupMaxSize for the block definition
This commit is contained in:
parent
d380b85246
commit
0f77cac4b5
|
@ -37,7 +37,7 @@ public class Deduper implements Serializable {
|
|||
public static JavaPairRDD<String, Block> createSortedBlocks(
|
||||
JavaPairRDD<String, MapDocument> mapDocs, DedupConfig config) {
|
||||
final String of = config.getWf().getOrderField();
|
||||
final int maxQueueSize = config.getWf().getGroupMaxSize();
|
||||
final int maxQueueSize = config.getWf().getQueueMaxSize();
|
||||
|
||||
return mapDocs
|
||||
// the reduce is just to be sure that we haven't document with same id
|
||||
|
|
Loading…
Reference in New Issue