forked from D-Net/dnet-hadoop
fix: deduper must use queueMaxSize instead of groupMaxSize for the block definition
This commit is contained in:
parent
d380b85246
commit
0f77cac4b5
|
@ -37,7 +37,7 @@ public class Deduper implements Serializable {
|
||||||
public static JavaPairRDD<String, Block> createSortedBlocks(
|
public static JavaPairRDD<String, Block> createSortedBlocks(
|
||||||
JavaPairRDD<String, MapDocument> mapDocs, DedupConfig config) {
|
JavaPairRDD<String, MapDocument> mapDocs, DedupConfig config) {
|
||||||
final String of = config.getWf().getOrderField();
|
final String of = config.getWf().getOrderField();
|
||||||
final int maxQueueSize = config.getWf().getGroupMaxSize();
|
final int maxQueueSize = config.getWf().getQueueMaxSize();
|
||||||
|
|
||||||
return mapDocs
|
return mapDocs
|
||||||
// the reduce is just to be sure that we haven't document with same id
|
// the reduce is just to be sure that we haven't document with same id
|
||||||
|
|
Loading…
Reference in New Issue