forked from D-Net/dnet-hadoop
trying to overcome OOM errors during duplicate scan phase
This commit is contained in:
parent
18c555cd79
commit
3c728aaa0c
|
@ -88,6 +88,7 @@ public class SparkCreateSimRels extends AbstractSparkAction {
|
|||
|
||||
JavaPairRDD<String, MapDocument> mapDocuments = sc
|
||||
.textFile(DedupUtility.createEntityPath(graphBasePath, subEntity))
|
||||
.repartition(10000)
|
||||
.mapToPair(
|
||||
(PairFunction<String, String, MapDocument>) s -> {
|
||||
MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s);
|
||||
|
|
Loading…
Reference in New Issue