trying to overcome OOM errors during duplicate scan phase

This commit is contained in:
Claudio Atzori 2020-07-08 22:39:51 +02:00
parent 18c555cd79
commit 3c728aaa0c
1 changed files with 1 additions and 0 deletions

View File

@ -88,6 +88,7 @@ public class SparkCreateSimRels extends AbstractSparkAction {
JavaPairRDD<String, MapDocument> mapDocuments = sc JavaPairRDD<String, MapDocument> mapDocuments = sc
.textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity))
.repartition(10000)
.mapToPair( .mapToPair(
(PairFunction<String, String, MapDocument>) s -> { (PairFunction<String, String, MapDocument>) s -> {
MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s); MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s);