From f09e0658654a89e91147a1b6dbbbc6e4ab48c8c6 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 26 Feb 2020 19:26:19 +0100 Subject: [PATCH] incremented number of repartition --- .../java/eu/dnetlib/dhp/provision/SparkGenerateScholix.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkGenerateScholix.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkGenerateScholix.java index 2c7107b70..be24d8a4b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkGenerateScholix.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkGenerateScholix.java @@ -37,7 +37,7 @@ public class SparkGenerateScholix { final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - final JavaRDD relationToExport = sc.textFile(graphPath + "/relation").filter(ProvisionUtil::isNotDeleted); + final JavaRDD relationToExport = sc.textFile(graphPath + "/relation").filter(ProvisionUtil::isNotDeleted).repartition(4000); final JavaPairRDD scholixSummary = sc.textFile(workingDirPath + "/summary").mapToPair((PairFunction) i -> new Tuple2<>(DHPUtils.getJPathString(jsonIDPath, i), i)); scholixSummary.join( relationToExport