incremented number of repartition

This commit is contained in:
Sandro La Bruzzo 2020-02-26 19:26:19 +01:00
parent 071f5c3e52
commit f09e065865
1 changed files with 1 additions and 1 deletions

View File

@ -37,7 +37,7 @@ public class SparkGenerateScholix {
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
final JavaRDD<String> relationToExport = sc.textFile(graphPath + "/relation").filter(ProvisionUtil::isNotDeleted);
final JavaRDD<String> relationToExport = sc.textFile(graphPath + "/relation").filter(ProvisionUtil::isNotDeleted).repartition(4000);
final JavaPairRDD<String,String> scholixSummary = sc.textFile(workingDirPath + "/summary").mapToPair((PairFunction<String, String, String>) i -> new Tuple2<>(DHPUtils.getJPathString(jsonIDPath, i), i));
scholixSummary.join(
relationToExport