added compression to output dataset

This commit is contained in:
Enrico Ottonello 2020-11-13 12:45:31 +01:00
parent 9a2fa9dc2f
commit 005f849674
1 changed files with 2 additions and 0 deletions

View File

@ -128,6 +128,8 @@ public class SparkGenEnrichedOrcidWorks {
}) })
.filter(p -> p != null); .filter(p -> p != null);
sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true");
oafPublicationRDD oafPublicationRDD
.mapToPair( .mapToPair(
p -> new Tuple2<>(p.getClass().toString(), p -> new Tuple2<>(p.getClass().toString(),