added compression to output dataset

This commit is contained in:
Enrico Ottonello 2020-11-13 12:45:31 +01:00
parent 9a2fa9dc2f
commit 005f849674
1 changed files with 2 additions and 0 deletions

View File

@ -128,6 +128,8 @@ public class SparkGenEnrichedOrcidWorks {
})
.filter(p -> p != null);
sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true");
oafPublicationRDD
.mapToPair(
p -> new Tuple2<>(p.getClass().toString(),