From 005f849674c93f44e9a3e66b86211dd8f38f8919 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Fri, 13 Nov 2020 12:45:31 +0100 Subject: [PATCH] added compression to output dataset --- .../dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index cc65b0b4f..a92d534d8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -128,6 +128,8 @@ public class SparkGenEnrichedOrcidWorks { }) .filter(p -> p != null); + sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true"); + oafPublicationRDD .mapToPair( p -> new Tuple2<>(p.getClass().toString(),