From 42846d3b911d3759046f4965dfa42d6d288bbc21 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 3 Apr 2024 09:25:00 +0200 Subject: [PATCH] [OpenCitation] add compression option when writing the sequence file --- .../actionmanager/opencitations/CreateActionSetSparkJob.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index 950b2eb57..39592f0bc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -88,7 +88,7 @@ public class CreateActionSetSparkJob implements Serializable { private static void extractContent(SparkSession spark, String inputPath, String outputPath) { getTextTextJavaPairRDD(spark, inputPath) - .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);// , GzipCodec.class); + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); } private static JavaPairRDD getTextTextJavaPairRDD(SparkSession spark, String inputPath) {