From bebc54d5bff5fe4f03e0b596be6377835cc72cab Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 8 Feb 2021 18:06:25 +0100 Subject: [PATCH] seq file storing native records is now compressed --- .../eu/dnetlib/dhp/collection/worker/CollectorWorker.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java index c2d32019d..945eff8b0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorker.java @@ -11,6 +11,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,7 +64,8 @@ public class CollectorWorker { conf, SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer.keyClass(IntWritable.class), - SequenceFile.Writer.valueClass(Text.class))) { + SequenceFile.Writer.valueClass(Text.class), + SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()))) { final IntWritable key = new IntWritable(counter.get()); final Text value = new Text(); plugin