[SWH] compress the output actionset

This commit is contained in:
Claudio Atzori 2023-10-06 14:03:33 +02:00
parent f759b18bca
commit 858931ccb6
1 changed files with 2 additions and 1 deletions

View File

@ -11,6 +11,7 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
@ -81,7 +82,7 @@ public class PrepareSWHActionsets {
JavaPairRDD<Text, Text> softwareRDD = prepareActionsets(spark, inputPath, softwareInputPath); JavaPairRDD<Text, Text> softwareRDD = prepareActionsets(spark, inputPath, softwareInputPath);
softwareRDD softwareRDD
.saveAsHadoopFile( .saveAsHadoopFile(
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
}); });
} }