forked from D-Net/dnet-hadoop
[OpenCitation] add compression option when writing the sequence file
This commit is contained in:
parent
4f0a044245
commit
42846d3b91
|
@ -88,7 +88,7 @@ public class CreateActionSetSparkJob implements Serializable {
|
|||
private static void extractContent(SparkSession spark, String inputPath, String outputPath) {
|
||||
|
||||
getTextTextJavaPairRDD(spark, inputPath)
|
||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);// , GzipCodec.class);
|
||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
|
||||
}
|
||||
|
||||
private static JavaPairRDD<Text, Text> getTextTextJavaPairRDD(SparkSession spark, String inputPath) {
|
||||
|
|
Loading…
Reference in New Issue