forked from D-Net/dnet-hadoop
[OpenCitation] add compression option when writing the sequence file
This commit is contained in:
parent
4f0a044245
commit
42846d3b91
|
@ -88,7 +88,7 @@ public class CreateActionSetSparkJob implements Serializable {
|
||||||
private static void extractContent(SparkSession spark, String inputPath, String outputPath) {
|
private static void extractContent(SparkSession spark, String inputPath, String outputPath) {
|
||||||
|
|
||||||
getTextTextJavaPairRDD(spark, inputPath)
|
getTextTextJavaPairRDD(spark, inputPath)
|
||||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);// , GzipCodec.class);
|
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static JavaPairRDD<Text, Text> getTextTextJavaPairRDD(SparkSession spark, String inputPath) {
|
private static JavaPairRDD<Text, Text> getTextTextJavaPairRDD(SparkSession spark, String inputPath) {
|
||||||
|
|
Loading…
Reference in New Issue