split out reader and writer
This commit is contained in:
parent
ae8a7ff587
commit
fe8d4458d0
10
pom.xml
10
pom.xml
|
@ -17,16 +17,6 @@
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<repositories>
|
<repositories>
|
||||||
<repository>
|
|
||||||
<releases>
|
|
||||||
<enabled>true</enabled>
|
|
||||||
</releases>
|
|
||||||
<snapshots>
|
|
||||||
<enabled>false</enabled>
|
|
||||||
</snapshots>
|
|
||||||
<id>clojars.org</id>
|
|
||||||
<url>http://clojars.org/repo</url>
|
|
||||||
</repository>
|
|
||||||
<repository>
|
<repository>
|
||||||
<releases>
|
<releases>
|
||||||
<enabled>true</enabled>
|
<enabled>true</enabled>
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package com.github.sakserv.sequencefile;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.IntWritable;
|
||||||
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class SequenceFileReader {
|
||||||
|
|
||||||
|
// Logger
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(SequenceFileReader.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
|
||||||
|
String inputFile = args[0];
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
try {
|
||||||
|
|
||||||
|
Path seqFilePath = new Path(inputFile);
|
||||||
|
|
||||||
|
SequenceFile.Reader reader = new SequenceFile.Reader(conf,
|
||||||
|
SequenceFile.Reader.file(seqFilePath));
|
||||||
|
|
||||||
|
Text key = new Text();
|
||||||
|
IntWritable val = new IntWritable();
|
||||||
|
|
||||||
|
while (reader.next(key, val)) {
|
||||||
|
LOG.info("Sequence File Data: Key: " + key + "\tValue: " + val);
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
} catch(IOException e) {
|
||||||
|
LOG.error("ERROR: Could not load hadoop configuration");
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -24,21 +24,21 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
public class Main {
|
public class SequenceFileWriter {
|
||||||
|
|
||||||
// Logger
|
// Logger
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(Main.class);
|
private static final Logger LOG = LoggerFactory.getLogger(SequenceFileWriter.class);
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
|
||||||
|
String outputFile = args[0];
|
||||||
|
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
try {
|
try {
|
||||||
FileSystem fs = FileSystem.get(conf);
|
FileSystem fs = FileSystem.get(conf);
|
||||||
|
|
||||||
Path seqFileDir = new Path("/tmp/seq_file_test");
|
Path seqFilePath = new Path(outputFile);
|
||||||
fs.mkdirs(seqFileDir);
|
fs.mkdirs(seqFilePath.getParent());
|
||||||
|
|
||||||
Path seqFilePath = new Path(seqFileDir + "/file.seq");
|
|
||||||
|
|
||||||
SequenceFile.Writer writer = SequenceFile.createWriter(conf,
|
SequenceFile.Writer writer = SequenceFile.createWriter(conf,
|
||||||
SequenceFile.Writer.file(seqFilePath), SequenceFile.Writer.keyClass(Text.class),
|
SequenceFile.Writer.file(seqFilePath), SequenceFile.Writer.keyClass(Text.class),
|
||||||
|
@ -49,19 +49,7 @@ public class Main {
|
||||||
|
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
SequenceFile.Reader reader = new SequenceFile.Reader(conf,
|
LOG.info("SUCCESS: Successfully wrote " + seqFilePath + " to HDFS.");
|
||||||
SequenceFile.Reader.file(seqFilePath));
|
|
||||||
|
|
||||||
Text key = new Text();
|
|
||||||
IntWritable val = new IntWritable();
|
|
||||||
|
|
||||||
while (reader.next(key, val)) {
|
|
||||||
System.out.println("SEQFILE KEY: " + key + "\t" + val);
|
|
||||||
}
|
|
||||||
|
|
||||||
fs.mkdirs(new Path("/tmp/seq_file_test"));
|
|
||||||
|
|
||||||
reader.close();
|
|
||||||
} catch(IOException e) {
|
} catch(IOException e) {
|
||||||
LOG.error("ERROR: Could not load hadoop configuration");
|
LOG.error("ERROR: Could not load hadoop configuration");
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
Loading…
Reference in New Issue