diff --git a/pom.xml b/pom.xml index 70e60a1..c694b34 100644 --- a/pom.xml +++ b/pom.xml @@ -17,16 +17,6 @@ - - - true - - - false - - clojars.org - http://clojars.org/repo - true diff --git a/src/main/java/com/github/sakserv/sequencefile/SequenceFileReader.java b/src/main/java/com/github/sakserv/sequencefile/SequenceFileReader.java new file mode 100644 index 0000000..8f28cf5 --- /dev/null +++ b/src/main/java/com/github/sakserv/sequencefile/SequenceFileReader.java @@ -0,0 +1,58 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.github.sakserv.sequencefile; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +public class SequenceFileReader { + + // Logger + private static final Logger LOG = LoggerFactory.getLogger(SequenceFileReader.class); + + public static void main(String[] args) { + + String inputFile = args[0]; + + Configuration conf = new Configuration(); + try { + + Path seqFilePath = new Path(inputFile); + + SequenceFile.Reader reader = new SequenceFile.Reader(conf, + SequenceFile.Reader.file(seqFilePath)); + + Text key = new Text(); + IntWritable val = new IntWritable(); + + while (reader.next(key, val)) { + LOG.info("Sequence File Data: Key: " + key + "\tValue: " + val); + } + + reader.close(); + } catch(IOException e) { + LOG.error("ERROR: Could not load hadoop configuration"); + e.printStackTrace(); + } + + } +} diff --git a/src/main/java/com/github/sakserv/sequencefile/Main.java b/src/main/java/com/github/sakserv/sequencefile/SequenceFileWriter.java similarity index 70% rename from src/main/java/com/github/sakserv/sequencefile/Main.java rename to src/main/java/com/github/sakserv/sequencefile/SequenceFileWriter.java index 6cefaa4..747c9f4 100644 --- a/src/main/java/com/github/sakserv/sequencefile/Main.java +++ b/src/main/java/com/github/sakserv/sequencefile/SequenceFileWriter.java @@ -24,21 +24,21 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -public class Main { +public class SequenceFileWriter { // Logger - private static final Logger LOG = LoggerFactory.getLogger(Main.class); + private static final Logger LOG = LoggerFactory.getLogger(SequenceFileWriter.class); public static void main(String[] args) { + String outputFile = args[0]; + Configuration conf = new Configuration(); try { FileSystem fs = FileSystem.get(conf); - - Path seqFileDir = new Path("/tmp/seq_file_test"); - fs.mkdirs(seqFileDir); - - Path seqFilePath = new Path(seqFileDir + "/file.seq"); + + Path seqFilePath = new Path(outputFile); + fs.mkdirs(seqFilePath.getParent()); SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(seqFilePath), SequenceFile.Writer.keyClass(Text.class), @@ -48,20 +48,8 @@ public class Main { writer.append(new Text("key2"), new IntWritable(2)); writer.close(); - - SequenceFile.Reader reader = new SequenceFile.Reader(conf, - SequenceFile.Reader.file(seqFilePath)); - - Text key = new Text(); - IntWritable val = new IntWritable(); - - while (reader.next(key, val)) { - System.out.println("SEQFILE KEY: " + key + "\t" + val); - } - - fs.mkdirs(new Path("/tmp/seq_file_test")); - - reader.close(); + + LOG.info("SUCCESS: Successfully wrote " + seqFilePath + " to HDFS."); } catch(IOException e) { LOG.error("ERROR: Could not load hadoop configuration"); e.printStackTrace();