From ae8a7ff587a35473f5cbd0e5941b1429cc1076b7 Mon Sep 17 00:00:00 2001 From: Shane Kumpf Date: Thu, 5 Mar 2015 06:19:07 -0700 Subject: [PATCH] added sample --- pom.xml | 11 + sequencefile-examples.iml | 3 + .../com/github/sakserv/sequencefile/Main.java | 71 +++++ src/resources/core-site.xml | 156 +++++++++++ src/resources/hdfs-site.xml | 265 ++++++++++++++++++ .../sequencefile}/SequenceFileTest.java | 2 +- 6 files changed, 507 insertions(+), 1 deletion(-) create mode 100644 src/main/java/com/github/sakserv/sequencefile/Main.java create mode 100644 src/resources/core-site.xml create mode 100644 src/resources/hdfs-site.xml rename test/main/java/{ => com/github/sakserv/sequencefile}/SequenceFileTest.java (98%) diff --git a/pom.xml b/pom.xml index 39f9e18..70e60a1 100644 --- a/pom.xml +++ b/pom.xml @@ -13,6 +13,7 @@ 2.6.0.2.2.0.0-2041 6.1.26 4.11 + 1.7.10 @@ -43,6 +44,16 @@ + + org.slf4j + slf4j-api + ${slf4j.version} + + + org.slf4j + slf4j-simple + ${slf4j.version} + com.github.sakserv hadoop-mini-clusters diff --git a/sequencefile-examples.iml b/sequencefile-examples.iml index 4cc6502..79fea3c 100644 --- a/sequencefile-examples.iml +++ b/sequencefile-examples.iml @@ -6,10 +6,13 @@ + + + diff --git a/src/main/java/com/github/sakserv/sequencefile/Main.java b/src/main/java/com/github/sakserv/sequencefile/Main.java new file mode 100644 index 0000000..6cefaa4 --- /dev/null +++ b/src/main/java/com/github/sakserv/sequencefile/Main.java @@ -0,0 +1,71 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.github.sakserv.sequencefile; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +public class Main { + + // Logger + private static final Logger LOG = LoggerFactory.getLogger(Main.class); + + public static void main(String[] args) { + + Configuration conf = new Configuration(); + try { + FileSystem fs = FileSystem.get(conf); + + Path seqFileDir = new Path("/tmp/seq_file_test"); + fs.mkdirs(seqFileDir); + + Path seqFilePath = new Path(seqFileDir + "/file.seq"); + + SequenceFile.Writer writer = SequenceFile.createWriter(conf, + SequenceFile.Writer.file(seqFilePath), SequenceFile.Writer.keyClass(Text.class), + SequenceFile.Writer.valueClass(IntWritable.class)); + + writer.append(new Text("key1"), new IntWritable(1)); + writer.append(new Text("key2"), new IntWritable(2)); + + writer.close(); + + SequenceFile.Reader reader = new SequenceFile.Reader(conf, + SequenceFile.Reader.file(seqFilePath)); + + Text key = new Text(); + IntWritable val = new IntWritable(); + + while (reader.next(key, val)) { + System.out.println("SEQFILE KEY: " + key + "\t" + val); + } + + fs.mkdirs(new Path("/tmp/seq_file_test")); + + reader.close(); + } catch(IOException e) { + LOG.error("ERROR: Could not load hadoop configuration"); + e.printStackTrace(); + } + + } +} diff --git a/src/resources/core-site.xml b/src/resources/core-site.xml new file mode 100644 index 0000000..3d32d1e --- /dev/null +++ b/src/resources/core-site.xml @@ -0,0 +1,156 @@ + + + + + fs.defaultFS + hdfs://sandbox.hortonworks.com:8020 + true + + + + fs.trash.interval + 360 + + + + hadoop.http.authentication.simple.anonymous.allowed + true + + + + hadoop.proxyuser.falcon.groups + users + + + + hadoop.proxyuser.falcon.hosts + * + + + + hadoop.proxyuser.hbase.groups + users + + + + hadoop.proxyuser.hbase.hosts + * + + + + hadoop.proxyuser.hcat.groups + users + + + + hadoop.proxyuser.hcat.hosts + sandbox.hortonworks.com + + + + hadoop.proxyuser.hive.groups + users + + + + hadoop.proxyuser.hive.hosts + sandbox.hortonworks.com + + + + hadoop.proxyuser.hue.groups + * + + + + hadoop.proxyuser.hue.hosts + * + + + + hadoop.proxyuser.oozie.groups + * + + + + hadoop.proxyuser.oozie.hosts + sandbox.hortonworks.com + + + + hadoop.proxyuser.root.groups + * + + + + hadoop.proxyuser.root.hosts + * + + + + hadoop.security.auth_to_local + + RULE:[2:$1@$0]([rn]m@.*)s/.*/yarn/ + RULE:[2:$1@$0](jhs@.*)s/.*/mapred/ + RULE:[2:$1@$0]([nd]n@.*)s/.*/hdfs/ + RULE:[2:$1@$0](hm@.*)s/.*/hbase/ + RULE:[2:$1@$0](rs@.*)s/.*/hbase/ + DEFAULT + + + + hadoop.security.authentication + simple + + + + hadoop.security.authorization + false + + + + io.compression.codecs + org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.SnappyCodec + + + + io.file.buffer.size + 131072 + + + + io.serializations + org.apache.hadoop.io.serializer.WritableSerialization + + + + ipc.client.connect.max.retries + 50 + + + + ipc.client.connection.maxidletime + 30000 + + + + ipc.client.idlethreshold + 8000 + + + + ipc.server.tcpnodelay + true + + + + mapreduce.jobtracker.webinterface.trusted + false + + + + proxyuser_group + users + + + \ No newline at end of file diff --git a/src/resources/hdfs-site.xml b/src/resources/hdfs-site.xml new file mode 100644 index 0000000..0bd746e --- /dev/null +++ b/src/resources/hdfs-site.xml @@ -0,0 +1,265 @@ + + + + + dfs.block.access.token.enable + false + + + + dfs.block.size + 34217472 + + + + dfs.blockreport.initialDelay + 120 + + + + dfs.blocksize + 134217728 + + + + dfs.client.read.shortcircuit + true + + + + dfs.client.read.shortcircuit.streams.cache.size + 4096 + + + + dfs.cluster.administrators + hdfs + + + + dfs.datanode.address + 0.0.0.0:50010 + + + + dfs.datanode.balance.bandwidthPerSec + 6250000 + + + + dfs.datanode.data.dir + /hadoop/hdfs/data + true + + + + dfs.datanode.data.dir.perm + 750 + + + + dfs.datanode.du.reserved + 1073741824 + + + + dfs.datanode.failed.volumes.tolerated + 0 + true + + + + dfs.datanode.http.address + 0.0.0.0:50075 + + + + dfs.datanode.https.address + 0.0.0.0:50475 + + + + dfs.datanode.ipc.address + 0.0.0.0:8010 + + + + dfs.datanode.max.transfer.threads + 1024 + + + + dfs.datanode.max.xcievers + 1024 + + + + dfs.domain.socket.path + /var/lib/hadoop-hdfs/dn_socket + + + + dfs.heartbeat.interval + 3 + + + + dfs.hosts.exclude + /etc/hadoop/conf/dfs.exclude + + + + dfs.http.policy + HTTP_ONLY + + + + dfs.https.port + 50470 + + + + dfs.journalnode.edits.dir + /hadoop/hdfs/journalnode + + + + dfs.journalnode.http-address + 0.0.0.0:8480 + + + + dfs.namenode.accesstime.precision + 3600000 + + + + dfs.namenode.avoid.read.stale.datanode + true + + + + dfs.namenode.avoid.write.stale.datanode + true + + + + dfs.namenode.checkpoint.dir + /hadoop/hdfs/namesecondary + + + + dfs.namenode.checkpoint.edits.dir + ${dfs.namenode.checkpoint.dir} + + + + dfs.namenode.checkpoint.period + 21600 + + + + dfs.namenode.checkpoint.txns + 1000000 + + + + dfs.namenode.handler.count + 100 + + + + dfs.namenode.http-address + sandbox.hortonworks.com:50070 + true + + + + dfs.namenode.https-address + sandbox.hortonworks.com:50470 + + + + dfs.namenode.name.dir + /hadoop/hdfs/namenode + true + + + + dfs.namenode.name.dir.restore + true + + + + dfs.namenode.safemode.threshold-pct + 1.0f + + + + dfs.namenode.secondary.http-address + sandbox.hortonworks.com:50090 + + + + dfs.namenode.stale.datanode.interval + 30000 + + + + dfs.namenode.startup.delay.block.deletion.sec + 3600 + + + + dfs.namenode.write.stale.datanode.ratio + 1.0f + + + + dfs.nfs.exports.allowed.hosts + * rw + + + + dfs.nfs3.dump.dir + /tmp/.hdfs-nfs + + + + dfs.permissions.enabled + true + + + + dfs.permissions.superusergroup + hdfs + + + + dfs.replication + 1 + + + + dfs.replication.max + 50 + + + + dfs.support.append + true + true + + + + dfs.webhdfs.enabled + true + true + + + + fs.permissions.umask-mode + 022 + + + \ No newline at end of file diff --git a/test/main/java/SequenceFileTest.java b/test/main/java/com/github/sakserv/sequencefile/SequenceFileTest.java similarity index 98% rename from test/main/java/SequenceFileTest.java rename to test/main/java/com/github/sakserv/sequencefile/SequenceFileTest.java index 13de248..8ca9665 100644 --- a/test/main/java/SequenceFileTest.java +++ b/test/main/java/com/github/sakserv/sequencefile/SequenceFileTest.java @@ -1,4 +1,4 @@ -/* +package com.github.sakserv.sequencefile;/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at