added sample
This commit is contained in:
parent
d773c97564
commit
ae8a7ff587
11
pom.xml
11
pom.xml
|
@ -13,6 +13,7 @@
|
|||
<hadoop.version>2.6.0.2.2.0.0-2041</hadoop.version>
|
||||
<jetty.version>6.1.26</jetty.version>
|
||||
<junit.version>4.11</junit.version>
|
||||
<slf4j.version>1.7.10</slf4j.version>
|
||||
</properties>
|
||||
|
||||
<repositories>
|
||||
|
@ -43,6 +44,16 @@
|
|||
</repositories>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>${slf4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-simple</artifactId>
|
||||
<version>${slf4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.sakserv</groupId>
|
||||
<artifactId>hadoop-mini-clusters</artifactId>
|
||||
|
|
|
@ -6,10 +6,13 @@
|
|||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/test/main/java" isTestSource="true" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.10" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.slf4j:slf4j-simple:1.7.10" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: com.github.sakserv:hadoop-mini-clusters:0.0.12" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.11" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.github.sakserv.sequencefile;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class Main {
|
||||
|
||||
// Logger
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Main.class);
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
try {
|
||||
FileSystem fs = FileSystem.get(conf);
|
||||
|
||||
Path seqFileDir = new Path("/tmp/seq_file_test");
|
||||
fs.mkdirs(seqFileDir);
|
||||
|
||||
Path seqFilePath = new Path(seqFileDir + "/file.seq");
|
||||
|
||||
SequenceFile.Writer writer = SequenceFile.createWriter(conf,
|
||||
SequenceFile.Writer.file(seqFilePath), SequenceFile.Writer.keyClass(Text.class),
|
||||
SequenceFile.Writer.valueClass(IntWritable.class));
|
||||
|
||||
writer.append(new Text("key1"), new IntWritable(1));
|
||||
writer.append(new Text("key2"), new IntWritable(2));
|
||||
|
||||
writer.close();
|
||||
|
||||
SequenceFile.Reader reader = new SequenceFile.Reader(conf,
|
||||
SequenceFile.Reader.file(seqFilePath));
|
||||
|
||||
Text key = new Text();
|
||||
IntWritable val = new IntWritable();
|
||||
|
||||
while (reader.next(key, val)) {
|
||||
System.out.println("SEQFILE KEY: " + key + "\t" + val);
|
||||
}
|
||||
|
||||
fs.mkdirs(new Path("/tmp/seq_file_test"));
|
||||
|
||||
reader.close();
|
||||
} catch(IOException e) {
|
||||
LOG.error("ERROR: Could not load hadoop configuration");
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
<!--Wed Mar 4 22:48:23 2015-->
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://sandbox.hortonworks.com:8020</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.trash.interval</name>
|
||||
<value>360</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.http.authentication.simple.anonymous.allowed</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.falcon.groups</name>
|
||||
<value>users</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.falcon.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hbase.groups</name>
|
||||
<value>users</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hbase.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hcat.groups</name>
|
||||
<value>users</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hcat.hosts</name>
|
||||
<value>sandbox.hortonworks.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hive.groups</name>
|
||||
<value>users</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hive.hosts</name>
|
||||
<value>sandbox.hortonworks.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hue.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.hue.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.oozie.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.oozie.hosts</name>
|
||||
<value>sandbox.hortonworks.com</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.proxyuser.root.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.auth_to_local</name>
|
||||
<value>
|
||||
RULE:[2:$1@$0]([rn]m@.*)s/.*/yarn/
|
||||
RULE:[2:$1@$0](jhs@.*)s/.*/mapred/
|
||||
RULE:[2:$1@$0]([nd]n@.*)s/.*/hdfs/
|
||||
RULE:[2:$1@$0](hm@.*)s/.*/hbase/
|
||||
RULE:[2:$1@$0](rs@.*)s/.*/hbase/
|
||||
DEFAULT</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.authentication</name>
|
||||
<value>simple</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.authorization</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.compression.codecs</name>
|
||||
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.file.buffer.size</name>
|
||||
<value>131072</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>io.serializations</name>
|
||||
<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connect.max.retries</name>
|
||||
<value>50</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.connection.maxidletime</name>
|
||||
<value>30000</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.client.idlethreshold</name>
|
||||
<value>8000</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>ipc.server.tcpnodelay</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.jobtracker.webinterface.trusted</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>proxyuser_group</name>
|
||||
<value>users</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
|
@ -0,0 +1,265 @@
|
|||
<!--Wed Mar 4 22:48:43 2015-->
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>dfs.block.access.token.enable</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.block.size</name>
|
||||
<value>34217472</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.blockreport.initialDelay</name>
|
||||
<value>120</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.blocksize</name>
|
||||
<value>134217728</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.read.shortcircuit</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.client.read.shortcircuit.streams.cache.size</name>
|
||||
<value>4096</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.cluster.administrators</name>
|
||||
<value> hdfs</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.address</name>
|
||||
<value>0.0.0.0:50010</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.balance.bandwidthPerSec</name>
|
||||
<value>6250000</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.data.dir</name>
|
||||
<value>/hadoop/hdfs/data</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.data.dir.perm</name>
|
||||
<value>750</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.du.reserved</name>
|
||||
<value>1073741824</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.failed.volumes.tolerated</name>
|
||||
<value>0</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.http.address</name>
|
||||
<value>0.0.0.0:50075</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.https.address</name>
|
||||
<value>0.0.0.0:50475</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.ipc.address</name>
|
||||
<value>0.0.0.0:8010</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.max.transfer.threads</name>
|
||||
<value>1024</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.max.xcievers</name>
|
||||
<value>1024</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.domain.socket.path</name>
|
||||
<value>/var/lib/hadoop-hdfs/dn_socket</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.heartbeat.interval</name>
|
||||
<value>3</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.hosts.exclude</name>
|
||||
<value>/etc/hadoop/conf/dfs.exclude</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.http.policy</name>
|
||||
<value>HTTP_ONLY</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.https.port</name>
|
||||
<value>50470</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.journalnode.edits.dir</name>
|
||||
<value>/hadoop/hdfs/journalnode</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.journalnode.http-address</name>
|
||||
<value>0.0.0.0:8480</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.accesstime.precision</name>
|
||||
<value>3600000</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.avoid.read.stale.datanode</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.avoid.write.stale.datanode</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.checkpoint.dir</name>
|
||||
<value>/hadoop/hdfs/namesecondary</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.checkpoint.edits.dir</name>
|
||||
<value>${dfs.namenode.checkpoint.dir}</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.checkpoint.period</name>
|
||||
<value>21600</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.checkpoint.txns</name>
|
||||
<value>1000000</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.handler.count</name>
|
||||
<value>100</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address</name>
|
||||
<value>sandbox.hortonworks.com:50070</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.https-address</name>
|
||||
<value>sandbox.hortonworks.com:50470</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.name.dir</name>
|
||||
<value>/hadoop/hdfs/namenode</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.name.dir.restore</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.safemode.threshold-pct</name>
|
||||
<value>1.0f</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.secondary.http-address</name>
|
||||
<value>sandbox.hortonworks.com:50090</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.stale.datanode.interval</name>
|
||||
<value>30000</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.startup.delay.block.deletion.sec</name>
|
||||
<value>3600</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.write.stale.datanode.ratio</name>
|
||||
<value>1.0f</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.nfs.exports.allowed.hosts</name>
|
||||
<value>* rw</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.nfs3.dump.dir</name>
|
||||
<value>/tmp/.hdfs-nfs</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.permissions.enabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.permissions.superusergroup</name>
|
||||
<value>hdfs</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>1</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.replication.max</name>
|
||||
<value>50</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.support.append</name>
|
||||
<value>true</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.enabled</name>
|
||||
<value>true</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.permissions.umask-mode</name>
|
||||
<value>022</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
|
@ -1,4 +1,4 @@
|
|||
/*
|
||||
package com.github.sakserv.sequencefile;/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
Loading…
Reference in New Issue