Skip to content

Instantly share code, notes, and snippets.

@elvuel
Last active December 13, 2015 19:49
Show Gist options
  • Save elvuel/4965821 to your computer and use it in GitHub Desktop.
Save elvuel/4965821 to your computer and use it in GitHub Desktop.
Hadoop setup
UBUNTU AS EXAMPLE
1. install SUN java sdk
2. addgroup hadoop
3. adduser --ingroup hadoop hadoop
4. visudo # edit /etc/sudoers
add: hadoop ALL=(ALL) ALL # after root ALL=(ALL) ALL
5. ssh config(su hadoop)
hadoop@host>$ ssh-kengen -t rsa -P ""
hadoop@host>$ cat .ssh/id_rsa.pub >> .ssh/authorized_keys
hadoop@host>$ sudo /etc/init.d/ssh reload
hadoop@host>$ ssh localhost
6. download hadoop(hadoop 2.0.3-alpha as example)
to /usr/local/
#> tar zxvf hadoop-2.0.3-alpha.tar.gz
#> mv hadoop-2.0.3-alph hadoop
#> chown -R hadoop:hadoop hadoop
# below su hadoop
* add to .bashrc
export HADOOP_PREFIX=/usr/local/hadoop
export PATH=$PATH:$HADOOP_PREFIX/bin
export PATH=$PATH:$HADOOP_PREFIX/sbin
export HADOOP_MAPRED_HOME=${HADOOP_PREFIX}
export HADOOP_COMMON_HOME=${HADOOP_PREFIX}
export HADOOP_HDFS_HOME=${HADOOP_PREFIX}
export YARN_HOME=${HADOOP_PREFIX}
* vim $HADOOP_HOME/etc/hadoop/core-site.xml
<property>
<name>io.native.lib.available</name>
<value>true</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/tmp</value>
</property>
* vim $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/hadoop/hadoop_space/hadoop203/dfs/name</value>
<description>Determines where on the local filesystem the DFS name node should store the name table.If this is a comma-delimited list of directories,then name table is replicated in all of the directories,for redundancy.</description>
<final>true</final>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/hadoop/hadoop_space/hadoop203/dfs/data</value>
<description>Determines where on the local filesystem an DFS data node should store its blocks.If this is a comma-delimited list of directories,then data will be stored in all named directories,typically on different devices.Directories that do not exist are ignored.
</description>
<final>true</final>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permission</name>
<value>false</value>
</property>
* vim $HADOOP_HOME/etc/hadoop/mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.job.tracker</name>
<value>hdfs://localhost:9001</value> <!-- recommand <value>localhost:9001</value> -->
<final>true</final>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>1536</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1024M</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>3072</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx2560M</value>
</property>
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.task.io.sort.factor</name>
<value>100</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.parallelcopies</name>
<value>50</value>
</property>
<property>
<name>mapred.system.dir</name>
<value>file:/home/hadoop/hadoop_space/hadoop203/mapred/system</value>
<final>true</final>
</property>
<property>
<name>mapred.local.dir</name>
<value>file:/home/hadoop/hadoop_space/hadoop203/mapred/local</value>
<final>true</final>
</property>
* vim $HADOOP_HOME/etc/hadoop/yarn-site.xml
<property>
<name>yarn.resourcemanager.address</name>
<value>localhost:18080</value> <!-- do not port to 8080 this will get FATAL error 'already in use' -->
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:18081</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:18082</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce.shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
* vim $HADOOP_HOME/etc/hadoop/hadoop-env.sh
export HADOOP_FREFIX=/usr/local/hadoop
export HADOOP_COMMON_HOME=${HADOOP_FREFIX}
export HADOOP_HDFS_HOME=${HADOOP_FREFIX}
export PATH=$PATH:$HADOOP_FREFIX/bin
export PATH=$PATH:$HADOOP_FREFIX/sbin
export HADOOP_MAPRED_HOME=${HADOOP_FREFIX}
export YARN_HOME=${HADOOP_FREFIX}
export HADOOP_CONF_HOME=${HADOOP_FREFIX}/etc/hadoop
export YARN_CONF_DIR=${HADOOP_FREFIX}/etc/hadoop
export JAVA_HOME=/usr/lib/jvm/default-java
* $>bin/hdfs namenode -format
* $>sbin/start-all.sh
* $>jps
# REF: http://www.cnblogs.com/aniuer/archive/2012/07/16/2594448.html
download hbase-0.94.5
tar zxvf hbase-0.94.5.tar.gz
cd hbase-0.94.5
vim pom.xml
<profile>
<id>hadoop-2.0</id>
<activation>
<property>
<name>hadoop.profile</name>
<value>2.0.3</value> <!-- default is 2.0 -->
</property>
</activation>
<properties>
<hadoop.version>2.0.3-alpha</hadoop.version><!-- default is 2.0.0-alpha -->
<slf4j.version>1.6.1</slf4j.version>
</properties>
# REF http://yanbohappy.sinaapp.com/?p=192
因为目前的HBase lib目录下包含的依赖的Hadoop jar包依然是Hadoop 1.0版本的结构(也就是common,hdfs,mapreduce三个模块打在一个jar包内)。而我们在底层使用的如果是Hadoop 2.0版本的话,需要重新把2.0版本的Hadoop jar包及其依赖的jar包放到HBase的lib内。
>$ mvn clean package assembly:assembly -DskipTests -Dhadoop.profile=2.0.3
结束后 cd target
tar zxvf hbase-0.94.5.tar.gz
sudo mv hbase-0.94.5 /usr/local/hbase
配置:
hadoop dfsadmin -safemode leave(可选)
vim conf/hbase-env.sh
export JAVA_HOME=/usr/lib/jvm/default-java
export HBASE_IDENT_STRING=$HOSTNAME
export HBASE_MANAGES_ZK=true
vim conf/hbase-site.xml
<property>
<name>hbase.rootdir</name>
<value>hdfs://localhost:9000/hbase</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>/home/elvuel/hbase_space/tmp</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>false</value>
<description>The mode the cluster will be in. Possible values are
false for standalone mode and true for distributed mode. If
false, startup will run all HBase and ZooKeeper daemons together
in the one JVM.
</description>
</property>
设置HBASE_HOME
export HBASE_HOME=/usr/local/hbase
然后启动hadoop
启动hbase
/usr/local/hbase/bin/start-hbase.sh
接着/usr/local/hbase/bin/hbase shell
hbase-irb-shell>create 't1', 'f1'
hbase-irb-shell>exit
hbase-daemons.sh start zookeeper#备忘
hbase rest start#备忘
*DONE*
hadoop fs -mkdir /invest
echo 'hello world' >> /tmp/invest1.txt
echo 'hello hadoop' >> /tmp/invest2.txt
hadoop fs -copyFromLocal /tmp/invest2.txt /invest
hadoop fs -ls /invest
cd /usr/local/hadoop//share/hadoop/mapreduce
hadoop jar hadoop-mapreduce-examples-2.0.3-alpha.jar wordcount /invest /invest-output
...
...
UBUNTU AS EXAMPLE(Deprecated)
1. install SUN java sdk
2. addgroup hadoop
3. adduser --ingroup hadoop hadoop
4. visudo # edit /etc/sudoers
add: hadoop ALL=(ALL) ALL # after root ALL=(ALL) ALL
5. ssh config(su hadoop)
hadoop@host>$ ssh-kengen -t rsa -P ""
hadoop@host>$ cat .ssh/id_rsa.pub >> .ssh/authorized_keys
hadoop@host>$ sudo /etc/init.d/ssh reload
hadoop@host>$ ssh localhost
6. download hadoop(hadoop 2.0.3-alpha as example)
to /usr/local/
#> tar zxvf hadoop-2.0.3-alpha.tar.gz
#> mv hadoop-2.0.3-alph hadoop
#> chown -R hadoop:hadoop hadoop
7. vim $HADOOP_HOME/etc/hadoop/hadoop-env.sh
export JAVA_HOME="/usr/lib/jvm/default-java"
vim $HADOOP_HOME/etc/hadoop/core-site.xml
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/tmp</value>
</property>
vim $HADOOP_HOME/etc/hadoop/mapred-site.xml
<property>
<name>mapred.job.tracker</name>
<value>localhost:9001</value>
</property>
vim $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
8. su hadoop
$>/usr/local/hadoop/bin/hadoop namenode -format
$>/usr/local/hadoop/sbin/start-all.sh
*DONE*
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment