Skip to content

Instantly share code, notes, and snippets.

@nipra
Created November 13, 2012 10:04
Show Gist options
  • Save nipra/4064987 to your computer and use it in GitHub Desktop.
Save nipra/4064987 to your computer and use it in GitHub Desktop.
CDH4 Hadoop + HBase Pseudo-distributed Mode installation
# Installing CDH4 on a Single Linux Node in Pseudo-distributed Mode
# https://ccp.cloudera.com/display/CDH4DOC/Installing+CDH4+on+a+Single+Linux+Node+in+Pseudo-distributed+Mode
# Installing CDH4 with MRv1 on a Single Linux Node in Pseudo-distributed mode
# On Ubuntu and other Debian systems
nipra@lambda:Downloads$ wget -cv http://archive.cloudera.com/cdh4/one-click-install/precise/amd64/cdh4-repository_1.0_all.deb
nipra@lambda:Downloads$ sudo dpkg -i cdh4-repository_1.0_all.deb # Adds /etc/apt/sources.list.d/cloudera-cdh4.list ??
nipra@lambda:Downloads$ dpkg -L cdh4-repository # To view the files on Ubuntu systems
# Install CDH4
# For Ubuntu Precise systems
# nipra@lambda:~$ lsb_release -c
$ curl -s http://archive.cloudera.com/cdh4/ubuntu/precise/amd64/cdh/archive.key | sudo apt-key add -
# To view the files on Ubuntu systems
$ dpkg -L hadoop-0.20-conf-pseudo
# Step 1: Format the NameNode.
# Got the error: Error: JAVA_HOME is not set and could not be found.
# Even though ~/.bashrc has ``export JAVA_HOME=/usr/lib/jvm/jdk1.7.0''
# FIX: [ https://groups.google.com/a/cloudera.org/d/msg/cdh-user/x0KLPPHiibU/twracHL-Rd0J ]
# $ sudo visudo
# Add ``Defaults env_keep+=JAVA_HOME''
$ sudo -u hdfs hdfs namenode -format
# Step 2: Start HDFS
# http://localhost:50070/
$ for service in /etc/init.d/hadoop-hdfs-*
> do
> sudo $service start
> done
# Step 3: Create the /tmp Directory
$ sudo -u hdfs hadoop fs -mkdir /tmp
$ sudo -u hdfs hadoop fs -chmod -R 1777 /tmp
# Step 4: Create the MapReduce system directories
sudo -u hdfs hadoop fs -mkdir /var
sudo -u hdfs hadoop fs -mkdir /var/lib
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred/mapred
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred/mapred/staging
sudo -u hdfs hadoop fs -chmod 1777 /var/lib/hadoop-hdfs/cache/mapred/mapred/staging
sudo -u hdfs hadoop fs -chown -R mapred /var/lib/hadoop-hdfs/cache/mapred
# Step 5: Verify the HDFS File Structure
$ sudo -u hdfs hadoop fs -ls -R /
# Step 6: Start MapReduce
# http://localhost:50030/
for service in /etc/init.d/hadoop-0.20-mapreduce-*
> do
> sudo $service start
> done
# Step 7: Create User Directories
$ sudo -u hdfs hadoop fs -mkdir /user/<user>
$ sudo -u hdfs hadoop fs -chown <user> /user/<user>
where <user> is the Linux username of each user.
sudo -u hdfs hadoop fs -mkdir /user/$USER
sudo -u hdfs hadoop fs -chown $USER /user/$USER
# For stopping daemons
$ for service in /etc/init.d/hadoop-hdfs-* /etc/init.d/hadoop-0.20-mapreduce-*
> do
> sudo $service stop
> done
# HBase
nipra@lambda:~$ sudo apt-get install hbase
nipra@lambda:~$ sudo apt-get install hbase-master
nipra@lambda:~$ sudo jps
nipra@lambda:~$ sudo /etc/init.d/hbase-master stop
# To enable pseudo-distributed mode, you must first make some configuration
# changes. Open /etc/hbase/conf/hbase-site.xml
<configuration>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://localhost:8020/hbase</value>
</property>
</configuration>
# Creating the /hbase Directory in HDFS
$ sudo -u hdfs hadoop fs -mkdir /hbase
$ sudo -u hdfs hadoop fs -chown hbase /hbase
# Installing and Starting ZooKeeper Server
# Using config: /etc/zookeeper/conf/zoo.cfg
# ZooKeeper data directory is missing at /var/lib/zookeeper fix the path or run initialize
# invoke-rc.d: initscript zookeeper-server, action "start" failed.
sudo apt-get install zookeeper-server
nipra@lambda:~$ sudo /etc/init.d/zookeeper-server init
nipra@lambda:~$ sudo /etc/init.d/zookeeper-server start
nipra@lambda:~$ sudo /etc/init.d/hbase-master start
# To enable the HBase Region Server on Ubuntu and Debian systems
$ sudo apt-get install hbase-regionserver
http://localhost:60010/
# Installing and Starting the HBase Thrift Server
sudo apt-get install hbase-thrift
# Installing and Configuring REST
sudo apt-get install hbase-rest
# ~/bin/start-hadoop-all.sh
for service in /etc/init.d/hadoop-hdfs-* /etc/init.d/hadoop-0.20-mapreduce-*
do
sudo $service start
done
# ~/bin/start-hbase-all.sh
for service in /etc/init.d/zookeeper-server /etc/init.d/hbase-master /etc/init.d/hbase-regionserver /etc/init.d/hbase-rest /etc/init.d/hbase-thrift
do
sudo $service start
done
# ~/bin/stop-hbase-all.sh
for service in /etc/init.d/hbase-thrift /etc/init.d/hbase-rest /etc/init.d/hbase-regionserver /etc/init.d/hbase-master /etc/init.d/zookeeper-server
do
sudo $service stop
done
# ~/bin/stop-hadoop-all.sh
for service in /etc/init.d/hadoop-0.20-mapreduce-* /etc/init.d/hadoop-hdfs-*
do
sudo $service stop
done
# ~/bin/start-all.sh
~/bin/start-hadoop-all.sh
~/bin/start-hbase-all.sh
# ~/bin/stop-all.sh
~/bin/stop-hbase-all.sh
~/bin/stop-hadoop-all.sh
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment