Skip to content

Instantly share code, notes, and snippets.

@bugcy013
Forked from nipra/notes.txt
Last active December 10, 2015 19:48
Show Gist options
  • Save bugcy013/4483715 to your computer and use it in GitHub Desktop.
Save bugcy013/4483715 to your computer and use it in GitHub Desktop.
# Installing CDH4 on a Single Linux Node in Pseudo-distributed Mode in MRv1
# https://ccp.cloudera.com/display/CDH4DOC/Installing+CDH4+on+a+Single+Linux+Node+in+Pseudo-distributed+Mode#InstallingCDH4onaSingleLinuxNodeinPseudo-distributedMode-InstallingCDH4withMRv1onaSingleLinuxNodeinPseudodistributedmode
# Installing CDH4 with MRv1 on a Single Linux Node in Pseudo-distributed mode
# On Ubuntu Precise
tech@tech-VirtualBox:~$ wget -cv http://archive.cloudera.com/cdh4/one-click-install/precise/amd64/cdh4-repository_1.0_all.deb
tech@tech-VirtualBox:~$ sudo dpkg -i cdh4-repository_1.0_all.deb # Adds /etc/apt/sources.list.d/cloudera-cdh4.list ??
tech@tech-VirtualBox:~$ dpkg -L cdh4-repository # To view the files on Ubuntu systems
/.
/etc
/etc/apt
/etc/apt/sources.list.d
/etc/apt/sources.list.d/cloudera-cdh4.list
/usr
/usr/share
/usr/share/doc
/usr/share/doc/cdh4-repository
/usr/share/doc/cdh4-repository/cloudera-cdh4.key
/usr/share/doc/cdh4-repository/copyright
# Install CDH4 on Precise
# For Ubuntu Precise systems
tech@tech-VirtualBox:~$ lsb_release -c
$ sudo apt-get install curl -y
$ curl -s http://archive.cloudera.com/cdh4/ubuntu/precise/amd64/cdh/archive.key | sudo apt-key add -
# To view the files on Ubuntu systems
$ dpkg -L hadoop-0.20-conf-pseudo
Package `hadoop-0.20-conf-pseudo' is not installed.
Use dpkg --info (= dpkg-deb --info) to examine archive files,
and dpkg --contents (= dpkg-deb --contents) to list their contents.
# To install Hadoop
tech@tech-VirtualBox:~$ sudo apt-get install hadoop-0.20-conf-pseudo
Reading package lists... Done
Building dependency tree
Reading state information... Done
The following extra packages will be installed:
bigtop-jsvc bigtop-utils hadoop hadoop-0.20-mapreduce hadoop-0.20-mapreduce-jobtracker hadoop-0.20-mapreduce-tasktracker hadoop-hdfs
hadoop-hdfs-datanode hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode zookeeper
The following NEW packages will be installed:
bigtop-jsvc bigtop-utils hadoop hadoop-0.20-conf-pseudo hadoop-0.20-mapreduce hadoop-0.20-mapreduce-jobtracker hadoop-0.20-mapreduce-tasktracker
hadoop-hdfs hadoop-hdfs-datanode hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode zookeeper
0 upgraded, 12 newly installed, 0 to remove and 481 not upgraded.
Need to get 69.1 MB of archives.
After this operation, 93.6 MB of additional disk space will be used.
Do you want to continue [Y/n]? y
....
once install done go to next setp.
# Step 1: Format the NameNode.
# Got the error: Error: JAVA_HOME is not set and could not be found.
# Even though ~/.bashrc has ``export JAVA_HOME=/usr/lib/jvm/jdk1.7.0''
# FIX: [ https://groups.google.com/a/cloudera.org/d/msg/cdh-user/x0KLPPHiibU/twracHL-Rd0J ]
# $ sudo visudo
# Add ``Defaults env_keep+=JAVA_HOME''
$ sudo -u hdfs hdfs namenode -format
# Step 2: Start HDFS
# http://localhost:50070/
$ for service in /etc/init.d/hadoop-hdfs-*
> do
> sudo $service start
> done
# Step 3: Create the /tmp Directory
$ sudo -u hdfs hadoop fs -mkdir /tmp
$ sudo -u hdfs hadoop fs -chmod -R 1777 /tmp
# Step 4: Create the MapReduce system directories
sudo -u hdfs hadoop fs -mkdir /var
sudo -u hdfs hadoop fs -mkdir /var/lib
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred/mapred
sudo -u hdfs hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred/mapred/staging
sudo -u hdfs hadoop fs -chmod 1777 /var/lib/hadoop-hdfs/cache/mapred/mapred/staging
sudo -u hdfs hadoop fs -chown -R mapred /var/lib/hadoop-hdfs/cache/mapred
# Step 5: Verify the HDFS File Structure
$ sudo -u hdfs hadoop fs -ls -R /
# Step 6: Start MapReduce
# http://localhost:50030/
for service in /etc/init.d/hadoop-0.20-mapreduce-*
> do
> sudo $service start
> done
# Step 7: Create User Directories
$ sudo -u hdfs hadoop fs -mkdir /user/<user>
$ sudo -u hdfs hadoop fs -chown <user> /user/<user>
where <user> is the Linux username of each user.
sudo -u hdfs hadoop fs -mkdir /user/$USER
sudo -u hdfs hadoop fs -chown $USER /user/$USER
# For stopping daemons
$ for service in /etc/init.d/hadoop-hdfs-* /etc/init.d/hadoop-0.20-mapreduce-*
> do
> sudo $service stop
> done
# HBase
tech@tech-VirtualBox:~$ sudo apt-get install hbase
tech@tech-VirtualBox:~$ sudo apt-get install hbase-master
tech@tech-VirtualBox:~$ sudo jps
tech@tech-VirtualBox:~$ sudo /etc/init.d/hbase-master stop
# To enable pseudo-distributed mode, you must first make some configuration
# changes. Open /etc/hbase/conf/hbase-site.xml
<configuration>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://localhost:8020/hbase</value>
</property>
</configuration>
# Creating the /hbase Directory in HDFS
start service namenode and datanode
root@tech-VirtualBox:~# /etc/init.d/hadoop-hdfs-namenode start
* Starting Hadoop namenode:
starting namenode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-namenode-tech-VirtualBox.out
root@tech-VirtualBox:~# /etc/init.d/hadoop-hdfs-datanode start
* Starting Hadoop datanode:
starting datanode, logging to /var/log/hadoop-hdfs/hadoop-hdfs-datanode-tech-VirtualBox.out
root@tech-VirtualBox:~#
$ sudo -u hdfs hadoop fs -mkdir /hbase
$ sudo -u hdfs hadoop fs -chown hbase /hbase
# Installing and Starting ZooKeeper Server
# Using config: /etc/zookeeper/conf/zoo.cfg
# ZooKeeper data directory is missing at /var/lib/zookeeper fix the path or run initialize
# invoke-rc.d: initscript zookeeper-server, action "start" failed.
sudo apt-get install zookeeper-server
tech@tech-VirtualBox:~$ sudo /etc/init.d/zookeeper-server init
tech@tech-VirtualBox:~$ sudo /etc/init.d/zookeeper-server start
tech@tech-VirtualBox:~$ sudo /etc/init.d/hbase-master start
# To enable the HBase Region Server on Ubuntu and Debian systems
$ sudo apt-get install hbase-regionserver
http://localhost:60010/
# Installing and Starting the HBase Thrift Server
sudo apt-get install hbase-thrift
# Installing and Configuring REST
sudo apt-get install hbase-rest
# ~/bin/start-hadoop-all.sh
for service in /etc/init.d/hadoop-hdfs-* /etc/init.d/hadoop-0.20-mapreduce-*
do
sudo $service start
done
# ~/bin/start-hbase-all.sh
for service in /etc/init.d/zookeeper-server /etc/init.d/hbase-master /etc/init.d/hbase-regionserver /etc/init.d/hbase-rest /etc/init.d/hbase-thrift
do
sudo $service start
done
# ~/bin/stop-hbase-all.sh
for service in /etc/init.d/hbase-thrift /etc/init.d/hbase-rest /etc/init.d/hbase-regionserver /etc/init.d/hbase-master /etc/init.d/zookeeper-server
do
sudo $service stop
done
# ~/bin/stop-hadoop-all.sh
for service in /etc/init.d/hadoop-0.20-mapreduce-* /etc/init.d/hadoop-hdfs-*
do
sudo $service stop
done
# ~/bin/start-all.sh
~/bin/start-hadoop-all.sh
~/bin/start-hbase-all.sh
# ~/bin/stop-all.sh
~/bin/stop-hbase-all.sh
~/bin/stop-hadoop-all.sh
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment