Last active
December 20, 2015 22:39
-
-
Save sturadnidge/6206277 to your computer and use it in GitHub Desktop.
CDH4 MRv1 install in pseudo-distributed mode, plus optional ZooKeeper, HBase, HttpFS. Assumes you have a Sun JDK installed and JAVA_HOME set for root if installed somewhere other than /usr/java/. This is all basically a summary of various parts of https://ccp.cloudera.com/display/CDH4DOC/CDH4+Documentation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Add Cloudera RPM-GPG-KEY and repo | |
rpm --import http://archive.cloudera.com/cdh4/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera | |
rpm -ivh http://archive.cloudera.com/cdh4/one-click-install/redhat/6/x86_64/cloudera-cdh-4-0.x86_64.rpm | |
# note: if you want to install a specific version, | |
# modify /etc/yum.repos.d/cloudera-cdh4.repo accordingly. | |
# For example, if you want to install 4.2.1, use the following baseurl: | |
# baseurl=http://archive.cloudera.com/cdh4/redhat/6/x86_64/cdh/4.2.1/ | |
# Install CDH4 Base | |
yum -y install hadoop-0.20-conf-pseudo | |
# if you can't use the system-wide Java runtime, create a file /etc/hadoop/conf/hadoop-env.sh | |
# and define JAVA_HOME in there | |
# ensure DNS is working, or that you have an entry in /etc/hosts for <hostname.fqdn> | |
# Update fs.default.name value to actual <hostname.fqdn> in /etc/hadoop/conf/core-site.xml | |
sed -i 's/localhost/<hostname.fqdn>/' /etc/hadoop/conf/core-site.xml | |
# Update mapred.job.tracker value to actual <hostname.fqdn> in /etc/hadoop/conf/mapred-site.xml | |
sed -i 's/localhost/<hostname.fqdn>/' /etc/hadoop/conf/mapred-site.xml | |
# make a directory for hadoop and grant ownership | |
mkdir -p /opt/hadoop | |
chown -R hdfs:hadoop /opt/hadoop | |
# configure data directory | |
mkdir /opt/hadoop/dfs | |
# update /etc/hadoop/conf/hdfs-site.xml | |
<property> | |
<name>dfs.namenode.name.dir</name> | |
<value>file:///opt/hadoop/nn</value> | |
</property> | |
<property> | |
<name>dfs.namenode.checkpoint.dir</name> | |
<value>file:///opt/hadoop/snn</value> | |
</property> | |
<property> | |
<name>dfs.datanode.data.dir</name> | |
<value>file:///opt/hadoop/dfs/data</value> | |
</property> | |
# move hadoop.tmp directory | |
mv /var/lib/hadoop-hdfs/cache /opt/hadoop/ | |
ln -s /opt/hadoop/cache /var/lib/hadoop-hdfs/cache | |
# move logs | |
mkdir -p /opt/hadoop/log | |
mv /var/log/hadoop-0.20-mapreduce /opt/hadoop/log/ | |
mv /var/log/hadoop-hdfs /opt/hadoop/log/ | |
mv /var/log/zookeeper /opt/hadoop/log/ | |
ln -s /opt/hadoop/log/hadoop-0.20-mapreduce /var/log/hadoop-0.20-mapreduce | |
ln -s /opt/hadoop/log/hadoop-hdfs /var/log/hadoop-hdfs | |
ln -s /opt/hadoop/log/zookeeper /var/log/zookeeper | |
# Format the NameNode | |
sudo -u hdfs hdfs namenode -format | |
# Start HDFS | |
for service in /etc/init.d/hadoop-hdfs-* ; do $service start; done | |
# do all of the following as the HDFS user | |
su - hdfs | |
# Create the HDFS /tmp directory before someone else does it and gets the perms wrong | |
hadoop fs -mkdir /tmp | |
hadoop fs -chmod -R 1777 /tmp | |
# Create and permission the hadoop.tmp directories for mapred user | |
hadoop fs -mkdir -p /var/lib/hadoop-hdfs/cache/mapred/mapred/staging | |
hadoop fs -chmod 1777 /var/lib/hadoop-hdfs/cache/mapred/mapred/staging | |
hadoop fs -chown -R mapred /var/lib/hadoop-hdfs/cache/mapred | |
# change back to rut | |
exit | |
# Start MapReduce | |
for service in /etc/init.d/hadoop-0.20-mapreduce-*; do $service start; done | |
# Check everything worked, run '$JAVA_HOME/bin/jps' and look for the following processes | |
DataNode | |
NameNode | |
SecondaryNameNode | |
JobTracker | |
TaskTracker | |
# | |
# Zookeeper | |
# | |
# Install zookeeper server package | |
# Base package was already installed by hadoop-0.20-conf-pseudo | |
yum -y install zookeeper-server | |
# set JAVA_HOME in /usr/lib/zookeeper/bin/zkEnv.sh if you can't use the system-wide Java runtime | |
# Initialise Zookeeper | |
service zookeeper-server init --myid=1 | |
# update zookeeper snapshot directory | |
mv /var/lib/zookeeper /opt/hadoop/ | |
ln -s /opt/hadoop/zookeeper /var/lib/zookeeper | |
# Start zookeeper | |
service zookeeper-server start | |
# Check everything worked, run '$JAVA_HOME/bin/jps' and look for the following process | |
QuorumPeerMain | |
# | |
# HBase | |
# | |
# set dfs.datanode.max.xcievers in /etc/hadoop/conf/hdfs-site.xml (yes, it's actually misspelled) | |
# Insert the following XML property between the <configuration> and </configuration> tags | |
<property> | |
<name>dfs.datanode.max.xcievers</name> | |
<value>4096</value> | |
</property> | |
# restart mapred & hdfs | |
for service in /etc/init.d/hadoop-0.20-mapreduce-*; do $service stop; done | |
for service in /etc/init.d/hadoop-hdfs-* ; do $service restart; done | |
for service in /etc/init.d/hadoop-0.20-mapreduce-*; do $service start; done | |
# Install HBase | |
yum -y install hbase-master hbase-regionserver | |
# tell hbase to use standalone zookeeper | |
# add the following to /etc/hbase/conf/hbase-env.sh | |
export HBASE_MANAGES_ZK=false | |
# Modify /etc/hbase/conf/hbase-site.xml | |
# Be sure to change <hostname.fqdn> to your actual fully qualified hostname | |
# Insert the following XML properties between the <configuration> and </configuration> tags | |
<property> | |
<name>hbase.cluster.distributed</name> | |
<value>true</value> | |
</property> | |
<property> | |
<name>hbase.rootdir</name> | |
<value>hdfs://<hostname.fqdn>:8020/hbase</value> | |
</property> | |
# Create the /hbase directory in HDFS | |
sudo -u hdfs hadoop fs -mkdir /hbase | |
sudo -u hdfs hadoop fs -chown hbase /hbase | |
# set JAVA_HOME in /etc/hbase/conf/hbase-env.sh if you can't use the system-wide Java runtime | |
# move logs | |
mv /var/log/hbase /opt/hadoop/log/ | |
ln -s /opt/hadoop/log/hbase /var/log/hbase | |
# Start HBase | |
for service in /etc/init.d/hbase-*; do $service start; done | |
# Check everything worked, run '$JAVA_HOME/bin/jps' and look for the following processes | |
HRegionServer | |
HMaster | |
# | |
# Httpfs | |
# | |
# Install httpfs | |
yum -y install hadoop-httpfs | |
# move logs | |
mv /var/log/hadoop-httpfs /opt/hadoop/log/ | |
ln -s /opt/hadoop/log/hadoop-httpfs /var/log/hadoop-httpfs | |
# export JAVA_HOME in /etc/hadoop-httpfs/conf/httpfs-env.sh | |
# Start httpfs - you should see some fairly typical Tomcat startup messages | |
service hadoop-httpfs start | |
# Check everything worked, run '$JAVA_HOME/bin/jps' and look for the following processes | |
Bootstrap | |
# | |
# we're done, running '$JAVA_HOME/bin/jps' should show all of the following processes | |
# (order doesn't matter) | |
# | |
Bootstrap | |
DataNode | |
NameNode | |
SecondaryNameNode | |
JobTracker | |
TaskTracker | |
QuorumPeerMain | |
HMaster | |
HRegionServer | |
# | |
# you can also point a web broswer at <hostname> ports 50030, 50060, 50070, 50090, 60010, 60030 | |
# | |
# | |
# Additional notes | |
# | |
# - User setup | |
# Create Linux users and corresponding HDFS home directories as needed | |
useradd -m -U <user> | |
sudo -u hdfs hadoop fs -mkdir /user/<user> | |
sudo -u hdfs hadoop fs -chown <user> /user/<user> | |
# - Shutting down / Starting up | |
# | |
# Order matters! To shutdown, do the following: | |
# | |
for service in /etc/init.d/hbase-*; do $service stop; done | |
service zookeeper-server stop | |
for service in /etc/init.d/hadoop-0.20-mapreduce-*; do $service stop; done | |
for service in /etc/init.d/hadoop-hdfs-* ; do $service stop; done | |
# | |
# Then to start back up: | |
# | |
for service in /etc/init.d/hadoop-hdfs-* ; do $service start; done | |
for service in /etc/init.d/hadoop-0.20-mapreduce-*; do $service start; done | |
service zookeeper-server start | |
for service in /etc/init.d/hbase-*; do $service start; done | |
# - Hadoop service accounts | |
# If you want to be picky with the GID/UID's of the Hadoop service accounts then do | |
# the following before running the Cloudera RPM's... otherwise just let the RPM's do it for you. | |
# Common Hadoop group | |
groupadd --gid 6666 hadoop | |
# Map Reduce | |
groupadd --gid 6667 mapred | |
useradd --no-create-home --home-dir /usr/lib/hadoop-0.20-mapreduce \ | |
--shell /bin/bash --uid 6667 --gid mapred --groups hadoop --comment "Hadoop MapReduce" mapred | |
# HDFS | |
groupadd --gid 6668 hdfs | |
useradd --no-create-home --home-dir /usr/lib/hadoop-hdfs \ | |
--shell /bin/bash --uid 6668 --gid hdfs --groups hadoop --comment "Hadoop HDFS" hdfs | |
# Zookeeper | |
groupadd --gid 6669 zookeeper | |
useradd --no-create-home --home-dir /var/run/zookeeper \ | |
--shell /sbin/nologin --uid 6669 --gid zookeeper --comment "Zookeeper" zookeeper | |
# HBase | |
groupadd --gid 6670 hbase | |
useradd --no-create-home --home-dir /var/run/hbase \ | |
--shell /sbin/nologin --uid 6670 --gid hbase --comment "HBase" hbase |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment