-
-
Save bugcy013/4484295 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Assumes you have Sun JDK installed already and JAVA_HOME set to that for root | |
# This is all basically a summary of various parts of https://ccp.cloudera.com/display/CDH4DOC/CDH4+Documentation | |
# Add Cloudera RPM-GPG-KEY and repo | |
rpm --import http://archive.cloudera.com/cdh4/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera | |
rpm -ivh http://archive.cloudera.com/cdh4/one-click-install/redhat/6/x86_64/cloudera-cdh-4-0.x86_64.rpm | |
# Install CDH4 Base | |
yum install hadoop-0.20-conf-pseudo | |
# set JAVA_HOME in /etc/hadoop/conf/hadoop-env.sh if you can't use the system-wide Java runtime | |
# Update fs.default.name value to actual <hostname> in /etc/hadoop/conf/core-site.xml | |
sed -i 's/localhost/<hostname>/' /etc/hadoop/conf/core-site.xml | |
# Update mapred.job.tracker value to actual <hostname> in /etc/hadoop/conf/mapred-site.xml | |
sed -i 's/localhost/<hostname>/' /etc/hadoop/conf/mapred-site.xml | |
# Format the NameNode | |
sudo -u hdfs hdfs namenode -format | |
# Start HDFS | |
for service in /etc/init.d/hadoop-hdfs-* ; do $service start; done | |
# do all of the following as the HDFS user | |
su - hdfs | |
# Create the HDFS /tmp directory before someone else does it and gets the perms wrong | |
hadoop fs -mkdir /tmp | |
hadoop fs -chmod -R 1777 /tmp | |
# Create and permission the MapReduce system directories | |
hadoop fs -mkdir /var | |
hadoop fs -mkdir /var/lib | |
hadoop fs -mkdir /var/lib/hadoop-hdfs | |
hadoop fs -mkdir /var/lib/hadoop-hdfs/cache | |
hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred | |
hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred/mapred | |
hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred/mapred/staging | |
hadoop fs -chmod 1777 /var/lib/hadoop-hdfs/cache/mapred/mapred/staging | |
hadoop fs -chown -R mapred /var/lib/hadoop-hdfs/cache/mapred | |
# Verify the HDFS File Structure | |
hadoop fs -ls -R / | |
# Should look as follows: | |
# drwxrwxrwt - hdfs supergroup 0 2012-04-19 15:14 /tmp | |
# drwxr-xr-x - hdfs supergroup 0 2012-04-19 15:16 /var | |
# drwxr-xr-x - hdfs supergroup 0 2012-04-19 15:16 /var/lib | |
# drwxr-xr-x - hdfs supergroup 0 2012-04-19 15:16 /var/lib/hadoop-hdfs | |
# drwxr-xr-x - hdfs supergroup 0 2012-04-19 15:16 /var/lib/hadoop-hdfs/cache | |
# drwxr-xr-x - mapred supergroup 0 2012-04-19 15:19 /var/lib/hadoop-hdfs/cache/mapred | |
# drwxr-xr-x - mapred supergroup 0 2012-04-19 15:29 /var/lib/hadoop-hdfs/cache/mapred/mapred | |
# drwxrwxrwt - mapred supergroup 0 2012-04-19 15:33 /var/lib/hadoop-hdfs/cache/mapred/mapred/staging | |
# change back to rut | |
exit | |
# Start MapReduce | |
for service in /etc/init.d/hadoop-0.20-mapreduce-*; do $service start; done | |
# Check everything worked, run '$JAVA_HOME/bin/jps' and look for the following processes | |
DataNode | |
NameNode | |
SecondaryNameNode | |
JobTracker | |
TaskTracker | |
# | |
# Zookeeper | |
# | |
# Install zookeeper server package | |
# Base package was already installed by hadoop-0.20-conf-pseudo | |
yum install zookeeper-server | |
# set JAVA_HOME in /usr/lib/zookeeper/bin/zkEnv.sh if you can't use the system-wide Java runtime | |
# Initialise Zookeeper | |
service zookeeper-server init --myid=1 | |
# Start zookeeper | |
service zookeeper-server start | |
# Check everything worked, run '$JAVA_HOME/bin/jps' and look for the following process | |
QuorumPeerMain | |
# | |
# HBase | |
# | |
# set dfs.datanode.max.xcievers in /etc/hadoop/conf/hdfs-site.xml (yes, it's actually misspelled) | |
# Insert the following XML property between the <configuration> and </configuration> tags | |
<property> | |
<name>dfs.datanode.max.xcievers</name> | |
<value>4096</value> | |
</property> | |
# restart hdfs | |
for service in /etc/init.d/hadoop-hdfs-* ; do $service restart; done | |
# Install HBase | |
yum install hbase-master hbase-regionserver | |
# Modify /etc/hbase/conf/hbase-site.xml | |
# Be sure to change <hostname> to your actual hostname | |
# Insert the following XML properties between the <configuration> and </configuration> tags | |
<property> | |
<name>hbase.cluster.distributed</name> | |
<value>true</value> | |
</property> | |
<property> | |
<name>hbase.rootdir</name> | |
<value>hdfs://<hostname>:8020/hbase</value> | |
</property> | |
# Create the /hbase directory in HDFS | |
sudo -u hdfs hadoop fs -mkdir /hbase | |
sudo -u hdfs hadoop fs -chown hbase /hbase | |
# set JAVA_HOME in /etc/hbase/conf/hbase-env.sh if you can't use the system-wide Java runtime | |
# Start HBase master | |
service hbase-master start | |
# start region server | |
service hbase-regionserver start | |
# Check everything worked, run '$JAVA_HOME/bin/jps' and look for the following processes | |
HRegionServer | |
HMaster | |
# | |
# we're done, running '$JAVA_HOME/bin/jps | sort' should show all of the following processes (order doesn't matter) | |
# | |
JobTracker | |
TaskTracker | |
QuorumPeerMain | |
DataNode | |
NameNode | |
SecondaryNameNode | |
HMaster | |
HRegionServer | |
# | |
# Additional notes | |
# | |
# | |
# - User setup | |
# Create Linux users and corresponding HDFS home directories as needed | |
useradd -m -U <user> | |
sudo -u hdfs hadoop fs -mkdir /user/<user> | |
sudo -u hdfs hadoop fs -chown <user> /user/<user> | |
# - Shutting down / Starting up | |
# Order matters! To shutdown, do the following: | |
for service in /etc/init.d/hbase-*; do $service stop; done | |
service zookeeper-server stop | |
for service in /etc/init.d/hadoop-0.20-mapreduce-*; do $service stop; done | |
for service in /etc/init.d/hadoop-hdfs-* ; do $service stop; done | |
# | |
# Then to start back up: | |
# | |
for service in /etc/init.d/hadoop-hdfs-* ; do $service start; done | |
for service in /etc/init.d/hadoop-0.20-mapreduce-*; do $service start; done | |
service zookeeper-server start | |
for service in /etc/init.d/hbase-*; do $service start; done | |
# - Disk Space | |
# Depending on your usage and your Linux environment, you may run out of disk space quickly. | |
# To fix this, moving /var/lib/hadoop-hdfs onto a dedicated file system is a good start. | |
# Doing the same for /var/log/hadoop-hdfs isn't a bad idea either. | |
# Make sure you retain (or duplicate) the exact same file system permissions in the new location if you do this. | |
# - Hadoop service accounts | |
# If you want to be picky with the GID/UID's of the Hadoop service accounts then do the following before running the Cloudera RPM's... otherwise just let the RPM's do it for you. | |
# Common Hadoop group | |
groupadd --gid 6666 hadoop | |
# Map Reduce | |
groupadd --gid 6667 mapred | |
useradd --no-create-home --home-dir /usr/lib/hadoop-0.20-mapreduce --shell /bin/bash --uid 6667 --gid mapred --groups hadoop --comment "Hadoop MapReduce" mapred | |
# HDFS | |
groupadd --gid 6668 hdfs | |
useradd --no-create-home --home-dir /usr/lib/hadoop-hdfs --shell /bin/bash --uid 6668 --gid hdfs --groups hadoop --comment "Hadoop HDFS" hdfs | |
# Zookeeper | |
groupadd --gid 6669 zookeeper | |
useradd --no-create-home --home-dir /var/run/zookeeper --shell /sbin/nologin --uid 6669 --gid zookeeper --comment "Zookeeper" zookeeper | |
# HBase | |
groupadd --gid 6670 hbase | |
useradd --no-create-home --home-dir /var/run/hbase --shell /sbin/nologin --uid 6670 --gid hbase --comment "HBase" hbase | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment