Skip to content

Instantly share code, notes, and snippets.

@mitmul
Last active January 12, 2018 10:31
Show Gist options
  • Select an option

  • Save mitmul/9192437 to your computer and use it in GitHub Desktop.

Select an option

Save mitmul/9192437 to your computer and use it in GitHub Desktop.
install CDH5 and setup hadoop, mahout development environment on CentOS 6.4 vagrant box (http://developer.nrel.gov/downloads/vagrant-boxes/CentOS-6.4-x86_64-v20130731.box)
# install CDH5
cd
sudo yum install -y java-1.7.0-openjdk-devel
wget http://archive.cloudera.com/cdh5/one-click-install/redhat/6/x86_64/cloudera-cdh-5-0.x86_64.rpm
sudo yum --nogpgcheck localinstall -y cloudera-cdh-5-0.x86_64.rpm
sudo rpm --import http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera
sudo yum install -y hadoop-0.20-conf-pseudo
# add below lines to ~/.bashrc
echo 'pathtojava=$(readlink -e $(which javac))' >> ~/.bashrc
echo 'export JAVA_HOME=${pathtojava%/*/*}' >> ~/.bashrc
echo '. /etc/default/hadoop' >> ~/.bashrc
. ~/.bashrc
# install mahout
sudo yum install -y mahout
# format hdfs
sudo -u hdfs hdfs namenode -format
# start hadoop deamons
for service in /etc/init.d/hadoop-hdfs-*
do
sudo $service start
done
# create /tmp directories
sudo -u hdfs hadoop fs -mkdir /tmp
sudo -u hdfs hadoop fs -chmod -R 1777 /tmp
# create directories for MapReduce
sudo -u hdfs hadoop fs -mkdir -p /var/lib/hadoop-hdfs/cache/mapred/mapred/staging
sudo -u hdfs hadoop fs -chmod 1777 /var/lib/hadoop-hdfs/cache/mapred/mapred/staging
sudo -u hdfs hadoop fs -chown -R mapred /var/lib/hadoop-hdfs/cache/mapred
# start MapReduce deamons
for service in /etc/init.d/hadoop-0.20-mapreduce-*
do
sudo $service start
done
# install browser
sudo yum install -y lynx
# jobtracker
# lynx http://localhost:50030
# HDFS status
# lynx http://localhost:50070
# install necessary packages
sudo yum -y install git-all zlib-devel openssl-devel readline-devel ncurses-devel sqlite-devel expat-devel bzip2-devel tcl-devel gdbm-devel libbsd-devel
# install pyenv to use Python 2.7.6
cd
git clone git://github.com/yyuu/pyenv.git .pyenv
echo 'export PYENV_ROOT=$HOME/.pyenv' >> ~/.bashrc
echo 'export PATH=$PYENV_ROOT/bin:$PATH' >> ~/.bashrc
echo 'eval "$(pyenv init -)"' >> ~/.bashrc
. ~/.bashrc
pyenv install 2.7.6
pyenv global 2.7.6
pyenv rehash
# install mrjob packages
pip install boto mr3po mrjob
# install maven2
cd
wget http://mirrors.gigenet.com/apache/maven/maven-3/3.2.1/binaries/apache-maven-3.2.1-bin.tar.gz
tar zxvf apache-maven-3.2.1-bin.tar.gz
sudo mv apache-maven-3.2.1 /usr/local/maven
sudo sh -c "echo 'export MAVEN_HOME=/usr/local/maven' >> /etc/profile"
sudo sh -c "echo 'export PATH=\$PATH:\$MAVEN_HOME/bin' >> /etc/profile"
. /etc/profile
# download mahout
wget http://archive.apache.org/dist/mahout/0.8/mahout-distribution-0.8-src.tar.gz
@dasgoll

dasgoll commented Sep 18, 2014

Copy link
Copy Markdown

Thanks for posting this :)
One question though. Why the need for Python 2.7?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment