Last active
January 12, 2018 10:31
-
-
Save mitmul/9192437 to your computer and use it in GitHub Desktop.
install CDH5 and setup hadoop, mahout development environment on CentOS 6.4 vagrant box (http://developer.nrel.gov/downloads/vagrant-boxes/CentOS-6.4-x86_64-v20130731.box)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # install CDH5 | |
| cd | |
| sudo yum install -y java-1.7.0-openjdk-devel | |
| wget http://archive.cloudera.com/cdh5/one-click-install/redhat/6/x86_64/cloudera-cdh-5-0.x86_64.rpm | |
| sudo yum --nogpgcheck localinstall -y cloudera-cdh-5-0.x86_64.rpm | |
| sudo rpm --import http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera | |
| sudo yum install -y hadoop-0.20-conf-pseudo | |
| # add below lines to ~/.bashrc | |
| echo 'pathtojava=$(readlink -e $(which javac))' >> ~/.bashrc | |
| echo 'export JAVA_HOME=${pathtojava%/*/*}' >> ~/.bashrc | |
| echo '. /etc/default/hadoop' >> ~/.bashrc | |
| . ~/.bashrc | |
| # install mahout | |
| sudo yum install -y mahout | |
| # format hdfs | |
| sudo -u hdfs hdfs namenode -format | |
| # start hadoop deamons | |
| for service in /etc/init.d/hadoop-hdfs-* | |
| do | |
| sudo $service start | |
| done | |
| # create /tmp directories | |
| sudo -u hdfs hadoop fs -mkdir /tmp | |
| sudo -u hdfs hadoop fs -chmod -R 1777 /tmp | |
| # create directories for MapReduce | |
| sudo -u hdfs hadoop fs -mkdir -p /var/lib/hadoop-hdfs/cache/mapred/mapred/staging | |
| sudo -u hdfs hadoop fs -chmod 1777 /var/lib/hadoop-hdfs/cache/mapred/mapred/staging | |
| sudo -u hdfs hadoop fs -chown -R mapred /var/lib/hadoop-hdfs/cache/mapred | |
| # start MapReduce deamons | |
| for service in /etc/init.d/hadoop-0.20-mapreduce-* | |
| do | |
| sudo $service start | |
| done | |
| # install browser | |
| sudo yum install -y lynx | |
| # jobtracker | |
| # lynx http://localhost:50030 | |
| # HDFS status | |
| # lynx http://localhost:50070 | |
| # install necessary packages | |
| sudo yum -y install git-all zlib-devel openssl-devel readline-devel ncurses-devel sqlite-devel expat-devel bzip2-devel tcl-devel gdbm-devel libbsd-devel | |
| # install pyenv to use Python 2.7.6 | |
| cd | |
| git clone git://github.com/yyuu/pyenv.git .pyenv | |
| echo 'export PYENV_ROOT=$HOME/.pyenv' >> ~/.bashrc | |
| echo 'export PATH=$PYENV_ROOT/bin:$PATH' >> ~/.bashrc | |
| echo 'eval "$(pyenv init -)"' >> ~/.bashrc | |
| . ~/.bashrc | |
| pyenv install 2.7.6 | |
| pyenv global 2.7.6 | |
| pyenv rehash | |
| # install mrjob packages | |
| pip install boto mr3po mrjob | |
| # install maven2 | |
| cd | |
| wget http://mirrors.gigenet.com/apache/maven/maven-3/3.2.1/binaries/apache-maven-3.2.1-bin.tar.gz | |
| tar zxvf apache-maven-3.2.1-bin.tar.gz | |
| sudo mv apache-maven-3.2.1 /usr/local/maven | |
| sudo sh -c "echo 'export MAVEN_HOME=/usr/local/maven' >> /etc/profile" | |
| sudo sh -c "echo 'export PATH=\$PATH:\$MAVEN_HOME/bin' >> /etc/profile" | |
| . /etc/profile | |
| # download mahout | |
| wget http://archive.apache.org/dist/mahout/0.8/mahout-distribution-0.8-src.tar.gz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for posting this :)
One question though. Why the need for Python 2.7?