Last active
August 29, 2015 14:07
-
-
Save alvaromuir/5131956a2c34eca54dfc to your computer and use it in GitHub Desktop.
base data-science vagrant linux build
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# base data-science vagrant linux build | |
# last update, 9.28.14 | |
# @alvaromuir | |
## Base installs ## | |
sudo yum -y update | |
sudo yum -y upgrade | |
sudo yum clean all | |
## Adminstrative ## | |
sudo rpm -Uvh http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm | |
sudo yum -y install openssh-clients openssh-server ntp bind-utils yum-utils htop nmap | |
## JAVA ## | |
sudo wget --no-cookies --no-check-certificate --header "Cookie: oraclelicense=accept-securebackup-cookie" "http://download.oracle.com/otn-pub/java/jdk/7u67-b01/jdk-7u67-linux-x64.rpm" -O /usr/local/src/jdk-7-linux-x64.rpm | |
sudo rpm -Uvh /usr/local/src/jdk-7-linux-x64.rpm | |
sudo alternatives --install /usr/bin/java java /usr/java/latest/bin/java 2 | |
sudo bash -c "echo export JAVA_HOME=/usr/java/default > /etc/profile.d/java.sh" | |
sudo bash -c "echo export JAVACMD='\$JAVA_HOME/bin/java/bin/java' >> /etc/profile.d/java.sh" | |
sudo bash -c "echo export PATH='\$PATH:\$JAVA_HOME/bin' >> /etc/profile.d/java.sh" | |
source /etc/profile.d/java.sh | |
## Core development additions ## | |
sudo yum -y groupinstall "Development tools" | |
sudo yum -y install python-setuptools golang nodejs npm screen hg git | |
sudo easy_install pip | |
sudo pip install virtualenvwrapper | |
sudo bash -c "echo export PATH='\$PATH:`python -c "from distutils.sysconfig import get_python_lib; print get_python_lib()"`' > /etc/profile.d/python.sh" | |
source /etc/profile.d/python.sh | |
sudo bash -c "echo export GOPATH='\$HOME/go' > /etc/profile.d/go.sh" | |
sudo bash -c "echo export PATH='\$PATH:\$HOME/go/bin' >> /etc/profile.d/go.sh" | |
source /etc/profile.d/go.sh | |
yum -y install postgresql-jdbc* | |
## ANT ## | |
sudo wget http://apache.tradebit.com/pub//ant/binaries/apache-ant-1.9.4-bin.zip -O /usr/local/src/ant-1.9.4-bin.zip | |
sudo unzip /usr/local/src/ant-1.9.4-bin.zip -d /opt | |
sudo mv /opt/apache-ant-1.9.4 /opt/ant | |
sudo ln -s /opt/ant/bin/ant /usr/bin/ant | |
sudo bash -c "echo 'ANT_HOME=/opt/ant' > /etc/profile.d/ant.sh" | |
sudo bash -c "echo 'export CLASSPATH=.' >> /etc/profile.d/ant.sh" | |
source /etc/profile.d/ant.sh | |
## Maven ## | |
sudo wget http://www.motorlogy.com/apache/maven/maven-3/3.2.1/binaries/apache-maven-3.2.1-bin.zip -O /usr/local/src/maven-3.2.1.zip | |
sudo unzip /usr/local/src/maven-3.2.1.zip -d /opt | |
sudo mv /opt/apache-maven-3.2.1 /opt/maven | |
sudo ln -s /opt/maven/bin/mvn /usr/bin/mvn | |
sudo bash -c "echo 'MAVEN_HOME=/opt/maven' > /etc/profile.d/maven.sh" | |
sudo bash -c "echo 'MAVEN_OPTS=\"-Xmx2g -Xmx512m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m\"' >> /etc/profile.d/maven.sh" | |
sudo bash -c "echo 'export CLASSPATH=.' >> /etc/profile.d/maven.sh" | |
source /etc/profile.d/maven.sh | |
# SCALA # | |
sudo yum -y localinstall http://downloads.typesafe.com/scala/2.11.2/scala-2.11.2.rpm | |
sudo bash -c "echo 'export SCALA_HOME=/usr/share/scala' > /etc/profile.d/scala.sh" | |
sudo bash -c "echo export PATH='\$PATH:\$SCALA_HOME/bin' >> /etc/profile.d/scala.sh" | |
source /etc/profile.d/scala.sh | |
## Data Science Stuff ## | |
sudo curl -o /usr/local/src/python_tools.zip http://www.datasciencetoolkit.org/python_tools.zip | |
sudo unzip /usr/local/src/python_tools.zip | |
sudo python/install | |
sudo rm -rf python __MACOSX | |
sudo pip install csvkit | |
sudo yum -y install numpy scipy python-matplotlib ipython python-pandas sympy python-nose R parallel octave | |
sudo curl -o /usr/local/bin/jq http://stedolan.github.io/jq/download/linux64/jq | |
sudo chmod +x /usr/local/bin/jq | |
sudo curl -o /etc/yum.repos.d/tange.repo http://download.opensuse.org/repositories/home:/tange/CentOS_CentOS-6/home:tange.repol | |
sudo npm -g install xml2json-command | |
sudo curl -o /usr/local/bin/Rio https://raw.githubusercontent.com/jeroenjanssens/data-science-at-the-command-line/master/tools/Rio | |
sudo chmod +x /usr/local/bin/Rio | |
sudo bash -c "echo alias rio=`which Rio` > /etc/profile.d/aliases.sh" | |
sudo bash -c "echo alias r=`which R` >> /etc/profile.d/aliases.sh" | |
## Julia ## | |
sudo yum -y install libXp openmotif openmotif22 | |
sudo git clone git://github.com/JuliaLang/julia.git /usr/share/julia | |
cd !$ | |
sudo bash -c "echo 'override USE_SYSTEM_BLAS = 1' >> Make.user" | |
sudo bash -c "echo 'OPENBLAS_DYNAMIC_ARCH=0' >> Make.user" | |
sudo make -C deps cleanall | |
sudo make -C deps clean-openblas | |
sudo make | |
cd ~ | |
sudo bash -c "echo 'export JULIA_HOME=/usr/share/julia' > /etc/profile.d/julia.sh" | |
sudo ln -s $JULIA_HOME/usr/bin/julia /usr/bin/ | |
source /etc/profile.d/julia.sh | |
## Hadoop ## | |
# NOTE: Hadoop will still have to be configured | |
# See /etc/hadoop/conf | |
sudo rm -rf /etc/yum.repos.d/hdp.repo | |
sudo rm -rf /etc/yum.repos.d/ambari.repo | |
sudo curl -o /etc/yum.repos.d/hdp.repo http://public-repo-1.hortonworks.com/HDP/centos6/2.x/updates/2.1.4.0/hdp.repo | |
sudo curl -o /etc/yum.repos.d/ambari.repo http://public-repo-1.hortonworks.com/ambari/centos6/1.x/updates/1.6.1/ambari.repo | |
sudo yum -y install hadoop hdfs hbase hcatalog hive hue mahout oozie pig storm squoop webhcat | |
sudo yum -y install ambari-server | |
sudo yum -y install | |
sudo bash -c "echo export HADOOP_CONF_DIR=/etc/hadoop/conf > /etc/profile.d/hadoop.sh" | |
sudo bash -c "echo export YARN_CONF_DIR='\$HADOOP_CONF_DIR' >> /etc/profile.d/hadoop.sh" | |
source /etc/profile.d/hadoop.sh | |
## Spark ## | |
curl http://d3kbcqa49mib13.cloudfront.net/spark-1.1.0.tgz | |
sudo tar -xf /usr/local/src/spark.tgz | |
cd spark-1.1.0 | |
mvn clean | |
mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -DskipTests clean package | |
cd ~ | |
sudo mv spark-1.1.0 /usr/share/spark | |
sudo bash -c "echo export PATH='\$PATH:/usr/share/spark/bin' >> /etc/profile.d/spark.sh" | |
source /etc/profile.d/spark.sh | |
sudo chkconfig ntpd on | |
sudo service ntpd start | |
chkconfig iptables off | |
chkconfig postgresql on | |
service postgresql start | |
## PACKAGE CLEANUP ## | |
sudo su - | |
# Zero free space to aid VM compression | |
dd if=/dev/zero of=/EMPTY bs=1M | |
rm -f /EMPTY | |
# Remove bash history | |
unset HISTFILE | |
rm -rf /root/.bash_history | |
rm -rf /home/vagrant/.bash_history | |
# Cleanup log files | |
find /var/log -type f | while read f; do echo -ne '' > $f; done; | |
# Whiteout root | |
count=`df --sync -kP / | tail -n1 | awk -F ' ' '{print $4}'`; | |
let count-- | |
dd if=/dev/zero of=/tmp/whitespace bs=1024 count=$count; | |
rm -rf /tmp/whitespace; | |
# Whiteout /boot | |
count=`df --sync -kP /boot | tail -n1 | awk -F ' ' '{print $4}'`; | |
let count-- | |
dd if=/dev/zero of=/boot/whitespace bs=1024 count=$count; | |
rm -rf /boot/whitespace; | |
swappart=$(cat /proc/swaps | grep -v Filename | tail -n1 | awk -F ' ' '{print $1}') | |
if [ "$swappart" != "" ]; then | |
swapoff $swappart; | |
dd if=/dev/zero of=$swappart; | |
mkswap $swappart; | |
swapon $swappart; | |
fi | |
history -cw && history -c && exit | |
unset HISTFILE | |
rm -rf .bash_history | |
history -cw && history -c && exit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment