Last active
March 26, 2017 23:57
-
-
Save mdaniel/7220157 to your computer and use it in GitHub Desktop.
Vagrantfile.sh script to provision an EMR compatible (hadoop 1.0.3, JavaSE 1.6) Vagrant instance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/sh | |
set -e | |
set -x | |
## this script is run as `sudo /tmp/vagrant-shell` by ~vagrant | |
#echo '[USER]' | |
#id -a | |
#echo '[ENV]' | |
#env | |
## this presumes you have a vboxnet through which 'vagant ssh' will connect to your VM | |
MY_IP=`/sbin/ifconfig -a | sed -ne '/addr:192/s/.*addr:\(192.168[0-9.]*\) .*/\1/p'` | |
if test -z "$MY_IP" | |
then | |
echo "Unable to sniff our your vboxnet interface" >&2 | |
exit 1 | |
fi | |
## strangely enough, $HOSTNAME isn't set | |
HOSTNAME=`hostname` | |
## shorten the typing since we'll use this a lot | |
HH=/home/hadoop | |
## set these variables for our use | |
## don't worry about the ownership for directories we make | |
## as they will be cleaned up at the end | |
HADOOP_HOME=${HH}/.versions/1.0.3 | |
# java-6-sun-1.6.0.26 | |
# Java(TM) SE Runtime Environment (build 1.6.0_26-b03) | |
# Java HotSpot(TM) 64-Bit Server VM (build 20.1-b02, mixed mode) | |
JAVA_HOME=/usr/lib/jvm/java-6-sun-1.6.0.26 | |
BINARY_DIR=/binaries | |
## this should be a pointer to ~/.vagrant.d | |
VAGRANT_D=/vagrant_home | |
## TODO only check these if in "provision" mode | |
for i in \ | |
${BINARY_DIR}/hadoop-1.0.3.tar.gz \ | |
${BINARY_DIR}/jdk-6u26-linux-x64.bin \ | |
${VAGRANT_D}/insecure_private_key | |
do | |
if test ! -f $i; then | |
echo "Required file is missing: $i" >&2 | |
exit 1 | |
fi | |
done | |
## set my hostname to the actual IP address, to prevent Hadoop from listening on localhost | |
if ! grep "${MY_IP}.${HOSTNAME}" /etc/hosts >/dev/null | |
then | |
sed -i.bak -e "/$HOSTNAME/s/.*/${MY_IP} ${HOSTNAME}/" /etc/hosts | |
fi | |
if ! grep '^hadoop' /etc/passwd >/dev/null | |
then | |
## doesn't matter what the password is because vagrant can sudo into it | |
useradd -s /bin/bash -m hadoop | |
fi | |
if test ! -d $HADOOP_HOME | |
then | |
DN=`dirname $HADOOP_HOME` | |
mkdir -p $DN | |
tar xzf ${BINARY_DIR}/hadoop-1.0.3.tar.gz -C $DN | |
mv ${DN}/hadoop-1.0.3 ${DN}/1.0.3 | |
unset DN | |
fi | |
if test ! -d $JAVA_HOME | |
then | |
DN=`dirname $JAVA_HOME` | |
BN=`basename $JAVA_HOME` | |
mkdir -p ${DN} | |
TMPF=/tmp/jdk-6u26-linux-x64.bin | |
cp ${BINARY_DIR}/jdk-6u26-linux-x64.bin $TMPF | |
chmod 755 $TMPF | |
# the .bin expands to PWD so change there in a subshell | |
(cd ${DN}; $TMPF -x; mv jdk1.6.0* ${BN}) | |
unset DN | |
unset BN | |
rm $TMPF | |
unset TMPF | |
chown -R 0:0 $JAVA_HOME | |
## openjdk priority is 1xyzRRRR where (x, y, z, %04r) | |
## so our priority needs to be higher than theirs | |
pri=91600026 | |
for i in $JAVA_HOME/bin/* | |
do | |
bn=`basename $i` | |
update-alternatives --install /usr/bin/${bn} ${bn} $i $pri | |
done | |
unset pri | |
fi | |
## move the hadoop.tmp.dir to /mnt/var/hadoop (as used by EMR) | |
## so it won't get blown away on reboots | |
if test ! -d /mnt/var/hadoop | |
then | |
mkdir -p /mnt/var/hadoop | |
chown -R hadoop /mnt/var/hadoop | |
fi | |
## this directory exists on EMR even though we don't currently use it | |
if test ! -d /mnt/var/log | |
then | |
mkdir -p /mnt/var/log | |
chown -R hadoop /mnt/var/log | |
fi | |
if test ! -f ${HH}/.bashrc.hadoop | |
then | |
## the ownership is fixed below | |
cat > ${HH}/.bashrc.hadoop<<EOD | |
JAVA_HOME=$JAVA_HOME | |
export JAVA_HOME | |
## not strictly needed because of the update-alternatives above | |
PATH=\$JAVA_HOME/bin:\$PATH | |
HADOOP_HOME=$HADOOP_HOME | |
HADOOP_HOME_WARN_SUPPRESS=1 | |
export HADOOP_HOME | |
export HADOOP_HOME_WARN_SUPPRESS | |
## amazon puts hadoop-bin at the end of the path for some reason | |
PATH=\$PATH:\$HADOOP_HOME/bin | |
EOD | |
fi | |
if ! grep ${HH}/bashrc.hadoop ${HH}/.bashrc >/dev/null | |
then | |
mv ${HH}/.bashrc ${HH}/.bashrc.ubuntu | |
cat > ${HH}/.bashrc<<EOD | |
# test and source ours first because the ubuntu one exits | |
# in non-interactive shells, which is exactly when we want to set JAVA_HOME | |
test -f ~/.bashrc.hadoop && source ~/.bashrc.hadoop | |
test -f ~/.bashrc.ubuntu && source ~/.bashrc.ubuntu | |
EOD | |
fi | |
if ! grep hadoop.tmp.dir ${HADOOP_HOME}/conf/core-site.xml >/dev/null | |
then | |
cat > ${HADOOP_HOME}/conf/core-site.xml<<EOD | |
<configuration> | |
<property> | |
<name>hadoop.tmp.dir</name> | |
<value>/mnt/var/hadoop</value> | |
</property> | |
<property> | |
<name>fs.default.name</name> | |
<value>hdfs://${HOSTNAME}:9000</value> | |
</property> | |
</configuration> | |
EOD | |
fi | |
if ! grep dfs.replication ${HADOOP_HOME}/conf/hdfs-site.xml >/dev/null | |
then | |
cat > ${HADOOP_HOME}/conf/hdfs-site.xml<<EOD | |
<configuration> | |
<property> | |
<name>dfs.replication</name> | |
<value>1</value> | |
</property> | |
</configuration> | |
EOD | |
fi | |
if ! grep job.tracker ${HADOOP_HOME}/conf/mapred-site.xml >/dev/null | |
then | |
cat > ${HADOOP_HOME}/conf/mapred-site.xml<<EOD | |
<configuration> | |
<property> | |
<name>mapred.job.tracker</name> | |
<value>${HOSTNAME}:9001</value> | |
</property> | |
</configuration> | |
EOD | |
fi | |
if test ! -f ${HH}/.ssh/known_hosts | |
then | |
mkdir ${HH}/.ssh || true | |
chmod 700 ${HH}/.ssh | |
/bin/echo -e "127.0.0.1\nlocalhost\n${HOSTNAME}\n" | \ | |
ssh-keyscan -f /dev/stdin > ${HH}/.ssh/known_hosts | |
fi | |
if test ! -f ${HH}/.ssh/id_rsa | |
then | |
cp ${VAGRANT_D}/insecure_private_key ${HH}/.ssh/id_rsa | |
ssh-keygen -y -f ${HH}/.ssh/id_rsa > ${HH}/.ssh/id_rsa.pub | |
cat ${HH}/.ssh/id_rsa.pub >> ${HH}/.ssh/authorized_keys | |
fi | |
chmod 600 ${HH}/.ssh/* | |
cat > ${HH}/start-hadoop.sh<<EOD | |
${HADOOP_HOME}/bin/hadoop namenode -format | |
${HADOOP_HOME}/bin/start-all.sh | |
EOD | |
chmod 755 ${HH}/start-hadoop.sh | |
chown -R hadoop ${HH} | |
cat >/dev/null<<COMMENT | |
## this is the output of running "env" on an EMR instance | |
TERM=xterm | |
SHELL=/bin/bash | |
HADOOP_HOME=/home/hadoop | |
USER=hadoop | |
LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib: | |
MAIL=/var/mail/hadoop | |
PATH=/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/home/hadoop/bin | |
PWD=/home/hadoop | |
JAVA_HOME=/usr/lib/jvm/java-6-sun | |
LANG=en_US.UTF-8 | |
SHLVL=1 | |
HOME=/home/hadoop | |
LOGNAME=hadoop | |
HADOOP_HOME_WARN_SUPPRESS=true | |
_=/usr/bin/env | |
COMMENT |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment