Last active
May 10, 2017 02:45
-
-
Save michaelmior/990bb8c6e3d611e5881da27366ec2b79 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -e | |
SPARK_VERSION="2.1.0-bin-hadoop2.7" | |
HADOOP_VERSION="2.7.3" | |
SHORT_USER=$(echo $USER | cut -d \\ -f2) | |
DATA_DIR=/var/tmp/$SHORT_USER/hdfs | |
IO_BPS=$((50 * 1024 * 1024)) | |
function add_to_env { | |
host=$1 | |
value=$2 | |
ssh $host "echo '$value' | sudo tee -a /etc/environment > /dev/null" | |
} | |
# Function to add properties to a configuration file on a remote host | |
function add_property { | |
host=$1 | |
name=$2 | |
value=$3 | |
file=$4 | |
ssh $host "sudo xmlstarlet ed -L \ | |
-s '/configuration' -t elem -n property --var new-field '\$prev' \ | |
-s '\$new-field' -t elem -n name -v $name \ | |
-s '\$new-field' -t elem -n value -v $value \ | |
$file" | |
} | |
# Check for correct arguments | |
if [ "$#" -lt 3 ]; then | |
echo "Usage: $0 NameNode ResourceManager [slave1 slave2 ...]" > /dev/stderr | |
exit 1 | |
fi | |
echo "Downloading tarballs" > /dev/stderr | |
wget -P ~ -c http://apache.mirrors.spacedump.net/hadoop/common/stable/hadoop-$HADOOP_VERSION.tar.gz | |
wget -P ~ -c http://d3kbcqa49mib13.cloudfront.net/spark-$SPARK_VERSION.tgz | |
# Get hostnames | |
namenode="$1" | |
resourcemanager="$2" | |
for host in "$@"; do | |
echo "Installing on $host..." > /dev/stderr | |
# Enable /etc/environment with sudo | |
ssh $host "echo 'session required pam_env.so readenv=1' | sudo tee -a /etc/pam.d/sudo > /dev/null" | |
# Properly configure environment | |
add_to_env $host "JAVA_HOME=\"/usr/lib/jvm/java-8-oracle/\"" | |
add_to_env $host "HADOOP_HOME=\"/opt/hadoop-$HADOOP_VERSION\"" | |
add_to_env $host "HADOOP_PREFIX=\"/opt/hadoop-$HADOOP_VERSION\"" | |
add_to_env $host "HADOOP_COMMON_HOME=\"/opt/hadoop-$HADOOP_VERSION\"" | |
add_to_env $host "HADOOP_CONF_DIR=\"/opt/hadoop-$HADOOP_VERSION/etc/hadoop\"" | |
add_to_env $host "HADOOP_HDFS_HOME=\"/opt/hadoop-$HADOOP_VERSION\"" | |
add_to_env $host "HADOOP_YARN_HOME=\"/opt/hadoop-$HADOOP_VERSION\"" | |
add_to_env $host "SPARK_HOME=\"/opt/spark-$SPARK_VERSION\"" | |
# This step is required so daemons listen on the correct interface | |
ssh $host sudo sed -i "/$host/d" /etc/hosts | |
ssh $host sudo tar zxf spark-$SPARK_VERSION.tgz -C /opt | |
ssh $host sudo tar zxf hadoop-$HADOOP_VERSION.tar.gz -C /opt | |
# Install xmlstarlet to make manipulating configs easier | |
ssh $host sudo apt-get update -qq && ssh $host sudo apt-get install -qq cgroup-bin xmlstarlet | |
# Create HDFS directory | |
ssh $host sudo rm -rf $DATA_DIR | |
ssh $host sudo mkdir -p $DATA_DIR/hdfs/datanode | |
ssh $host sudo mkdir -p $DATA_DIR/hdfs/namenode | |
ssh $host sudo mkdir -p $DATA_DIR/tmp | |
add_property $host \ | |
dfs.datanode.data.dir \ | |
file://$DATA_DIR/hdfs/datanode \ | |
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/hdfs-site.xml | |
add_property $host \ | |
dfs.namenode.name.dir \ | |
file://$DATA_DIR/hdfs/namenode \ | |
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/hdfs-site.xml | |
add_property $host \ | |
dfs.namenode.datanode.registration.ip-hostname-check \ | |
false \ | |
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/hdfs-site.xml | |
add_property $host \ | |
hadoop.tmp.dir \ | |
$DATA_DIR/tmp \ | |
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/core-site.xml | |
# Set the NameNode and ResourceManager | |
add_property $host \ | |
fs.defaultFS \ | |
hdfs://$namenode \ | |
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/core-site.xml | |
add_property $host \ | |
yarn.resourcemanager.hostname \ | |
$resourcemanager \ | |
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/yarn-site.xml | |
# Configure YARN resource limits | |
add_property $host \ | |
yarn.nodemanager.resource.memory-mb \ | |
49152 \ | |
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/yarn-site.xml | |
add_property $host \ | |
yarn.nodemanager.resource.cpu-vcores \ | |
10 \ | |
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/yarn-site.xml | |
add_property $host \ | |
yarn.nodemanager.vmem-check-enabled \ | |
false \ | |
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/yarn-site.xml | |
done | |
# Remove non-slaves from arguments | |
shift 2 | |
# Format HDFS and start the NameNode | |
echo "Starting NameNode on $namenode" > /dev/stderr | |
ssh $namenode sudo /opt/hadoop-$HADOOP_VERSION/bin/hdfs namenode -format | |
ssh $namenode sudo /opt/hadoop-$HADOOP_VERSION/sbin/hadoop-daemon.sh start namenode | |
# Start DataNodes | |
for slave in "$@"; do | |
echo "Starting DataNode on $slave" > /dev/stderr | |
ssh $slave sudo /opt/hadoop-$HADOOP_VERSION/sbin/hadoop-daemon.sh start datanode | |
# Optionally introduce a bandwidth limit on HDFS | |
if [ "$IO_BPS" -gt 0 ]; then | |
device=$(ssh $slave df $DATA_DIR | tail -1 | awk '{ print $1 }' | tr -d [:digit:]) | |
ssh $slave "sudo sh -c '(mount | grep blkio > /dev/null) || (mkdir -p /cgroup/blkio && mount -t cgroup -o blkio none /cgroup/blkio)'" | |
ssh $slave sudo cgcreate -g blkio:/iothrottle | |
ssh $slave sudo "cgset -r blkio.throttle.read_bps_device=\"\$(ls -ls $device | awk '{ print \$6\$7 }' | tr , :) $IO_BPS\" iothrottle" | |
ssh $slave sudo "cgset -r blkio.throttle.write_bps_device=\"\$(ls -ls $device | awk '{ print \$6\$7 }' | tr , :) $IO_BPS\" iothrottle" | |
ssh $slave "sudo sh -c 'jps | grep DataNode | cut -d \" \" -f1 > /cgroup/blkio/iothrottle/tasks'" | |
fi | |
done | |
# Create a directory for Spark event logs | |
ssh $namenode sudo /opt/hadoop-$HADOOP_VERSION/bin/hdfs dfs -mkdir /spark-logs | |
# Start ResourceManager | |
echo "Starting ResourceManager on $resourcemanager" > /dev/stderr | |
ssh $resourcemanager sudo /opt/hadoop-$HADOOP_VERSION/sbin/yarn-daemon.sh start resourcemanager | |
# Start NodeManagers | |
for slave in "$@"; do | |
echo "Starting NodeManager on $host" > /dev/stderr | |
ssh $slave sudo /opt/hadoop-$HADOOP_VERSION/sbin/yarn-daemon.sh start nodemanager | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment