Forked from andershammar/install-apache-zeppelin-on-amazon-emr.sh
Last active
March 9, 2016 10:12
-
-
Save eyelove/394298e8d9d374051554 to your computer and use it in GitHub Desktop.
Bootstrap script for installing Apache Zeppelin on an Amazon EMR Cluster. Verfied on Amazon EMR release 4.x.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -ex | |
if [ "$(cat /mnt/var/lib/info/instance.json | jq -r .isMaster)" == "true" ]; then | |
# Install Git | |
sudo yum -y install git | |
# Install Maven | |
wget -P /tmp http://apache.mirrors.spacedump.net/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz | |
sudo mkdir /opt/apache-maven | |
sudo tar -xvzf /tmp/apache-maven-3.3.3-bin.tar.gz -C /opt/apache-maven | |
cat <<EOF >> /home/hadoop/.bashrc | |
# Maven | |
export MAVEN_HOME=/opt/apache-maven/apache-maven-3.3.3 | |
export PATH=\$MAVEN_HOME/bin:\$PATH | |
EOF | |
source /home/hadoop/.bashrc | |
# Install Zeppelin | |
git clone https://github.com/apache/incubator-zeppelin.git /home/hadoop/zeppelin | |
cd /home/hadoop/zeppelin | |
mvn clean package -Pspark-1.4 -Dhadoop.version=2.6.0 -Phadoop-2.6 -Pyarn -DskipTests | |
# Configure Zeppelin | |
SPARK_DEFAULTS=/usr/lib/spark/conf/spark-defaults.conf | |
declare -a ZEPPELIN_JAVA_OPTS | |
if [ -f $SPARK_DEFAULTS ]; then | |
ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ | |
$(grep spark.executor.instances $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) | |
ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ | |
$(grep spark.executor.cores $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) | |
ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ | |
$(grep spark.executor.memory $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) | |
ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ | |
$(grep spark.default.parallelism $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) | |
ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ | |
$(grep spark.yarn.executor.memoryOverhead $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) | |
fi | |
echo "${ZEPPELIN_JAVA_OPTS[@]}" | |
cp conf/zeppelin-env.sh.template conf/zeppelin-env.sh | |
cat <<EOF >> conf/zeppelin-env.sh | |
export MASTER=yarn-client | |
export HADOOP_HOME=/usr/lib/hadoop | |
export HADOOP_CONF_DIR=/etc/hadoop/conf | |
export ZEPPELIN_SPARK_USEHIVECONTEXT=false | |
export ZEPPELIN_JAVA_OPTS="${ZEPPELIN_JAVA_OPTS[@]}" | |
export PYTHONPATH=$PYTHONPATH:/usr/lib/spark/python | |
#export ZEPPELIN_NOTEBOOK_S3_BUCKET=<myZeppelinBucket> | |
#export ZEPPELIN_NOTEBOOK_USER=<myZeppelinUser> | |
EOF | |
cat <<'EOF' > 0001-Add-Hadoop-libraries-and-EMRFS-to-Zeppelin-classpath.patch | |
From 2b0226e45207758d526522bd22d497c9def7c008 Mon Sep 17 00:00:00 2001 | |
From: Anders Hammar <[email protected]> | |
Date: Fri, 18 Sep 2015 10:24:18 +0000 | |
Subject: [PATCH] Add Hadoop libraries and EMRFS to Zeppelin classpath | |
--- | |
bin/interpreter.sh | 13 +++++++++++++ | |
1 file changed, 13 insertions(+) | |
diff --git a/bin/interpreter.sh b/bin/interpreter.sh | |
index e03a13b..de458f2 100755 | |
--- a/bin/interpreter.sh | |
+++ b/bin/interpreter.sh | |
@@ -89,8 +89,21 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then | |
# CDH | |
addJarInDir "${HADOOP_HOME}" | |
addJarInDir "${HADOOP_HOME}/lib" | |
+ | |
+ # Hadoop libraries | |
+ addJarInDir "${HADOOP_HOME}/../hadoop-hdfs" | |
+ addJarInDir "${HADOOP_HOME}/../hadoop-mapreduce" | |
+ addJarInDir "${HADOOP_HOME}/../hadoop-yarn" | |
+ | |
+ # Hadoop LZO | |
+ addJarInDir "${HADOOP_HOME}/../hadoop-lzo/lib" | |
fi | |
+ # Add EMRFS libraries | |
+ addJarInDir "/usr/share/aws/emr/emrfs/conf" | |
+ addJarInDir "/usr/share/aws/emr/emrfs/lib" | |
+ addJarInDir "/usr/share/aws/emr/emrfs/auxlib" | |
+ | |
addJarInDir "${INTERPRETER_DIR}/dep" | |
PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-0.8.2.1-src.zip" | |
-- | |
2.1.0 | |
EOF | |
git config user.email "[email protected]" | |
git config user.name "Kim DeokJung" | |
git am 0001-Add-Hadoop-libraries-and-EMRFS-to-Zeppelin-classpath.patch | |
# Start the Zeppelin daemon | |
bin/zeppelin-daemon.sh start | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://blogs.aws.amazon.com/bigdata/post/Tx2HJD3Z74J2U8U/Running-an-External-Zeppelin-Instance-using-S3-Backed-Notebooks-with-Spark-on-Am