Created
August 20, 2018 14:56
-
-
Save vak/766b3a7ad236fb0c819898718bfe522d to your computer and use it in GitHub Desktop.
Custom bootstrap script to install Zeppelin 0.8 on AWS EMR (tested on EMR 5.16.0)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -ex | |
# ATTENTION: | |
# | |
# 1. ensure you have about 1Gb on the storage of /usr/lib/ for the Zeppelin huge bundle chosen by default below, | |
# or choose a smaller bundle from Zeppelin web-site | |
# | |
# 2. adjust values of ZEPPELIN_NOTEBOOK_S3_BUCKET | |
# and ZEPPELIN_NOTEBOOK_S3_USER if you need S3-persistance of your Zeppelin Notebooks to your S3 bucket | |
# otherwize just remove all three last exports lines starting from 'export ZEPPELIN_NOTEBOOK_S' | |
cd /tmp | |
wget 'http://apache.mirror.digionline.de/zeppelin/zeppelin-0.8.0/zeppelin-0.8.0-bin-all.tgz' | |
tar xf /tmp/zeppelin-0.8.0-bin-all.tgz | |
cat <<EOF >>/tmp/zeppelin-0.8.0-bin-all/conf/zeppelin-env.sh | |
export ZEPPELIN_PORT=8890 | |
export ZEPPELIN_CONF_DIR=/etc/zeppelin/conf | |
export ZEPPELIN_LOG_DIR=/var/log/zeppelin | |
export ZEPPELIN_PID_DIR=/var/run/zeppelin | |
export ZEPPELIN_PID=$ZEPPELIN_PID_DIR/zeppelin.pid | |
export ZEPPELIN_WAR_TEMPDIR=/var/run/zeppelin/webapps | |
export ZEPPELIN_NOTEBOOK_DIR=/var/lib/zeppelin/notebook | |
export MASTER=yarn-client | |
export SPARK_HOME=/usr/lib/spark | |
export HADOOP_CONF_DIR=/etc/hadoop/conf | |
export CLASSPATH=":/usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar" | |
export SPARK_SUBMIT_OPTIONS="$SPARK_SUBMIT_OPTIONS --conf 'spark.executorEnv.PYTHONPATH=/usr/lib/spark/python/lib/py4j-src.zip:/usr/lib/spark/python/:<CPS>{{PWD}}/pyspark.zip<CPS>{{PWD}}/py4j-src.zip' --conf spark.yarn.isPython=true" | |
export ZEPPELIN_NOTEBOOK_S3_BUCKET=my_bucket_here | |
export ZEPPELIN_NOTEBOOK_S3_USER=my_zeppelin_notebook_user_here | |
export ZEPPELIN_NOTEBOOK_STORAGE=org.apache.zeppelin.notebook.repo.S3NotebookRepo | |
EOF | |
cd | |
sudo mv /tmp/zeppelin-0.8.0-bin-all /usr/lib | |
sudo mkdir /etc/zeppelin/ | |
sudo ln -s /usr/lib/zeppelin-0.8.0-bin-all/conf /etc/zeppelin/ | |
sudo mkdir /var/{run,log}/zeppelin/ | |
sudo chown hadoop:hadoop /var/{run,log}/zeppelin/ | |
/usr/lib/zeppelin-0.8.0-bin-all/bin/zeppelin-daemon.sh start |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment