|
#!/bin/bash |
|
|
|
MINICONDA_VERSION="4.3.21" |
|
PANDAS_VERSION="0.20.3" |
|
SCIKIT_VERSION="0.19.0" |
|
|
|
while [[ $# > 1 ]]; do |
|
key="$1" |
|
|
|
case $key in |
|
# Where on S3 are the notebooks located. |
|
# This path needs to exist. |
|
# Eg: --notebook-dir /mnt/my-notebooks/ |
|
--notebook-dir) |
|
NOTEBOOK_DIR="$2" |
|
shift |
|
;; |
|
--miniconda-version) |
|
MINICONDA_VERSION="$2" |
|
shift |
|
;; |
|
--pandas-version) |
|
PANDAS_VERSION="$2" |
|
shift |
|
;; |
|
--scikit-version) |
|
SCIKIT_VERSION="$2" |
|
shift |
|
;; |
|
*) |
|
echo "Unknown option: ${key}" |
|
exit 1; |
|
esac |
|
shift |
|
done |
|
|
|
# Install conda and friends |
|
wget https://repo.continuum.io/miniconda/Miniconda3-$MINICONDA_VERSION-Linux-x86_64.sh -O /mnt/miniconda.sh |
|
/bin/bash /mnt/miniconda.sh -b -p /mnt/conda |
|
rm /mnt/miniconda.sh |
|
|
|
echo -e '\nexport PATH=/mnt/conda/bin:$PATH' >> $HOME/.bashrc && source $HOME/.bashrc |
|
|
|
conda config --set always_yes yes --set changeps1 no |
|
conda config -f --add channels conda-forge |
|
conda config -f --add channels defaults |
|
|
|
conda install hdfs3 findspark ujson jsonschema toolz boto3 py4j numpy pandas==$PANDAS_VERSION conda=$MINICONDA_VERSION jupyterlab scikit-learn==$SCIKIT_VERSION |
|
conda install matplotlib plotly bokeh seaborn ipywidgets ipyvolume jupyter_contrib_nbextensions |
|
conda install libgcc opencv fastparquet h5py |
|
|
|
grep -Fq "\"isMaster\": true" /mnt/var/lib/info/instance.json |
|
if [ $? -eq 0 ]; |
|
then |
|
|
|
pip install --upgrade --ignore-installed mxnet |
|
pip install --upgrade --ignore-installed mxnet-cu91 |
|
pip install --upgrade --ignore-installed tensorflow==1.5.0 |
|
pip install --upgrade --ignore-installed tensorflow-gpu==1.5.0 |
|
pip install --upgrade --ignore-installed keras |
|
|
|
# Jupyter configuration. This is not using the defaults. |
|
mkdir -p ~/.jupyter |
|
touch ~/.jupyter/jupyter_notebook_config.py |
|
echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py |
|
echo "c.NotebookApp.open_browser = False" >> ~/.jupyter/jupyter_notebook_config.py |
|
echo "c.NotebookApp.ip = '*'" >> ~/.jupyter/jupyter_notebook_config.py |
|
echo "c.NotebookApp.notebook_dir = '$NOTEBOOK_DIR'" >> ~/.jupyter/jupyter_notebook_config.py |
|
# This will not work properly and there is no documentation for it |
|
# echo "c.ContentsManager.checkpoints_kwargs = {'root_dir': '.checkpoints'}" >> ~/.jupyter/jupyter_notebook_config.py |
|
|
|
# PySpark for Python 3 |
|
mkdir -p /mnt/conda/share/jupyter/kernels/python3-pyspark |
|
cat <<EOF > /mnt/conda/share/jupyter/kernels/python3-pyspark/kernel.json |
|
{ |
|
"argv": [ |
|
"/mnt/conda/bin/python3.6", |
|
"-m", |
|
"ipykernel_launcher", |
|
"-f", |
|
"{connection_file}" |
|
], |
|
"display_name": "Python 3 - PySpark", |
|
"language": "python", |
|
"env": { |
|
"SPARK_HOME": "/usr/lib/spark", |
|
"PYSPARK_PYTHON": "/mnt/conda/bin/python3.6", |
|
"PYTHONPATH": "/usr/lib/spark/python:/usr/lib/spark/python/lib/py4j-0.10.4-src.zip", |
|
"PYTHONSTARTUP": "/usr/lib/spark/python/pyspark/shell.py", |
|
"PYSPARK_SUBMIT_ARGS": "--master yarn-client pyspark-shell" |
|
} |
|
} |
|
EOF |
|
|
|
# Install the kernel for Python 2 |
|
/mnt/conda/bin/conda create -n ipykernel_py2 python=2 ipykernel |
|
source activate ipykernel_py2 |
|
python -m ipykernel install --user |
|
|
|
# Install the kernel for PySpark with Python 2 |
|
mkdir -p /mnt/conda/share/jupyter/kernels/python2-pyspark |
|
cat <<EOF > /mnt/conda/share/jupyter/kernels/python2-pyspark/kernel.json |
|
{ |
|
"argv": [ |
|
"/mnt/conda/envs/ipykernel_py2/bin/python", |
|
"-m", |
|
"ipykernel_launcher", |
|
"-f", |
|
"{connection_file}" |
|
], |
|
"display_name": "Python 2 - PySpark", |
|
"language": "python", |
|
"env": { |
|
"SPARK_HOME": "/usr/lib/spark", |
|
"PYSPARK_PYTHON": "/mnt/conda/envs/ipykernel_py2/bin/python", |
|
"PYTHONPATH": "/usr/lib/spark/python:/usr/lib/spark/python/lib/py4j-0.10.4-src.zip", |
|
"PYTHONSTARTUP": "/usr/lib/spark/python/pyspark/shell.py", |
|
"PYSPARK_SUBMIT_ARGS": "--master yarn-client pyspark-shell" |
|
} |
|
} |
|
EOF |
|
|
|
# Setup the Jupyter daemon |
|
sudo cat <<EOF > ~/jupyter.conf |
|
description "Jupyter" |
|
author "Cosmin Catalin Sanda" |
|
start on runlevel [2345] |
|
stop on runlevel [016] |
|
respawn |
|
respawn limit 0 10 |
|
console output |
|
chdir $NOTEBOOK_DIR |
|
exec start-stop-daemon -v --start -c hadoop --exec /mnt/conda/bin/jupyter notebook |
|
EOF |
|
|
|
jupyter nbextension enable execute_time/ExecuteTime |
|
jupyter nbextension enable freeze/main |
|
jupyter nbextension enable hide_input/main |
|
jupyter nbextension enable table_beautifier/main |
|
jupyter nbextension enable spellchecker/main |
|
jupyter nbextension enable python-markdown/main |
|
jupyter nbextension enable scratchpad/main |
|
jupyter nbextension enable varInspector/main |
|
|
|
sudo mv ~/jupyter.conf /etc/init/ |
|
sudo chown root:root /etc/init/jupyter.conf |
|
|
|
sudo initctl reload-configuration |
|
|
|
# Start Jupyter daemon |
|
sudo initctl start jupyter |
|
|
|
fi |