Created
January 22, 2022 13:16
-
-
Save hizkifw/0316afcfefb1597ace72b164cb9ad828 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM continuumio/anaconda3:2021.11 | |
# Fetch Java, Scala, and Spark | |
RUN set -ex; \ | |
apt-get update; \ | |
apt-get install -y default-jre default-jdk scala gnupg; \ | |
wget -qO /tmp/spark.tgz https://dlcdn.apache.org/spark/spark-3.2.0/spark-3.2.0-bin-hadoop2.7.tgz; \ | |
wget -qO /tmp/spark.tgz.asc https://downloads.apache.org/spark/spark-3.2.0/spark-3.2.0-bin-hadoop2.7.tgz.asc; \ | |
wget -qO- https://downloads.apache.org/spark/KEYS | gpg --import -; \ | |
gpg --verify /tmp/spark.tgz.asc /tmp/spark.tgz; \ | |
apt-get purge -y gnupg; \ | |
rm -rf /var/lib/apt/lists/*; \ | |
tar -xzf /tmp/spark.tgz -C /usr/local/; \ | |
rm -rf /tmp/spark.tgz /tmp/spark.tgz.asc; \ | |
mv /usr/local/spark-3.2.0-bin-hadoop2.7 /usr/local/spark; | |
# Install essential packages | |
RUN set -ex; \ | |
pip install \ | |
findspark pyspark numpy pandas py4j \ | |
psutil cairocffi scikit-learn tensorflow; \ | |
conda install jupyter -y --quiet; | |
# Create user | |
RUN useradd -m -s /bin/bash -U user | |
USER user | |
WORKDIR /home/user | |
ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 \ | |
SPARK_HOME=/usr/local/spark \ | |
HADOOP_HOME=/usr/local/spark \ | |
PYSPARK_PYTHON=/opt/conda/bin/python3 \ | |
PYSPARK_DRIVER_PYTHON=/opt/conda/bin/ipython3 | |
CMD ["jupyter", "notebook", "--ip", "*"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment