Last active
November 6, 2020 19:59
-
-
Save Voronenko/36b8ab5d2165612092a4ae54c069b60a to your computer and use it in GitHub Desktop.
Pairing session notes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docker build -t spark3:local . |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: "3.7" | |
services: | |
spark-master: | |
# image: elopezdelara/miniconda3-spark:latest | |
image: spark3:local | |
expose: | |
- "8080" | |
ports: | |
- "9080:8080" | |
command: | |
- start-master.sh | |
environment: | |
- SPARK_NO_DAEMONIZE=1 | |
- SPARK_PUBLIC_DNS=localhost | |
networks: | |
- spark | |
volumes: | |
- ./events:/tmp/spark-events | |
spark-worker: | |
depends_on: | |
- spark-master | |
# image: elopezdelara/miniconda3-spark:latest | |
image: spark3:local | |
expose: | |
- "8081" | |
ports: | |
- "8081:8081" | |
command: | |
- start-slave.sh | |
- spark://spark-master:7077 | |
environment: | |
- SPARK_NO_DAEMONIZE=1 | |
- SPARK_PUBLIC_DNS=localhost | |
networks: | |
- spark | |
volumes: | |
- ./events:/tmp/spark-events | |
jupyter-notebook: | |
depends_on: | |
- spark-master | |
container_name: jupyter-notebook | |
image: spark3:local | |
expose: | |
- "4040" | |
- "8888" | |
ports: | |
- "4040:4040" | |
- "8888:8888" | |
networks: | |
- spark | |
volumes: | |
- ./notebooks:/opt/notebooks | |
- ./events:/tmp/spark-events | |
command: | |
- jupyter | |
- notebook | |
- --ip='*' | |
- --port=8888 | |
- --no-browser | |
- --notebook-dir=/opt/notebooks | |
- --allow-root | |
- --NotebookApp.token='' | |
networks: | |
spark: | |
name: spark |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM continuumio/miniconda3 | |
ENV BASE_DIR /opt | |
ENV NOTEBOOKS_HOME ${BASE_DIR}/notebooks | |
ENV SPARK_HOME ${BASE_DIR}/spark | |
ENV PYSPARK_DRIVER_PYTHON ipython | |
ENV PATH ${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${PATH} | |
# Python Packages | |
RUN conda update -n base conda && \ | |
conda install numpy pandas matplotlib scikit-learn jupyter ipython pyspark=3.0.0 -y --quiet && \ | |
conda clean -tipsy && \ | |
mkdir -p ${NOTEBOOKS_HOME} | |
RUN mkdir -p /usr/share/man/man1 | |
RUN mkdir /tmp/spark-events | |
# OpenJDK | |
RUN apt-get update --fix-missing && \ | |
apt-get install -y openjdk-11-jdk-headless procps curl wget && \ | |
apt-get clean && \ | |
rm -rf /var/lib/apt/lists/* | |
# Spark | |
RUN curl -s https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz | tar xz -C /tmp && \ | |
mv /tmp/spark-3.0.0-bin-hadoop2.7 ${SPARK_HOME} | |
# Google Cloud Storage Connector | |
RUN wget -q https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar -P ${SPARK_HOME}/jars | |
RUN wget https://downloads.mysql.com/archives/get/p/3/file/mysql-connector-java-5.1.45.tar.gz | |
RUN tar xf mysql-connector-java-5.1.45.tar.gz | |
RUN mv mysql-connector-java-5.1.45/mysql-connector-java-5.1.45-bin.jar ${SPARK_HOME}/jars | |
RUN rm -rf mysql-connector-java-5.1.45 | |
WORKDIR ${BASE_DIR}/work |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment