Skip to content

Instantly share code, notes, and snippets.

@j3speaks
Created May 17, 2019 20:28
Show Gist options
  • Save j3speaks/a276be63054a486b6f28ae6b6bc56fa3 to your computer and use it in GitHub Desktop.
Save j3speaks/a276be63054a486b6f28ae6b6bc56fa3 to your computer and use it in GitHub Desktop.
Dockerize Spark
version: "3.7"
services:
spark-master:
image: j3/spark:latest
container_name: spark-master
hostname: spark-master
build:
context: .
dockerfile: Dockerfile
ports:
- "8080:8080"
- "7077:7077"
networks:
- spark-network
environment:
- "SPARK_MASTER_HOST=spark-master"
- "SPARK_MASTER_PORT=7077"
- "SPARK_MASTER_WEBUI_PORT=8080"
command: "/start-master.sh"
spark-worker:
image: j3/spark:latest
depends_on:
- spark-master
ports:
- 8080
networks:
- spark-network
environment:
- "SPARK_MASTER_URL=spark://spark-master:7077"
- "SPARK_WORKER_WEBUI_PORT=8080"
command: "/start-worker.sh"
networks:
spark-network:
driver: bridge
ipam:
driver: default
FROM openjdk:8-alpine
RUN apk --update add wget tar bash python
RUN wget http://apache.mirror.anlx.net/spark/spark-2.4.2/spark-2.4.2-bin-hadoop2.7.tgz
RUN tar -xzf spark-2.4.2-bin-hadoop2.7.tgz && mv spark-2.4.2-bin-hadoop2.7 spark && rm spark-2.4.2-bin-hadoop2.7.tgz
RUN printf "#!/bin/sh\n/spark/bin/spark-class org.apache.spark.deploy.master.Master --host \$SPARK_MASTER_HOST --port \$SPARK_MASTER_PORT --webui-port \$SPARK_MASTER_WEBUI_PORT" > /start-master.sh
RUN chmod +x /start-master.sh
RUN printf "#!/bin/sh\n/spark/bin/spark-class org.apache.spark.deploy.worker.Worker --webui-port \$SPARK_WORKER_WEBUI_PORT \$SPARK_MASTER_URL" > /start-worker.sh
RUN chmod +x /start-worker.sh
COPY cloudera-10k.txt /cloudera-10k.txt
COPY employee.txt /spark/examples/src/main/scala/org/apache/spark/examples/sql/employee.txt
COPY employee.json /spark/examples/src/main/scala/org/apache/spark/examples/sql/employee.json
COPY spark-example.scala /spark-example.scala
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment