-
-
Save mtavkhelidze/681fd576b699ba8fb16cc7b1525525dd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM alpine:3.10 | |
ARG SPARK_VERSION=3.0.0-preview | |
ARG HADOOP_VERSION_SHORT=3.2 | |
ARG HADOOP_VERSION=3.2.0 | |
ARG AWS_SDK_VERSION=1.11.375 | |
RUN apk add --no-cache bash openjdk8-jre python3 | |
# Download and extract Spark | |
RUN wget -qO- https://www-eu.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION_SHORT}.tgz | tar zx -C /opt && \ | |
mv /opt/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION_SHORT} /opt/spark | |
# Configure Spark to respect IAM role given to container | |
RUN echo spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper > /opt/spark/conf/spark-defaults.conf | |
# Add hadoop-aws and aws-sdk | |
RUN wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar -P /opt/spark/jars/ && \ | |
wget https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS_SDK_VERSION}/aws-java-sdk-bundle-${AWS_SDK_VERSION}.jar -P /opt/spark/jars/ | |
ENV PATH="/opt/spark/bin:${PATH}" | |
ENV PYSPARK_PYTHON=python3 | |
COPY app.py / | |
# Setting proper hostname before running spark, see https://stackoverflow.com/a/55652399/7098262 | |
ENTRYPOINT ["/bin/sh", "-c", "echo 127.0.0.1 $HOSTNAME >> /etc/hosts; spark-submit app.py"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment