Created
December 9, 2019 17:08
-
-
Save Mister-Meeseeks/1ebf875b6e1262449cbc45c5342f592a to your computer and use it in GitHub Desktop.
Build Spark from source and install in system paths
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This Dockerfile will download Spark from the latest Github source, build | |
# at /usr/local/src/spark/ and install Spark commands and libraries to | |
# system paths under the /usr/local/src/ directory. | |
FROM debian:stable-slim | |
RUN mkdir -p /usr/share/man/man1 && \ | |
apt-get update && \ | |
apt-get install -y \ | |
man \ | |
curl \ | |
wget \ | |
unzip \ | |
maven \ | |
git \ | |
openjdk-11-jdk-headless \ | |
scala | |
RUN git clone https://github.com/apache/spark.git /usr/local/src/spark | |
# -Phive-2.3 flag is needed for building with -Phadoop3.2 due to [SPARK-30029] | |
RUN cd /usr/local/src/spark && \ | |
./build/mvn -Phive-2.3 -Phadoop-3.2 -Phadoop-cloud -DskipTests clean package | |
RUN cd /usr/local/src/ && \ | |
wget https://sdk-for-java.amazonwebservices.com/latest/aws-java-sdk.zip && \ | |
unzip aws-java-sdk.zip '*.jar' && \ | |
rm aws-java-sdk.zip && \ | |
find aws-java-*/ -name '*.jar' \ | |
| grep -v third-party \ | |
| sed 's+.*+cp & spark/assembly/target/scala-2.12/jars/+' | bash | |
RUN cd /usr/local/src/spark && \ | |
curl https://raw.githubusercontent.com/Mister-Meeseeks/spark-installer/master/install-spark.sh > install.sh && \ | |
chmod u+x install.sh && \ | |
./install.sh | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment