Setup Environmnet variables for Hadoop.
export HADOOP_VERSION=2.8.5
export HADOOP_HOME=${HOME}/hadoop-$HADOOP_VERSION
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=${HADOOP_HOME}/bin:$PATH
Download Hadoop files.
curl -sL --retry 3 \
"http://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz" \
| gunzip \
| tar -x -C /tmp/ \
&& mv /tmp/hadoop-$HADOOP_VERSION $HADOOP_HOME \
&& rm -rf $HADOOP_HOME/share/doc
Setup Environmnet variables for Spark.
export SPARK_VERSION=2.4.0
export SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-without-hadoop
export SPARK_HOME=$HOME/spark-${SPARK_VERSION}
export SPARK_DIST_CLASSPATH=$(hadoop classpath)
export PATH=${SPARK_HOME}/bin:$PATH
Download Spark files without Hadoop bin.
curl -sL --retry 3 \
"https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_PACKAGE}.tgz" \
| gunzip \
| tar x -C /tmp/ \
&& mv /tmp/$SPARK_PACKAGE $SPARK_HOME