- Build Mesos https://digitalocean.mesosphere.com/clusters/new/custom
- Official document for Spark on Mesos http://spark.apache.org/docs/latest/running-on-mesos.html
- A step by step but old manual https://mesosphere.com/docs/tutorials/run-spark-on-mesos/
- Create an Ad Hoc Spark Cluster in 15 Minutes http://www.infolace.com/blog/2015/02/27/create-an-ad-hoc-spark-cluster/
Last active
September 1, 2020 16:28
-
-
Save dapangmao/e3ab5cac2a70b95bc95d to your computer and use it in GitHub Desktop.
How to set up a spark cluster on digitalocean
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo openvpn --config *.opvn | |
apt-get update | |
apt-get install vim | |
wget http://d3kbcqa49mib13.cloudfront.net/spark-1.3.0-bin-hadoop2.4.tgz | tar zxf | |
hadoop fs -mkdir /spark | |
hadoop fs -put spark-1.3.0-bin-hadoop2.4.tgz /spark | |
hadoop fs -du -h /spark | |
cp spark-env.sh.template spark-env.sh | |
export MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so | |
export SPARK_EXECUTOR_URI=hdfs://10.132.70.244:50070/spark/spark-1.3.0-bin-hadoop2.4.tgz | |
export MASTER=mesos://10.132.70.244:5050/ | |
./pyspark --master mesos://10.132.70.244:5050 | |
a = sc.parallelize(xrange(1, 100000, 2)) | |
b = a.collect() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -x | |
# IPYTHON=yes | |
MASTER_IP=$1 | |
SPARK_MASTER=mesos://zk://${MASTER_IP}:2181/mesos | |
SPARK_DIR=spark-1.3.0-bin-hadoop2.4 | |
SPARK_FILE=spark-1.3.0-bin-hadoop2.4.tgz | |
# Install packages | |
# apt-get -y install tmux | |
# apt-get -y install mosh | |
# Get Spark and put it in HDFS | |
wget http://d3kbcqa49mib13.cloudfront.net/spark-1.3.0-bin-hadoop2.4.tgz | tar zxf | |
hdfs dfs -mkdir /tmp | |
hdfs dfs -put ${SPARK_FILE} /tmp | |
rm ${SPARK_FILE} | |
# Configure Spark | |
cd ${SPARK_DIR} | |
cd conf | |
cp spark-env.sh.template spark-env.sh | |
cat >> spark-env.sh <<EOF | |
export MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so | |
export SPARK_EXECUTOR_URI=hdfs://${MASTER_IP}/tmp/${SPARK_FILE} | |
export MASTER=${SPARK_MASTER} | |
export SPARK_LOCAL_IP=${MASTER_IP} | |
export SPARK_PUBLIC_DNS=${MASTER_IP} | |
EOF | |
cp spark-defaults.conf.template spark-defaults.conf | |
cat >> spark-defaults.conf <<EOF | |
spark.executor.uri hdfs://${MASTER_IP}/tmp/spark-1.2.0-bin-hadoop2.4.tgz | |
EOF | |
sed 's/log4j.rootCategory=INFO/log4j.rootCategory=WARN/' < log4j.properties.template > log4j.properties | |
cd .. | |
# Update environment | |
cat >> ~/.profile <<EOF | |
export PATH=${SPARK_DIR}/bin:${PATH} | |
export SPARK_MASTER=${SPARK_MASTER} | |
EOF | |
# Run an IPython notebook server | |
if [ ! -z "${IPYTHON+x}" ] | |
then | |
apt-get -y install python-dev | |
pip install pyzmq | |
pip install "ipython[notebook]" | |
mkdir ~/notebook | |
IPYTHON_OPTS="notebook --ip=0.0.0.0 --no-browser --notebook-dir=${HOME}/notebook --no-stdout --no-stderr " bin/pyspark --master ${SPARK_MASTER} & | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment