Created
July 15, 2015 19:11
-
-
Save cmd-ntrf/2bc55aaf3ce0e9f3408b to your computer and use it in GitHub Desktop.
Example to submit an PySpark interactive job on a Torque cluster.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -f | |
#PBS -N apache-spark | |
#PBS -l nodes=2:ppn=8 | |
#PBS -l walltime=00:01:00:00 | |
#PBS -A <specify account> | |
# Define variables | |
SPARK_MASTER="spark://$HOSTNAME:7077" | |
SPARK_HOME=<specify where is spark installed> | |
VENV=<specify path to virtualenv for Python> | |
# Launch master daemon on head node | |
$SPARK_HOME/sbin/start-master.sh | |
# Launch driver | |
source $VENV/bin/activate | |
source $SPARK_HOME/sbin/spark-config.sh | |
ipython notebook --no-browser --ip='*' & | |
# Define slave initialization | |
cat << EOF > $PBS_O_WORKDIR/slave_init.sh | |
#!/bin/bash -l | |
#module load apps/python/2.7.5 | |
source $VENV/bin/activate | |
$SPARK_HOME/sbin/start-slave.sh $SPARK_MASTER | |
EOF | |
chmod u+x $PBS_O_WORKDIR/slave_init.sh | |
# Launch one slave daemon per allocated node | |
pbsdsh -ou $PBS_O_WORKDIR/slave_init.sh |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment