Last active
July 10, 2018 18:35
-
-
Save brevans/168ab4b0c48d3d58aa979660fdf7500d to your computer and use it in GitHub Desktop.
slurm submit script for running cryoSPARC on Yale HPC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
### Edit these to your needs | |
ONHOLD=00:15 # Time to wait for cryoSPARC jobs to start (format HH:MM) | |
[email protected] # Send notification email when node is ready | |
SLURM_PARTITION=gpu # Which SLURM partition to use | |
NUM_GPUS=4 # How many GPUs | |
JOB_MEM=121G # How much memory for the job? | |
WALLTIME=1-00:00:00 # Walltime request | |
#### Don't edit below here. Unless you know what you are doing | |
sbatch <<EOF | |
#!/bin/bash | |
#SBATCH -p ${SLURM_PARTITION} | |
#SBATCH -N 1 | |
#SBATCH --gres=gpu:1080ti:${NUM_GPUS} | |
#SBATCH --gres-flags=enforce-binding | |
#SBATCH --mem=${JOB_MEM} | |
#SBATCH --cpus-per-task=$((NUM_GPUS * 2)) | |
#SBATCH -t ${WALLTIME} | |
#SBATCH -J cryoSPARC | |
# Find path of cryoSPARC (we expect it to be on your path) | |
CRYOSPARC_PATH=\$(dirname \$(dirname \$(which cryosparc))) | |
cryosparc start | |
# Convert on hold time to seconds | |
ONHOLD_SEC=\$(( \$(echo $ONHOLD| cut -d ':' -f1) * 3600 + \$(echo $ONHOLD| cut -d ':' -f2) * 60 )) | |
# Set on hold conut to zero | |
ONHOLD_COUNT=0 | |
# Find on hold end time | |
ONHOLD_TIME=\$(date +\%H:\%M --date="\$ONHOLD_SEC seconds") | |
# Send notification email | |
echo "After establishing a tunnel (e.g. ssh -N -L 38000:\$(hostname):38000 [email protected]), Please go to http://localhost:38000 on your computer and start your cryoSPARC job before \$ONHOLD_TIME." | mail -s "\$(hostname) is ready for your cryoSPARC job" $EMAIL | |
# Let the server start and print some info | |
sleep 15 | |
cryosparc status | |
cryosparc configure gpu list | |
echo "\$(date +\%H:\%M:\%S) ON HOLD for $ONHOLD (HH:MM)" | |
# Start counting on hold time | |
while [ \$ONHOLD_COUNT -lt \$ONHOLD_SEC ]; do | |
sleep 1 | |
ONHOLD_COUNT=\$(( \$ONHOLD_COUNT + 1 )) | |
# end loop if cryoSPARC is started | |
if pgrep -f "python \${CRYOSPARC_PATH}/cryosparc-compute/sparcjob.py" 1>/dev/null; then | |
ONHOLD_COUNT=\$ONHOLD_SEC | |
echo "\$(date +\%H:\%M:\%S) JOB STARTED" | |
fi | |
done | |
# Keep slurm allocation until all cryoSPARC jobs are done, checking every 5 minutes | |
while pgrep -f "python \${CRYOSPARC_PATH}/cryosparc-compute/sparcjob.py" 1>/dev/null; do | |
sleep 5m | |
echo "\$(date +\%H:\%M:\%S) JOB STILL RUNNING" | |
done | |
# Stop the server | |
cryosparc stop | |
echo "\$(date +\%H:\%M:\%S) Job is done, cleaning up:" | |
find /tmp -user $USER 2>&1 | grep -v "Permission denied" | |
find /tmp -user $USER -delete 2>&1 | grep -v "Permission denied" | |
EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment