Skip to content

Instantly share code, notes, and snippets.

@Wildcarde
Last active February 13, 2017 16:24
Show Gist options
  • Save Wildcarde/d0ace6f234d9e7f14f67 to your computer and use it in GitHub Desktop.
Save Wildcarde/d0ace6f234d9e7f14f67 to your computer and use it in GitHub Desktop.
Python PyStone Slurm Demo
#!/bin/bash
#This file will create the environment we will be using on spock. A similar environment can be constructed on rondo.
#First we need to load the module we will be using for this demo, running this command
#will add a new version of python to your command line. Unfortunately the 3.4 version doesn't have the tools
#we need, so we will actually be using it to make a custom environment for this demonstration.
module load anacondapy/3.4
#Running this will display a list of available python environments you've made with conda
#we include two stock environments via the module load command and you can make any number
#with the conda create command from there.
conda info -e
#We create a new environment with the below command, this environment is likely overkill but it'll work
#for what we need
conda create --name pybench33 python=3.3 anaconda pip
#enable this new python environment so that it can be used for running commands
. activate pybench33
function [] = matlabdemoscript(con_input)
%injest the input 'con_input' and display it as the job id
disp(['Job ID: ' con_input])
%display the current working directory for matlab
disp(['Current folder: ' pwd])
#!/usr/bin/env bash
#name job matlabdemo, output to slurm file, use partition all, run for 60 minutes and use 2GB of ram
#SBATCH -J 'matlabdemo'
#SBATCH -o output.out
#SBATCH -p all
#SBATCH -t 60
#SBATCH --mem 2000
#run matlab from the command line as part of a submit job
module load matlab/R2013a
# this call assumes a file 'matlabdemoscript.m' is located in the same directory as this job and contains a function named the same.
matlab -nosplash -nojvm -nodisplay -nodesktop -r "try;matlabdemoscript('$SLURM_JOB_ID');catch;exit;end;exit;"
# a more verbose version would be:
matlab -nosplash -nojvm -nodisplay -nodesktop -r "try; matlabdemoscript('$SLURM_JOB_ID'); catch me; fprintf('%s / %s\n',me.identifier,me.message); end; exit"
#this version will capture the output of an exception and print it to the command line (and by proxy the slurm output file)
#Alternatively you can run the script with console redirection like this but will have much less flexibility or debugging capacity:
#matlab -nojvm -nodisplay -nodesktop < matlabdemoscript.m
user@spock-login ~/test
% sbatch nodeinfo.sh !259
Submitted batch job 33425
user@spock-login ~/test
% ls !260
nodeinfo.sh slurm-33425.out staged_submit.sh test.sh
user@spock-login ~/test
% cat slurm-33425.out !263
In the directory: /usr/people/gmcgrath/test <- working directory
As the user: user <- user to submit the job
on host: spock-c1-16 <- node the job ran on
With access to cpu id(s):
Cpus_allowed_list: 8 <- id number of CPU core the job was allowed to run on
#!/usr/bin/env bash
#this script dumps some basic information to an output file when submitted via sbatch
#name the job nodeinfo and place it's output in a file named slurm-<jobid>.out
#set partition to 'all' this isn't strictly necessary but it's good practice
#set time to 5 minutes so jobs get killed if something weird happens
#SBATCH -J 'nodeinfo'
#SBATCH -o slurm-%j.out
#SBATCH -p all
#SBATCH -t 5
echo "In the directory: `pwd` "
echo "As the user: `whoami` "
echo "on host: `hostname` "
echo "With access to cpu id(s): "
cat /proc/$$/status | grep Cpus_allowed_list
echo "Array Allocation Number: $SLURM_ARRAY_JOB_ID"
echo "Array Index: $SLURM_ARRAY_TASK_ID"
#!/bin/env python
""" Simple multiprocessing test.pystones benchmark "
" Anders Wallin 2008Jun15 anders.e.e.wallin (at) gmail.com
Adapted by Garrett McGrath gmcgrath815 (at) gmail.com to work with
python 3 and anaconda python"""
from test import pystone
import multiprocessing as mp
import time
#pystone.LOOPS defaults to 50,000
STONES_PER_PROCESS= 10*pystone.LOOPS
#the multiprocess function we are going to run.
def f(q):
t=pystone.pystones(STONES_PER_PROCESS)
q.put(t,block=True)
if __name__ == '__main__':
#quick info dump about the system you are running on
print ('multiprocessing test.pystones() benchmark')
print ('You have '+str(mp.cpu_count()) + ' CPU(s)')
print ('Processes,Pystones,Wall time,pystones/s')
#create a queue to store results in
results = mp.Queue()
#run test on 1 to n processes consecutively
for N in range(1,mp.cpu_count()+3):
p=[]
q=mp.Queue()
results=[]
#launch the multi
for m in range(1,N+1):
p.append( mp.Process(target=f,args=(q,)) )
#calculate wall time
start=time.time()
for pr in p:
pr.start()
for r in p:
results.append( q.get() )
stop=time.time()
cputime = stop-start
#Print results
print (str(N)+','+str(N*STONES_PER_PROCESS) \
+','+ str(cputime)+','+str( N*STONES_PER_PROCESS / cputime ))
#!/usr/bin/env bash
#name the job pybench33 and place it's output in a file named slurm-<jobid>.out
# allow 40 minutes to run (it should not take 40 minutes however)
# set partition to 'all' so it runs on any available node on the cluster
#SBATCH -J 'pybench33'
#SBATCH -o slurm-%j.out
#SBATCH -p all
#SBATCH -t 40
module load anacondapy/3.4
. activate pybench33
./pybench33.py
#!/usr/bin/env bash
#this isn't meant to be run it just holds some commands you can use at the command line.
#this will run nodeinfo with the default resources provided by the 'all' partition
sbatch nodeinfo.sh
#this will request multiple cpus for a single task
sbatch -c 2 nodeinfo.sh
#and this will run an array of tasks simultaneously
sbatch --array=[0-2] nodeinfo.sh
#more complicated example
sbatch runbench.sh
#and
sbatch -c 4 runbench.sh
#these commands will produce output in slurm-x.out files displaying the relative performance of asking for 1 or 4 cpu cores
#!/usr/bin/env bash
# just a quick script to demonstrate two jobs with the second dependant on the first
SEED=$[ ($RANDOM % 1000) +1]
echo "random seed: $SEED"
#sbatch -J "stest+$SEED" ~/stest.sh
jid=`sbatch -J "stest+$SEED" ~/stest.sh | awk '{print $NF}'`
echo $jid
sbatch --dependency=afterok:$jid -J "stest2+$SEED" ~/stest2.sh
#! /bin/env bash
#quick script to submit jobs for unpacking tar files to spock
#All this does is get a list of .tar.gz files in a folder and submit
#an sbatch job for each one to unpack it.
#assumption:
# there are .tar.gz files in the folder this command will be submitted from that you wish to unpack
for file in *.tar.gz
do
sbatch ./unpack.sh $file
done
# if you wanted to start a new job for each directory you could do so pretty directly by changing the loop
#to something like:
#for obj in *
#do
# if [ -d $obj] #this if statement checks if a given object in the folder is a directory or not
# echo "Issue sbatch for work on directory"
# fi
#done
#!/bin/env bash
#this will simply run a tar command on any file it's given
#this is called from 'tarloop.sh' via the sbatch command
#All it does it writeout the file it's been asked to unpack and runs
#tar xvf against the file. There is no error checking / sanity / good pratice
#this is just a basic demo script
#SBATCH -J 'unpack'
#SBATCH -o unpack-%j.out
#SBATCH -p all
#SBATCH -t 40
echo "Unpacking: $1"
tar xvf $1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment