Skip to content

Instantly share code, notes, and snippets.

@bmcfee
Created November 13, 2014 23:27
Show Gist options
  • Save bmcfee/f74bccff36ae2b233bb5 to your computer and use it in GitHub Desktop.
Save bmcfee/f74bccff36ae2b233bb5 to your computer and use it in GitHub Desktop.
FWGrid job dispatcher
#!/usr/bin/env python
import scipy.io
import pprint
import sys, os, subprocess
import getopt
import time
import stat
NumNodes = 20
Parameters = None
Data = None
Jobname = None
Basepath = os.getcwd()
Outpath = '%s/scratchbig' % os.environ['HOME']
MATLAB = "/apps/matlabR2008b/bin/matlab -nosplash -nodesktop"
def split(a, n):
n = min(n, len(a))
k, m = len(a) / n, len(a) % n
return list((a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)))
def createJob(jobs, x, numJobs):
matlabProgram = """
%% --- %s-%d ---
%% Automatically generated by dispatch.py: %s
addpath(genpath('~/codebase'));
OUTPATH = '%s';
load('%s');
load('%s');
%% Tweet starting up
system('/home/bmcfee/bin/qataki u "JOB START: %s-%d"');
try
for j = %d:%d
performExperiment(parameters(j), folds, j, OUTPATH);
end
%% Tweet finished
system('/home/bmcfee/bin/qataki u "JOB END: %s-%d"');
catch
%% Tweet error
system('/home/bmcfee/bin/qataki u "JOB FAIL: %s-%d"');
end
%% ---
""" % (Jobname, x, time.asctime(), Outpath, Parameters, Data, Jobname, x, min(jobs), max(jobs), Jobname, x, Jobname, x)
with open('%s/job%02d.m' % (Outpath,x), 'w') as f:
f.write(matlabProgram)
launchProgram = """#!/bin/sh
cd %s
%s -r "addpath('%s'); job%02d; exit;"
""" % (Basepath, MATLAB, Outpath, x)
launcher = '%s/%s-%02d' % (Outpath, Jobname, x)
with open(launcher, 'w') as f:
f.write(launchProgram)
os.chmod(launcher, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
return launcher
def usage():
print """
Usage:
dispatch.py OPTIONS
where OPTIONS are the following:
-h This help
-p PARAMETERS.mat Path to the parameters file (contains 'parameters')
-d DATA.mat Path to the data file (contains 'folds')
-t JOBNAME Name for this experiment
-n #NODES Maximum nodes to run in parallel (default: 20)
-b BASEDIRECTORY Path to the experiment code (default: CWD)
-o OUTDIRECTORY Path to where the output should go (default: ~/scratchbig)
"""
def main():
global NumNodes
global Parameters
global Data
global Jobname
global Basepath
global Outpath
try:
opts, args = getopt.getopt(sys.argv[1:], "hp:d:n:t:b:o:", ["help"])
except getopt.getoptError, err:
print str(err)
usage()
sys.exit(1)
for o,a in opts:
if o == "-h":
usage()
sys.exit(0)
elif o == "-p":
Parameters = a
elif o == "-d":
Data = a
elif o == "-n":
NumNodes = int(a)
if NumNodes < 1 or NumNodes > 20:
assert False, "Invalid node setting: %d" % NumNodes
elif o == "-t":
Jobname = a
elif o == "-b":
Basepath = a
assert os.path.exists(Basepath), "Path doesn't exist! %s" % Basepath
elif o == "-o":
Outpath = a
assert os.path.exists(Outpath), "Path doesn't exist! %s" % Outpath
else:
assert False, "Unsupported option: %s" % o
assert os.path.exists(Data), "%s does not exist!" % Data
# 1. Load the parameter array
P = scipy.io.loadmat(Parameters)
numJobs = len(P['parameters'])
# 2. Partition into (NumNodes) jobs
jobs = split(range(1, 1 + numJobs), NumNodes)
# 2.5. Create a timestamp and directory for this experiment
Outpath = '%s/%s_%s' % (Outpath, Jobname, time.strftime('%Y%m%d_%H%M%S'))
os.mkdir(Outpath)
# 3. Generate matlab scripts and launchers
launchscripts = [createJob(job, x, len(jobs)) for (x, job) in zip(range(len(jobs)), jobs)]
# 4. Launch
for script in launchscripts:
launch = ['qsub', '-l', 'matlab=1', # We need nodes with matlab
# '-l', 'arch=lx26-x86', # We need x86 architecture for mex
'-S', '/bin/bash', # We need bash for matlab's system() to work properly
'-e', Outpath,
'-o', Outpath,
script]
subprocess.call(launch)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment