Skip to content

Instantly share code, notes, and snippets.

@arq5x
Created November 10, 2016 20:39
Show Gist options
  • Save arq5x/b0b8f28e90fee6d69ab525f7c85e2262 to your computer and use it in GitHub Desktop.
Save arq5x/b0b8f28e90fee6d69ab525f7c85e2262 to your computer and use it in GitHub Desktop.
python batch submission
# STEP 1: for all fams, print each SGS region (chrom, start, end)
import sys
import subprocess as sub
def run_jobs(commands):
"""
This function takes a set of max_work commands and executes
them with rj
"""
f = open('rungemini.sh', 'w')
f.write(commands)
f.close()
# execute rj for the current version of rungemini.sh
# sub.call("/uufs/chpc.utah.edu/common/home/u6000294/bin/rj -c rungemini.sh", shell=True)
# reset commands for the next batch of max_work
sub.call("cat rungemini.sh", shell=True)
max_work = 2 # TODO: increase this
work_size = 0
commands = "" # initialize commands to be an empty string
for line in open(sys.argv[1]):
# split always makes a list even if there is just one element
# need to pull out the first (0th) element of the list
FAM = line.strip().split('\t')[0]
for line in open(sys.argv[2]):
# split the 3 columns from the input into three different variables. automagically.
CHROM, START, END = line.strip().split('\t')
# build up a command for the current family, chrom, start, end
cmd = "gemini query --header -q \"SELECT *, (gt_types).(phenotype==2 and family_id == "
cmd += FAM
cmd += ") FROM variants WHERE (chrom = "
cmd += CHROM
cmd += " and start >= "
cmd += START
cmd += " and end <= "
cmd += END
cmd += ") AND impact_severity !='LOW')\" /uufs/chpc.utah.edu/common/home/u6000771/Data/simons.db"
cmd += " > "
cmd += FAM
cmd += "."
cmd += CHROM
cmd += "."
cmd += START
cmd += "."
cmd += END
cmd += ".txt &\nwait\n"
# add the current command to the set of (up to) max_work commands to be run
commands += cmd
# increment the number of commands seen by 1
work_size += 1
# is work_size evenly divisible by max_work (% the "mod" or "modulo")
if (work_size % max_work == 0):
run_jobs(commands)
commands = "" # reset commands to be an empty string
# run for the last batch
run_jobs(commands)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment