Created
November 10, 2016 20:39
-
-
Save arq5x/b0b8f28e90fee6d69ab525f7c85e2262 to your computer and use it in GitHub Desktop.
python batch submission
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# STEP 1: for all fams, print each SGS region (chrom, start, end) | |
import sys | |
import subprocess as sub | |
def run_jobs(commands): | |
""" | |
This function takes a set of max_work commands and executes | |
them with rj | |
""" | |
f = open('rungemini.sh', 'w') | |
f.write(commands) | |
f.close() | |
# execute rj for the current version of rungemini.sh | |
# sub.call("/uufs/chpc.utah.edu/common/home/u6000294/bin/rj -c rungemini.sh", shell=True) | |
# reset commands for the next batch of max_work | |
sub.call("cat rungemini.sh", shell=True) | |
max_work = 2 # TODO: increase this | |
work_size = 0 | |
commands = "" # initialize commands to be an empty string | |
for line in open(sys.argv[1]): | |
# split always makes a list even if there is just one element | |
# need to pull out the first (0th) element of the list | |
FAM = line.strip().split('\t')[0] | |
for line in open(sys.argv[2]): | |
# split the 3 columns from the input into three different variables. automagically. | |
CHROM, START, END = line.strip().split('\t') | |
# build up a command for the current family, chrom, start, end | |
cmd = "gemini query --header -q \"SELECT *, (gt_types).(phenotype==2 and family_id == " | |
cmd += FAM | |
cmd += ") FROM variants WHERE (chrom = " | |
cmd += CHROM | |
cmd += " and start >= " | |
cmd += START | |
cmd += " and end <= " | |
cmd += END | |
cmd += ") AND impact_severity !='LOW')\" /uufs/chpc.utah.edu/common/home/u6000771/Data/simons.db" | |
cmd += " > " | |
cmd += FAM | |
cmd += "." | |
cmd += CHROM | |
cmd += "." | |
cmd += START | |
cmd += "." | |
cmd += END | |
cmd += ".txt &\nwait\n" | |
# add the current command to the set of (up to) max_work commands to be run | |
commands += cmd | |
# increment the number of commands seen by 1 | |
work_size += 1 | |
# is work_size evenly divisible by max_work (% the "mod" or "modulo") | |
if (work_size % max_work == 0): | |
run_jobs(commands) | |
commands = "" # reset commands to be an empty string | |
# run for the last batch | |
run_jobs(commands) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment