Last active
December 23, 2015 15:09
-
-
Save brevans/6653392 to your computer and use it in GitHub Desktop.
bowtie 2 -> sorted bams
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
example: | |
./gen_bowtie2_coms.py fastq_dir bin_dir out_dir | |
-all sample fastq files must be in on directory | |
-the bin directory should have samtools and bowtie2 | |
-the outdir must already exist | |
This script prints the commands to screen. You can save them to a file with | |
./gen_bowtie2_coms.py fastq_dir bin_dir out_dir > jobs.txt | |
''' | |
import sys | |
import re | |
from os import path | |
from collections import defaultdict as dd | |
from glob import glob | |
def abs_join(a, b): | |
return path.join(path.abspath(a), b) | |
def get_sample_fqs(dir): | |
samples = dd(lambda: {'1':[], '2':[]}) | |
for fi in glob(abs_join(dir, '*.fastq.gz')): | |
#for each file, find its sample name and its read orientation | |
mat = re.match('([\w\-_]*)_L\d\d\d_R(\d)_\d\d\d\.fastq\.gz', | |
path.basename(fi)) | |
s_name, pair_num = mat.groups() | |
#add the file to the samples dictionary | |
samples[s_name][pair_num].append(fi) | |
samples[s_name]['rg_id_string'] = '--rg-id {0} --rg SM:{0} --rg LB:{0} --rg PL:ILLUMINA'.format(s_name) | |
return samples | |
def generate_bowtie2_commands(samples, ref, bin, out, bowtie_args): | |
coms = [] | |
for sample in samples.keys(): | |
com = '{0} {1} {2} -x {3} -1 {4} -2 {5} | {6} view -Su - | {6} sort - {7}; {6} index {7}.bam;'.format( | |
abs_join(bin, 'bowtie2'), | |
samples[sample]['rg_id_string'], | |
bowtie_args, | |
ref, | |
','.join(samples[sample]['1']), | |
','.join(samples[sample]['2']), | |
abs_join(bin, 'samtools'), | |
abs_join(out, sample)) | |
coms.append(com) | |
return coms | |
def main(): | |
samples = get_sample_fqs(sys.argv[1]) | |
ref = sys.argv[2] | |
bindir = sys.argv[3] | |
outdir = sys.argv[4] | |
try: | |
bowtie2_args = sys.argv[5] | |
except IndexError: | |
bowtie2_args = '--no-unal --very-sensitive-local' | |
commands = generate_bowtie2_commands(samples, ref, bindir, outdir, bowtie2_args) | |
print '\n'.join(commands) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment