Last active
August 29, 2015 14:08
-
-
Save brevans/7a57c38e3db78d2ba161 to your computer and use it in GitHub Desktop.
dumbly concatenate paired end illumina sequences
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import gzip | |
import sys | |
from glob import glob | |
from os.path import basename | |
import re | |
from Bio import SeqIO | |
def gopen(fi, *args, **kwargs): | |
if fi.endswith('.gz'): | |
return gzip.open(fi, *args, **kwargs) | |
else: | |
return open(fi, *args, **kwargs) | |
def cat_reads(r1_fn, r2_fn, concat=False): | |
cat_out = gopen(re.sub('_R1', '', basename(r1_fn)), 'wt') | |
R1 = SeqIO.parse(gopen(r1_fn, 'rt'), "fastq") | |
R2 = SeqIO.parse(gopen(r2_fn, 'rt'), "fastq") | |
for r1, r2 in zip(R1, R2): | |
SeqIO.write(r1 + r2.reverse_complement(), cat_out, 'fastq') | |
if __name__ == '__main__': | |
filenames = [x for x in glob('./*.fq')] | |
for r1, r2 in zip(filenames[0::2], filenames[1::2]): | |
cat_reads(r1, r2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment