Skip to content

Instantly share code, notes, and snippets.

@brevans
Last active August 29, 2015 14:08
Show Gist options
  • Save brevans/7a57c38e3db78d2ba161 to your computer and use it in GitHub Desktop.
Save brevans/7a57c38e3db78d2ba161 to your computer and use it in GitHub Desktop.
dumbly concatenate paired end illumina sequences
#!/usr/bin/env python
import gzip
import sys
from glob import glob
from os.path import basename
import re
from Bio import SeqIO
def gopen(fi, *args, **kwargs):
if fi.endswith('.gz'):
return gzip.open(fi, *args, **kwargs)
else:
return open(fi, *args, **kwargs)
def cat_reads(r1_fn, r2_fn, concat=False):
cat_out = gopen(re.sub('_R1', '', basename(r1_fn)), 'wt')
R1 = SeqIO.parse(gopen(r1_fn, 'rt'), "fastq")
R2 = SeqIO.parse(gopen(r2_fn, 'rt'), "fastq")
for r1, r2 in zip(R1, R2):
SeqIO.write(r1 + r2.reverse_complement(), cat_out, 'fastq')
if __name__ == '__main__':
filenames = [x for x in glob('./*.fq')]
for r1, r2 in zip(filenames[0::2], filenames[1::2]):
cat_reads(r1, r2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment