Skip to content

Instantly share code, notes, and snippets.

@gregcaporaso
Created July 10, 2012 17:07
Show Gist options
  • Save gregcaporaso/3084751 to your computer and use it in GitHub Desktop.
Save gregcaporaso/3084751 to your computer and use it in GitHub Desktop.
Script for summarizing barcode error in the 'ultra-deep' ANL sequencing run
#!/usr/bin/env python
# File created on 03 Jul 2012
from __future__ import division
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.5.0-dev"
__maintainer__ = "Greg Caporaso"
__email__ = "[email protected]"
__status__ = "Development"
from glob import glob
from os.path import split, join
from qiime.golay import decode
from qiime.parse import MinimalFastqParser
from qiime.util import parse_command_line_parameters, make_option, gzip_open
script_info = {}
script_info['brief_description'] = ""
script_info['script_description'] = ""
script_info['script_usage'] = [("","","")]
script_info['output_description']= ""
script_info['required_options'] = [
make_option('-i','--input_glob',help='wildcard string to match input barcode file(s)'),
make_option('-o','--output_dir',type="new_dirpath",help='the output directory'),
make_option('-b','--correct_barcode',help="the expected barcode sequence")
]
script_info['optional_options'] = []
script_info['version'] = __version__
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
correct_barcode = opts.correct_barcode
results = {}
fps = glob("%s" % opts.input_glob)
for fp in fps:
lane_id = split(fp)[1].split('_')[0]
for _, seq, _ in MinimalFastqParser(gzip_open(fp)):
try:
results[seq] += 1
except KeyError:
results[seq] = 1
all_f = open(join(opts.output_dir,'barcode_counts.txt'),'w')
summary_f = open(join(opts.output_dir,'result_summary.txt'),'w')
summary_f.write('# %s\n' % ' '.join(fps))
data = [(v,k) for k, v in results.items()]
data.sort()
summary = {}
summary[True] = {0:0,1:0,2:0,3:0,4:0,-1:0}
summary[False] = {0:0,1:0,2:0,3:0,4:0,-1:0}
for count,bc in data[::-1]:
try:
new, diffs = decode(bc)
except KeyError:
new, diffs = "Can't decode", -1
all_f.write('%s\t%s\t%d\t%d\t%s\t%s\n' % (lane_id,bc,count,diffs,new,str(new==correct_barcode)))
summary[new==correct_barcode][diffs] += count
summary_f.write('%s\tCorrect Barcode\t%d\t%d\t%d\t%d\t%d\t%d\n' %\
(lane_id,summary[True][0],summary[True][1],summary[True][2],summary[True][3],summary[True][4],summary[True][-1]))
summary_f.write('%s\tIncorrect Barcode\t%d\t%d\t%d\t%d\t%d\t%d\n' %\
(lane_id,summary[False][0],summary[False][1],summary[False][2],summary[False][3],summary[False][4],summary[False][-1]))
all_f.close()
summary_f.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment