Created
July 10, 2012 17:07
-
-
Save gregcaporaso/3084751 to your computer and use it in GitHub Desktop.
Script for summarizing barcode error in the 'ultra-deep' ANL sequencing run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# File created on 03 Jul 2012 | |
from __future__ import division | |
__author__ = "Greg Caporaso" | |
__copyright__ = "Copyright 2011, The QIIME project" | |
__credits__ = ["Greg Caporaso"] | |
__license__ = "GPL" | |
__version__ = "1.5.0-dev" | |
__maintainer__ = "Greg Caporaso" | |
__email__ = "[email protected]" | |
__status__ = "Development" | |
from glob import glob | |
from os.path import split, join | |
from qiime.golay import decode | |
from qiime.parse import MinimalFastqParser | |
from qiime.util import parse_command_line_parameters, make_option, gzip_open | |
script_info = {} | |
script_info['brief_description'] = "" | |
script_info['script_description'] = "" | |
script_info['script_usage'] = [("","","")] | |
script_info['output_description']= "" | |
script_info['required_options'] = [ | |
make_option('-i','--input_glob',help='wildcard string to match input barcode file(s)'), | |
make_option('-o','--output_dir',type="new_dirpath",help='the output directory'), | |
make_option('-b','--correct_barcode',help="the expected barcode sequence") | |
] | |
script_info['optional_options'] = [] | |
script_info['version'] = __version__ | |
def main(): | |
option_parser, opts, args =\ | |
parse_command_line_parameters(**script_info) | |
correct_barcode = opts.correct_barcode | |
results = {} | |
fps = glob("%s" % opts.input_glob) | |
for fp in fps: | |
lane_id = split(fp)[1].split('_')[0] | |
for _, seq, _ in MinimalFastqParser(gzip_open(fp)): | |
try: | |
results[seq] += 1 | |
except KeyError: | |
results[seq] = 1 | |
all_f = open(join(opts.output_dir,'barcode_counts.txt'),'w') | |
summary_f = open(join(opts.output_dir,'result_summary.txt'),'w') | |
summary_f.write('# %s\n' % ' '.join(fps)) | |
data = [(v,k) for k, v in results.items()] | |
data.sort() | |
summary = {} | |
summary[True] = {0:0,1:0,2:0,3:0,4:0,-1:0} | |
summary[False] = {0:0,1:0,2:0,3:0,4:0,-1:0} | |
for count,bc in data[::-1]: | |
try: | |
new, diffs = decode(bc) | |
except KeyError: | |
new, diffs = "Can't decode", -1 | |
all_f.write('%s\t%s\t%d\t%d\t%s\t%s\n' % (lane_id,bc,count,diffs,new,str(new==correct_barcode))) | |
summary[new==correct_barcode][diffs] += count | |
summary_f.write('%s\tCorrect Barcode\t%d\t%d\t%d\t%d\t%d\t%d\n' %\ | |
(lane_id,summary[True][0],summary[True][1],summary[True][2],summary[True][3],summary[True][4],summary[True][-1])) | |
summary_f.write('%s\tIncorrect Barcode\t%d\t%d\t%d\t%d\t%d\t%d\n' %\ | |
(lane_id,summary[False][0],summary[False][1],summary[False][2],summary[False][3],summary[False][4],summary[False][-1])) | |
all_f.close() | |
summary_f.close() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment