Last active
December 26, 2015 14:19
-
-
Save walterst/7164604 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" Usage | |
python extract_bcs_from_fastq_ends.py X Y Z A B C D E | |
Where | |
X: input fastq file with barcodes at the beginning and ends of the reads | |
Y: output fastq barcodes file | |
Z: output reads fastq file (with barcodes removed) | |
A: length of forward barcode (int) | |
B: length of reverse barcode (int) | |
C: True/False flag to reverse complement the forward barcode | |
D: True/False flag to reverse complement the reverse barcode | |
E: True/False switch order of barcodes (False-order is barcode at start of | |
the read followed by barcode at end of read, True-order is barcode at end of | |
read followed by barcode at the beginning of the read). | |
""" | |
from sys import argv | |
from cogent.parse.fastq import MinimalFastqParser | |
from cogent import DNA | |
f = open(argv[1], "U") | |
bc_out = open(argv[2], "w") | |
read_out = open(argv[3], "w") | |
f_barcode_size = int(argv[4]) | |
r_barcode_size = int(argv[5]) | |
rc_f_bc = argv[6] | |
if rc_f_bc == "True": | |
rc_f_bc = True | |
elif rc_f_bc == "False": | |
rc_f_bc = False | |
else: | |
raise ValueError,("Fifth parameter must either be True or False") | |
rc_r_bc = argv[7] | |
if rc_r_bc == "True": | |
rc_r_bc = True | |
elif rc_r_bc == "False": | |
rc_r_bc = False | |
else: | |
raise ValueError,("Sixth parameter must either be True or False") | |
switch_bc_order = argv[8] | |
if switch_bc_order == "True": | |
switch_bc_order = True | |
elif switch_bc_order == "False": | |
switch_bc_order = False | |
else: | |
raise ValueError,("Seventh parameter must either be True or False") | |
for data in MinimalFastqParser(f, strict=False): | |
curr_label = data[0].strip() | |
curr_f_bc_read = data[1].strip()[0:f_barcode_size] | |
curr_read = data[1].strip()[f_barcode_size:-r_barcode_size] | |
curr_r_bc_read = data[1].strip()[-r_barcode_size:] | |
if rc_f_bc: | |
curr_f_bc_read = DNA.rc(curr_f_bc_read) | |
if rc_r_bc: | |
curr_r_bc_read = DNA.rc(curr_r_bc_read) | |
curr_f_bc_qual = data[2].strip()[0:f_barcode_size] | |
curr_qual = data[2].strip()[f_barcode_size:-r_barcode_size] | |
curr_r_bc_qual = data[2].strip()[-r_barcode_size:] | |
if rc_f_bc: | |
curr_f_bc_qual = curr_f_bc_qual[::-1] | |
if rc_r_bc: | |
curr_r_bc_qual = curr_r_bc_qual[::-1] | |
bc_out.write("@%s\n" % curr_label) | |
if switch_bc_order: | |
final_bc_read = curr_r_bc_read + curr_f_bc_read | |
final_bc_qual = curr_r_bc_qual + curr_f_bc_qual | |
else: | |
final_bc_read = curr_f_bc_read + curr_r_bc_read | |
final_bc_qual = curr_f_bc_qual + curr_r_bc_qual | |
bc_out.write("%s\n" % final_bc_read) | |
bc_out.write("+\n") | |
bc_out.write("%s\n" % final_bc_qual) | |
read_out.write("@%s\n" % curr_label) | |
read_out.write("%s\n" % curr_read) | |
read_out.write("+\n") | |
read_out.write("%s\n" % curr_qual) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment