walterst · December 22, 2015 16:18
diff --git a/parse_bc_from_read_end.py b/parse_bc_from_read_end.py
 #!/usr/bin/env python

 from sys import argv

 from cogent.parse.fasta import MinimalFastaParser
 from cogent import DNA

 """
 Requires PyCogent installation. This should be present if you have QIIME 
 installed. If you are using MacQIIME, initialize the environment by calling
 macqiime before using this script.

 Usage:
 python parse_bc_from_read_end.py X Y A B T/F
 Where:
 X is the input fasta file
 Y is the output fasta file
 A is the length of the forward barcode
 B is the length of the reverse barcode
 T/F: pass "T" to reverse complement the reverse barcode, "F" to leave it as is
 Example:
 python parse_bc_from_read_end.py seqs.fna parsed_seqs.fna 8 8 T

 This script will remove the last B bases of each read (assumed to be reverse
 barcode) and put them immediately after the initial barcode (length is 
 specified by the A parameter above). The final barcodes (combo of the forward
 and reverse barcodes) can be listed as such in the mapping file for use with
 split_libraries.py.

 This script is designed for fasta reads, and assumes that the beginning and end
 of the read are the barcodes. This script will not work if the barcodes are not
 in these positions.
 """

 input_fasta = open(argv[1], "U")
 output_fasta = open(argv[2], "w")
 f_bc_len = int(argv[3])
 r_bc_len = int(argv[4])
 if argv[5] == "T":
    rc = True
 else:
    rc = False
    
 for label, seq in MinimalFastaParser(input_fasta):
    curr_f_bc = seq[0:f_bc_len]
    if rc:
        curr_r_bc = DNA.rc(seq[-r_bc_len:])
    else:
        curr_r_bc = seq[-r_bc_len:]
    rest_of_seq = seq[f_bc_len:-r_bc_len]
    
    final_seq = curr_f_bc + curr_r_bc + rest_of_seq
    
    output_fasta.write(">%s\n%s\n" % (label, final_seq))
	#!/usr/bin/env python

	from sys import argv

	from cogent.parse.fasta import MinimalFastaParser
	from cogent import DNA

	"""
	Requires PyCogent installation. This should be present if you have QIIME
	installed. If you are using MacQIIME, initialize the environment by calling
	macqiime before using this script.

	Usage:
	python parse_bc_from_read_end.py X Y A B T/F
	Where:
	X is the input fasta file
	Y is the output fasta file
	A is the length of the forward barcode
	B is the length of the reverse barcode
	T/F: pass "T" to reverse complement the reverse barcode, "F" to leave it as is
	Example:
	python parse_bc_from_read_end.py seqs.fna parsed_seqs.fna 8 8 T

	This script will remove the last B bases of each read (assumed to be reverse
	barcode) and put them immediately after the initial barcode (length is
	specified by the A parameter above). The final barcodes (combo of the forward
	and reverse barcodes) can be listed as such in the mapping file for use with
	split_libraries.py.

	This script is designed for fasta reads, and assumes that the beginning and end
	of the read are the barcodes. This script will not work if the barcodes are not
	in these positions.
	"""

	input_fasta = open(argv[1], "U")
	output_fasta = open(argv[2], "w")
	f_bc_len = int(argv[3])
	r_bc_len = int(argv[4])
	if argv[5] == "T":
	rc = True
	else:
	rc = False

	for label, seq in MinimalFastaParser(input_fasta):
	curr_f_bc = seq[0:f_bc_len]
	if rc:
	curr_r_bc = DNA.rc(seq[-r_bc_len:])
	else:
	curr_r_bc = seq[-r_bc_len:]
	rest_of_seq = seq[f_bc_len:-r_bc_len]

	final_seq = curr_f_bc + curr_r_bc + rest_of_seq

	output_fasta.write(">%s\n%s\n" % (label, final_seq))