genomewalker · August 4, 2024 06:29
diff --git a/get-rna.py b/get-rna.py
 import argparse
 import gzip
 from Bio import SeqIO
 from Bio.SeqFeature import SeqFeature, FeatureLocation
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord

 def extract_rrna_trna_features(input_file, output_file):
    # Determine if the input file is gzipped
    if input_file.endswith(".gz"):
        input_handle = gzip.open(input_file, "rt")
    else:
        input_handle = open(input_file, "r")

    # Determine if the output file should be gzipped
    if output_file.endswith(".gz"):
        output_handle = gzip.open(output_file, "wt")
    else:
        output_handle = open(output_file, "w")

    # Read and write rRNA and tRNA features
    for record in SeqIO.parse(input_handle, "genbank"):
        features = [feature for feature in record.features if feature.type in ['rRNA', 'tRNA']]
        for feature in features:
            # Extract the sequence for the feature
            feature_seq = feature.extract(record.seq)
            # Prepare the FASTA header
            locus_tag = feature.qualifiers.get('locus_tag', ['unknown locus'])[0]
            product = feature.qualifiers.get('product', ['unknown product'])[0]
            header = f"{locus_tag}|{feature.type}|{product}"
            # Create a new SeqRecord for each feature
            feature_record = SeqRecord(
                feature_seq,
                id=header,
                description=""
            )
            SeqIO.write(feature_record, output_handle, "fasta")
    
    input_handle.close()
    output_handle.close()

 def main():
    parser = argparse.ArgumentParser(description="Extract rRNA and tRNA features from a GenBank file")
    parser.add_argument('--input', '-i', required=True, help='Input GenBank file (can be gzipped)')
    parser.add_argument('--output', '-o', required=True, help='Output file to store rRNA and tRNA features (can be gzipped)')

    args = parser.parse_args()

    extract_rrna_trna_features(args.input, args.output)

 if __name__ == "__main__":
    main()

diff --git a/get-xRNA.md b/get-xRNA.md
	import argparse
	import gzip
	from Bio import SeqIO
	from Bio.SeqFeature import SeqFeature, FeatureLocation
	from Bio.Seq import Seq
	from Bio.SeqRecord import SeqRecord

	def extract_rrna_trna_features(input_file, output_file):
	# Determine if the input file is gzipped
	if input_file.endswith(".gz"):
	input_handle = gzip.open(input_file, "rt")
	else:
	input_handle = open(input_file, "r")

	# Determine if the output file should be gzipped
	if output_file.endswith(".gz"):
	output_handle = gzip.open(output_file, "wt")
	else:
	output_handle = open(output_file, "w")

	# Read and write rRNA and tRNA features
	for record in SeqIO.parse(input_handle, "genbank"):
	features = [feature for feature in record.features if feature.type in ['rRNA', 'tRNA']]
	for feature in features:
	# Extract the sequence for the feature
	feature_seq = feature.extract(record.seq)
	# Prepare the FASTA header
	locus_tag = feature.qualifiers.get('locus_tag', ['unknown locus'])[0]
	product = feature.qualifiers.get('product', ['unknown product'])[0]
	header = f"{locus_tag}\|{feature.type}\|{product}"
	# Create a new SeqRecord for each feature
	feature_record = SeqRecord(
	feature_seq,
	id=header,
	description=""
	)
	SeqIO.write(feature_record, output_handle, "fasta")

	input_handle.close()
	output_handle.close()

	def main():
	parser = argparse.ArgumentParser(description="Extract rRNA and tRNA features from a GenBank file")
	parser.add_argument('--input', '-i', required=True, help='Input GenBank file (can be gzipped)')
	parser.add_argument('--output', '-o', required=True, help='Output file to store rRNA and tRNA features (can be gzipped)')

	args = parser.parse_args()

	extract_rrna_trna_features(args.input, args.output)

	if __name__ == "__main__":
	main()