Last active
September 16, 2024 12:25
-
-
Save peterk87/5422267 to your computer and use it in GitHub Desktop.
Python: Parse Genbank file using BioPython
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from Bio.Seq import Seq | |
from Bio.SeqRecord import SeqRecord | |
from Bio.SeqFeature import SeqFeature, FeatureLocation | |
from Bio import SeqIO | |
# get all sequence records for the specified genbank file | |
recs = [rec for rec in SeqIO.parse("genbank_file.gbk", "genbank")] | |
# print the number of sequence records that were extracted | |
print len(recs) | |
# print annotations for each sequence record | |
for rec in recs: | |
print rec.annotations | |
# print the CDS sequence feature summary information for each feature in each | |
# sequence record | |
for rec in recs: | |
feats = [feat for feat in rec.features if feat.type == "CDS"] | |
for feat in feats: | |
print feat |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment