Created
May 29, 2013 21:04
-
-
Save ivan-krukov/5673834 to your computer and use it in GitHub Desktop.
Different ways of parsing fasta files. Last one is my favorite - generators and named tuples.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load everythin in memory | |
#split | |
def split_fasta(input_file): | |
with open(input_file) as fasta_file: | |
text = fasta_file.read().split(">")[1:] | |
data = [] | |
for entry in text: | |
header,sequence = entry.split("\n",1) | |
sequence = sequence.replace("\n","") | |
data.append((header,sequence)) | |
return data | |
#same using regular expressions | |
from re import findall, split, sub, MULTILINE | |
def re_fasta(input_file): | |
with open(input_file) as fasta_file: | |
text = findall(r">[^>]+",fasta_file.read(),MULTILINE) | |
data = [] | |
for entry in text: | |
header,sequence = split("\n",entry,1) | |
sequence = sub("\n","",sequence) | |
data.append((header,sequence)) | |
return data | |
#using named tuples | |
from collections import namedtuple | |
Entry = namedtuple("Entry",("header","seq")) | |
def nt_fasta(input_file): | |
with open(input_file) as fasta_file: | |
text = fasta_file.read().split(">")[1:] | |
data = [] | |
for entry in text: | |
header,sequence = entry.split("\n",1) | |
sequence = sequence.replace("\n","") | |
data.append(Entry(header,seq=sequence)) | |
return data | |
#line by line with generators | |
def gen_fasta(input_file): | |
Entry = namedtuple("Entry",("header","seq")) | |
seq_buffer = [] | |
with open(input_file) as fasta_file: | |
for line in fasta_file: | |
line = line.strip() | |
if line.startswith(">"): | |
if seq_buffer: | |
yield Entry(header,"".join(seq_buffer)) | |
seq_buffer = [] | |
header = line | |
else: | |
seq_buffer.append(line) | |
yield Entry(header,"".join(seq_buffer)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment