Created
December 2, 2016 00:04
-
-
Save carlomazzaferro/c80e01a5dcd22f36836652259a4d1840 to your computer and use it in GitHub Desktop.
Parsing fasta file to python lists: protein ID, peptide sequence
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_separate_lists(fasta_file): | |
""" | |
Creates 2 lists from a fasta file | |
:param fasta_file: file | |
:return: one list for the IDs in the file and one list for the proteins/peptides in it | |
""" | |
with open(fasta_file) as infile: | |
all_list = [] | |
peptide = "" | |
lines = infile.readlines() | |
for i in range(0, len(lines)): | |
if lines[i].startswith('>'): | |
all_list.append(lines[i].rstrip()) | |
else: | |
peptide += lines[i].rstrip() | |
try: | |
if lines[i + 1].startswith('>'): | |
all_list.append(peptide) | |
peptide = "" | |
except: | |
all_list.append(peptide) | |
j = [] | |
k = [] | |
for i in all_list: | |
if i.startswith('>'): | |
j.append(i) | |
else: | |
k.append(i) | |
return j, k |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment