Last active
October 13, 2024 21:24
-
-
Save MaggieMoss/dd55cac2d8ce7c7fe00c6911d195120a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Bio.Seq import Seq | |
from Bio import Entrez | |
from Bio import pairwise2 | |
from Bio.Align import substitution_matrices | |
from Bio.pairwise2 import format_alignment | |
Entrez.email = "<hidden @ mail>" | |
blosum62 = substitution_matrices.load("BLOSUM62") | |
def get_protein_sequence(id: str) -> str: | |
fasta = Entrez.efetch(db="protein", id=id, | |
rettype="fasta", retmode="txt") | |
fasta_list = fasta.readlines() | |
fasta_list.pop(0) | |
return "".join(fasta_list).strip().replace("\n", "") | |
human_id = "CAA35621.1" | |
fly_id = "NP_476761.3" | |
human_sequence = get_protein_sequence(human_id) | |
fly_sequence = get_protein_sequence(fly_id) | |
alignments = pairwise2.align.localds( | |
# Gap costs taken from the defaults on the blast algorithm page | |
# for BlOSUM62 | |
Seq(human_sequence[-10:]), Seq(fly_sequence[-10:]), blosum62, -11, -1) | |
for a in alignments: | |
print(format_alignment(*a)) | |
# output: | |
TSCAQNWIYE | |
|..|..|||| | |
TPAAKDWIYE | |
Score=37 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment