Skip to content

Instantly share code, notes, and snippets.

@MaggieMoss
Last active October 13, 2024 21:24
Show Gist options
  • Save MaggieMoss/dd55cac2d8ce7c7fe00c6911d195120a to your computer and use it in GitHub Desktop.
Save MaggieMoss/dd55cac2d8ce7c7fe00c6911d195120a to your computer and use it in GitHub Desktop.
from Bio.Seq import Seq
from Bio import Entrez
from Bio import pairwise2
from Bio.Align import substitution_matrices
from Bio.pairwise2 import format_alignment
Entrez.email = "<hidden @ mail>"
blosum62 = substitution_matrices.load("BLOSUM62")
def get_protein_sequence(id: str) -> str:
fasta = Entrez.efetch(db="protein", id=id,
rettype="fasta", retmode="txt")
fasta_list = fasta.readlines()
fasta_list.pop(0)
return "".join(fasta_list).strip().replace("\n", "")
human_id = "CAA35621.1"
fly_id = "NP_476761.3"
human_sequence = get_protein_sequence(human_id)
fly_sequence = get_protein_sequence(fly_id)
alignments = pairwise2.align.localds(
# Gap costs taken from the defaults on the blast algorithm page
# for BlOSUM62
Seq(human_sequence[-10:]), Seq(fly_sequence[-10:]), blosum62, -11, -1)
for a in alignments:
print(format_alignment(*a))
# output:
TSCAQNWIYE
|..|..||||
TPAAKDWIYE
Score=37
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment