Skip to content

Instantly share code, notes, and snippets.

@lmiller1990
Created July 24, 2024 10:10
Show Gist options
  • Save lmiller1990/a19e9534ffb5beb0a4e3be51816e1d0b to your computer and use it in GitHub Desktop.
Save lmiller1990/a19e9534ffb5beb0a4e3be51816e1d0b to your computer and use it in GitHub Desktop.
Binf7000
import random
standardTranslation = {
"TTT": "F",
"TCT": "S",
"TAT": "Y",
"TGT": "C",
"TTC": "F",
"TCC": "S",
"TAC": "Y",
"TGC": "C",
"TTA": "L",
"TCA": "S",
"TAA": "*",
"TGA": "*",
"TTG": "L",
"TCG": "S",
"TAG": "*",
"TGG": "W",
"CTT": "L",
"CCT": "P",
"CAT": "H",
"CGT": "R",
"CTC": "L",
"CCC": "P",
"CAC": "H",
"CGC": "R",
"CTA": "L",
"CCA": "P",
"CAA": "Q",
"CGA": "R",
"CTG": "L",
"CCG": "P",
"CAG": "Q",
"CGG": "R",
"ATT": "I",
"ACT": "T",
"AAT": "N",
"AGT": "S",
"ATC": "I",
"ACC": "T",
"AAC": "N",
"AGC": "S",
"ATA": "I",
"ACA": "T",
"AAA": "K",
"AGA": "R",
"ATG": "M",
"ACG": "T",
"AAG": "K",
"AGG": "R",
"GTT": "V",
"GCT": "A",
"GAT": "D",
"GGT": "G",
"GTC": "V",
"GCC": "A",
"GAC": "D",
"GGC": "G",
"GTA": "V",
"GCA": "A",
"GAA": "E",
"GGA": "G",
"GTG": "V",
"GCG": "A",
"GAG": "E",
"GGG": "G",
"---": "-",
}
reverseComplement = {"A": "T", "T": "A", "C": "G", "G": "C"}
aaseqs = ["WFINDMENKE", "SPTKFINDME", "IHFINDMEQS", "LRTFINDMEI", "FINDMEIKQS"]
def translate(dna: str, offset=0):
aa = ""
for i in range(offset, len(dna), 3):
codon = dna[i : i + 3]
if len(codon) == 3:
aa += standardTranslation[codon]
return aa
def reverse(dna: str):
# dna = "AATGTTCTCGTAGAA"
# p = translate(dna, 1)
#
# rc = reverse(dna)
# print("reverse", rc)
#
# p2 = translate(rc)
# print("reverse protein", p2)
rc = ""
for i in range(len(dna)):
rc += reverseComplement[dna[len(dna) - i - 1]]
return rc
""" get a codon for a given aa. If multiple are available, pick one randomly """
def getCodon(aa: str):
# print(getCodon("S"))
# serine is S
# many codons - UCU, UCC, UCA, UCG, AGU and AGC
acids = standardTranslation.items()
opts = []
for [codon, code] in acids:
if aa is code:
opts.append(codon)
r = random.randint(0, len(opts) - 1)
return opts[r]
DNA_Alphabet = "ACGT"
class Distrib:
def __init__(self, alpha) -> None:
self.alpha = alpha
self.counts = {}
self.total = len(alpha)
for x in alpha:
if x not in self.counts:
self.counts[x] = 0
self.counts[x] = self.counts[x] + 1
def __str__(self):
"""Return a readable representation of the distribution"""
str = "< "
for s in self.alpha:
str += s + ("=%4.2f " % (self.counts[s] / self.total))
return str + " >"
def observe(self, char: str):
"""Add an additional observation to the model"""
self.total += 1
self.counts[char] += 1
def __getitem__(self, sym):
return self.counts[sym] / self.total
def generate(self):
"""random symbol propotional to frequency of occurrence
Eg: if the distribution was A=0.25, B=0.75, we will get A approx 25% of the time.
"""
p = random.random() # between 0.0 and 1.0. Eg 0.65, 0.99, etc.
q = 0.0
for sym in self.alpha:
q = q + self[sym]
if p < q:
return sym
raise RuntimeError("WTF")
p1 = Distrib(DNA_Alphabet)
seq1 = "CTGTCCGATATTGCAGCGTCTTAGTGTGAGAACGCTCGCTGTATATCCGCAGGGAGATCCCCCTTCTTATTTCCTTGGAAAGCATTTACGACAGCAGTCC"
for s in seq1:
p1.observe(s)
print(p1)
seq2 = ""
niter = 100000 # try also 1000, and 10000
for i in range(niter):
seq2 = seq2 + str(p1.generate()) # predict a random event/observation
p2 = Distrib(DNA_Alphabet)
for s in seq2:
p2.observe(s)
print(p2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment