Created
July 24, 2024 10:10
-
-
Save lmiller1990/a19e9534ffb5beb0a4e3be51816e1d0b to your computer and use it in GitHub Desktop.
Binf7000
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
standardTranslation = { | |
"TTT": "F", | |
"TCT": "S", | |
"TAT": "Y", | |
"TGT": "C", | |
"TTC": "F", | |
"TCC": "S", | |
"TAC": "Y", | |
"TGC": "C", | |
"TTA": "L", | |
"TCA": "S", | |
"TAA": "*", | |
"TGA": "*", | |
"TTG": "L", | |
"TCG": "S", | |
"TAG": "*", | |
"TGG": "W", | |
"CTT": "L", | |
"CCT": "P", | |
"CAT": "H", | |
"CGT": "R", | |
"CTC": "L", | |
"CCC": "P", | |
"CAC": "H", | |
"CGC": "R", | |
"CTA": "L", | |
"CCA": "P", | |
"CAA": "Q", | |
"CGA": "R", | |
"CTG": "L", | |
"CCG": "P", | |
"CAG": "Q", | |
"CGG": "R", | |
"ATT": "I", | |
"ACT": "T", | |
"AAT": "N", | |
"AGT": "S", | |
"ATC": "I", | |
"ACC": "T", | |
"AAC": "N", | |
"AGC": "S", | |
"ATA": "I", | |
"ACA": "T", | |
"AAA": "K", | |
"AGA": "R", | |
"ATG": "M", | |
"ACG": "T", | |
"AAG": "K", | |
"AGG": "R", | |
"GTT": "V", | |
"GCT": "A", | |
"GAT": "D", | |
"GGT": "G", | |
"GTC": "V", | |
"GCC": "A", | |
"GAC": "D", | |
"GGC": "G", | |
"GTA": "V", | |
"GCA": "A", | |
"GAA": "E", | |
"GGA": "G", | |
"GTG": "V", | |
"GCG": "A", | |
"GAG": "E", | |
"GGG": "G", | |
"---": "-", | |
} | |
reverseComplement = {"A": "T", "T": "A", "C": "G", "G": "C"} | |
aaseqs = ["WFINDMENKE", "SPTKFINDME", "IHFINDMEQS", "LRTFINDMEI", "FINDMEIKQS"] | |
def translate(dna: str, offset=0): | |
aa = "" | |
for i in range(offset, len(dna), 3): | |
codon = dna[i : i + 3] | |
if len(codon) == 3: | |
aa += standardTranslation[codon] | |
return aa | |
def reverse(dna: str): | |
# dna = "AATGTTCTCGTAGAA" | |
# p = translate(dna, 1) | |
# | |
# rc = reverse(dna) | |
# print("reverse", rc) | |
# | |
# p2 = translate(rc) | |
# print("reverse protein", p2) | |
rc = "" | |
for i in range(len(dna)): | |
rc += reverseComplement[dna[len(dna) - i - 1]] | |
return rc | |
""" get a codon for a given aa. If multiple are available, pick one randomly """ | |
def getCodon(aa: str): | |
# print(getCodon("S")) | |
# serine is S | |
# many codons - UCU, UCC, UCA, UCG, AGU and AGC | |
acids = standardTranslation.items() | |
opts = [] | |
for [codon, code] in acids: | |
if aa is code: | |
opts.append(codon) | |
r = random.randint(0, len(opts) - 1) | |
return opts[r] | |
DNA_Alphabet = "ACGT" | |
class Distrib: | |
def __init__(self, alpha) -> None: | |
self.alpha = alpha | |
self.counts = {} | |
self.total = len(alpha) | |
for x in alpha: | |
if x not in self.counts: | |
self.counts[x] = 0 | |
self.counts[x] = self.counts[x] + 1 | |
def __str__(self): | |
"""Return a readable representation of the distribution""" | |
str = "< " | |
for s in self.alpha: | |
str += s + ("=%4.2f " % (self.counts[s] / self.total)) | |
return str + " >" | |
def observe(self, char: str): | |
"""Add an additional observation to the model""" | |
self.total += 1 | |
self.counts[char] += 1 | |
def __getitem__(self, sym): | |
return self.counts[sym] / self.total | |
def generate(self): | |
"""random symbol propotional to frequency of occurrence | |
Eg: if the distribution was A=0.25, B=0.75, we will get A approx 25% of the time. | |
""" | |
p = random.random() # between 0.0 and 1.0. Eg 0.65, 0.99, etc. | |
q = 0.0 | |
for sym in self.alpha: | |
q = q + self[sym] | |
if p < q: | |
return sym | |
raise RuntimeError("WTF") | |
p1 = Distrib(DNA_Alphabet) | |
seq1 = "CTGTCCGATATTGCAGCGTCTTAGTGTGAGAACGCTCGCTGTATATCCGCAGGGAGATCCCCCTTCTTATTTCCTTGGAAAGCATTTACGACAGCAGTCC" | |
for s in seq1: | |
p1.observe(s) | |
print(p1) | |
seq2 = "" | |
niter = 100000 # try also 1000, and 10000 | |
for i in range(niter): | |
seq2 = seq2 + str(p1.generate()) # predict a random event/observation | |
p2 = Distrib(DNA_Alphabet) | |
for s in seq2: | |
p2.observe(s) | |
print(p2) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment