Created
September 15, 2017 12:59
-
-
Save SwadicalRag/8473bb62993ca458d35ca11c71158261 to your computer and use it in GitHub Desktop.
quick genome disassembler i made for a quiz
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local AADisasm = {} | |
AADisasm.StartCodons = { | |
"AUG", | |
} | |
AADisasm.TerminationCodons = { | |
UAA = true, | |
UAG = true, | |
UGA = true, | |
} | |
AADisasm.RibosomeBindingSites = { | |
"AAGGAG", | |
} | |
AADisasm.AminoAcidLookup = { | |
UUU = "F", | |
UUC = "F", | |
UUA = "L", | |
UUG = "L", | |
CUU = "L", | |
CUC = "L", | |
CUA = "L", | |
CUG = "L", | |
AUU = "I", | |
AUC = "I", | |
AUA = "I", | |
AUG = "M", | |
GUU = "V", | |
GUC = "V", | |
GUA = "V", | |
GUG = "V", | |
UCU = "S", | |
UCC = "S", | |
UCA = "S", | |
UCG = "S", | |
AGU = "S", | |
AGC = "S", | |
CCU = "P", | |
CCC = "P", | |
CCA = "P", | |
CCG = "P", | |
ACU = "T", | |
ACC = "T", | |
ACA = "T", | |
ACG = "T", | |
GCU = "A", | |
GCC = "A", | |
GCA = "A", | |
GCG = "A", | |
UAU = "Y", | |
UAC = "Y", | |
UAA = "*", | |
UAG = "*", | |
UGA = "*", | |
CAU = "H", | |
CAC = "H", | |
CAA = "Q", | |
CAG = "Q", | |
AAU = "N", | |
AAC = "N", | |
AAA = "K", | |
AAG = "K", | |
GAU = "D", | |
GAC = "D", | |
GAA = "E", | |
GAG = "E", | |
UGU = "C", | |
UGC = "C", | |
UGG = "W", | |
CGU = "R", | |
CGC = "R", | |
CGA = "R", | |
CGG = "R", | |
AGA = "R", | |
AGG = "R", | |
GGU = "G", | |
GGC = "G", | |
GGA = "G", | |
GGG = "G", | |
} | |
function AADisasm:FindFirstExpressedProtein(sequence) | |
sequence = sequence:upper():gsub("[^AUTGC]",""):gsub("T","U") | |
-- ^ standardise | |
-- first find rbs | |
for _,site in ipairs(AADisasm.RibosomeBindingSites) do | |
if sequence:match(site) then | |
-- FOUND site | |
local rbsPos,subsequence = sequence:match(site.."()(.+)$") | |
-- find start codons | |
for _,startSequence in ipairs(AADisasm.StartCodons) do | |
local startPos,subsequence2 = subsequence:match("()"..startSequence.."(.+)$") | |
if subsequence2 then | |
-- find end codon | |
local codons = {startSequence} | |
local aminoacids = {"M"} | |
for codon in subsequence2:gmatch("...") do | |
aminoacids[#aminoacids + 1] = AADisasm.AminoAcidLookup[codon] | |
if AADisasm.TerminationCodons[codon] then | |
return startPos + rbsPos,codons,aminoacids | |
else | |
codons[#codons + 1] = codon | |
end | |
end | |
end | |
error("unterminated amino acid sequence!") | |
end | |
end | |
end | |
end | |
return AADisasm |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment