Skip to content

Instantly share code, notes, and snippets.

@amonshiz
Created June 17, 2015 04:04
Show Gist options
  • Save amonshiz/e59a588802d0ee204d1e to your computer and use it in GitHub Desktop.
Save amonshiz/e59a588802d0ee204d1e to your computer and use it in GitHub Desktop.
module Bioinformatics.Protein (
AminoAcid (..),
Protein,
charToAminoAcid,
rnaCodonTable,
rnaToAminoAcids,
rnaToProtein,
monoisotopicMasses,
openReadingFrame
) where
import Data.Char
import qualified Data.Map as M
import qualified Data.Maybe as Mb
import qualified Bioinformatics.RNANucleotide as R
data AminoAcid = A | C | D | E | F | G | H | I | K | L | M | N | P | Q | R | S | T | V | W | Y | Stop
deriving (Show, Eq, Ord, Read)
type Protein = [AminoAcid]
charToAminoAcid :: Char -> AminoAcid
charToAminoAcid c = read [toUpper c] :: AminoAcid
codonMapping = [([R.U,R.U,R.U], F),([R.C,R.U,R.U], L),([R.A,R.U,R.U], I),([R.G,R.U,R.U], V),
([R.U,R.U,R.C], F),([R.C,R.U,R.C], L),([R.A,R.U,R.C], I),([R.G,R.U,R.C], V),
([R.U,R.U,R.A], L),([R.C,R.U,R.A], L),([R.A,R.U,R.A], I),([R.G,R.U,R.A], V),
([R.U,R.U,R.G], L),([R.C,R.U,R.G], L),([R.A,R.U,R.G], M),([R.G,R.U,R.G], V),
([R.U,R.C,R.U], S),([R.C,R.C,R.U], P),([R.A,R.C,R.U], T),([R.G,R.C,R.U], A),
([R.U,R.C,R.C], S),([R.C,R.C,R.C], P),([R.A,R.C,R.C], T),([R.G,R.C,R.C], A),
([R.U,R.C,R.A], S),([R.C,R.C,R.A], P),([R.A,R.C,R.A], T),([R.G,R.C,R.A], A),
([R.U,R.C,R.G], S),([R.C,R.C,R.G], P),([R.A,R.C,R.G], T),([R.G,R.C,R.G], A),
([R.U,R.A,R.U], Y),([R.C,R.A,R.U], H),([R.A,R.A,R.U], N),([R.G,R.A,R.U], D),
([R.U,R.A,R.C], Y),([R.C,R.A,R.C], H),([R.A,R.A,R.C], N),([R.G,R.A,R.C], D),
([R.U,R.A,R.A], Stop),([R.C,R.A,R.A], Q),([R.A,R.A,R.A], K),([R.G,R.A,R.A], E),
([R.U,R.A,R.G], Stop),([R.C,R.A,R.G], Q),([R.A,R.A,R.G], K),([R.G,R.A,R.G], E),
([R.U,R.G,R.U], C),([R.C,R.G,R.U], R),([R.A,R.G,R.U], S),([R.G,R.G,R.U], G),
([R.U,R.G,R.C], C),([R.C,R.G,R.C], R),([R.A,R.G,R.C], S),([R.G,R.G,R.C], G),
([R.U,R.G,R.A], Stop),([R.C,R.G,R.A], R),([R.A,R.G,R.A], R),([R.G,R.G,R.A], G),
([R.U,R.G,R.G], W),([R.C,R.G,R.G], R),([R.A,R.G,R.G], R),([R.G,R.G,R.G], G)]
rnaCodonTable = M.fromList codonMapping
monoisotopicMasses = M.fromList [(A, 71.03711), (C, 103.00919),(D, 115.02694),
(E, 129.04259), (F, 147.06841), (G, 57.02146),
(H, 137.05891), (I, 113.08406), (K, 128.09496),
(L, 113.08406), (M, 131.04049), (N, 114.04293),
(P, 97.05276), (Q, 128.05858), (R, 156.10111),
(S, 87.03203), (T, 101.04768), (V, 99.06841),
(W, 186.07931), (Y, 163.06333 )]
lookupProteins :: [[R.RNANucleotide]] -> [AminoAcid]
lookupProteins = foldr (\rs acc -> if Mb.isJust rs then (Mb.fromJust rs):acc else acc) [] . map (\c -> M.lookup c rnaCodonTable)
rnaToAminoAcids :: [R.RNANucleotide] -> [AminoAcid]
rnaToAminoAcids = lookupProteins . R.rnaStringToRNAChunks
rnaToProtein :: [R.RNANucleotide] -> Protein
rnaToProtein = filter (\s -> s /= Stop) . lookupProteins . R.rnaStringToRNAChunks
openReadingFrame :: Protein -> Maybe Protein
openReadingFrame p@(M:aas) =
let b = break ((==) Stop) p
fb = fst b
sb = snd b
endsInStop xs = if not (null xs) then head xs == Stop else False
in if endsInStop sb
then Just fb
else Nothing
openReadingFrame _ = Nothing
import qualified Bioinformatics.DNANucleotide as D
import qualified Bioinformatics.RNANucleotide as R
import qualified Bioinformatics.Utilities as U
import qualified Bioinformatics.Protein as P
import qualified Bioinformatics.FASTA as F
import qualified Data.Map as M
import Data.Maybe
import qualified Data.List as DL
import qualified Data.Set as DS
getRNA :: String -> [R.RNANucleotide]
getRNA = map (U.dnaToRNA . D.charToDNANucleotide)
getComplementedRNA :: String -> [R.RNANucleotide]
getComplementedRNA = map (U.dnaToRNA . D.nucleotideComplement . D.charToDNANucleotide)
getProtein :: String -> P.Protein
getProtein = P.rnaToAminoAcids . getRNA
getComplementedProtein :: String -> P.Protein
getComplementedProtein = P.rnaToAminoAcids . getComplementedRNA
getReadingFrames :: String -> [P.Protein]
getReadingFrames cs =
let ps = map getProtein $ DL.tails cs
cps = map getComplementedProtein . DL.tails $ reverse cs
pSet = DS.fromList . catMaybes $ map P.openReadingFrame (ps ++ cps)
in DS.toList pSet
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment