Created
June 17, 2015 04:04
-
-
Save amonshiz/e59a588802d0ee204d1e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Bioinformatics.Protein ( | |
AminoAcid (..), | |
Protein, | |
charToAminoAcid, | |
rnaCodonTable, | |
rnaToAminoAcids, | |
rnaToProtein, | |
monoisotopicMasses, | |
openReadingFrame | |
) where | |
import Data.Char | |
import qualified Data.Map as M | |
import qualified Data.Maybe as Mb | |
import qualified Bioinformatics.RNANucleotide as R | |
data AminoAcid = A | C | D | E | F | G | H | I | K | L | M | N | P | Q | R | S | T | V | W | Y | Stop | |
deriving (Show, Eq, Ord, Read) | |
type Protein = [AminoAcid] | |
charToAminoAcid :: Char -> AminoAcid | |
charToAminoAcid c = read [toUpper c] :: AminoAcid | |
codonMapping = [([R.U,R.U,R.U], F),([R.C,R.U,R.U], L),([R.A,R.U,R.U], I),([R.G,R.U,R.U], V), | |
([R.U,R.U,R.C], F),([R.C,R.U,R.C], L),([R.A,R.U,R.C], I),([R.G,R.U,R.C], V), | |
([R.U,R.U,R.A], L),([R.C,R.U,R.A], L),([R.A,R.U,R.A], I),([R.G,R.U,R.A], V), | |
([R.U,R.U,R.G], L),([R.C,R.U,R.G], L),([R.A,R.U,R.G], M),([R.G,R.U,R.G], V), | |
([R.U,R.C,R.U], S),([R.C,R.C,R.U], P),([R.A,R.C,R.U], T),([R.G,R.C,R.U], A), | |
([R.U,R.C,R.C], S),([R.C,R.C,R.C], P),([R.A,R.C,R.C], T),([R.G,R.C,R.C], A), | |
([R.U,R.C,R.A], S),([R.C,R.C,R.A], P),([R.A,R.C,R.A], T),([R.G,R.C,R.A], A), | |
([R.U,R.C,R.G], S),([R.C,R.C,R.G], P),([R.A,R.C,R.G], T),([R.G,R.C,R.G], A), | |
([R.U,R.A,R.U], Y),([R.C,R.A,R.U], H),([R.A,R.A,R.U], N),([R.G,R.A,R.U], D), | |
([R.U,R.A,R.C], Y),([R.C,R.A,R.C], H),([R.A,R.A,R.C], N),([R.G,R.A,R.C], D), | |
([R.U,R.A,R.A], Stop),([R.C,R.A,R.A], Q),([R.A,R.A,R.A], K),([R.G,R.A,R.A], E), | |
([R.U,R.A,R.G], Stop),([R.C,R.A,R.G], Q),([R.A,R.A,R.G], K),([R.G,R.A,R.G], E), | |
([R.U,R.G,R.U], C),([R.C,R.G,R.U], R),([R.A,R.G,R.U], S),([R.G,R.G,R.U], G), | |
([R.U,R.G,R.C], C),([R.C,R.G,R.C], R),([R.A,R.G,R.C], S),([R.G,R.G,R.C], G), | |
([R.U,R.G,R.A], Stop),([R.C,R.G,R.A], R),([R.A,R.G,R.A], R),([R.G,R.G,R.A], G), | |
([R.U,R.G,R.G], W),([R.C,R.G,R.G], R),([R.A,R.G,R.G], R),([R.G,R.G,R.G], G)] | |
rnaCodonTable = M.fromList codonMapping | |
monoisotopicMasses = M.fromList [(A, 71.03711), (C, 103.00919),(D, 115.02694), | |
(E, 129.04259), (F, 147.06841), (G, 57.02146), | |
(H, 137.05891), (I, 113.08406), (K, 128.09496), | |
(L, 113.08406), (M, 131.04049), (N, 114.04293), | |
(P, 97.05276), (Q, 128.05858), (R, 156.10111), | |
(S, 87.03203), (T, 101.04768), (V, 99.06841), | |
(W, 186.07931), (Y, 163.06333 )] | |
lookupProteins :: [[R.RNANucleotide]] -> [AminoAcid] | |
lookupProteins = foldr (\rs acc -> if Mb.isJust rs then (Mb.fromJust rs):acc else acc) [] . map (\c -> M.lookup c rnaCodonTable) | |
rnaToAminoAcids :: [R.RNANucleotide] -> [AminoAcid] | |
rnaToAminoAcids = lookupProteins . R.rnaStringToRNAChunks | |
rnaToProtein :: [R.RNANucleotide] -> Protein | |
rnaToProtein = filter (\s -> s /= Stop) . lookupProteins . R.rnaStringToRNAChunks | |
openReadingFrame :: Protein -> Maybe Protein | |
openReadingFrame p@(M:aas) = | |
let b = break ((==) Stop) p | |
fb = fst b | |
sb = snd b | |
endsInStop xs = if not (null xs) then head xs == Stop else False | |
in if endsInStop sb | |
then Just fb | |
else Nothing | |
openReadingFrame _ = Nothing |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import qualified Bioinformatics.DNANucleotide as D | |
import qualified Bioinformatics.RNANucleotide as R | |
import qualified Bioinformatics.Utilities as U | |
import qualified Bioinformatics.Protein as P | |
import qualified Bioinformatics.FASTA as F | |
import qualified Data.Map as M | |
import Data.Maybe | |
import qualified Data.List as DL | |
import qualified Data.Set as DS | |
getRNA :: String -> [R.RNANucleotide] | |
getRNA = map (U.dnaToRNA . D.charToDNANucleotide) | |
getComplementedRNA :: String -> [R.RNANucleotide] | |
getComplementedRNA = map (U.dnaToRNA . D.nucleotideComplement . D.charToDNANucleotide) | |
getProtein :: String -> P.Protein | |
getProtein = P.rnaToAminoAcids . getRNA | |
getComplementedProtein :: String -> P.Protein | |
getComplementedProtein = P.rnaToAminoAcids . getComplementedRNA | |
getReadingFrames :: String -> [P.Protein] | |
getReadingFrames cs = | |
let ps = map getProtein $ DL.tails cs | |
cps = map getComplementedProtein . DL.tails $ reverse cs | |
pSet = DS.fromList . catMaybes $ map P.openReadingFrame (ps ++ cps) | |
in DS.toList pSet |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment