Skip to content

Instantly share code, notes, and snippets.

@amonshiz
Created January 25, 2015 16:08
Show Gist options
  • Save amonshiz/3415700b3ff4db60b83c to your computer and use it in GitHub Desktop.
Save amonshiz/3415700b3ff4db60b83c to your computer and use it in GitHub Desktop.
module Bioinformatics.DNANucleotide (
DNANucleotide (..),
charToDNANucleotide,
nucleotideComplement,
calculateGCContent
) where
import Data.Char
import Data.List
data DNANucleotide = A | C | G | T deriving(Show, Eq, Ord, Read)
charToDNANucleotide :: Char -> DNANucleotide
charToDNANucleotide c = read [toUpper c] :: DNANucleotide
nucleotideComplement :: DNANucleotide -> DNANucleotide
nucleotideComplement A = T
nucleotideComplement T = A
nucleotideComplement C = G
nucleotideComplement G = C
calculateGCContent :: [DNANucleotide] -> Double
calculateGCContent ns =
let gcs = filter (\n -> n == G || n == C) ns
in (fromIntegral $ length gcs) / (fromIntegral $ length ns)
import qualified Bioinformatics.DNANucleotide as D
import qualified Data.List.Split as S
getLines contents = do
line <- getLine
if null line
then return contents
else getLines $ contents ++ line ++ "\n"
type FASTAFormatLine = (String, [D.DNANucleotide])
fastaStringToFASTAFormat :: String -> FASTAFormatLine
fastaStringToFASTAFormat [] = ("", [])
fastaStringToFASTAFormat xs =
let comps = lines xs
in (head comps, map D.charToDNANucleotide . concat $ tail comps)
buildFASTALines :: String -> [FASTAFormatLine]
buildFASTALines = map fastaStringToFASTAFormat . S.splitOn ">"
determineGCContent :: [FASTAFormatLine] -> [(String, Double)]
determineGCContent = map (\(n, ns) -> (n, D.calculateGCContent ns))
main = do
theLines <- getLines ""
let greatestGC = foldl (\(an, agc) (n, gc) -> if gc > agc then (n, gc) else (an, agc)) ("", 0) . determineGCContent $ buildFASTALines theLines
putStrLn $ fst greatestGC
putStrLn . show $ 100.0 * (snd greatestGC)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment