Created
January 25, 2015 16:08
-
-
Save amonshiz/3415700b3ff4db60b83c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Bioinformatics.DNANucleotide ( | |
DNANucleotide (..), | |
charToDNANucleotide, | |
nucleotideComplement, | |
calculateGCContent | |
) where | |
import Data.Char | |
import Data.List | |
data DNANucleotide = A | C | G | T deriving(Show, Eq, Ord, Read) | |
charToDNANucleotide :: Char -> DNANucleotide | |
charToDNANucleotide c = read [toUpper c] :: DNANucleotide | |
nucleotideComplement :: DNANucleotide -> DNANucleotide | |
nucleotideComplement A = T | |
nucleotideComplement T = A | |
nucleotideComplement C = G | |
nucleotideComplement G = C | |
calculateGCContent :: [DNANucleotide] -> Double | |
calculateGCContent ns = | |
let gcs = filter (\n -> n == G || n == C) ns | |
in (fromIntegral $ length gcs) / (fromIntegral $ length ns) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import qualified Bioinformatics.DNANucleotide as D | |
import qualified Data.List.Split as S | |
getLines contents = do | |
line <- getLine | |
if null line | |
then return contents | |
else getLines $ contents ++ line ++ "\n" | |
type FASTAFormatLine = (String, [D.DNANucleotide]) | |
fastaStringToFASTAFormat :: String -> FASTAFormatLine | |
fastaStringToFASTAFormat [] = ("", []) | |
fastaStringToFASTAFormat xs = | |
let comps = lines xs | |
in (head comps, map D.charToDNANucleotide . concat $ tail comps) | |
buildFASTALines :: String -> [FASTAFormatLine] | |
buildFASTALines = map fastaStringToFASTAFormat . S.splitOn ">" | |
determineGCContent :: [FASTAFormatLine] -> [(String, Double)] | |
determineGCContent = map (\(n, ns) -> (n, D.calculateGCContent ns)) | |
main = do | |
theLines <- getLines "" | |
let greatestGC = foldl (\(an, agc) (n, gc) -> if gc > agc then (n, gc) else (an, agc)) ("", 0) . determineGCContent $ buildFASTALines theLines | |
putStrLn $ fst greatestGC | |
putStrLn . show $ 100.0 * (snd greatestGC) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment