Created
April 28, 2021 18:36
-
-
Save dawedawe/bdfcc6285557ead6da102752dc704798 to your computer and use it in GitHub Desktop.
The (incorrect) central dogma of molecular biology (DNA -> RNA -> protein) modeled in F#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace Dogma | |
module DNA = | |
type Nucleobases = | |
| A // Adenine, a Purine | |
| C // Cytosine, a Pyrimidine | |
| G // Guanine, a Purine | |
| T // Thymine, a Pyrimidine | |
type Codon = (Nucleobases * Nucleobases * Nucleobases) | |
let complement nucleobase = | |
match nucleobase with | |
| A -> T | |
| C -> G | |
| G -> C | |
| T -> A | |
let complementSeq sequence = Seq.map complement sequence | |
let parse c = | |
match System.Char.ToUpper(c) with | |
| 'A' -> Some A | |
| 'C' -> Some C | |
| 'G' -> Some G | |
| 'T' -> Some T | |
| _ -> None | |
let parseSeq (s: string) = | |
s.ToCharArray() | |
|> Array.choose parse | |
module RNA = | |
type Nucleobases = | |
| A // Adenine, a Purine | |
| C // Cytosine, a Pyrimidine | |
| G // Adenine, a Purine | |
| U // Uracil, a Pyrimidine | |
type Codon = (Nucleobases * Nucleobases * Nucleobases) | |
let complement (nucleobase: Nucleobases) = | |
match nucleobase with | |
| A -> U | |
| C -> G | |
| G -> C | |
| U -> A | |
module Transcription = | |
let dnaToRna dna = | |
match dna with | |
| DNA.A -> RNA.U // RNA has Uracil in place of DNA Thymine | |
| DNA.C -> RNA.G | |
| DNA.G -> RNA.C | |
| DNA.T -> RNA.A | |
let dnaSeqToRnaSeq (dna: seq<DNA.Nucleobases>) = | |
dna | |
|> Seq.map dnaToRna | |
module Translation = | |
let seqToCodons (s: seq<'T>) = | |
let a = s |> Seq.toArray | |
seq { | |
for i in 0 .. 3 .. a.Length - 3 do | |
yield (a.[i], a.[i+1], a.[i+2]) | |
} | |
type AminoAcid = | |
| Alanine | |
| Arginine | |
| Asparagine | |
| AsparticAcid | |
| Cysteine | |
| Glutamine | |
| GlutamicAcid | |
| Glycine | |
| Histidine | |
| Isoleucine | |
| Leucine | |
| Lysine | |
| Methionine | |
| Phenylalanine | |
| Proline | |
| Serine | |
| Threonine | |
| Tryptophan | |
| Tyrosine | |
| Valine | |
let aminoAcidToOneLetter = | |
function | |
| Alanine -> "A" | |
| Arginine -> "R" | |
| Asparagine -> "N" | |
| AsparticAcid -> "D" | |
| Cysteine -> "C" | |
| Glutamine -> "Q" | |
| GlutamicAcid -> "E" | |
| Glycine -> "G" | |
| Histidine -> "H" | |
| Isoleucine -> "I" | |
| Leucine -> "L" | |
| Lysine -> "K" | |
| Methionine -> "M" | |
| Phenylalanine -> "F" | |
| Proline -> "P" | |
| Serine -> "S" | |
| Threonine -> "T" | |
| Tryptophan -> "W" | |
| Tyrosine -> "Y" | |
| Valine -> "V" | |
type Signal = | |
| StartOrAmino of AminoAcid | |
| Stop | |
| Amino of AminoAcid | |
let rnaCodonToSignal = | |
function | |
// 1st base U | |
| (RNA.Nucleobases.U, RNA.Nucleobases.U, RNA.Nucleobases.U) -> Amino Phenylalanine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.U, RNA.Nucleobases.C) -> Amino Phenylalanine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.U, RNA.Nucleobases.A) -> Amino Leucine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.U, RNA.Nucleobases.G) -> StartOrAmino Leucine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.C, RNA.Nucleobases.U) -> Amino Serine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.C, RNA.Nucleobases.C) -> Amino Serine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.C, RNA.Nucleobases.A) -> Amino Serine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.C, RNA.Nucleobases.G) -> Amino Serine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.A, RNA.Nucleobases.U) -> Amino Tyrosine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.A, RNA.Nucleobases.C) -> Amino Tyrosine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.A, RNA.Nucleobases.A) -> Stop | |
| (RNA.Nucleobases.U, RNA.Nucleobases.A, RNA.Nucleobases.G) -> Stop | |
| (RNA.Nucleobases.U, RNA.Nucleobases.G, RNA.Nucleobases.U) -> Amino Cysteine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.G, RNA.Nucleobases.C) -> Amino Cysteine | |
| (RNA.Nucleobases.U, RNA.Nucleobases.G, RNA.Nucleobases.A) -> Stop | |
| (RNA.Nucleobases.U, RNA.Nucleobases.G, RNA.Nucleobases.G) -> Amino Tryptophan | |
// 1st base C | |
| (RNA.Nucleobases.C, RNA.Nucleobases.U, RNA.Nucleobases.U) -> Amino Leucine | |
| (RNA.Nucleobases.C, RNA.Nucleobases.U, RNA.Nucleobases.C) -> Amino Leucine | |
| (RNA.Nucleobases.C, RNA.Nucleobases.U, RNA.Nucleobases.A) -> Amino Leucine | |
| (RNA.Nucleobases.C, RNA.Nucleobases.U, RNA.Nucleobases.G) -> Amino Leucine | |
| (RNA.Nucleobases.C, RNA.Nucleobases.C, RNA.Nucleobases.U) -> Amino Proline | |
| (RNA.Nucleobases.C, RNA.Nucleobases.C, RNA.Nucleobases.C) -> Amino Proline | |
| (RNA.Nucleobases.C, RNA.Nucleobases.C, RNA.Nucleobases.A) -> Amino Proline | |
| (RNA.Nucleobases.C, RNA.Nucleobases.C, RNA.Nucleobases.G) -> Amino Proline | |
| (RNA.Nucleobases.C, RNA.Nucleobases.A, RNA.Nucleobases.U) -> Amino Histidine | |
| (RNA.Nucleobases.C, RNA.Nucleobases.A, RNA.Nucleobases.C) -> Amino Histidine | |
| (RNA.Nucleobases.C, RNA.Nucleobases.A, RNA.Nucleobases.A) -> Amino Glutamine | |
| (RNA.Nucleobases.C, RNA.Nucleobases.A, RNA.Nucleobases.G) -> Amino Glutamine | |
| (RNA.Nucleobases.C, RNA.Nucleobases.G, RNA.Nucleobases.U) -> Amino Arginine | |
| (RNA.Nucleobases.C, RNA.Nucleobases.G, RNA.Nucleobases.C) -> Amino Arginine | |
| (RNA.Nucleobases.C, RNA.Nucleobases.G, RNA.Nucleobases.A) -> Amino Arginine | |
| (RNA.Nucleobases.C, RNA.Nucleobases.G, RNA.Nucleobases.G) -> Amino Arginine | |
// 1st base A | |
| (RNA.Nucleobases.A, RNA.Nucleobases.U, RNA.Nucleobases.U) -> Amino Isoleucine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.U, RNA.Nucleobases.C) -> Amino Isoleucine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.U, RNA.Nucleobases.A) -> Amino Isoleucine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.U, RNA.Nucleobases.G) -> StartOrAmino Methionine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.C, RNA.Nucleobases.U) -> Amino Threonine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.C, RNA.Nucleobases.C) -> Amino Threonine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.C, RNA.Nucleobases.A) -> Amino Threonine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.C, RNA.Nucleobases.G) -> Amino Threonine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.A, RNA.Nucleobases.U) -> Amino Asparagine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.A, RNA.Nucleobases.C) -> Amino Asparagine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.A, RNA.Nucleobases.A) -> Amino Lysine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.A, RNA.Nucleobases.G) -> Amino Lysine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.G, RNA.Nucleobases.U) -> Amino Serine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.G, RNA.Nucleobases.C) -> Amino Serine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.G, RNA.Nucleobases.A) -> Amino Arginine | |
| (RNA.Nucleobases.A, RNA.Nucleobases.G, RNA.Nucleobases.G) -> Amino Arginine | |
// 1st base G | |
| (RNA.Nucleobases.G, RNA.Nucleobases.U, RNA.Nucleobases.U) -> Amino Valine | |
| (RNA.Nucleobases.G, RNA.Nucleobases.U, RNA.Nucleobases.C) -> Amino Valine | |
| (RNA.Nucleobases.G, RNA.Nucleobases.U, RNA.Nucleobases.A) -> Amino Valine | |
| (RNA.Nucleobases.G, RNA.Nucleobases.U, RNA.Nucleobases.G) -> StartOrAmino Valine | |
| (RNA.Nucleobases.G, RNA.Nucleobases.C, RNA.Nucleobases.U) -> Amino Alanine | |
| (RNA.Nucleobases.G, RNA.Nucleobases.C, RNA.Nucleobases.C) -> Amino Alanine | |
| (RNA.Nucleobases.G, RNA.Nucleobases.C, RNA.Nucleobases.A) -> Amino Alanine | |
| (RNA.Nucleobases.G, RNA.Nucleobases.C, RNA.Nucleobases.G) -> Amino Alanine | |
| (RNA.Nucleobases.G, RNA.Nucleobases.A, RNA.Nucleobases.U) -> Amino AsparticAcid | |
| (RNA.Nucleobases.G, RNA.Nucleobases.A, RNA.Nucleobases.C) -> Amino AsparticAcid | |
| (RNA.Nucleobases.G, RNA.Nucleobases.A, RNA.Nucleobases.A) -> Amino GlutamicAcid | |
| (RNA.Nucleobases.G, RNA.Nucleobases.A, RNA.Nucleobases.G) -> Amino GlutamicAcid | |
| (RNA.Nucleobases.G, RNA.Nucleobases.G, RNA.Nucleobases.U) -> Amino Glycine | |
| (RNA.Nucleobases.G, RNA.Nucleobases.G, RNA.Nucleobases.C) -> Amino Glycine | |
| (RNA.Nucleobases.G, RNA.Nucleobases.G, RNA.Nucleobases.A) -> Amino Glycine | |
| (RNA.Nucleobases.G, RNA.Nucleobases.G, RNA.Nucleobases.G) -> Amino Glycine | |
let dnaCodonToSignal = | |
function | |
// 1st base T | |
| (DNA.Nucleobases.T, DNA.Nucleobases.T, DNA.Nucleobases.T) -> Amino Phenylalanine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.T, DNA.Nucleobases.C) -> Amino Phenylalanine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.T, DNA.Nucleobases.A) -> Amino Leucine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.T, DNA.Nucleobases.G) -> StartOrAmino Leucine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.C, DNA.Nucleobases.T) -> Amino Serine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.C, DNA.Nucleobases.C) -> Amino Serine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.C, DNA.Nucleobases.A) -> Amino Serine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.C, DNA.Nucleobases.G) -> Amino Serine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.A, DNA.Nucleobases.T) -> Amino Tyrosine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.A, DNA.Nucleobases.C) -> Amino Tyrosine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.A, DNA.Nucleobases.A) -> Stop | |
| (DNA.Nucleobases.T, DNA.Nucleobases.A, DNA.Nucleobases.G) -> Stop | |
| (DNA.Nucleobases.T, DNA.Nucleobases.G, DNA.Nucleobases.T) -> Amino Cysteine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.G, DNA.Nucleobases.C) -> Amino Cysteine | |
| (DNA.Nucleobases.T, DNA.Nucleobases.G, DNA.Nucleobases.A) -> Stop | |
| (DNA.Nucleobases.T, DNA.Nucleobases.G, DNA.Nucleobases.G) -> Amino Tryptophan | |
// 1st base C | |
| (DNA.Nucleobases.C, DNA.Nucleobases.T, DNA.Nucleobases.T) -> Amino Leucine | |
| (DNA.Nucleobases.C, DNA.Nucleobases.T, DNA.Nucleobases.C) -> Amino Leucine | |
| (DNA.Nucleobases.C, DNA.Nucleobases.T, DNA.Nucleobases.A) -> Amino Leucine | |
| (DNA.Nucleobases.C, DNA.Nucleobases.T, DNA.Nucleobases.G) -> Amino Leucine | |
| (DNA.Nucleobases.C, DNA.Nucleobases.C, DNA.Nucleobases.T) -> Amino Proline | |
| (DNA.Nucleobases.C, DNA.Nucleobases.C, DNA.Nucleobases.C) -> Amino Proline | |
| (DNA.Nucleobases.C, DNA.Nucleobases.C, DNA.Nucleobases.A) -> Amino Proline | |
| (DNA.Nucleobases.C, DNA.Nucleobases.C, DNA.Nucleobases.G) -> Amino Proline | |
| (DNA.Nucleobases.C, DNA.Nucleobases.A, DNA.Nucleobases.T) -> Amino Histidine | |
| (DNA.Nucleobases.C, DNA.Nucleobases.A, DNA.Nucleobases.C) -> Amino Histidine | |
| (DNA.Nucleobases.C, DNA.Nucleobases.A, DNA.Nucleobases.A) -> Amino Glutamine | |
| (DNA.Nucleobases.C, DNA.Nucleobases.A, DNA.Nucleobases.G) -> Amino Glutamine | |
| (DNA.Nucleobases.C, DNA.Nucleobases.G, DNA.Nucleobases.T) -> Amino Arginine | |
| (DNA.Nucleobases.C, DNA.Nucleobases.G, DNA.Nucleobases.C) -> Amino Arginine | |
| (DNA.Nucleobases.C, DNA.Nucleobases.G, DNA.Nucleobases.A) -> Amino Arginine | |
| (DNA.Nucleobases.C, DNA.Nucleobases.G, DNA.Nucleobases.G) -> Amino Arginine | |
// 1st base A | |
| (DNA.Nucleobases.A, DNA.Nucleobases.T, DNA.Nucleobases.T) -> Amino Isoleucine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.T, DNA.Nucleobases.C) -> Amino Isoleucine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.T, DNA.Nucleobases.A) -> Amino Isoleucine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.T, DNA.Nucleobases.G) -> StartOrAmino Methionine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.C, DNA.Nucleobases.T) -> Amino Threonine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.C, DNA.Nucleobases.C) -> Amino Threonine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.C, DNA.Nucleobases.A) -> Amino Threonine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.C, DNA.Nucleobases.G) -> Amino Threonine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.A, DNA.Nucleobases.T) -> Amino Asparagine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.A, DNA.Nucleobases.C) -> Amino Asparagine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.A, DNA.Nucleobases.A) -> Amino Lysine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.A, DNA.Nucleobases.G) -> Amino Lysine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.G, DNA.Nucleobases.T) -> Amino Serine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.G, DNA.Nucleobases.C) -> Amino Serine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.G, DNA.Nucleobases.A) -> Amino Arginine | |
| (DNA.Nucleobases.A, DNA.Nucleobases.G, DNA.Nucleobases.G) -> Amino Arginine | |
// 1st base G | |
| (DNA.Nucleobases.G, DNA.Nucleobases.T, DNA.Nucleobases.T) -> Amino Valine | |
| (DNA.Nucleobases.G, DNA.Nucleobases.T, DNA.Nucleobases.C) -> Amino Valine | |
| (DNA.Nucleobases.G, DNA.Nucleobases.T, DNA.Nucleobases.A) -> Amino Valine | |
| (DNA.Nucleobases.G, DNA.Nucleobases.T, DNA.Nucleobases.G) -> StartOrAmino Valine | |
| (DNA.Nucleobases.G, DNA.Nucleobases.C, DNA.Nucleobases.T) -> Amino Alanine | |
| (DNA.Nucleobases.G, DNA.Nucleobases.C, DNA.Nucleobases.C) -> Amino Alanine | |
| (DNA.Nucleobases.G, DNA.Nucleobases.C, DNA.Nucleobases.A) -> Amino Alanine | |
| (DNA.Nucleobases.G, DNA.Nucleobases.C, DNA.Nucleobases.G) -> Amino Alanine | |
| (DNA.Nucleobases.G, DNA.Nucleobases.A, DNA.Nucleobases.T) -> Amino AsparticAcid | |
| (DNA.Nucleobases.G, DNA.Nucleobases.A, DNA.Nucleobases.C) -> Amino AsparticAcid | |
| (DNA.Nucleobases.G, DNA.Nucleobases.A, DNA.Nucleobases.A) -> Amino GlutamicAcid | |
| (DNA.Nucleobases.G, DNA.Nucleobases.A, DNA.Nucleobases.G) -> Amino GlutamicAcid | |
| (DNA.Nucleobases.G, DNA.Nucleobases.G, DNA.Nucleobases.T) -> Amino Glycine | |
| (DNA.Nucleobases.G, DNA.Nucleobases.G, DNA.Nucleobases.C) -> Amino Glycine | |
| (DNA.Nucleobases.G, DNA.Nucleobases.G, DNA.Nucleobases.A) -> Amino Glycine | |
| (DNA.Nucleobases.G, DNA.Nucleobases.G, DNA.Nucleobases.G) -> Amino Glycine | |
let dnaCodonsToSignals codons = | |
codons | |
|> Seq.map dnaCodonToSignal | |
let rnaCodonsToSignals codons = | |
codons | |
|> Seq.map rnaCodonToSignal | |
let rnaSeqToSignals rna = | |
rna | |
|> seqToCodons | |
|> rnaCodonsToSignals | |
module Dogma = | |
let dogma dnaSeq = | |
dnaSeq | |
|> Transcription.dnaSeqToRnaSeq | |
|> Translation.rnaSeqToSignals | |
module PrettyPrint = | |
open System.Text | |
open Translation | |
let signalsToString (signals: seq<Translation.Signal>) = | |
signals | |
|> Seq.map (fun s -> match s with | |
| StartOrAmino a -> aminoAcidToOneLetter a | |
| Amino a -> aminoAcidToOneLetter a | |
| Stop -> "-STOP-") | |
|> Seq.fold (fun (b: StringBuilder) s -> b.Append(s)) (StringBuilder()) | |
|> fun s -> s.ToString() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment