Last active
December 15, 2015 08:19
-
-
Save sir-deenicus/5230432 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
open Prelude | |
open System.Text.RegularExpressions | |
open System.IO | |
open System.Windows | |
let sw = Diagnostics.Stopwatch() | |
///////////////////////////////////////////////////////////// | |
let countsymbols (s : string) = | |
s |> Seq.groupBy id | |
|> Seq.map (fun (c, cs) -> c, cs |> Seq.length |> float) | |
|> Seq.sortBy fst | |
String.Join(" ", | |
countsymbols "ACCACATCATTGTATTATCAATATTGCTCCTAGGACAGCCTTAGGATTTGGCCTGAGGTAGCTCAATAGAGGTATGATCCATCCTAGTAAATGATCTGATCTAGAATGGGTCTCTTGTCTAACCTGTCGCTCCGCAGTTAGTATTACGCATAACCCTTACGAAGAATTGGTCCGGCTTAACGTAAACTCTTTCCACTTCGCGACTGTGTTATGAACAGTAGTGCCATGATGGGGGAATGGCCCTCATCGGTCTGCATATCTCAGGAGAATACCCGCGAGCGTTATGAGGGGGGAGTTCGTCTATCCGCACCTCTCGGTGCGGGTTTGAGGTAGGCAGAGCCGGGTGCGGGCGCGCGATTCACGCCCTTTTTGACTATGGGCGTGAGAAGGGAAAATGCCCAAACTAGTTACAGGCGAAAACTCCTATACGTATTAATTTATACCTACGGGAATGGTAGAATTATTACACGTTAGCCGCTTGGATTGACGCCAAAGGCGATTAGGCGTCCCCGCATGATCCGGGCATTAAAGTGTTAAGTCACACGGGCGATGGGATCCCCGTCCTTCTGTACTCCCGTAATCGGTAGTTGTCCACCTTGGATCGCCGAATGCTGAACATTGACAAACCTACTTCGATACTAAACATAATCTATACGCTTTATTTCCCGCATTGTGTTTTTCGGATGATTATTCGCGCGCATTTCAGAACGCATCTGCGCGCAGCTTTGGGGGCAATAAATCGAGTTGATAAGCGGGCAGGTGTGAAACGTTGGGCCCAACCTCTATGAAATATACCCGCGGATAATTGACGAGACTTGGATCCATTGCATATGTCTCTTTCGCACCGTACAGAGTGTATATTCGTGGGCAAACGATATGTTCTGTCCAAATTCGCTTTATTCGACGTTCCAGGCGATGATCAGGCCATAACAATAAGGTCGTGTTCAGCGGCCAACTCCGGCCTGGA" | |
|> Seq.map snd) | |
///////////////////////////////////////////////////////////// | |
let transcribe (s:string) = s.Replace("T", "U") | |
transcribe "GATGGAACTTGACTACGTAAATT" | |
transcribe "TGTGGGGTAGCCAGTGGATGCTCCGAAATATCAAGAAGCTGCGGCTACGCGTTTGGAAATACGCAATGGCTGGCGCAGGAGTCTTCATAAATGTGCTCTGTTCTAATTCGAGTCCTCCGGCCTGCAAACAGAAATCATAAAGATACCTTCATAATACTGTTGCAACTGCCCCTGGAACCCCAGGGAATGACACGACAATTTTCGCCGTCATGCGTGTAGTGGGAAGTTCTGTCCGTTGGCGTTCCGCATGGAGAGGCTTTCGTCCCACGCTCTAGTTTAACACGCTGCACACAGAGAGCCAACCTTTTGAAGCAGGCCACGTATGCCACCCGTTCATAGTCAACGTCACTCAAGTTGGTGATGAAACCAGTTTCCCGAAGGTAGCCAGCTTCTCGGGAATAACGTGTGCTTGGGTCGTAAAAACGACCTTGGAGTAATCTGTCTACACTGGTGACCTGGAAATCAGAAGTAGTAACACTACGATAGTGCGCTACAATGAATTCTAAGTGTCCCCGGTGGTGTGTAAGCTGGCACAACATGTGGCGGGCAGTTTCAATATTGAGAGGGTCATAATACGCCGGTTGCGTCAGGCTATTTAGCACCATGCCGGGCGAAACGAGGCGTATCGCCAGGGCGAACCGGCCCAACCACAGGTTTTAAATAAAAACGTTTATCAAAACAGGGGGGCGACTAATCCCAGCTAAGGTGGGCGAAATGACTGTAAATTTCGAGTGTTTCCAACCTCAACGTACTAGATATGACTAACAAGACAACCTAATGGCAACGCGCGCTAGGTCAATTTAGGACCGCCTGAGAGGCAGAGCCACCACCCAATCTATAGTATGGCTTCAAGGTCGCTTTTTTAGGCGGGCGGATCACCGCCTAGGACCATCTGGGTAAATATCATGCTGGTATTAGGACAATCCATAATAACCCCGGGTTTCGTCGGAGTTTGAACGGTTTGCCGCAGTCACATCCGGGCCCATAATAGGGG" | |
///////////////////////////////////////////////////////////// | |
let rev_complement (s : string) = | |
String.Join("", s |> List.ofSeq | |
|> List.rev | |
|> List.map (function | 'A' -> 'T' | 'G' -> 'C' | 'C' -> 'G' | 'T' -> 'A' | _ -> ' ')) | |
rev_complement "TTGTCTGAGGAACCCTTTGCTGCAAGAAAGGTACAGCGCAATAATATAGCTCGTATTGCCGCAAGCCCAGCAGCAGCCTACCAAGGCCTGTAGAAGTATGCGTAAGCATGTTCGCTATCGAAGTTCCAAGTCCGCGAAAGCGAGAGGTAGCGCTCTCTGCGAATTGGTGCGACAGAGTTTTTCTTTCATTTGATCGTCAAAAGCATTGCGGTCCGCACAAAGCATAACTCCTCACAGATTAGATTACGGTGTTTTTCGAATAGCCATACGCTGAATGCCGCACTTTGTCTTTCGCCCCGGCGTAACTCGCAGCACAGCTCATGCTTATTACCGCTCCGTGATGACTTTACCCAACTTATTAAACGTAGTGTATTCATCTTATCCTACGAGATGTGCGCAGGGTCCGGCACTAGACCTTGGGGGAGGACTGTTTAGGCTTTTGGTCTTCGTAATAGCGTATCGGTACCCGCCCGCTGTATGACCGCTTGCAACACTACTCACACCCCATCTAACTCTTTCGCCCTAGGGAGGCCGAAGGAAGTACCACGCGCTTTGCCCCCTGGGAAGCTAAAGAGGCGAAAGGTTAAGTTTTATTTCCCCATTAGTGGTGCCGACCCCGCATAAGGCCATTTAGGTCTCGGGACACACTAAAAGACATCTCTGGTTACCATTTATGCTTAAGCTCGCCATGCGAACGGCTGGATCCGCGACTGAGGGCCTCTCCTAACCCCGTTCAGAGCGTAAAAGTCCCCCAGATTGTGACAGGTCTCTCCGGACATAAATCCAGAAACGGATGACGTAAAGTTGAGGGTGGAGG" | |
///////////////////////////////////////////////////////////// | |
let hamm a b = HelperFunctions.hamming (a |> Prelude.charArr) (b |> Prelude.charArr) | |
hamm "TCTAAGTATCCCCTCACCCGACGATACAGGGAGGCATAAAGGTATCTAACCTAATTGACCGGTTCTCTACGCCTGGGTCAGCCTGCTCCGGTACCGTGGATTTCGCTCTTCCAATCGGTCAAGATACCCATATGTTAGGACCCCCTTCTCTAGGACCCTTTGTTCTTTTTAGCGGGGTTTATGGAAGAGAATTAAGTGTCCTGGGGTTAGGTGCCGTCGCTACTAGTAAGGTACATGAAGAGTCGCGATTCCACCGGTTTTGCGACGACGCAACACGTGGCCTTACTGTCTAGATAGGCATAGAACTTACTTATGAAAACCTTGTCGCGTGCCCTCTATCTGGCCAGGGGATAGCTGTGAAGGTCCTCGACGGTCGGGGCCTTTGTATGCTCATGTAAAAGACCTAGCGGTTATCAATCACTCCCTACTCACCCACTAGCTAATCCGGCTATGTTCGACCTCAGAGACCTCTTCCCAGTGTGTTTTAAACCACCGGTCAGTGAAGGGCACGACGATCATCGGCGGCTCCTTAAACACCCCCTCGTCGACCCACAGAAATAGTACTATATTATGTTACCGACAGCCCGAATGAGCTCCCCCACTCCGGAATGCTATCGTTTGGCGGAATCGGTAAATTCATTAAAACGTCTTTTTAATTCCAGCACCCTCATCTGCCGTGCCGCTTGCAGTGTAGGAGCATCATCTTCAGTCACCGATTCTGTCCACCACCAGGCCGCTACAGAAGTCCCACCCGCCGGCTATTCATCGCCAATGTCCAGTCCCCCCAGTCCTCATTAAATGTATTTTACTGGAACGAGCCAGCATATCAAGGCTAGACTAGTGGACACCGAACGCAATGTGACCTTAGCTACCATGCATTGCCGGCAACGTATCAGTTCGTTTATGATTCGTCACTGTGGATTGTGTATCATCGGATTAAGGTCGGCCTAATATGCTAATTAAC" | |
"CAAAAGGTTTGCTCCACATGTCGACAAAGGGCGAAAACGAAGTTTCTATGCCGAGTGGTCCGTCGTTTCCTTCGTGTCCAGCCTGCCCCGGGAATGAGGGATCCTCTCGTCGAGTGAGTCTTAGTGCCGCACAGCGACCAGCTACCTCTCTAACACGATGTAGTAATTTTTTAAATGTCTATGCTGCATAGCTGAATGCCGTAGGGTTAGAGGCGGCCACTAAATGTGCAATCCCGCGAGTGAACCGGATTAAAGGGTCTCATGCGACCGCTGGAGGAGGTTGGAGTATCATAGAAGTCAAGGTTGATACATATTTGAAAGTGCTCACGTCCCTTGTATGTTGTCATGGACTGGCCTTGCAGTGTCTCTACGGCTGGAGCGTTTCTTCGTTGTTGTGGAGAACGCAGTATTCGCCAATCGCCACATAATCACAGAAGCTCTCATCGTGCAAGTTTTGCCCTGGCTGACTCCTTCAAATTGCGCTATTCACTACCGGTGAGCGTATAACACAGACTTGATCCGGGCCGACTAAAACACTCGTCCCTTCAAACACAAAGGGAGGCTTGTAATTGCTTGACGAAGCCGCCAAACTGTCTAAGCGCTTTATCGAAGACCCTTGGTGCGTAATAATCGTCTGCACCCGTCGATCTAATTAAGCGAAGCGCCCTCACCGGTGGTGTCCTTCGCGAGTTAGGATCAGGATGTAGATTTATCGCTCTGGTCTCCCCAACAGGAGATACAATATTCTATCCTAACGGCTACTGATCGGCAAGCCGGTTCCCCCACATTCTGGCTTAACTGCCATTTCCTGGCATTTACCTGAAGAGCCAAGCAAAACTAGTGTTCACGATTGGAAATGGTGTATTACGCAACATAAACTCGCGGACAACAATCCCTTAATACAAGAGCTTTCAGTGTAGATGGTCTATCATTGCGTTGAAGTCCACCTAATCTGCCTACTAAC" | |
///////////////////////////////////////////////////////////// | |
// From: http://stackoverflow.com/questions/286427/calculating-permutations-in-f | |
// Much faster than anything else I've tested | |
let rec insertions x = function | |
| [] -> [[x]] | |
| (y :: ys) as l -> (x::l)::(List.map (fun x -> y::x) (insertions x ys)) | |
let rec permutations = function | |
| [] -> seq [ [] ] | |
| x :: xs -> Seq.concat (Seq.map (insertions x) (permutations xs)) | |
let perms n = | |
let pers = permutations [1..n] | |
let plen = pers |> Seq.length |> string | |
pers |> Seq.fold (fun str perm -> str + String.Join(" ", perm) + "\n") (plen + "\n") | |
File.WriteAllText(dir + "perms.txt", perms 7) | |
///////////////////////////////////////////////////////////// | |
let str = @">Rosalind_6404 | |
CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC | |
TCCCACTAATAATTCTGAGG | |
>Rosalind_5959 | |
CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT | |
ATATCCATTTGTCAGCAGACACGC | |
>Rosalind_0808 | |
CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC | |
TGGGAACCTGCGGGCAGTAGGTGGAAT" | |
let gc_content (s:string) = | |
let dnas = s.Split('>') | |
dnas.[1..] |> Array.map (fun dna -> let dat = Regex.Split(dna,("(?<=\d)\n")) | |
dat.[0], let sqs = Regex.Replace(dat.[1],"\s", "") | |
|> countsymbols | |
|> Map.ofSeq in | |
(sqs.['G'] + sqs.['C']) / sumMap sqs) | |
let topgc s = | |
let id, gc = gc_content s |> Array.maxBy snd | |
sprintf "%s\n%f%%" id (gc * 100.) | |
topgc str | |
///////////////////////////////////////////////////////////// | |
let pfromGc gc = 2. * ((gc/2.) **2. + ((1. - gc) / 2.) **2.) | |
String.Join(" ", "0.000 0.066 0.182 0.199 0.272 0.369 0.413 0.466 0.519 0.572 0.658 0.703 0.801 0.830 0.894 1.000".Split(' ') | |
|> Array.map (float >> pfromGc)) | |
///////////////////////////////////////////////// | |
let rnacodonTable = | |
splitstr [|"\n"; " "|] @"UUU F CUU L AUU I GUU V | |
UUC F CUC L AUC I GUC V | |
UUA L CUA L AUA I GUA V | |
UUG L CUG L AUG M GUG V | |
UCU S CCU P ACU T GCU A | |
UCC S CCC P ACC T GCC A | |
UCA S CCA P ACA T GCA A | |
UCG S CCG P ACG T GCG A | |
UAU Y CAU H AAU N GAU D | |
UAC Y CAC H AAC N GAC D | |
UAA Stop CAA Q AAA K GAA E | |
UAG Stop CAG Q AAG K GAG E | |
UGU C CGU R AGU S GGU G | |
UGC C CGC R AGC S GGC G | |
UGA Stop CGA R AGA R GGA G | |
UGG W CGG R AGG R GGG G" | |
|> Array.map (fun (str:string) -> let code = str.Trim().Split(' ') in code.[0], code.[1]) | |
|> Map.ofArray | |
let translate (rna : string) = | |
String.Join("", [| for i in 0..3..rna.Length - 4 -> rnacodonTable.[rna.[i..i + 2]] |]) | |
translate "AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA" | |
translate (File.ReadAllText(dir+"rosalind_prot.txt")) | |
/////////////////////////////////////////////////////// | |
let motifs (s:string) (sub : string) = | |
let rec findall output = function | |
| (i:int) -> let pos = s.IndexOf(sub, i) | |
if pos = -1 then output else findall (pos + 1 :: output) (pos + 1) | |
findall [] 0 |> List.rev | |
String.Join(" ", motifs | |
"CTTTAAACGACATGACCCCAACGACATTGAACGACATCAGCCGTCTCAACGACATTACGGCAACGACAAAACGACATCTCCTGCGAAACGACAAACGACAATAACGACACAACGACATCAACGACAAACGACACAAACGACAGAACGACAGAACGACACGATATTAAACGACAGGAAACGACATAACGACAGCGTCCTAGTTTACAGAACGACAAGAGTCGATTCAACGACATTAACGACACGAACGACATACTAAGACTAACGACAAACGACATAAACGACAGTTGTCAAACGACAAACGACAGACATCATAAACGACACTAACGACACGTAAGAACGACAAACAACGACACAAACGACAAACGACAGGTTAACGACATAGAACGACAAAACGACATCTTAACGACATAACGACACATTAACGACAATTCATGAACGACAATAATACTAACGACAAACGACACCAACGACATAACGACAAACGACATCTATACAACAACGACAGCAGCTAAACGACATTAACGACATAACGACAAACGACATAAACGACATAACGACATCCTAACGACAGTAACGACACCAACGACATCCTTGCAACGACACGTGCAACGACACTGAAACGACAAACGACATTCCTTGAACGACAAACGACAAACGACATCAACGACACCAACGACAAACGACAAACGACAAACGACATCGCAACGACAAACGACAAACGACATAACGACATTAAAAGGAACGACAACGCAACGACAGAACGACAAAACGACAGGAAACGACAGAACGACACCGCAGGATGCCGATAATGCCGGCAACGACAAACGACAAACGACACAACGACA" | |
"AACGACAAA") | |
//////////////// | |
let motifProb (r:int) n m gc = Math.Round((pfromGc gc) ** n * (m - n + 1.), r) | |
let dat = "0.000 0.105 0.165 0.219 0.257 0.310 0.388 0.412 0.465 0.518 0.576 0.640 0.680 0.747 0.789 0.854 0.921 1.000".Split(' ') |> Array.map float | |
String.Join(" ", dat |> Array.map (motifProb 3 8. 9093.)) | |
(* | |
Go through half the string. from both directions at once | |
add to a lstr and an rstr. compare lstr to rstr, if they are equal set a flag. if not equal continue | |
if the next char is not equal then end. else continue | |
*) | |
let borderLen (str:string) = | |
let rec findfail state lstr rstr i = | |
let lstr', rstr' = lstr + string str.[i], string str.[str.Length - 1 - i] + rstr | |
let state' = lstr' = rstr' | |
match state' with | |
| false when state = true -> lstr'.Length - 1 | |
| _ when i >= str.Length / 2 - 1 -> if state' then lstr'.Length else 0 | |
| _ -> findfail state' lstr' rstr' (i + 1) | |
findfail false "" "" 0 | |
let failureArray (s:string) = | |
let b = Array.create (s.Length + 1) 0 in b.[0] <- -1 | |
let rec seeklongest i = function | |
| -1 -> 0 | |
| j -> if s.[j] <> s.[i] then seeklongest i b.[j] else j + 1 | |
for i in 1..s.Length do | |
b.[i] <- seeklongest (i - 1) b.[i - 1] | |
b.[1..] | |
"CAGTAAGCAGGGACTG" |> failureArray | |
let str1 = File.ReadAllText(dir+"rosalind_kmp.txt").Trim() | |
let res = String.Join ( " " ,failureArray str1) | |
File.WriteAllText(dir+"roskmpout.txt", res) | |
/////////////////////////////// | |
let sqstr = @"GATTACA | |
TAGACCA | |
ATACA" | |
let dats = sqstr.Split('\n') | |
let dats2 = [|"crayon"; "raygun"; "crayfish";"crapton"|] | |
let dats3 = [| "pinging"; "dinging"|] | |
let commonSubstrs (strings : string []) = | |
let start = HelperFunctions.longestCommonSubstring strings.[0] strings.[1] | |
strings.[2..] |> Array.fold (fun (fset) curword -> fset |> Set.map (fun w -> HelperFunctions.longestCommonSubstring w curword) | |
|> Set.unionMany) start | |
let str1 = File.ReadAllText(dir+"rosalind_lcs.txt").Trim() | |
let dats = str1.Split('\n') | |
let lcs = commonSubstrs dats | |
/////////// | |
//////////////////////////////////////////////// | |
let rec listPermutations = function | |
| [] -> Seq.singleton [] | |
| (l::ls) -> seq { for x in l do for xs in listPermutations ls do yield (x::xs) } | |
let lexic n (alpharaw:string) = | |
let alpha = alpharaw.Replace(" " , "") | |
let sq = listPermutations [for i in 0..n - 1 -> alpha] | |
|> Seq.toArray | |
|> Array.map (fun cl -> String.Join("", cl)) | |
let order (alph:string) = alph |> Seq.mapi (fun i c -> c, i) |> Map.ofSeq | |
let lookup (order:Map<char,int>) char = order.[char] | |
let lexorder = order alpha | |
sq |> Array.sortBy (fun (str : string) -> str |> Seq.map (lookup lexorder) |> Array.ofSeq) | |
let res = String.Join("\n", (lexic 3 "J T Y G D N")) | |
let lexic2 k n (alpharaw:string) = | |
let alpha = alpharaw.Replace(" " , "") | |
let sq = [for m in k..n -> listPermutations [for i in 0..m - 1 -> alpha] ] | |
|> Seq.concat | |
|> Seq.toArray | |
|> Array.map (fun cl -> String.Join("", cl)) | |
let order (alph:string) = alph |> Seq.mapi (fun i c -> c, i) |> Map.ofSeq | |
let lookup (order:Map<char,int>) char = order.[char] | |
let lexorder = order alpha | |
sq |> Array.sortBy (fun (str : string) -> str |> Seq.map (lookup lexorder) | |
|> List.ofSeq) | |
let res2 = String.Join("\n", (lexic2 1 4 "Q W U L P H X I S C G")) | |
Clipboard.SetText(res2) | |
////// | |
let codeexons (dna : string) (introns: string[]) = | |
introns |> Array.fold (fun (splicedDna:string) curintron -> | |
splicedDna.Replace(curintron, "")) dna | |
let dna = "ATGTGCGAACAAAATGGCGTAAAAGTCAAGGTTGTCGGCTCAGAGTGTCTATAACATTATCAAGTGATGCCGGATATTTGAATCTCGCAGCACCAGCAAGAATAATGGTTTTAGAGAATGAGACCTTGACGGCGCGGAATAGAGGGGTGACCCCCTCCATAGTCGTTCCAACAACGGGAATCATACTAGAGTCCTACCACGACGCCAATATTAACTCGATCCGGGGGCACGGACCATATGCAACGTCTCCGCCAATTCCCGCCAAGCTCAAGAGGGTAACCATGGTCCAGGAAGCGTCCTGGAACACCCAGCATCGAAGGTTGGAGCAGTGGAGGTTCACTTATACTAACAGACTCGGAGTGATAAAAGTCGACTGCGGTATTCTGGTATATACATCGCCTAGGTAGTGTTTTGATTCACCACCTTGAGGGAATCCGGATGTTCCGCGACCTCACGATTCTCAAATTAGAAACAATAGATCAGCAACGACTGTAGACACCGTGGTTGATATACAAAGGCTCTTAAGCTGCAAGCCTAAAACGTGTGTGTCCCGGCTTACAGGGACCTGGGCCCAGGCATGAGAAGATACAGAAGGCTAAGAGACCGGGAGCTTGACTTGGCTGCAAGTAGCCAAAGTGACTCCTCAACGTGTATACACTAGATGCAGTCGAATTGGGCCGCGGTACCGGGGGGGAAACACAATCCGAAATACTTCATCGAACCTTTACATATTTGATCAGAGACGCAGTGGATTCCCTGGTCTACGATTCATTGTCGGTATCCGAAATGGTCCCTACCAACTGGGCATCCACTACGTTTACATGACTGGTCTTTGGGCTCGTGCCAAGAACCGGAATAGGTATAGATGCGCTACGCTCCACGTGAAAGCACGTGGTATATCTGGAGTGAGCACCTCGCCCACGTCCGGAACGCTGCCGCATAA" | |
let introns = @"CTTTACATATTTGA | |
AGCTGCAAGC | |
TTAGAGAATGAGACCTTGACGGCGCGGAATAGAGGGGTGACCCCCTCCAT | |
GCTACGCTCCACGTGAAAGCACGTGG | |
CTAACAGACTCGGAGTGATAAAAGTCGACTGCGGTATTCTGGTAT | |
CACTACGTTTACATGACTGGTCTTTGGGCTCGT | |
ACTAGATGCAGTCGAATTGGGCCGCGGTACCGGGGGGGAAACACA | |
AAGGCTAAGAGACC | |
TCAGAGTGTCTATAACATTATCAAGTGATGCCGGA | |
CCCTGGTCTACGATTC | |
CGCGACCTCACGATTCTCAAATTAGA | |
CTACCACGACGCCA | |
TCAAGAGGGTAACCATGGTCCAGGAAG".Split('\n') | |
let res2 = (dna, introns) ||> codeexons |> transcribe |> translate | |
///////////////////// | |
let mmasstable = splitstr [|"\n"|] @"A 71.03711 | |
C 103.00919 | |
D 115.02694 | |
E 129.04259 | |
F 147.06841 | |
G 57.02146 | |
H 137.05891 | |
I 113.08406 | |
K 128.09496 | |
L 113.08406 | |
M 131.04049 | |
N 114.04293 | |
P 97.05276 | |
Q 128.05858 | |
R 156.10111 | |
S 87.03203 | |
T 101.04768 | |
V 99.06841 | |
W 186.07931 | |
Y 163.06333" |> Array.map (fun s -> let spl = splitstr [|" "|] s in char spl.[0], float spl.[1]) |> Map.ofArray | |
let lookupmass c = mmasstable.[c] | |
let proteinmass (protein:string) = protein |> Seq.fold (flip (lookupmass >> (+))) 0. | |
File.ReadAllText(dir+"rosalind_prtm.txt").Trim() |> proteinmass | |
///////////////// | |
let topermMap (ps : int []) = ps |> Array.mapi (fun i x -> i + 1, x) |> Map.ofArray | |
let inversePerm (p) = p |> Map.toArray |> Array.map swap |> Map.ofArray | |
let (<*>) (a:Map<int, int>) (b:Map<int,int>) = [| for i in 1..a.Count -> i, a.[b.[i]] |] |> Map.ofArray | |
let permAsline a = a |> Map.toArray |> Array.map snd | |
let breakPoints (p : int []) = | |
p |> Array.fold (fun (numPoints, i) x -> | |
let adj = match i with | |
| 0 -> int (abs(0 - x)) | |
| y when y = p.Length - 1 -> (abs (y + 2) - x) | |
| _ -> abs(p.[i + 1] - x) | |
if adj <> 1 then (numPoints + 1), (i + 1) else numPoints, (i+1)) (0, 0) | |
let permuted (workarr:int[]) i j = | |
let mid = workarr.[i..j] |> Array.rev | |
let left, right = (if i = 0 then Array.empty else workarr.[0..(i-1)]), | |
if j = 9 then Array.empty else workarr.[j + 1..9] | |
let n = Array.concat [left; mid ;right] | |
let score,_ = breakPoints n | |
n, score | |
let rec search cnarr best i = function | |
| w when w <= (9 - i) -> | |
let obest, a = best | |
let na, sc = permuted cnarr i (i + w) | |
search cnarr (if sc = obest then | |
(sc, na :: a) | |
elif sc < obest then | |
(sc, [na]) | |
else best) i (w+1) | |
| _ -> best | |
let computePerm cnarr = | |
let rec intervs b = function | |
| i when i <= 8 -> intervs (search cnarr b i 1) (i+1) | |
| _ -> b | |
intervs (Int32.MaxValue, [[||]]) 0 | |
let rec pickDepth isroot depth (arrs : int [] list) = | |
match arrs with | |
| [x] -> if isroot then 0, x | |
else let sc, lst = computePerm x | |
if sc = 0 then | |
depth, lst.Head | |
else match lst with | |
| [x] -> depth, lst.Head | |
| x -> pickDepth false (depth + 1) lst | |
| x -> let sc = x |> List.map (fun d -> computePerm d, d) | |
let tsc, zz = sc |> List.minBy (fst >> fst) |> fst | |
if tsc = 0 then | |
depth, zz.Head | |
else let nn = match (sc |> List.filter (fun z -> z |> fst |> fst <= tsc )) with | |
| [p] -> depth, p |> fst |> snd |> List.head | |
| lsf -> lsf |> List.map (fun ((_,d),_) -> pickDepth false (depth + 1) d) | |
|> List.minBy fst | |
nn | |
let rec searchall t narr count = | |
if HelperFunctions.hamming t narr = 0 then printfn "%A" narr;printfn "%A" t; count | |
else | |
let _, bestArrs = computePerm narr | |
let ta, topn = pickDepth true 1 bestArrs | |
searchall t topn (count + 1 + ta) | |
let alls = @"9 3 10 7 2 5 6 4 1 8 | |
2 9 10 8 4 7 5 6 3 1".Split([|"\n"|] , StringSplitOptions.RemoveEmptyEntries) |> Array.map (fun (s:string) -> s.Split(' ') |> Array.map int) | |
sw.Restart() | |
let xuse = [| for i in 0..0 do | |
let pi = ((alls.[i + 1] |> topermMap |> inversePerm) <*> (alls.[i] |> topermMap)) |> permAsline | |
yield searchall [|1..10|] pi 0 |] | |
sw.Stop() | |
sw.Elapsed | |
// original depth search rev order min a b | |
//rr2 8 6 8 6 5 -> 8 5 8 5 5 -> 5 7 8 4 5 5 5 8 4 5 5 5 7 3 5 | |
//rr 7 5 6 8 4 -> 6 5 6 8 4 -> 7 5 7 8 7 6 5 6 8 4 6 5 6 7 4 | |
//site 9 4 5 7 0 -> 9 4 5 7 0 -> 9 4 6 7 0 9 4 5 7 0 9 4 5 7 0 | |
//rr5 N/A N/A N/A 7 7 5 8 5 7 6 5 7 5 | |
///////////////////////////// | |
let intToCodons = function 0 -> "A" | 1 -> "C" | 2 -> "G" | 3 -> "T" | _ -> "" | |
let consensus (dnas:string []) = | |
let len = dnas.[0].Length | |
let A = Array.create len 0 | |
let G = Array.create len 0 | |
let C = Array.create len 0 | |
let T = Array.create len 0 | |
let letterPairs = ['A', A; 'G',G;'C',C;'T',T] | |
let letterMap = letterPairs |> Map.ofList | |
dnas |> Array.iter (fun dna -> | |
dna |> String.iteri (fun i letter -> let pvec = letterMap.[letter] | |
pvec.[i] <- pvec.[i] + 1)) | |
[ for i in 0..len - 1 -> letterPairs |> List.maxBy (fun (_, dat) -> dat.[i]) |> fst ], | |
[|A;C;G;T|] | |
let d = File.ReadAllLines(dir+"rosalind_cons.txt") | |
let a,b = d |> consensus | |
Clipboard.SetText(String.Join("", a)) | |
String.Join("\n", [for i in 0..3 -> intToCodons i + ": " + String.Join (" ", b.[i])]) |> Clipboard.SetText | |
///////////////////////// | |
let readFASTA (s:string) = | |
let dnas = s.Split('>') | |
printfn "%A" dnas.[0] | |
dnas.[1..] |> Array.map (fun dna -> let dats = Regex.Split(dna,("(?<=\d)[\r\n|\n]")) | |
dats.[0], Regex.Replace(dats.[1],"\s", "")) | |
let overlapGraph k (data : (string * string) []) = | |
data |> Array.fold ( | |
fun (edgelist, i) (id,dna) -> | |
data.[i + 1..] |> Array.fold ( | |
fun edgelistInner (idComp, dnaComp) -> | |
if dna.[dna.Length - k..] = dnaComp.[0..k - 1] then | |
//printfn "%A" (k,id, idComp,dna, dnaComp, dna.[dna.Length - k..], dnaComp.[0..k - 1]) | |
((id, idComp), (-1, dna.Length - k)):: edgelistInner | |
elif dnaComp.[dnaComp.Length - k..] = dna.[0..k - 1] then | |
// printfn "%A" (k, dna,idComp,id, dnaComp, dnaComp.[dnaComp.Length - k..], dna.[0..k - 1] ) | |
((idComp, id), (1, dnaComp.Length - k)) :: edgelistInner | |
else edgelistInner) edgelist, i + 1 ) ([], 0) | |
let dset = @">Rosalind_0498 | |
AAATAAA | |
>Rosalind_2391 | |
AAATTTT | |
>Rosalind_2323 | |
TTTTCCC | |
>Rosalind_0442 | |
AAATCCC | |
>Rosalind_5013 | |
GGGTGGG" |> readFASTA | |
let dnastrs = File.ReadAllText(dir+"rosalind_grph.txt").Trim() | |
let dset = dnastrs |> readFASTA | |
let dispList (elist : ((string * string)*(int*int)) list) = | |
elist |> List.rev | |
|> List.map (fun ((s1,s2),_) -> s1 + " " + s2) | |
String.Join("\n",overlapGraph 3 dset |> fst |> dispList) |> Clipboard.SetText | |
///////////////////// | |
let inverseRNATable = rnacodonTable |> Seq.groupBy keyValueToValue | |
|> Seq.map (fun (pcode, codons) -> pcode , codons |> Seq.length) | |
|> Map.ofSeq | |
let inferRNA n (protein : string) = | |
protein |> Seq.fold (fun count letter -> (inverseRNATable.[string letter] * count) % n) inverseRNATable.["Stop"] | |
"MQTIINFIPCHKGAGDVMEAPDIPNTNETSVPKVDLYTLGPRQDNSGREGPNKRFRVHTWRFMISFHMMMKQPKCLWNVHDLHHFWSHAENFKMRETITSCDYPADVGNNRLAREHKWTFTGGTHMYLCRKCSTQKYYLEASEDKRAAKITMCIIYLTIVVTFFTSGNRCTVHWSFFLYEATHYHPIAPWLQVSINCYGCAGWCNRQIVICIGRLFINKNCHNGLRHPIAHENMYQQAASEAVDFYLMMMDRWEDNGWWVTSIQIIPELDRRPYVSHVWYCWLCCDTCWWKHKNRNPRKNNKASAKFNTGNTKNPLMNVMKSNFWMVTQRAISTHNYNWEWGEDRYCVCKTTPFRRSNKQDRGNRDIVAEWKHTEYNPWKQMPHKKWVWRFRDCHGYRRSNHCQMFSLSMWIELPVMLDTPMDMIDVCGGNKRLFPPRVWWWLMTSGMGQSQGMMCRRMPDHCSRFTYGLKHWEPSAFMWKAEHWKGHSSLFDLFLAKPRGAVYQDIVEYMHWPYIKISEGKWWTGPIKEYDSNQLAVFVLDPWSFMEQKEDKMVKQGTKCQSNRTGWGFVCVWNLKEWQPGWETWQPQLFTMAKMHGSNHNCQQPGVKTCCNKCSRLHIHPKLVFFDPYLQSYMGNPKVCNPYCQVPKRNQEDKWYCGMQLHDHTRSRSRMLDDVIWSSVYAKHTWKCVMLYMPPGYKVLESGLDMSMDESHSACPLHQNQPFVNQFQHYGRWGHWVNNFQHPELGIFMWIRRNSWHNQHWELVECHFDKVPQRHFFADVTKAYQSGCVGMVMRNSSMTWAKKRLFFRAIQDRLNMAISCWKSVPEHQNMGNFPWTWAGVSMPKTTLMVWVVGMMQEQFMFTADHMYLTAQAGPKFRDNNCTVWHNADFDHTRGDSWTMLALEIKEVIGEGDIEYSRYGDNTPPVNHSNEEFFWMTRQGMIVGDPILTFGIDQKEYMHEEWHERWIHQTQYFIPDCCQTNARCEIQCDTIRMFPWTY" | |
|> inferRNA 1000000 | |
//////////////////// | |
let joinstr (sep:string) (a:'a seq) = String.Join(sep, a) | |
let splicedMotif (dna:string) (comp:string) = | |
let rec seekMotif indices i = function | |
| j when i = comp.Length || j = dna.Length -> indices | |
| j when dna.[j] = comp.[i] -> seekMotif (j + 1::indices) (i+1) (j + 1) | |
| j -> seekMotif indices i (j + 1) | |
seekMotif [] 0 0 | |
splicedMotif "ACGTACGTGACG" "GTA" |> List.rev |> joinstr " " | |
let s = "CCCTCGTGACATGTACGGCACAACCCTATTTGTCTTTGAGGGGAAAGTTACACTTTGTCCAAAACAGGTGCGAAATGATGACACAACGGTGGTGAAATGCGTTAATAGAGAGGGCAAACGGTGTAACAGCAACGTGGCGCTTAGTACTTGCGTGAGCGCACCCCTACCGAACTACGATCTATTGGGGGGCAATGTCCCCTTCTTTGTCTCCACGCAAACTTGGGGCTCGAACGCCTACCCCGCGTGCGCCTGGCTTTGCGGCAGCGACTCCTCACTGGGTTAGGTAACCCATTTAATTGGTTCATATGGTCCTCCGCCAGCCCGCTATTTACGTAAACCCATTCGAGTCAGCATCGTGCGGAGGAATCGATTCTAAAGGCAGGATCCTTACGACGGTGAACTGTCAGGCGAGGGATTTCTGTAGGTCTATCGAGTGAGTACCGTTATCTCTTAAAGCACCTTACGGCCCAGAGGCCCGAACACGGACATCCATGATGGTAGGGCGGTGCCCAGTGACAGACTATGGAAAACATTATAACTGGCACTACTTTCGAAGAGGTCGAGACTGAGCAGTCATTTGTTATAAGATCTGCTCTTAACATGCGGATCTGGGCAGGCCGTACTGAAGACCTTGACTTGCCATAAGCGCGCCACTGAAGTTATGGTACACAAGTTTCAGCCACAATGAAAAAACCGGGGGGGCCACCCCAGAAATTCCCGTCGGGCAAAGTGTGAACATAAGTACTCACATGTAGGTGTGTAACAAGATGGGGTGACTGAATGTTTCCCGTATGTTGCGTAAGATCTAAGCTGGTACTAGCACAACTGTACGAAGGTCAGGTACTAGCACAACATGGTTCTTGCTGGGCTGGAATCCCGAGTCGCTACCAATTGAAACGTACATGTGG" | |
let t = "GATTAAAAGACCCACAGTCCGGTCTGATATCAGAAGGTCACGGCACTTTTTAAGGCGATTTTTGCAATTAAG" | |
splicedMotif s t |> List.rev |> joinstr " " | |
/////////////////////////////////////// | |
splitstr [|"TAG"; "TAA"; "TGA"|] | |
let dna1 = "AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG" | |
let dna11 = "CGGATTTGTAGAACTCACAGGGCGCTCTTTACAGGATGTGCCGGTATATGCATACCCTAGATGAAGTCATCTCCCACCGATCGCGTAGCAGCTCGGTGGCAAAACATTAAGCCAGTATGAACCAACACGACTGCATCTTACTTCTCTTGCTTGACGTATTTATTGAAAGTGAAATGTCACTAGCAGGGATTCGTCCTTACTCAGCCTGGCTGGCAACGACTCCCTTCCATTAGCTTCTGCAGAGAAGTACTGAGCTGTTCGCTCCAAGCAGCATGGTGGGACAAACCCTTGATTGGGCGTGGATGGCGAGCCCGATTACAAAGGGATGGGTTTTCTGTGTGCTTATCAAATCTCCAATATCCTCAGCGGCGCTGAAGGTCTATACACCCAGAGACTAACGTGTTAACTACAACCGGCTATTCTAATGGGTATACCGGCCCGTTTGCCTTTCAATGAAGGGGCAAATAGTTCACGTGAGTGGCGCATAGCTATGCGCCACTCACGTGAACTATTTGCCCCTTCATACGCCCCTTAGCCTCGCTGATTCACTGACGGCCGTGGTGACTGCAGCGGAAGTTTTAGGGGGACTACTTGCTGGGCCTTTATTAGCGGGTTCGCTGGGATAGCCATCGTCGATTCGACCATCGGCCCTGAGCGGTGTGTCGCCGCGGGATCCAGACTGAGGGTTGCTTTGCGAATTTGTCAACATGCATCTCTTGGAGCTTACGTACACCGAGTGGAGAGGACCTACCGCGATAATGCATGACTTGCACATCTTCTTGCACATGACTAGGTGCCTACGCCTCCCGGATCGTTACTCGGATGGACTCTCTTAGCTCACCCTGTATAGTGGTGTAGCTGAGCACCGAGCTTATGCGGTGAAATCGCGCGCTTTGGTTGATCGAACCTACTGGTTTATTCATTACCGGACTACCTGGTCCTTGCTCCCGAGCACATGTCCCTGGCTAAACCCACC" | |
let rec frames flist (rna:string) = | |
let start = rna.IndexOf("AUG") | |
if start = -1 then flist | |
else let prot = rna.[start..] |> translate | |
let stop = prot.IndexOf("Stop") | |
if stop = -1 then flist | |
else frames (prot.[0..stop - 1] :: flist) rna.[start + 3..] | |
let f1 = dna11 |> transcribe |> frames [] |> set | |
let f2 = dna11 |> rev_complement |> transcribe |> frames [] |> set | |
String.Join("\n", Set.union f1 f2) |> Clipboard.SetText | |
//////////// | |
let signPermutation (permutation : int list) = | |
let len = permutation.Length | |
let calc = 2. ** (float len) |> int | |
let signs = [|0..calc - 1|] |> Array.map (fun i -> let s = Convert.ToString(i,2) in String.replicate (len - s.Length) "0" + s) | |
[ for i in 0..calc - 1 -> permutation |> List.mapi (fun j k -> if signs.[i].[j] = '1' then -k else k)] | |
let t = permutations [1..3] |> Seq.collect signPermutation |> Array.ofSeq | |
String.Join("\n", t |> Array.map (fun s -> String.Join(" ",s))) |> Clipboard.SetText | |
///////////////// | |
let dnastr = "TACTGCGCGCGTTTACGATCTTTCTTTCAGCCGCGTGGTGGCAGGTCTCCACTCCTATCACGTTACAAATTTCTATTCGCTCTACTACATCATGTCAGGTTCGTCTTACACGTACTCCGACCTTTGGTACTCCCGTAGCCCCCCACCCGTCTCACACGTCTTTTGAAATTTTACGATCTGGACTGTTTGTTTATTACACATCGCTCGGAGCTAGTGCATACCTGAATCGTACGTAGTGGGGCGACCCTGGCCCTTACCACCTTGCGGGGTGCAAGGCCAACACAACGCCGGGCCGATGGGCCTTCCTGACATGCGCGCAATGTGGAGTTCGTCTAAAAAACCCTGACTTCAACGAAGTCCTTGCATCAGTTAGTTTTCCTGGAGAAGAAACCGTAATAGATGCTGAATTTACTGGAAAGCCCACCTGGCCCTCCATGTCACTGTACGTTATGTAGCTAGCCCCCAGGGGCCCAAGTCGTTGCAACTACCGATAGGACGCGTTATCTTCAGTGAAGACCGCTTCAGTGCGACCTTGATCTCTCAGTGTACCGAGTTGCACCTAACACTGACGAGGCGTTATCGTTCATTGCGGGACTCTATCCTGGGTTATTAATAAGCGCATGGACTGAAAAGTCACGTCACTGATAGTCGTCTGGTCATACACATATATCGATTCGAAACGGCAATTGAATCTCAACGCATTTAACATTAGCCCTCCAGCTGCGCGCAACCGGTACTTGACGTTTATAAGCGTGGTGTAGTGCTGGCTCGGTGACAGCATCGGCCAAGACTAGGCGTATATCATGCAGATCGACACCCCATACCATTGCAGCACCATCAATTATAGCGGGGTTTGAGTTCGCTATGTGTGTCATAGGCATGTTTGTTATCTCTTGTGCCTGGCTACATTGGTCTAGGCTACAAGAGAGCCCCACTGTGGTAACGACTGGTATGCTACAGCG" | |
let dnapal = [for i in 0..dnastr.Length - 1 do | |
for j in 3..7 do | |
if i + j < dnastr.Length then | |
let dstr = dnastr.[i..i+j] | |
//printfn " i %d j %d i + j %d w %d" i j (i + j) dstr.Length | |
if dstr = rev_complement dstr then yield (i + 1,j + 1)] | |
String.Join("\n", dnapal |> List.map (fun (a,b) -> string a + " " + string b)) |> Clipboard.SetText | |
///////// | |
let subsqA = "GCGCAAGAATTGTCAAAATTGCCAGTCACCGTACGCCCGATCGCCAGGCCGTCTAACTCTCACATCACCAGGGGGAGCCAGGTGAAAATTCCACCAGAATTTTCGAGATCCTCTAACAGGTCGTTCTCTTACACAATTCGGTCGATGACCAGTTGCGCTTAGTGCAGTTTTGCGAATAACTCGAGCTAAGAATGGGGACCAGTGGAAAAAAGCGTCTTAATGCGCGTTCTGGCGGGAATGAGAAGTCAGCTGACAGCACACCTCGCGCGTGTCTTGCAATTCAGTCTCATTACCCCTCCGCTTAGTCTCATACTTCAACATGCTGCGATTGATAGCTTTTTGAGCATCACGTATGAGTATGTGGCTGACAAAGCTATATTGTTAGAAAGCACCACCGGTCAAATTAAATAAGTATTCCAGAGTAGACCCAACATTGCGAGCCCGCTATATCATCGTGATCGACTCATACGAGAACCGGGCGAGTTTACTTAACCCATTTGGCCATGTGTTCATGGATTGATACGATCTTGATAACGGGCCGATTCCGTCGGAGTTGTAGGTCCCACACTGGTGCGGACTGCCATCTGTGTGTTCTGATCGTTACAATATACCTGTTCACCACAAGGAACCTTCAATTGGTTTGCTCGATTTATGATAATAACGCTTTTCTGAATACGTTCGTTTCCAGTTGGGGCCTCTGACGGATTGGGCACTGGGGAGCGGCGACCGCGACTTTCAGCGAGAGACCGAAGCTGCCACTTTCGAGAAGAGATAGCATGTGAGCGCGCGGGAGGAGAATGACATCTACGCTCTTCCTCATGAAGTGGTGCCGAGGTCAGCAGCTTACGCGCACCGA" | |
let subsqB = "ATTTGCTCGCAGTACGAGAGATTTTTTTACTAAACCTAAGCAACCCAGAAAAGTAGGCTTACTCATAGAGGGATGAGTAGCATGTCGCAATTGCGGGCCATCTCCGTCAATCGGCGAATCGGCAACGCGTGAAAGGGTGTGTAGGAACCCACTTCTATAAACAAGTAGTGCTTCTGACCATACAATCAACAATGGTAGAGAACGATAAGAGCTCAGGGCGTCCATGCTTAGTTAATGGGGTCTGAGCGCTGAAAGATGCTGGTGCTCTAAACACCTCTGCGCTTTTTAGCTGTAACTTCAGCATACACTTGAGTTGAACACTATGGCGTTAGTGCTAAAACTTAATTCAAGGTATCAGTAACCGAGTGTGCTCGGAAAGGAGGGTCCCGCCTTGTCTCCTAGCACTCAGACCAATTTATTTTTTAGGGACCTGAAGACAGCATTGCCTCGATTGCCACTGGAGAAGCCGGATTGTAGGTCCTTTACCGCTGAAGAAAGTGATAAGTTAAACAGCTCATAAACCGGAGCTCATGTTACGAAGCGGGCAGCGCTATTATTTTAGAGGCGATCCTCCACCCTTTGAATCGCTTTTCCAACCGGGCTCACATATGCTCCCCAGTTTTCTGACTCAGACTCACATGGCCCAGTAGGTAATATGGTGTATCAGTTACGCACTCGCCGACGTGGTTGCTCGCCATTATTTTCAACCCGTATAAAGCGCCGCTGCTCCATATCGACAGTGGTGCGTAGATGTATGCGCAGGCAGTTATTGCCGATTCTGGCCACCAGAAAGTCGCTAGTCTCAACTCGACAGTTTGAGAGGAGAAACACTCGTATAAAGTAATTGACGTGGGCCGCAACTCAAAGG" | |
let ln ,i ,j, d = HelperFunctions.longestCommonSubSeq subsqA subsqB | |
let t = HelperFunctions.backtrackLCS d subsqA subsqB i j | |
t |> Clipboard.SetText | |
/////////// | |
let levStr = "WTDVFDHESYMIWVFCYGSMCLWFMHVRILGDGHWWVEEGRGKCLFCYMRSTIVDFKFRGGFHCGAWQGDERTMVTQQLPGRDAGNWMFLDEQPSYQTGSRGRRQCTMKPQCMGESMVSIINQICGIAWIHIHGRYGQTGRRDVGAGYDCTFHRNAYWLRGGNQHCGFNGINNKLKCVHVQVSVGPKDFPLKESSPVVEAPVMAMLKRYWLLKRSLRDDNETDAHMKKNVQDAAMPITRETKKTWQGVFSQPEMGWWWNMEPKMGECNVKIYIMLIKIVFFESFYHARPVHSSFAMISYPRLENHPRGCYFHHFFEIWWGGGHENEDRSQSRMQGDWTTGLRSDDTGMDDLRQFTGGNPCSTHQMDNCYSGAKMNPHWGCRKMRTIFMFFFEFMYYAHHTCVIVMDNDSTALILSFGWEDLELGFYWRDFSSDRDDCTQCMGENKVEWAWKGRIVKVFSRVKWLYMILCGSNEIVTWAGIPRHMVKYFLMVSCGAVRCADDRPKAWLLWYVDLVTMLPITAKLLDLMHETWFDRINYVESYKCFITRYQILDSINHLTHYVKFHCLHFFEAVDQFLVAVSLQGWKYHVLWKPLIHDYVKDLTKQGIPVKYKKLDNLQIEYNNIEYTMYIWNQLNSDVLNLGNFCGERRRGLWASNFCSCPQRRAKVGRCWTFPSWVCSGNMRLLVKMYMFNHAYINESIYCLETDMPENVRIIHWVMTKFWFYEMCCQPYCTAMPVYTQNYTEKKRCHNIECWDFNPYGQYVIVVWCTVVRNAEVPKMPASESLQEDFYKLVSIVTQNGCKHDYYEKGNNFVRTVEYVKFPQPQMAVSITEVICVWYFQPGQERCTYMVTIECEELYVNCCWFANGLDYGPLHINAHEMCKIAATMIVQRTDPRQCWQAHRNFNMMIVWGHQNWLRGLAPCDGQELVFKAGCTWQAVDQGQTALSWNTQLTELTGTFCDWGHRRTCEDIRNK" | |
let levstr2 = "YVNLRQVRAEVNQFYMVYQEVNISYLTVSTQQTMNHIPYCTHYPIWCGYNTADFYCWGTHHKKEFGMQKARKYGGDKMGLHLVYMGVIDRSASLINIKMINGRLWEEWEEELRDVSQFLWQMQGLVNSVKKYKTRKRPEWCAYTGRKWPNTKANGTFTPSRHQPLIARMDYQMFLNQQTTEYKWRCYPFQCSTKCGYLIWIYKQMEMWFRMNPLQYCEETFEEQQIAGFCLGVRWVNHDDEIYMEGPPWEPTNNIVKMYWETALPLWPTYDKWYHAPNFREYIANAWTSIGWNIFIAAPCVDYQYTVFKWLQELEAYETVSLMKLTYIQETLVHVDEETTFRCAMAKMNAAIFLRLKFCTEALESKEQMKSDCLWPDLVDQKIKADTHNLYDAWVRPAKNKKEKGEDNKSAVHMLTASMRISVNETDHAECKVGFLSMLYMENNTQNAMQFQNWDRAVKPFELWPMCRWKPHADNCMLTIKTIGDDGCNQISANPDRYQCECHELHTPTKRRRQDPSSKEHQFTNVMAYSVLFRMDRCHLEMGMWLFRHDNEPVARSMEVHYRHDVGIQQGNVLPEPMGARCMWPCTDSPYYTVWPTISKMILWMEKICPWGKKFEATHYVRDKNRIEGAMPCMDDYMVVWSKNCITQGIDKLADPTGKQINLMCNWQEGVMKNGCKQEHNWDVRMTSYLVHTCPYCPTLTCCGQHKGRLDCFIMFYRLPKYQINRVSSEIMPQQGMISVCVYCGFYKFLVFGLECACWDFTMAVCQWSPTTAQIHSSCDEMYMQVHRFFIHMEKMGRKENQMDYICQQYDCMNQCMQLQRRKDPVQFNWDLSSTHIIPGRENIEKLCKEQYHPTKVFANFCGMDNCIAGQHDWQLIRFYCSAPYKQMSAFKWMKHMLIKNNVLNYPFKFAPETRITPLMWWENKPCAVQLVSKHDSGNFGLPVYTCRGCVLDRQFMRCQNTIRWCTDSSI" | |
HelperFunctions.LevenshteinDistance levStr levstr2 | |
///////////// | |
let kmers = lexic 4 "A C G T" | |
let dnaF = @">Rosalind_6431 | |
CTTCGAAAGTTTGGGCCGAGTCTTACAGTCGGTCTTGAAGCAAAGTAACGAACTCCACGG | |
CCCTGACTACCGAACCAGTTGTGAGTACTCAACTGGGTGAGAGTGCAGTCCCTATTGAGT | |
TTCCGAGACTCACCGGGATTTTCGATCCAGCCTCAGTCCAGTCTTGTGGCCAACTCACCA | |
AATGACGTTGGAATATCCCTGTCTAGCTCACGCAGTACTTAGTAAGAGGTCGCTGCAGCG | |
GGGCAAGGAGATCGGAAAATGTGCTCTATATGCGACTAAAGCTCCTAACTTACACGTAGA | |
CTTGCCCGTGTTAAAAACTCGGCTCACATGCTGTCTGCGGCTGGCTGTATACAGTATCTA | |
CCTAATACCCTTCAGTTCGCCGCACAAAAGCTGGGAGTTACCGCGGAAATCACAG" |> readFASTA |> Seq.head |> snd | |
let dnaF2 = File.ReadAllText(dir+"rosalind_kmer.txt").Trim() | |
let dnaStrF2 = (dnaF2 |> readFASTA).[0] |> snd | |
String.Join(" ",kmers |> Array.map (motifs dnaStrF2 >> List.length)) |> Clipboard.SetText | |
///////////////////////// | |
let sr = @"ATTAGACCTG | |
CCTGCCGGAA | |
AGACCTGCCG | |
GCCGGAATAC".Split('\n') |> Array.mapi (fun i s -> string i, s) |> Map.ofArray | |
let fourth (a,b,c,d) = d | |
let fst4 (a,b,c,d) = a | |
let rec seekSuper (sSet:Map<string,string>) = | |
let s = sSet |> Map.toArray | |
let ovl =[for i in 1..(snd s.[0]).Length - 2 -> overlapGraph i s |> fst] | |
|> List.concat | |
|> List.sortBy (snd >> snd) | |
let mivl = ovl |> List.filter (fun (_,(_,s)) -> s <= (ovl.Head |> snd |> snd)) | |
|> List.map (fun ((e1, e2),(_,sc)) -> | |
let so = (snd s.[int e1]).[0..sc - 1] + (snd s.[int e2]) | |
printfn "%A" (sc,so) | |
sc,e1,e2, so) | |
match mivl with | |
| [a] -> (fourth a).Length, fourth a | |
| os -> os |> List.map (fun (_, e1,e2,str) -> | |
printfn "%A" str | |
let nmap = Map.remove e1 sSet |> Map.remove e2 | |
seekSuper nmap) | |
|> List.minBy fst | |
seekSuper sr | |
//[("3", "GCCGGAATAC"); ("1", "CCTGCCGGAA"); ("2", "AGACCTGCCG"); | |
// ("0", "ATTAGACCTG")] | |
// ATTAGACCTG AGACCTGCCG | |
// ATTAGACCTGCCG CCTGCCGGAA | |
// | |
// ATTAGACCTGCCGGAATAC | |
// ATTAGACCTGCCGGAATAC | |
/////////////////////////// | |
type RandomVariable<'a when 'a : comparison> = Map<'a, float * float> | |
let makeDist (items: ('a * float) []) = | |
let sum = items |> Array.sumBy snd | |
items |> Map.ofArray | |
|> Map.map (fun _ count -> count, count/sum) : RandomVariable<'a> | |
let normalize dist = | |
let sum = sumMapGen (fun op (count,_) -> op count) dist | |
dist |> Map.map (fun _ (count,_) -> count, count/sum) | |
type Dominance = Dominant | Recessive | |
type Allele = Homozygous of Dominance | Heterozygous | |
let pops = [|("k",Homozygous(Dominant)), 21096.; ("m", Heterozygous) , 27320.; | |
("l",Homozygous Recessive),33757.|] |> makeDist | |
let takeOne thing ps = let item , rest = Map.partition (fun key _ -> thing = key) ps | |
let less1, _ = item.[thing] | |
let recount = if less1 - 1. = 0. then rest |> normalize | |
else rest.Add(thing, (less1 - 1., 0.)) |> normalize | |
item.[thing], recount | |
let mate p = function | |
| Homozygous Dominant, _ | |
| _ , Homozygous Dominant -> p | |
| Heterozygous , Heterozygous -> 0.75 * p | |
| _, Heterozygous | |
| Heterozygous , _ -> p * 0.5 | |
| Homozygous Recessive , Homozygous Recessive -> 0. | |
let v = [|for x in pops do | |
let (_,p),r = takeOne x.Key pops | |
let al = snd x.Key | |
for y in r do | |
let (_,al2), (_,p2) = keyValueToPair y | |
yield al, al2, mate (p*p2) (al, al2) |] | |
v |> Array.sumBy third | |
////////////////////////////////////// | |
let combins = MathNet.Numerics.Combinatorics.Combinations | |
let f total atleast p = [for x in atleast..total -> p ** x * (1. - p) ** (total - x) * combins(int total,int x)] | |
let iall k N = | |
Math.Round(f (2. ** k) N 0.25 |> List.sum,3) | |
iall 7. 32. | |
//////////////////////////////// | |
let regex = "N[^P][S|T][^P]" | |
open System.Net | |
let wc = new System.Net.WebClient() | |
let readFASTA2 (s:string) = | |
let dats = s.Split([|"\r\n";"\n"|], System.StringSplitOptions.RemoveEmptyEntries)// Regex.Split(s,("[\r\n|\n]")) | |
dats.[0], String.Join("",dats.[1..]) | |
let str = ">sp|P10761|ZP3_MOUSE Zona pellucida sperm-binding protein 3 OS=Mus musculus GN=Zp3 PE=1 SV=4 | |
MASSYFLFLCLLLCGGPELCNSQTLWLLPGGTPTPVGSSSPVKVECLEAELVVTVSRDLFGTGKLV | |
QPGDLTLGSEGCQPRVSVDTDVVRFNAQLHECSSRVQMTKDALVYSTFLLHDPRPVSGLSILRTNR | |
VEVPIECRYPRQGNVSSHPIQPTWVPFRATVSSEEKLAFSLRLMEENWNTEKSAPTFHLGEVAHLQ | |
AEVQTGSHLPLQLFVDHCVATPSPLPDPNSSPYHFIVDFHGCLVDGLSESFSAFQVPRPRPETLQF | |
TVDVFHFANSSRNTLYITCHLKVAPANQIPDKLNKACSFNKTSQSWLPVEGDADICDCCSHGNCSN | |
SSSSQFQIHGPRQWSKLVSRNRRHVTDEADVTVGPLIFLGKANDQTVEGWTASAQTSVALGLGLAT | |
VAFLTLAAIVLAVTRKCHSSSYLVSLPQ" | |
let motifs2 f (s:string) (rx:Regex) = | |
let rec getAll cs i = function | |
| false -> cs | |
| _ -> let nmatch = rx.Match(s, i) | |
match nmatch.Value with | |
| "" -> getAll cs i nmatch.Success | |
| _ -> getAll (f nmatch::cs) (nmatch.Index + 1) nmatch.Success | |
getAll [] 0 true | |
let rx = Regex(regex) | |
let findMotif (p:string) = | |
let dna = wc.DownloadString(sprintf "http://www.uniprot.org/uniprot/%s.fasta" (p.Trim())) | |
|> readFASTA2 | |
|> snd | |
let ccv = motifs2 (fun x -> x.Index + 1) dna rx |> List.rev | |
printfn "%A" ccv | |
p, ccv | |
let dats = | |
@"Q0IBF4 | |
Q8LCP6 | |
P12923 | |
Q9D9T0 | |
Q706D1 | |
P05155_IC1_HUMAN | |
P00743_FA10_BOVIN | |
P09791_PARB_TRYBB | |
A1JP66 | |
P06870_KLK1_HUMAN | |
P07204_TRBM_HUMAN | |
Q9LHF1 | |
P00304_ARA3_AMBEL | |
P02974_FMM1_NEIGO | |
P02186".Split('\n') |> Array.map findMotif | |
|> Array.filter (snd >> List.length >> (<>) 0) | |
|> Array.map (fun (id,inds) -> sprintf "%s\n%s" id (String.Join(" ", inds))) | |
String.Join("\n",dats) | |
///////////////////////////// | |
let a = [19117.; 19625.; 17122.; 17379.; 16109.; 18020.] |> List.mapi (fun i x -> let p = match i with | j when j <= 2 -> 1. | 3 -> 0.75 | 4 -> 0.5 | _ -> 0. | |
2. * x * p ) | |
a |> List.sum | |
//////////////////////////// | |
let partialp2 n k = | |
let num, den = set [2.0..n],set [2.0..(n - k)] | |
Set.difference num den |> Set.fold (*) 1. | |
Numerics.BigInteger(partialp2 84. 9.) % 1000000I | |
let na = "ACGATACAA" |> Seq.fold (fun x c -> x + log10 (match c with | 'G' | 'C' -> 0.129/2. | _ -> (1. - 0.129)/2.)) 0. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment