-
-
Save HerbM/946d25d0c704dcdc304a8747691cb7d2 to your computer and use it in GitHub Desktop.
open System | |
open System.IO | |
open System.Collections.Generic | |
open System.Text | |
open System.Text.RegularExpressions | |
#load "Library1.fs" | |
open Library1 | |
// Define your library scripting code here | |
let regex s = new Regex(s) | |
let (=~) s (re:Regex) = re.IsMatch(s) | |
let (<>~) s (re:Regex) = not (s =~ re) | |
let samplestring = "This is a string";; | |
let readLines filePath = System.IO.File.ReadLines("sample.txt");; | |
samplestring =~ regex "This" | |
let elementOf x e = Seq.exists ((=) e) x // elementOf "abc" 'c' | |
let filterString (s : string) (filter : string) = | |
s | |
|> Seq.filter (fun x -> elementOf filter x) | |
let readDNAdata = | |
"\dev\sample.txt" | |
|> System.IO.File.ReadAllLines | |
|> String.concat "" | |
let parseDNAdata (line : string) = | |
((regex "(?<=\d)(?=[ACTG])").Split(line)).[0..1] | |
let GCpercent (dnaData : string []) = | |
let id = dnaData.[0] | |
let dnaseq = dnaData.[1] | |
let allCount = dnaseq.Length | |
let GCcount = (filterString dnaseq "GC") |> Seq.length | |
(id, 100.0 * float(GCcount) / float(allCount), dnaseq) | |
let getDNAdata = | |
readDNAdata.Split '>' | |
|> Seq.map parseDNAdata | |
|> Seq.map GCpercent | |
getDNAdata |
baronfel asked for the data (ignore everything from hash line inclusive up:
The data doesn't come "all on one line so our main delimeter is the ">" that precedes each ID line.
The line breaks WITHIN a single element/string are in the original data.
There was no trash or spurious characters that we found.
Rosalind_6404
CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
TCCCACTAATAATTCTGAGG
Rosalind_5959
CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
ATATCCATTTGTCAGCAGACACGC
Rosalind_0808
CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
TGGGAACCTGCGGGCAGTAGGTGGAAT
ninjarobot,
Thanks. I did some changes to that function to eliminate the out of bounds and agree it is (almost certainly) there.
Thanks. This was my first F# program so I wasn't immediately sure how to do the match...with and really appreciate your example and will review/use/adapt it.
I am guessing the out of bounds issue is happening on line 35 or 36 of
GCpercent
. You can be a little safer with a pattern match:If you just want to ignore dnaData of the wrong size, then you can filter the None's with
Seq.choose id