Created
October 31, 2016 01:20
-
-
Save HerbM/946d25d0c704dcdc304a8747691cb7d2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
open System.IO | |
open System.Collections.Generic | |
open System.Text | |
open System.Text.RegularExpressions | |
#load "Library1.fs" | |
open Library1 | |
// Define your library scripting code here | |
let regex s = new Regex(s) | |
let (=~) s (re:Regex) = re.IsMatch(s) | |
let (<>~) s (re:Regex) = not (s =~ re) | |
let samplestring = "This is a string";; | |
let readLines filePath = System.IO.File.ReadLines("sample.txt");; | |
samplestring =~ regex "This" | |
let elementOf x e = Seq.exists ((=) e) x // elementOf "abc" 'c' | |
let filterString (s : string) (filter : string) = | |
s | |
|> Seq.filter (fun x -> elementOf filter x) | |
let readDNAdata = | |
"\dev\sample.txt" | |
|> System.IO.File.ReadAllLines | |
|> String.concat "" | |
let parseDNAdata (line : string) = | |
((regex "(?<=\d)(?=[ACTG])").Split(line)).[0..1] | |
let GCpercent (dnaData : string []) = | |
let id = dnaData.[0] | |
let dnaseq = dnaData.[1] | |
let allCount = dnaseq.Length | |
let GCcount = (filterString dnaseq "GC") |> Seq.length | |
(id, 100.0 * float(GCcount) / float(allCount), dnaseq) | |
let getDNAdata = | |
readDNAdata.Split '>' | |
|> Seq.map parseDNAdata | |
|> Seq.map GCpercent | |
getDNAdata |
ninjarobot,
Thanks. I did some changes to that function to eliminate the out of bounds and agree it is (almost certainly) there.
Thanks. This was my first F# program so I wasn't immediately sure how to do the match...with and really appreciate your example and will review/use/adapt it.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
baronfel asked for the data (ignore everything from hash line inclusive up:
The data doesn't come "all on one line so our main delimeter is the ">" that precedes each ID line.
The line breaks WITHIN a single element/string are in the original data.
There was no trash or spurious characters that we found.