Last active
August 29, 2015 14:11
-
-
Save nfaggian/2d19e6756535c3adb777 to your computer and use it in GitHub Desktop.
Experimentation with "Deedle"
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#I "/home/accounts/nfaggian/Desktop/fsharp/analysis/packages/Deedle.1.0.6/" | |
#load "Deedle.fsx" | |
#time;; | |
open System | |
open System.IO | |
open Deedle | |
let rec allFilesUnder baseFolder = | |
// Steps through all the files in a basefolder. | |
seq { | |
yield! Directory.GetFiles(baseFolder) | |
for subDir in Directory.GetDirectories(baseFolder) do | |
yield! allFilesUnder subDir | |
} | |
let frame_mean (filename: string) = | |
// Compute the mean of each forecast day | |
let group = Frame.ReadCsv(filename) |> Frame.groupRowsByInt "forecast_day" | |
group?forecast_error |> Stats.levelMean Pair.get1Of2 | |
type frame_spec = {date: string; forecast_day: int; forecast_error: float;} | |
let frame_records (errors: seq<float>, date: System.DateTime) = | |
// Form records for each error sequence. | |
let date_strings = [for x in 1 .. Seq.length(errors) -> date.ToString()] | |
let forecast_days = [for x in 0 .. Seq.length(errors) -> x] | |
Seq.zip3 forecast_days date_strings errors | |
|> Seq.map (fun (a, b, c) -> {date=b; forecast_day=a; forecast_error=c;}) | |
let date_parser (filename: string) = | |
System.DateTime.ParseExact(Seq.last(filename.ToString().Split('_')).Split('.').[0], "yyyyMM", null) | |
let data = allFilesUnder "/work/nfaggian/verification_notebooks/results/" | |
|> Seq.filter (fun x -> x.Contains "NSW") | |
|> Seq.sort | |
|> Seq.map (fun x -> (date_parser x, frame_mean x)) | |
let verification_frame = [for x in data -> x] | |
|> Seq.map (fun (a, b) -> frame_records(b.Values, a)) | |
|> Seq.concat | |
|> Frame.ofRecords | |
|> Frame.indexRowsDate "date" | |
|> Frame.groupRowsByInt "forecast_day" | |
type describe_spec = {count: int; min: float option; max:float option; median:float} | |
let describe (x: Series<(int * DateTime),float> ) = | |
// Report some descriptive statistics | |
{count=Stats.count(x); min=Stats.min(x); max=Stats.max(x); median=Stats.median(x)} | |
// What are the forecast day descriptive statistics (very broad brush) | |
[for x in 1..7 -> x] | |
|> Seq.map (fun x -> describe(verification_frame?forecast_error.[x,*]) ) | |
|> Frame.ofRecords |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment