Skip to content

Instantly share code, notes, and snippets.

@swlaschin
Created April 21, 2016 17:00
Show Gist options
  • Save swlaschin/ff7943a028952ab3dde8034f88ee599c to your computer and use it in GitHub Desktop.
Save swlaschin/ff7943a028952ab3dde8034f88ee599c to your computer and use it in GitHub Desktop.
Updated version of R to F# code (from https://gist.github.com/ovatsus/5354187)
#r "packages/FSharp.Data/lib/net40/FSharp.Data.dll"
open FSharp.Data
type OzoneFile = CsvProvider<"http://faculty.washington.edu/heagerty/Books/Biostatistics/DATA/ozone.csv">
let csv = new OzoneFile()
//What are the column names of the dataset?
csv.Headers
//Extract the first 2 rows of the data frame and print them to the console
csv.Rows |> Seq.take 2
//How many observations (i.e. rows) are in this data frame?
csv.Rows |> Seq.length
//Extract the last 2 rows of the data frame and print them to the console
let data = csv.Rows |> Seq.toArray
data.[data.Length-2..]
//What is the value of Ozone in the 47th row?
data.[46].Ozone
//How many missing values are in the Ozone column of this data frame?
open System
data |> Seq.map (fun x -> x.Ozone) |> Seq.countBy Double.IsNaN
// define a function to get the Ozone property
let getOzone (row : OzoneFile.Row) = row.Ozone
// How many missing values : alternative #1
data |> Seq.map getOzone |> Seq.countBy Double.IsNaN
//What is the mean of the Ozone column in this dataset? Exclude missing values (coded as NA) from this calculation.
data |> Seq.map (fun x -> x.Ozone) |> Seq.filter (not << Double.IsNaN) |> Seq.average
let isNotNaN item = item |> Double.IsNaN |> not
//What is the mean of the Ozone column? alternative #1
data |> Seq.map getOzone |> Seq.filter isNotNaN |> Seq.average
// alternative #2
data |> Seq.filter (getOzone >> isNotNaN) |> Seq.averageBy getOzone
//Extract the subset of rows of the data frame where Ozone values are above 31 and Temp values are above 90. What is the mean of Solar.R in this subset?
data
|> Seq.filter (fun x -> x.Ozone > 31.0 && x.Temp > 90)
|> Seq.averageBy (fun x -> x.``Solar.R``)
//What is the mean of "Temp" when "Month" is equal to 6?
data
|> Seq.filter (fun x -> x.Month = 6)
|> Seq.averageBy (fun x -> float x.Temp)
//Take a look at the 'iris' dataset that comes with R. In this dataset, what is the mean of 'Sepal.Length' for the species virginica?
let iris = new CsvProvider<"https://dataminingproject.googlecode.com/svn-history/r44/DataMiningApp/datasets/Iris/iris.csv">()
iris.Rows |> Seq.filter (fun x -> x.Species = "virginica") |> Seq.averageBy (fun x -> x.``Sepal Length``)
//Continuing with the 'iris' dataset from Question 4, what R code returns a vector of the means of the variables 'Sepal.Length', 'Sepal.Width', 'Petal.Length', and 'Petal.Width'?
[iris.Rows |> Seq.averageBy (fun x -> x.``Sepal Length``)
iris.Rows |> Seq.averageBy (fun x -> x.``Sepal Width``)
iris.Rows |> Seq.averageBy (fun x -> x.``Petal Length``)
iris.Rows |> Seq.averageBy (fun x -> x.``Petal Width``)]
//Load the 'mtcars' dataset in R. How can one calculate the average miles per gallon (mpg) by number of cylinders in the car (cyl)?
let mtcars = new CsvProvider<"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv">()
mtcars.Rows
|> Seq.groupBy (fun x -> x.Cyl)
|> Seq.sortBy fst
|> Seq.map (fun (key, values) -> key, values |> Seq.averageBy (fun x -> x.Mpg))
//Continuing with the 'mtcars' dataset from Question 6, what is the absolute difference between the average horsepower of 4-cylinder cars and the average horsepower of 8-cylinder cars?
let avg4cyl = mtcars.Rows |> Seq.filter (fun x -> x.Cyl = 4) |> Seq.averageBy (fun x -> float x.Hp)
let avg8cyl = mtcars.Rows |> Seq.filter (fun x -> x.Cyl = 8) |> Seq.averageBy (fun x -> float x.Hp)
abs(avg4cyl - avg8cyl)
#r "packages/Deedle/lib/net40/Deedle.dll"
open System
open System.IO
open Deedle
let wc = new System.Net.WebClient()
wc.DownloadFile("http://faculty.washington.edu/heagerty/Books/Biostatistics/DATA/ozone.csv","ozone.csv")
let data = Deedle.Frame.ReadCsv("ozone.csv")
//What are the column names of the dataset?
data.Columns.Keys |> Seq.toList
//Extract the first 2 rows of the data frame and print them to the console
data |> Frame.take 2
data.Rows.[0..1]
//How many observations (i.e. rows) are in this data frame?
data |> Frame.countRows
//Extract the last 2 rows of the data frame and print them to the console
data |> Frame.takeLast 2
//What is the value of Ozone in the 47th row?
data.Rows.[46]?Ozone
//How many missing values are in the Ozone column of this data frame?
Series.countKeys data?Ozone - Series.countValues data?Ozone
//What is the mean of the Ozone column in this dataset? Exclude missing values (coded as NA) from this calculation.
data?Ozone |> Stats.mean
//Extract the subset of rows of the data frame where Ozone values are above 31 and Temp values are above 90. What is the mean of Solar.R in this subset?
data
|> Frame.fillMissingWith 0
|> Frame.filterRowValues (fun x -> x?Ozone > 31. && x?Temp > 90.)
|> Frame.getCol "Solar.R"
|> Stats.mean
//What is the mean of "Temp" when "Month" is equal to 6?
(data |> Frame.filterRowValues (fun x -> x?Month = 6.))?Temp |> Stats.mean
//Take a look at the 'iris' dataset that comes with R. In this dataset, what is the mean of 'Sepal.Length' for the species virginica?
wc.DownloadFile("https://dataminingproject.googlecode.com/svn-history/r44/DataMiningApp/datasets/Iris/iris.csv","iris.csv")
let iris = Deedle.Frame.ReadCsv "iris.csv"
iris.Columns.Keys |> Seq.toList
iris
|> Frame.filterRowValues (fun x -> x.GetAs<string>("Species") = "virginica")
|> Frame.getCol "Sepal Length"
|> Stats.mean
//Continuing with the 'iris' dataset from Question 4, what R code returns a vector of the means of the variables 'Sepal.Length', 'Sepal.Width', 'Petal.Length', and 'Petal.Width'?
iris
|> Stats.mean
|> Series.observations
iris
|> Frame.dropCol "Species"
|> Stats.mean
|> Series.observations
//Load the 'mtcars' dataset in R. How can one calculate the average miles per gallon (mpg) by number of cylinders in the car (cyl)?
wc.DownloadFile("https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv","mtcars.csv")
let mtcars = Deedle.Frame.ReadCsv "mtcars.csv"
mtcars.Columns.Keys |> Seq.toList
(mtcars |> Frame.groupRowsByString "cyl" |> Stats.mean)?mpg
//Continuing with the 'mtcars' dataset from Question 6, what is the absolute difference between the average horsepower of 4-cylinder cars and the average horsepower of 8-cylinder cars?
let hps = (mtcars |> Frame.groupRowsByString "cyl" |> Frame.applyLevel fst Stats.mean)?hp
abs (hps.["4"] - hps.["8"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment