Created
April 21, 2016 17:00
-
-
Save swlaschin/ff7943a028952ab3dde8034f88ee599c to your computer and use it in GitHub Desktop.
Updated version of R to F# code (from https://gist.github.com/ovatsus/5354187)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "packages/FSharp.Data/lib/net40/FSharp.Data.dll" | |
open FSharp.Data | |
type OzoneFile = CsvProvider<"http://faculty.washington.edu/heagerty/Books/Biostatistics/DATA/ozone.csv"> | |
let csv = new OzoneFile() | |
//What are the column names of the dataset? | |
csv.Headers | |
//Extract the first 2 rows of the data frame and print them to the console | |
csv.Rows |> Seq.take 2 | |
//How many observations (i.e. rows) are in this data frame? | |
csv.Rows |> Seq.length | |
//Extract the last 2 rows of the data frame and print them to the console | |
let data = csv.Rows |> Seq.toArray | |
data.[data.Length-2..] | |
//What is the value of Ozone in the 47th row? | |
data.[46].Ozone | |
//How many missing values are in the Ozone column of this data frame? | |
open System | |
data |> Seq.map (fun x -> x.Ozone) |> Seq.countBy Double.IsNaN | |
// define a function to get the Ozone property | |
let getOzone (row : OzoneFile.Row) = row.Ozone | |
// How many missing values : alternative #1 | |
data |> Seq.map getOzone |> Seq.countBy Double.IsNaN | |
//What is the mean of the Ozone column in this dataset? Exclude missing values (coded as NA) from this calculation. | |
data |> Seq.map (fun x -> x.Ozone) |> Seq.filter (not << Double.IsNaN) |> Seq.average | |
let isNotNaN item = item |> Double.IsNaN |> not | |
//What is the mean of the Ozone column? alternative #1 | |
data |> Seq.map getOzone |> Seq.filter isNotNaN |> Seq.average | |
// alternative #2 | |
data |> Seq.filter (getOzone >> isNotNaN) |> Seq.averageBy getOzone | |
//Extract the subset of rows of the data frame where Ozone values are above 31 and Temp values are above 90. What is the mean of Solar.R in this subset? | |
data | |
|> Seq.filter (fun x -> x.Ozone > 31.0 && x.Temp > 90) | |
|> Seq.averageBy (fun x -> x.``Solar.R``) | |
//What is the mean of "Temp" when "Month" is equal to 6? | |
data | |
|> Seq.filter (fun x -> x.Month = 6) | |
|> Seq.averageBy (fun x -> float x.Temp) | |
//Take a look at the 'iris' dataset that comes with R. In this dataset, what is the mean of 'Sepal.Length' for the species virginica? | |
let iris = new CsvProvider<"https://dataminingproject.googlecode.com/svn-history/r44/DataMiningApp/datasets/Iris/iris.csv">() | |
iris.Rows |> Seq.filter (fun x -> x.Species = "virginica") |> Seq.averageBy (fun x -> x.``Sepal Length``) | |
//Continuing with the 'iris' dataset from Question 4, what R code returns a vector of the means of the variables 'Sepal.Length', 'Sepal.Width', 'Petal.Length', and 'Petal.Width'? | |
[iris.Rows |> Seq.averageBy (fun x -> x.``Sepal Length``) | |
iris.Rows |> Seq.averageBy (fun x -> x.``Sepal Width``) | |
iris.Rows |> Seq.averageBy (fun x -> x.``Petal Length``) | |
iris.Rows |> Seq.averageBy (fun x -> x.``Petal Width``)] | |
//Load the 'mtcars' dataset in R. How can one calculate the average miles per gallon (mpg) by number of cylinders in the car (cyl)? | |
let mtcars = new CsvProvider<"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv">() | |
mtcars.Rows | |
|> Seq.groupBy (fun x -> x.Cyl) | |
|> Seq.sortBy fst | |
|> Seq.map (fun (key, values) -> key, values |> Seq.averageBy (fun x -> x.Mpg)) | |
//Continuing with the 'mtcars' dataset from Question 6, what is the absolute difference between the average horsepower of 4-cylinder cars and the average horsepower of 8-cylinder cars? | |
let avg4cyl = mtcars.Rows |> Seq.filter (fun x -> x.Cyl = 4) |> Seq.averageBy (fun x -> float x.Hp) | |
let avg8cyl = mtcars.Rows |> Seq.filter (fun x -> x.Cyl = 8) |> Seq.averageBy (fun x -> float x.Hp) | |
abs(avg4cyl - avg8cyl) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "packages/Deedle/lib/net40/Deedle.dll" | |
open System | |
open System.IO | |
open Deedle | |
let wc = new System.Net.WebClient() | |
wc.DownloadFile("http://faculty.washington.edu/heagerty/Books/Biostatistics/DATA/ozone.csv","ozone.csv") | |
let data = Deedle.Frame.ReadCsv("ozone.csv") | |
//What are the column names of the dataset? | |
data.Columns.Keys |> Seq.toList | |
//Extract the first 2 rows of the data frame and print them to the console | |
data |> Frame.take 2 | |
data.Rows.[0..1] | |
//How many observations (i.e. rows) are in this data frame? | |
data |> Frame.countRows | |
//Extract the last 2 rows of the data frame and print them to the console | |
data |> Frame.takeLast 2 | |
//What is the value of Ozone in the 47th row? | |
data.Rows.[46]?Ozone | |
//How many missing values are in the Ozone column of this data frame? | |
Series.countKeys data?Ozone - Series.countValues data?Ozone | |
//What is the mean of the Ozone column in this dataset? Exclude missing values (coded as NA) from this calculation. | |
data?Ozone |> Stats.mean | |
//Extract the subset of rows of the data frame where Ozone values are above 31 and Temp values are above 90. What is the mean of Solar.R in this subset? | |
data | |
|> Frame.fillMissingWith 0 | |
|> Frame.filterRowValues (fun x -> x?Ozone > 31. && x?Temp > 90.) | |
|> Frame.getCol "Solar.R" | |
|> Stats.mean | |
//What is the mean of "Temp" when "Month" is equal to 6? | |
(data |> Frame.filterRowValues (fun x -> x?Month = 6.))?Temp |> Stats.mean | |
//Take a look at the 'iris' dataset that comes with R. In this dataset, what is the mean of 'Sepal.Length' for the species virginica? | |
wc.DownloadFile("https://dataminingproject.googlecode.com/svn-history/r44/DataMiningApp/datasets/Iris/iris.csv","iris.csv") | |
let iris = Deedle.Frame.ReadCsv "iris.csv" | |
iris.Columns.Keys |> Seq.toList | |
iris | |
|> Frame.filterRowValues (fun x -> x.GetAs<string>("Species") = "virginica") | |
|> Frame.getCol "Sepal Length" | |
|> Stats.mean | |
//Continuing with the 'iris' dataset from Question 4, what R code returns a vector of the means of the variables 'Sepal.Length', 'Sepal.Width', 'Petal.Length', and 'Petal.Width'? | |
iris | |
|> Stats.mean | |
|> Series.observations | |
iris | |
|> Frame.dropCol "Species" | |
|> Stats.mean | |
|> Series.observations | |
//Load the 'mtcars' dataset in R. How can one calculate the average miles per gallon (mpg) by number of cylinders in the car (cyl)? | |
wc.DownloadFile("https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv","mtcars.csv") | |
let mtcars = Deedle.Frame.ReadCsv "mtcars.csv" | |
mtcars.Columns.Keys |> Seq.toList | |
(mtcars |> Frame.groupRowsByString "cyl" |> Stats.mean)?mpg | |
//Continuing with the 'mtcars' dataset from Question 6, what is the absolute difference between the average horsepower of 4-cylinder cars and the average horsepower of 8-cylinder cars? | |
let hps = (mtcars |> Frame.groupRowsByString "cyl" |> Frame.applyLevel fst Stats.mean)?hp | |
abs (hps.["4"] - hps.["8"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment