Last active
August 19, 2016 22:37
-
-
Save ovatsus/5354187 to your computer and use it in GitHub Desktop.
R <-> FSharp.Data comparison
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "packages/FSharp.Data.1.1.10/lib/net40/FSharp.Data.dll" | |
open FSharp.Data.Csv | |
open FSharp.Data.Csv.Extensions | |
let csv = CsvFile.Load("http://faculty.washington.edu/heagerty/Books/Biostatistics/DATA/ozone.csv") | |
//What are the column names of the dataset? | |
csv.Headers | |
//Extract the first 2 rows of the data frame and print them to the console | |
csv.Data |> Seq.take 2 | |
//How many observations (i.e. rows) are in this data frame? | |
csv.Data |> Seq.length | |
//Extract the last 2 rows of the data frame and print them to the console | |
let data = csv.Data |> Seq.toArray | |
data.[data.Length-2..] | |
//What is the value of Ozone in the 47th row? | |
data.[46]?Ozone | |
//How many missing values are in the Ozone column of this data frame? | |
open System | |
data |> Seq.map (fun x -> x?Ozone.AsFloat()) |> Seq.countBy Double.IsNaN | |
//What is the mean of the Ozone column in this dataset? Exclude missing values (coded as NA) from this calculation. | |
data |> Seq.map (fun x -> x?Ozone.AsFloat()) |> Seq.filter (not << Double.IsNaN) |> Seq.average | |
//Extract the subset of rows of the data frame where Ozone values are above 31 and Temp values are above 90. What is the mean of Solar.R in this subset? | |
data |> Seq.filter (fun x -> x?Ozone.AsFloat() > 31.0 && x?Temp.AsInteger() > 90) |> Seq.averageBy (fun x -> x?``Solar.R``.AsFloat()) | |
//What is the mean of "Temp" when "Month" is equal to 6? | |
data |> Seq.filter (fun x -> x?Month = "6") |> Seq.averageBy (fun x -> x?Temp.AsFloat()) | |
//Take a look at the 'iris' dataset that comes with R. In this dataset, what is the mean of 'Sepal.Length' for the species virginica? | |
let iris = CsvFile.Load("https://dataminingproject.googlecode.com/svn-history/r44/DataMiningApp/datasets/Iris/iris.csv") | |
iris.Data |> Seq.filter (fun x -> x?Species = "virginica") |> Seq.averageBy (fun x -> x?``Sepal Length``.AsFloat()) | |
//Continuing with the 'iris' dataset from Question 4, what R code returns a vector of the means of the variables 'Sepal.Length', 'Sepal.Width', 'Petal.Length', and 'Petal.Width'? | |
[iris.Data |> Seq.averageBy (fun x -> x?``Sepal Length``.AsFloat()) | |
iris.Data |> Seq.averageBy (fun x -> x?``Sepal Width``.AsFloat()) | |
iris.Data |> Seq.averageBy (fun x -> x?``Petal Length``.AsFloat()) | |
iris.Data |> Seq.averageBy (fun x -> x?``Petal Width``.AsFloat())] | |
//Load the 'mtcars' dataset in R. How can one calculate the average miles per gallon (mpg) by number of cylinders in the car (cyl)? | |
let mtcars = CsvFile.Load("https://raw.github.com/ropensci/rfigshare/master/inst/doc/mtcars.csv") | |
mtcars.Data | |
|> Seq.groupBy (fun x -> x?cyl) | |
|> Seq.sortBy fst | |
|> Seq.map (fun (key, values) -> key, values |> Seq.averageBy (fun x -> x?mpg.AsFloat())) | |
//Continuing with the 'mtcars' dataset from Question 6, what is the absolute difference between the average horsepower of 4-cylinder cars and the average horsepower of 8-cylinder cars? | |
let avg4cyl = mtcars.Data |> Seq.filter (fun x -> x?cyl = "4") |> Seq.averageBy (fun x -> x?hp.AsFloat()) | |
let avg8cyl = mtcars.Data |> Seq.filter (fun x -> x?cyl = "8") |> Seq.averageBy (fun x -> x?hp.AsFloat()) | |
abs(avg4cyl - avg8cyl) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "packages/FSharp.Data.1.1.10/lib/net40/FSharp.Data.dll" | |
open FSharp.Data | |
let csv = new CsvProvider<"http://faculty.washington.edu/heagerty/Books/Biostatistics/DATA/ozone.csv">() | |
//What are the column names of the dataset? | |
csv.Headers | |
//Extract the first 2 rows of the data frame and print them to the console | |
csv.Data |> Seq.take 2 | |
//How many observations (i.e. rows) are in this data frame? | |
csv.Data |> Seq.length | |
//Extract the last 2 rows of the data frame and print them to the console | |
let data = csv.Data |> Seq.toArray | |
data.[data.Length-2..] | |
//What is the value of Ozone in the 47th row? | |
data.[46].Ozone | |
//How many missing values are in the Ozone column of this data frame? | |
open System | |
data |> Seq.map (fun x -> x.Ozone) |> Seq.countBy Double.IsNaN | |
//What is the mean of the Ozone column in this dataset? Exclude missing values (coded as NA) from this calculation. | |
data |> Seq.map (fun x -> x.Ozone) |> Seq.filter (not << Double.IsNaN) |> Seq.average | |
//Extract the subset of rows of the data frame where Ozone values are above 31 and Temp values are above 90. What is the mean of Solar.R in this subset? | |
data |> Seq.filter (fun x -> x.Ozone > 31.0 && x.Temp > 90) |> Seq.averageBy (fun x -> x.``Solar.R``) | |
//What is the mean of "Temp" when "Month" is equal to 6? | |
data |> Seq.filter (fun x -> x.Month = 6) |> Seq.averageBy (fun x -> float x.Temp) | |
//Take a look at the 'iris' dataset that comes with R. In this dataset, what is the mean of 'Sepal.Length' for the species virginica? | |
let iris = new CsvProvider<"https://dataminingproject.googlecode.com/svn-history/r44/DataMiningApp/datasets/Iris/iris.csv">() | |
iris.Data |> Seq.filter (fun x -> x.Species = "virginica") |> Seq.averageBy (fun x -> x.``Sepal Length``) | |
//Continuing with the 'iris' dataset from Question 4, what R code returns a vector of the means of the variables 'Sepal.Length', 'Sepal.Width', 'Petal.Length', and 'Petal.Width'? | |
[iris.Data |> Seq.averageBy (fun x -> x.``Sepal Length``) | |
iris.Data |> Seq.averageBy (fun x -> x.``Sepal Width``) | |
iris.Data |> Seq.averageBy (fun x -> x.``Petal Length``) | |
iris.Data |> Seq.averageBy (fun x -> x.``Petal Width``)] | |
//Load the 'mtcars' dataset in R. How can one calculate the average miles per gallon (mpg) by number of cylinders in the car (cyl)? | |
let mtcars = new CsvProvider<"https://raw.github.com/ropensci/rfigshare/master/inst/doc/mtcars.csv">() | |
mtcars.Data | |
|> Seq.groupBy (fun x -> x.cyl) | |
|> Seq.sortBy fst | |
|> Seq.map (fun (key, values) -> key, values |> Seq.averageBy (fun x -> x.mpg)) | |
//Continuing with the 'mtcars' dataset from Question 6, what is the absolute difference between the average horsepower of 4-cylinder cars and the average horsepower of 8-cylinder cars? | |
let avg4cyl = mtcars.Data |> Seq.filter (fun x -> x.cyl = 4) |> Seq.averageBy (fun x -> float x.hp) | |
let avg8cyl = mtcars.Data |> Seq.filter (fun x -> x.cyl = 8) |> Seq.averageBy (fun x -> float x.hp) | |
abs(avg4cyl - avg8cyl) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load "packages/FSharp.DataFrame.0.9.3-beta/FSharp.DataFrame.fsx" | |
#r "packages/FSharp.Data.1.1.10/lib/net40/FSharp.Data.dll" | |
open System | |
open System.IO | |
open FSharp.Net | |
open FSharp.DataFrame | |
let readCsvWeb url = | |
let temp = Path.GetTempFileName() | |
File.WriteAllText(temp, Http.Request url) | |
let frame = Frame.ReadCsv temp | |
File.Delete temp | |
frame | |
let data = readCsvWeb "http://faculty.washington.edu/heagerty/Books/Biostatistics/DATA/ozone.csv" | |
//What are the column names of the dataset? | |
data.Columns.Keys |> Seq.toList | |
//Extract the first 2 rows of the data frame and print them to the console | |
data |> Frame.getRows [0..1] | |
data.Rows.[0..1] | |
//How many observations (i.e. rows) are in this data frame? | |
data |> Frame.countRows | |
//Extract the last 2 rows of the data frame and print them to the console | |
data |> Frame.takeLast 2 | |
//What is the value of Ozone in the 47th row? | |
data?Ozone.[46] | |
data.Rows.[46]?Ozone | |
//How many missing values are in the Ozone column of this data frame? | |
Series.countKeys data?Ozone - Series.countValues data?Ozone | |
data?Ozone |> Series.filterAll (fun k value -> value.IsNone) |> Series.countKeys | |
//What is the mean of the Ozone column in this dataset? Exclude missing values (coded as NA) from this calculation. | |
data?Ozone |> Series.mean | |
//Extract the subset of rows of the data frame where Ozone values are above 31 and Temp values are above 90. What is the mean of Solar.R in this subset? | |
data | |
|> Frame.fillMissingWith 0 | |
|> Frame.filterRowValues (fun x -> x?Ozone > 31. && x?Temp > 90.) | |
|> Frame.getCols ["Solar.R"] | |
|> Frame.mean | |
//Note: the following code gives the error: Type constraint mismatch when applying the default type 'int' for a type inference variable. The type 'int' does not support the operator 'DivideByInt' Consider adding further type constraints | |
//frame | |
//|> Frame.fillMissingWith 0 | |
//|> Frame.filterRowValues (fun x -> x?Ozone > 31. && x?Temp > 90.) | |
//|> Frame.getCol "Solar.R" | |
//|> Series.mean | |
//What is the mean of "Temp" when "Month" is equal to 6? | |
(data |> Frame.filterRowValues (fun x -> x?Month = 6.))?Temp |> Series.mean | |
//Take a look at the 'iris' dataset that comes with R. In this dataset, what is the mean of 'Sepal.Length' for the species virginica? | |
let iris = readCsvWeb "https://dataminingproject.googlecode.com/svn-history/r44/DataMiningApp/datasets/Iris/iris.csv" | |
iris |> Frame.filterRowValues (fun x -> x.GetAs<string>("Species") = "virginica") |> Frame.getCols ["Sepal Length"] |> Frame.mean | |
//Continuing with the 'iris' dataset from Question 4, what R code returns a vector of the means of the variables 'Sepal.Length', 'Sepal.Width', 'Petal.Length', and 'Petal.Width'? | |
iris |> Frame.getCols ["Sepal Length"; "Sepal Width"; "Petal Length"; "Petal Width"] |> Frame.mean | |
iris |> Frame.dropCol "Species" |> Frame.mean | |
//Load the 'mtcars' dataset in R. How can one calculate the average miles per gallon (mpg) by number of cylinders in the car (cyl)? | |
let mtcars = readCsvWeb "https://raw.github.com/ropensci/rfigshare/master/inst/doc/mtcars.csv" | |
(mtcars |> Frame.groupRowsByString "cyl" |> Frame.meanBy fst)?mpg | |
//Continuing with the 'mtcars' dataset from Question 6, what is the absolute difference between the average horsepower of 4-cylinder cars and the average horsepower of 8-cylinder cars? | |
let hps = (mtcars |> Frame.groupRowsByString "cyl" |> Frame.meanBy fst)?hp | |
abs (hps.["4"] - hps.["8"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> data <- read.csv("hw1_data.csv") | |
> #What are the column names of the dataset? | |
> colnames(data) | |
[1] "Ozone" "Solar.R" "Wind" "Temp" "Month" "Day" | |
> #Extract the first 2 rows of the data frame and print them to the console | |
> data[1:2,] | |
Ozone Solar.R Wind Temp Month Day | |
1 41 190 7.4 67 5 1 | |
2 36 118 8.0 72 5 2 | |
> #How many observations (i.e. rows) are in this data frame? | |
> dim(data) | |
[1] 153 6 | |
> #Extract the last 2 rows of the data frame and print them to the console | |
> n <- nrow(data) | |
> data[(n-1):n,] | |
Ozone Solar.R Wind Temp Month Day | |
152 18 131 8.0 76 9 29 | |
153 20 223 11.5 68 9 30 | |
> #What is the value of Ozone in the 47th row? | |
> data$Ozone[47] | |
[1] 21 | |
> #How many missing values are in the Ozone column of this data frame? | |
> sum(is.na(data$Ozone)) | |
[1] 37 | |
> #What is the mean of the Ozone column in this dataset? Exclude missing values (coded as NA) from this calculation. | |
> mean(data$Ozone, na.rm=T) | |
[1] 42.12931 | |
> #Extract the subset of rows of the data frame where Ozone values are above 31 and Temp values are above 90. What is the mean of Solar.R in this subset? | |
> mean(data[data$Ozone>31 & data$Temp > 90,]$Solar.R, na.rm=T) | |
[1] 212.8 | |
> #What is the mean of "Temp" when "Month" is equal to 6? | |
> mean(data[data$Month==6,]$Temp, na.rm=T) | |
[1] 79.1 | |
> #Take a look at the 'iris' dataset that comes with R. In this dataset, what is the mean of 'Sepal.Length' for the species virginica? | |
> library(datasets) | |
> data(iris) | |
> mean(iris[iris$Species == "virginica",]$Sepal.Length) | |
[1] 6.588 | |
> #Continuing with the 'iris' dataset from Question 4, what R code returns a vector of the means of the variables 'Sepal.Length', 'Sepal.Width', 'Petal.Length', and 'Petal.Width'? | |
> apply(iris[,1:4], 2, mean) | |
Sepal.Length Sepal.Width Petal.Length Petal.Width | |
5.843333 3.057333 3.758000 1.199333 | |
> #Load the 'mtcars' dataset in R. How can one calculate the average miles per gallon (mpg) by number of cylinders in the car (cyl)? | |
> library(datasets) | |
> data(mtcars) | |
> tapply(mtcars$mpg, mtcars$cyl, mean) | |
4 6 8 | |
26.66364 19.74286 15.10000 | |
> #Continuing with the 'mtcars' dataset from Question 6, what is the absolute difference between the average horsepower of 4-cylinder cars and the average horsepower of 8-cylinder cars? | |
> abs(mean(mtcars[mtcars$cyl==4,]$hp) - mean(mtcars[mtcars$cyl==8,]$hp)) | |
[1] 126.5779 | |
> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment