Created
October 19, 2017 12:28
-
-
Save daroczig/67479016434089dfbe3d57ce341a853b to your computer and use it in GitHub Desktop.
Code presented at the R workshop of the CRUNCH 2017 conference: http://crunchconf.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## intro slides: http://bit.ly/CRUNCH-R-2017 | |
## basic operations | |
1 + 3 | |
3*2 | |
3^2 | |
## constants | |
pi | |
"pi" | |
'pi' | |
letters | |
LETTERS | |
month.name | |
month.abb | |
## help | |
?pi | |
apropos('month') | |
apropos('cor') | |
## variables | |
x = 4 | |
x <- 4 | |
x | |
x / 2 | |
x^2 | |
x^0.5 | |
## functions | |
sqrt(x) | |
sqrt(x = x) | |
sin(pi / 2) | |
round(pi) | |
ceiling(pi) | |
floor(pi) | |
## custom functions | |
f <- function(x) 2 * x + 1 | |
f(5) | |
f(pi) | |
## vectors | |
c(1, 2, 3, 4, 5) | |
1:5 | |
seq(1, 5) | |
?seq | |
seq(1, 5, 0.1) | |
x <- seq(1, 5, 0.1) | |
plot(x, f(x)) | |
plot(x, f(x), type = 'l') | |
plot(x, f(x), type = 'l', xlab = '', main = '2x+1') | |
grid() | |
## TODO plot 1 period of sine curve | |
?sin | |
f <- function(x) sin(x) | |
x <- seq(0, pi * 2, 0.1) | |
plot(x, sin(x), type = 'l') | |
## a simpler way for plotting functions | |
curve(f) | |
?curve | |
curve(2*x + 1, from = 0, to = 50) | |
curve(x + 1, add = TRUE, col = 'red') | |
## TODO plot 1 period of sine curve | |
curve(sin, from = 0, to = 2*pi) | |
## TODO Brownian motion in 1D | |
runif(1) | |
runif(5) | |
## random numbers from uniform distribution | |
round(runif(5)) | |
round(runif(5))*2 - 1 | |
cumsum(round(runif(25))*2 - 1) | |
plot(cumsum(round(runif(25))*2 - 1), type = 's') | |
set.seed(42) | |
plot(cumsum(round(runif(25))*2 - 1), type = 's') | |
for (i in 2:6) { | |
lines(cumsum(round(runif(25))*2 - 1), type = 's', col = i) | |
} | |
## pro tipp #1 | |
plot(c(1, 25), c(-25, 25), type = 'n') | |
plot(NULL, NULL, xlim = c(1, 25), ylim = c(-25, 25)) | |
## pro tipp #2 | |
sample(c(-1, 1), 25, replace = TRUE) | |
################################################################################ | |
## custom vectors -> combine values into vector | |
h <- c(174, 170, 160) | |
h | |
(w <- c(90, 80, 70)) | |
plot(h, w) | |
plot(h, w, main = "Demo", xlab = 'Height', ylab = 'Weight') | |
cor(w, h) | |
lm(w ~ h) # formula notation | |
fit <- lm(w ~ h) | |
summary(fit) | |
plot(h, w, main = "Demo", xlab = 'Height', ylab = 'Weight') | |
abline(fit, col = 'red') | |
## some basic modeling | |
predict(fit, list(h = c(56, 104))) | |
56 * fit$coefficients[2] + fit$coefficients[1] | |
56 * 1.3462 + -146.1538 | |
## why? | |
plot(h, w, main = "Demo", xlab = 'Height', ylab = 'Weight', | |
xlim = c(0, max(h)), | |
ylim = c(0, max(w))) | |
abline(fit, col = 'red') | |
## polynomial model | |
fit <- lm(w ~ poly(h, 2, raw = TRUE)) | |
fit <- lm(w ~ h + I(h^2)) | |
fit | |
2824 + 174 * -34.3571 + (174^2)*0.1071 | |
plot(h, w) | |
lines(h, predict(fit), col = 'red') | |
predict(fit, data.frame(h = c(56, 104))) | |
plot(0:200, predict(fit, list(h = 0:200)), type = 'l') | |
abline(v = c(160, 174), col = 'red') | |
## data.frame | |
df <- data.frame(weight = w, height = h) | |
df | |
str(df) | |
df$weight | |
?'$' | |
plot(df) | |
cor(df) | |
lm(weight ~ height, data = df) | |
str(df) | |
df[i, j] | |
df[1, ] | |
df[, 1] | |
df[3, 2] | |
df$weight | |
df$weight[2] | |
## Body Mass Index (BMI) is a person's weight in kilograms divided by the square of height in meters. | |
df$height <- df$height/100 | |
df | |
df$bmi <- df$weight / df$height^2 | |
df | |
df$bmi <- df$weight / (df$height/100)^2 | |
## import data | |
df <- read.csv('http://bit.ly/heightweight') | |
str(df) | |
## TODO | |
df$height <- df$heightIn * 2.54 | |
df$weight <- df$weightLb * 0.45 | |
str(df) | |
df$heightIn <- df$weightLb <- NULL | |
str(df) | |
## descriptive stats | |
min(df$ageYear) | |
max(df$ageYear) | |
range(df$ageYear) | |
sum(df$weightLb) | |
length(df$weight) | |
nrow(df) | |
ncol(df) | |
dim(df) | |
## TODO plot and analyze association between height and weight | |
plot(df$height, df$weight) | |
abline(lm(weight ~ height, data = df), col = 'red') | |
## TODO compute and plot BMI index | |
df$bmi <- df$weight / (df$height/100)^2 | |
hist(df$bmi) | |
## exploratory data analysis | |
pairs(df) | |
library(GGally) | |
ggpairs(df) | |
library(pairsD3) | |
pairsD3(df) | |
## statistical test: t-test, ANOVA | |
t.test(height ~ sex, data = df) | |
aov(height ~ sex, data = df) | |
summary(aov(height ~ sex, data = df)) | |
summary(aov(weight ~ sex, data = df)) | |
## Post hoc tests => Tukey Honest Significant Differences | |
TukeyHSD(aov(height ~ sex, data = df)) | |
TukeyHSD(aov(weight ~ sex, data = df)) | |
## ############################################################################# | |
## Wilkinson: The Grammar of Graphics | |
## Wickham: ggplot2 -> layer by layer: | |
## - coordinate system, | |
## - aesthetics (labels) | |
## - scales | |
## - geometries | |
## - shapes | |
## - statistics | |
## - grid, color theme etc. + layers | |
## http://www.cookbook-r.com/Graphs/ | |
## ############################################################################# | |
library(ggplot2) | |
?diamonds | |
## barplot | |
ggplot(diamonds, aes(x = cut)) + geom_bar() | |
ggplot(diamonds, aes(x = cut)) + geom_bar() + theme_bw() | |
ggplot(diamonds, aes(x = cut)) + | |
geom_bar(colour = "darkgreen", fill = "white") + | |
theme_bw() | |
p <- ggplot(diamonds, aes(x = cut)) + | |
geom_bar(colour = "darkgreen", fill = "white") + theme_bw() | |
p | |
## coord transformations | |
library(scales) | |
p + scale_y_log10() | |
p + scale_y_sqrt() | |
p + scale_y_reverse() | |
p + coord_flip() | |
## facet | |
p + facet_wrap( ~ clarity) | |
p + facet_grid(color ~ clarity) | |
## stacked, clustered bar charts | |
p <- ggplot(diamonds, aes(x = color, fill = cut)) | |
p + geom_bar() | |
p + geom_bar(position = "fill") | |
p + geom_bar(position = "dodge") | |
## histogram | |
ggplot(diamonds, aes(x = price)) + geom_bar(binwidth = 100) | |
ggplot(diamonds, aes(x = price)) + geom_bar(binwidth = 1000) | |
ggplot(diamonds, aes(x = price)) + geom_bar(binwidth = 10000) | |
ggplot(diamonds, aes(price, fill = cut)) + geom_density(alpha = 0.2) + theme_bw() | |
## scatterplot | |
p <- ggplot(diamonds, aes(x = carat, y = price)) + geom_point() | |
## adding layers | |
p | |
p <- p + geom_smooth() | |
p | |
p + geom_smooth(method = "lm", se = FALSE, color = 'red') | |
ggplot(diamonds, aes(x = carat, y = price, colour = cut)) + geom_point() + geom_smooth() | |
## themes | |
library(ggthemes) | |
p + theme_economist() + scale_colour_economist() | |
p + theme_stata() + scale_colour_stata() | |
p + theme_excel() + scale_colour_excel() | |
p + theme_wsj() + scale_colour_wsj("colors6", "") | |
p + theme_gdocs() + scale_colour_gdocs() | |
## Further reading at http://www.cookbook-r.com/Graphs/ | |
## TODO on mtcars | |
## * number of carburators | |
ggplot(mtcars, aes(x = carb)) + geom_bar() | |
## * hp | |
ggplot(mtcars, aes(x = hp)) + geom_histogram() | |
## * barplot of carburators per am | |
ggplot(mtcars, aes(x = carb)) + geom_bar() + facet_grid(. ~ am) | |
## stacked bar chart | |
ggplot(mtcars, aes(x = carb, fill = factor(am))) + geom_bar() | |
## grouped/dodged | |
ggplot(mtcars, aes(x = carb, fill = factor(am))) + geom_bar(position = 'dodge') | |
## * boxplot of hp by carb | |
ggplot(mtcars, aes(x = factor(am), y = hp)) + geom_boxplot() | |
## * hp and wt by carb | |
ggplot(mtcars, aes(x = hp, y = wt)) + geom_point() + facet_grid(. ~ carb) | |
## * hp and wt by carb regression | |
ggplot(mtcars, aes(x = hp, y = wt)) + geom_point() + facet_grid(. ~ carb) + geom_smooth() | |
## ############################################################################# | |
## PCA demo on image processing | |
## ############################################################################# | |
## http://www.nasa.gov/images/content/694811main_pia16225-43_full.jpg | |
## http://bit.ly/BudapestBI-R-img | |
download.file('http://bit.ly/BudapestBI-R-img', 'image.jpg') | |
library(jpeg) | |
img <- readJPEG('image.jpg') | |
str(img) | |
dim(img) | |
h <- dim(img)[1] | |
w <- dim(img)[2] | |
h;w | |
img1d <- matrix(img, h * w) | |
str(img1d) | |
pca <- prcomp(img1d) | |
pca | |
summary(pca) | |
image(matrix(pca$x[, 1], h)) | |
image(matrix(pca$x[, 2], h)) | |
image(matrix(pca$x[, 2], h), col = gray.colors(100)) | |
image(matrix(pca$x[, 3], h)) | |
## ############################################################################# | |
## MDS | |
## ############################################################################# | |
## distance between 40 Hungarian cities -> 2D scatterplot | |
download.file('http://bit.ly/CEU-R-distances', 'cities.xls') | |
library(readxl) | |
cities <- read_excel('cities.xls') | |
str(cities) | |
## get rid of 1st column and last row (metadata) | |
cities <- cities[, -1] | |
cities <- cities[-nrow(cities), ] | |
mds <- cmdscale(as.dist(cities)) | |
mds | |
plot(mds) | |
text(mds[, 1], mds[, 2], names(cities)) | |
## flipping both x and y axis | |
mds <- -mds | |
plot(mds) | |
text(mds[, 1], mds[, 2], names(cities)) | |
## distance between European cities -> 2D scatterplot on ggplot | |
mds <- cmdscale(eurodist) | |
mds | |
mds <- as.data.frame(mds) | |
library(ggplot2) | |
ggplot(mds, aes(V1, -V2, label = rownames(mds))) + geom_text() | |
## non-geo stuff | |
str(mtcars) | |
?mtcars | |
mds <- cmdscale(dist(mtcars)) | |
plot(mds) | |
text(mds[, 1], mds[, 2], rownames(mtcars)) | |
mds <- as.data.frame(mds) | |
library(ggplot2) | |
ggplot(mds, aes(V1, -V2, label = rownames(mtcars))) + | |
geom_text() + theme_bw() | |
library(ggrepel) | |
ggplot(mds, aes(V1, -V2, label = rownames(mtcars))) + | |
geom_text_repel() + theme_bw() | |
## ############################################################################# | |
## feature selection - same in 2D | |
## ############################################################################# | |
str(iris) | |
plot(iris$Sepal.Length, iris$Sepal.Width) | |
fit <- lm(Sepal.Width ~ Sepal.Length, data = iris) | |
fit | |
summary(fit) | |
plot(iris$Sepal.Length, iris$Sepal.Width) | |
abline(fit, col = 'red', lwd = 5) | |
## TODO improve the model | |
summary(lm(Sepal.Width ~ Sepal.Length + Species, | |
data = iris)) | |
library(ggplot2) | |
ggplot(iris, aes(Sepal.Length, Sepal.Width, | |
col = Species)) + | |
geom_point() + geom_smooth(method = 'lm') | |
ggplot(iris, aes(Sepal.Length, Sepal.Width)) + | |
geom_point(aes(col = Species)) + | |
geom_smooth(aes(col = Species), method = 'lm') + | |
geom_smooth(method = 'lm', col = 'black') + theme_bw() | |
## ############################################################################# | |
## PCA demo to show the steps in hierarchical clustering | |
## ############################################################################# | |
PC <- prcomp(iris[, 1:4])$x | |
dm <- dist(iris[, 1:4]) | |
hc <- hclust(dm) | |
plot(hc) | |
for (i in 2:8) { | |
plot(hc) | |
rect.hclust(hc, k = i, border = 'red') | |
Sys.sleep(1) | |
} | |
library(animation) | |
ani.options(interval = 1) | |
saveGIF({ | |
for (i in 2:8) { | |
plot(hc) | |
rect.hclust(hc, k = i, border = 'red') | |
} | |
}) | |
library(animation) | |
ani.options(interval = 1) | |
saveGIF({ | |
for (i in 1:8) { | |
plot(-PC[, 1:2], | |
col = cutree(hc, i), | |
pch = as.numeric(factor(iris$Species)) + 5) | |
} | |
}) | |
## ############################################################################# | |
## basic decision trees | |
## ############################################################################# | |
rm(iris) | |
set.seed(3) | |
iris$rnd <- runif(150) | |
iris <- iris[order(iris$rnd),] | |
iris | |
iris$rnd <- NULL | |
train <- iris[1:100, ] | |
test <- iris[101:150, ] | |
library(rpart) | |
ct <- rpart(Species ~ ., data = train) | |
summary(ct) | |
plot(ct); text(ct) | |
library(partykit) | |
plot(as.party(ct)) | |
predict(ct, newdata = test) | |
predict(ct, newdata = test, type = 'class') | |
## confusion matrix | |
table(test$Species, | |
predict(ct, newdata = test, | |
type = 'class')) | |
ct <- rpart(Species ~ ., data = train, | |
minsplit = 3) | |
## ?rpart.control | |
plot(ct); text(ct) | |
table(test$Species, | |
predict(ct, newdata = test, | |
type = 'class')) | |
## ############################################################################# | |
## K-Nearest Neighbors algorithm | |
## ############################################################################# | |
setDT(iris) | |
iris[, rnd := runif(150)] | |
setorder(iris, rnd) | |
iris | |
iris[, rnd := NULL] | |
train <- iris[1:100] | |
test <- iris[101:150] | |
library(class) | |
knn(train[, 1:4, with = FALSE], test[, 1:4, with = FALSE], train$Species) | |
res <- knn(train[, 1:4, with = FALSE], test[, 1:4, with = FALSE], train$Species) | |
table(test$Species, res) | |
res <- knn(train[, 1:4, with = FALSE], test[, 1:4, with = FALSE], train$Species, k = 2) | |
table(test$Species, res) | |
## ############################################################################# | |
## EXERCISE: basic decision trees | |
## ############################################################################# | |
## => model gender from the height/weight dataset | |
df <- read.csv('http://bit.ly/BudapestBI-R-csv') | |
library(rpart) | |
ct <- rpart(sex ~ heightIn + weightLb, data = df) | |
plot(ct); text(ct) | |
ct <- rpart(sex ~ heightIn + weightLb, data = df, minsplit = 1) | |
table(df$sex, predict(ct, type = 'class')) | |
## ############################################################################# | |
## did you use test data? beware of overfitting | |
## ############################################################################# | |
## random order | |
set.seed(7) | |
df <- df[order(runif(nrow(df))), ] | |
## create training and test datasets | |
train <- df[1:100, ] | |
test <- df[101:237, ] | |
## pretty impressing results! | |
ct <- rpart(sex ~ heightIn + weightLb, data = train, minsplit = 1) | |
table(train$sex, predict(ct, type = 'class')) | |
## but oops | |
table(test$sex, predict(ct, newdata = test, type = 'class')) | |
## with a more generalized model | |
ct <- rpart(sex ~ heightIn + weightLb, data = train) | |
table(train$sex, predict(ct, type = 'class')) | |
table(test$sex, predict(ct, newdata = test, type = 'class')) | |
## back to slides on high level overview on bagging, random forest, gbm etc | |
## ############################################################################# | |
## H2O | |
## ############################################################################# | |
## start and connect to H2O | |
library(h2o) | |
h2o.init() | |
## in a browser check http://localhost:54321 | |
## demo data | |
library(hflights) | |
rm(hflights) | |
str(hflights) | |
## copy to H2O | |
hflights.hex <- as.h2o(hflights, 'hflights') | |
str(hflights.hex) | |
str(head(hflights.hex)) | |
head(hflights.hex, 3) | |
summary(hflights.hex) | |
## check flight number: numeric? | |
## check on H2O web interface as well (enum vs factor) | |
## convert numeric to factor/enum | |
hflights.hex[, 'FlightNum'] <- as.factor(hflights.hex[, 'FlightNum']) | |
summary(hflights.hex) | |
## boring | |
for (v in c('Month', 'DayofMonth', 'DayOfWeek', 'DepTime', 'ArrTime', 'Cancelled')) { | |
hflights.hex[, v] <- as.factor(hflights.hex[, v]) | |
} | |
summary(hflights.hex) | |
## feature engineering: departure time? is it OK? hour of the day? | |
## redo everything... just use the R script | |
library(data.table) | |
hflights <- data.table(hflights) | |
hflights[, hour := substr(DepTime, 1, 2)] | |
hflights[, .N, by = hour] | |
hflights[, hour := substr(DepTime, 1, nchar(DepTime) - 2)] | |
hflights[, hour := cut(as.numeric(hour), seq(0, 24, 4))] | |
hflights[, .N, by = hour] | |
hflights[is.na(hour)] | |
hflights[, .N, by = .(is.na(hour), Cancelled == 1)] | |
## drop columns | |
hflights <- hflights[, .(Month, DayofMonth, DayOfWeek, Dest, Origin, | |
UniqueCarrier, FlightNum, TailNum, Distance, | |
Cancelled = factor(Cancelled))] | |
## re-upload to H2O | |
h2o.ls() | |
hflights.hex <- as.h2o(as.data.frame(hflights), 'hflights') | |
## split the data | |
h2o.splitFrame(data = hflights.hex , ratios = 0.75, destination_frames = paste0('hflights', 0:1)) | |
h2o.ls() | |
## build the first model | |
hflights.rf <- h2o.randomForest( | |
model_id = 'hflights_rf', | |
x = setdiff(names(hflights), 'Cancelled'), | |
y = 'Cancelled', | |
training_frame = 'hflights0', | |
validation_frame = 'hflights1') | |
hflights.rf | |
## more trees | |
hflights.rf <- h2o.randomForest( | |
model_id = 'hflights_rf500', | |
x = setdiff(names(hflights), 'Cancelled'), | |
y = 'Cancelled', | |
training_frame = 'hflights0', | |
validation_frame = 'hflights1', ntrees = 500) | |
## change to UI and check ROC curve & AUC | |
## GBM | |
hflights.gbm <- h2o.gbm( | |
x = setdiff(names(hflights), 'Cancelled'), | |
y = 'Cancelled', | |
training_frame = 'hflights0', | |
validation_frame = 'hflights1', | |
model_id = 'hflights_gbm') | |
## more trees should help, again !!! | |
hflights.gbm <- h2o.gbm( | |
x = setdiff(names(hflights), 'Cancelled'), | |
y = 'Cancelled', | |
training_frame = 'hflights0', | |
validation_frame = 'hflights1', | |
model_id = 'hflights_gbm2', ntrees = 550) | |
## but no: although higher training AUC, lower validation AUC => overfit | |
## more trees should help, again !!! | |
hflights.gbm <- h2o.gbm( | |
x = setdiff(names(hflights), 'Cancelled'), | |
y = 'Cancelled', | |
training_frame = 'hflights_part0', | |
validation_frame = 'hflights_part1', | |
model_id = 'hflights_gbm2', ntrees = 250, learn_rate = 0.01) | |
## bye | |
h2o.shutdown() | |
## ############################################################################# | |
## intro to data.table | |
## ############################################################################# | |
## introduction to data.table | |
install.packages('data.table') | |
install.packages('hflights') | |
library(hflights) | |
?hflights | |
str(hflights) | |
library(data.table) | |
dt <- data.table(hflights) | |
## dt[i] | |
dt[DayOfWeek == 5] | |
dt[DayOfWeek == 5 & Month == 3] | |
dt[DayOfWeek == 5 & Month == 3, ActualElapsedTime] | |
## dt[i, j] | |
dt[DayOfWeek == 5 & Month == 3, mean(ActualElapsedTime)] | |
dt[DayOfWeek == 5 & Month == 3, | |
mean(ActualElapsedTime, na.rm = TRUE)] | |
dt[is.na(ActualElapsedTime) & Cancelled == 0 & Diverted == 0] | |
## the avg flight time to LAX | |
dt[Dest == 'LAX', mean(AirTime, na.rm = TRUE)] | |
## avg flight time per dest | |
## dt[i, j, by = ...] | |
dta <- dt[, mean(AirTime, na.rm = TRUE), by = Dest] | |
dta | |
names(dta)[2] <- 'avgAirTime' | |
## multiple "j" expresions | |
dt[, .( | |
minAvgTime = min(AirTime, na.rm = TRUE), | |
avgAirTime = mean(AirTime, na.rm = TRUE), | |
maxAirTime = max(AirTime, na.rm = TRUE)), | |
by = .(Dest, Origin)] | |
## TODO number of cancelled flights to LAX | |
dt[Dest == 'LAX', sum(Cancelled)] | |
nrow(dt[Dest == 'LAX' & Cancelled == 1]) | |
dt[Dest == 'LAX' & Cancelled == 1, .N] | |
dt[Cancelled == 1, .N, by = Dest] | |
## TODO sum of miles travelled to LAX | |
dt[Dest == 'LAX', sum(Distance)] | |
dt$distanceKm <- dt$Distance * 1.6 | |
dt[, distanceKm := Distance * 1.6] | |
dt[Dest == 'LAX', sum(Distance) * 1.6] | |
dt[Dest == 'LAX', sum(distanceKm)] | |
## TODO percentage of flights cancelled per dest | |
dt[, sum(Cancelled == 1) / .N, by = Dest] | |
dt[, sum(Cancelled) / .N, by = Dest] | |
dt[, mean(Cancelled) * 100] | |
## TODO plot the departure and arrival delays | |
ggplot(dt, aes(ArrDelay, DepDelay)) + geom_point() | |
ggplot(dt, aes(ArrDelay, DepDelay)) + geom_hex() | |
## TODO plot the avg departure and arrival delay by dest: | |
## aggregate, then visualize | |
dta <- dt[, list( | |
arrival = mean(ArrDelay, na.rm = TRUE), | |
departure = mean(DepDelay, na.rm = TRUE) | |
), by = Dest] | |
ggplot(dta, aes(x = departure, y = arrival)) + | |
geom_text(aes(label = Dest)) | |
## TODO visualize the avg flight time per dest | |
dta <- dt[, list(airtime = mean(AirTime, na.rm = TRUE)), | |
by = Dest] | |
ggplot(dta, aes(x = Dest, y = airtime)) + | |
geom_bar(stat = 'identity') | |
## heatmap => day of week + hour => N | |
install.packages("nycflights13") | |
library(nycflights13) | |
str(flights) | |
dt <- data.table(flights) | |
dt[, dayOfWeek := weekdays(as.Date( | |
paste(year, month, day, sep = '-') | |
))] | |
dt[, date := paste(year, month, day, sep = '-')] | |
dt[, date := as.Date(date)] | |
dt[, dayOfWeek := weekdays(date)] | |
dta <- dt[, list(count = .N), by = list(dayOfWeek, hour)] | |
ggplot(dta, aes(x = hour, y = dayOfWeek, fill = count)) + | |
geom_tile() | |
## long and wide tables | |
library(reshape2) | |
?dcast | |
?melt | |
dcast(dta, dayOfWeek ~ hour) # long -> wide | |
dta[dayOfWeek == 'Friday' & hour == 5] | |
dtw <- dcast(dta, hour ~ dayOfWeek) | |
melt(dtw, id.vars = 'hour') # wide -> long |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment