Skip to content

Instantly share code, notes, and snippets.

@eliocamp
Created July 10, 2019 18:56
Show Gist options
  • Save eliocamp/0b07e5935bc5525e2dd73c3b8a1d2ce8 to your computer and use it in GitHub Desktop.
Save eliocamp/0b07e5935bc5525e2dd73c3b8a1d2ce8 to your computer and use it in GitHub Desktop.
pipes vs data.table
library(data.table)
library(magrittr)
library(ggplot2)
set.seed(42)
do_bench <- function(N) {
dt <- suppressWarnings(data.table(x = rnorm(N),
group = letters[1:3]))
pipe <- function() {
dt %>%
.[, mean(x), by = group] %>%
.[group == "a"]
}
bracket <- function() {
dt[, mean(x), by = group][group == "a"]
}
bm <- bench::mark(pipe, bracket, check = FALSE)
as.numeric(bm$time[[2]]/bm$time[[1]])
}
benches <- data.table(N = 10^seq(4, 8)) %>%
.[, do_bench(N), by = N]
ggplot(benches, aes(factor(N), V1)) +
geom_boxplot() +
ggforce::geom_sina(size = 0.1, alpha = 0.1) +
geom_hline(yintercept = 1) +
scale_y_log10("Timing [][] / %>%", breaks = c(1/3, 1/2, seq(1, 3)), limits = c(1/4, 4),
labels = c("1/3", "1/2", seq(1, 3))) +
scale_x_discrete("Number of rows") +
labs(title = "Using pipes with data.table operations has negligible impact on timing",
subtitle = "doing \ndt %>% .[, mean(x), by = group] %>% .[group == 1] vs. \ndt[, mean(x), by = group][group == 1]") +
hrbrthemes::theme_ipsum_rc()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment