Created
February 15, 2022 02:18
-
-
Save grantmcdermott/f9af3b7ce3e4aaa6ec6e02443af41a71 to your computer and use it in GitHub Desktop.
Benchmarking collapse_mask
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Context: https://twitter.com/grant_mcdermott/status/1493400952878952448 | |
options(collapse_mask = "all") # NB: see `help('collapse-options')` | |
library(dplyr) | |
library(data.table) | |
library(collapse) # Needs to come after library(dplyr) for collapse_mask to work | |
flights = fread('https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv') | |
vars = c('dep_delay', 'arr_delay', 'air_time', 'distance', 'hour') | |
## Note we explicitly call dplyr::<function> for the 1st line in this benchmark, | |
## since we've masked the regular dplyr operations with their collapse | |
## equivalents (i.e. 2nd line). | |
library(microbenchmark) | |
microbenchmark( | |
dplyr = flights |> dplyr::group_by(month, day, origin, dest) |> dplyr::summarise(across(vars, sum)), | |
collapse = flights |> group_by(month, day, origin, dest) |> summarise(across(vars, sum)), | |
data.table = flights[, lapply(.SD, sum), by=.(month, day, origin, dest), .SDcols=vars], | |
times = 2 | |
) | |
#> Unit: milliseconds | |
#> expr min lq mean median uq max neval cld | |
#> dplyr 1041.243193 1041.243193 1061.813553 1061.813553 1082.383912 1082.383912 2 b | |
#> collapse 10.350356 10.350356 10.428991 10.428991 10.507626 10.507626 2 a | |
#> data.table 9.615242 9.615242 9.778382 9.778382 9.941521 9.941521 2 a |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
We have @SebKrantz to thank for the package, but stoked to hear it's working on your system now.