Load a few packages. Here farr
provides system_time()
.
library(tidyverse)
library(DBI)
library(farr)
Make a parquet file for crsp_monthly
:
tidy_finance <- dbConnect(
RSQLite::SQLite(),
"data/tidy_finance_r.sqlite",
extended_types = TRUE
)
crsp_monthly <- tbl(tidy_finance, "crsp_monthly")
crsp_monthly |>
collect() |>
arrow::write_parquet("data/crsp_monthly.parquet")
dbDisconnect(tidy_finance)
Now run the benchmark:
db <- dbConnect(duckdb::duckdb())
tbl(db, "'data/crsp_monthly.parquet'") |>
group_by(month) |>
mutate(portfolio = ntile(mktcap_lag, 10)) |>
group_by(portfolio) |>
summarize(mean_ret_excess = mean(ret_excess, na.rm = TRUE)) |>
arrange(portfolio) |>
collect() |>
system_time()
user system elapsed
0.641 0.102 0.178
# A tibble: 10 × 2
portfolio mean_ret_excess
<dbl> <dbl>
1 1 0.0227
2 2 0.00519
3 3 0.00481
4 4 0.00522
5 5 0.00546
6 6 0.00612
7 7 0.00637
8 8 0.00681
9 9 0.00649
10 10 0.00579