library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(DBI)
library(MonetDBLite)
library(feather)
library(fst)
td <- tempdir()
setwd(td)
times <- 5
n <- 1e6 * 5
resample <- purrr::partial(sample, replace = TRUE)
df <- data.frame(
time = Sys.time() + rnorm(n) * 1e7,
bool = resample(c(TRUE, FALSE), n),
int = resample(1000, n),
dbl = rnorm(n, sd = 1000),
chr = resample(letters, n)
)
monet <- dbConnect(MonetDBLite(), dbname = "monet")
microbenchmark::microbenchmark(
saveRDS(df, "rds"),
write_feather(df, "feather"),
write_fst(df, "fst", compress = 0),
dbWriteTable(monet, "df", df, overwrite = TRUE),
times = times
)
#> Identifier(s) "time", "int" are reserved SQL keywords and need(s) to be quoted in queries.
#> Identifier(s) "time", "int" are reserved SQL keywords and need(s) to be quoted in queries.
#> Identifier(s) "time", "int" are reserved SQL keywords and need(s) to be quoted in queries.
#> Identifier(s) "time", "int" are reserved SQL keywords and need(s) to be quoted in queries.
#> Identifier(s) "time", "int" are reserved SQL keywords and need(s) to be quoted in queries.
#> Unit: milliseconds
#> expr min lq
#> saveRDS(df, "rds") 11544.71686 11777.7724
#> write_feather(df, "feather") 325.20101 325.4623
#> write_fst(df, "fst", compress = 0) 90.45394 108.2210
#> dbWriteTable(monet, "df", df, overwrite = TRUE) 1570.86544 1677.5482
#> mean median uq max neval
#> 12511.1462 12195.1468 13067.0356 13971.0592 5
#> 341.5257 325.4959 349.1955 382.2738 5
#> 127.1958 113.2424 123.6935 200.3680 5
#> 1702.9170 1694.2976 1743.8645 1828.0091 5
microbenchmark::microbenchmark(
readRDS("rds"),
read_feather("feather"),
read_fst("fst"),
tbl(monet, "df") %>% collect(),
times = times
)
#> Unit: milliseconds
#> expr min lq mean median
#> readRDS("rds") 829.08102 835.73890 852.4980 835.9805
#> read_feather("feather") 130.47369 160.96860 187.1435 166.2264
#> read_fst("fst") 84.74794 97.05555 131.5531 113.2991
#> tbl(monet, "df") %>% collect() 144.19685 214.83650 253.4896 240.0959
#> uq max neval
#> 844.9840 916.7057 5
#> 238.9650 239.0840 5
#> 137.4563 225.2067 5
#> 329.8362 338.4824 5
f <- function(data) filter(data, chr %in% c("a", "b"))
microbenchmark::microbenchmark(
readRDS("rds") %>% f(),
read_feather("feather") %>% f(),
read_fst("fst") %>% f(),
tbl(monet, "df") %>% f() %>% collect(),
df %>% f(),
times = times
)
#> Unit: milliseconds
#> expr min lq mean
#> readRDS("rds") %>% f() 990.2467 1049.8131 1081.5837
#> read_feather("feather") %>% f() 434.8617 441.3463 456.8683
#> read_fst("fst") %>% f() 325.0571 388.1029 387.8848
#> tbl(monet, "df") %>% f() %>% collect() 186.9416 194.9822 201.6705
#> df %>% f() 193.6270 197.0525 208.6567
#> median uq max neval
#> 1105.8664 1126.1228 1135.8693 5
#> 444.2382 464.8642 499.0311 5
#> 399.1494 412.0324 415.0822 5
#> 198.3381 204.4821 223.6084 5
#> 214.9407 217.1024 220.5610 5
dbDisconnect(monet, shutdown = TRUE)
Created on 2019-03-01 by the reprex package (v0.2.1.9000)