Created
March 30, 2016 01:18
-
-
Save rmflight/53a93424f00b83a907d0d79ad5557d38 to your computer and use it in GitHub Desktop.
feather benchmarking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(devtools) | |
# install_github("wesm/feather/R") | |
library(feather) | |
library(microbenchmark) | |
set.seed(3-29-16) | |
rows <- 100000 | |
x <- data.frame(ints = round(runif(rows, -100, 100)), stringsAsFactors = FALSE) | |
x$floats <- runif(rows, -100, 100) | |
x$bools <- sample(c(TRUE, FALSE), rows, replace = TRUE) | |
x$dates <- as.POSIXct(runif(rows, 100000000, 1459293171), origin = "1970-01-01") | |
x$categories <- as.factor(sample(c(LETTERS, 0:9), rows, replace = TRUE)) | |
x$strings <- replicate(rows, paste0(sample(letters, sample(1:10, 1), replace = TRUE), collapse = "")) | |
microbenchmark( | |
write.csv(x, file = "x.csv"), times = 10 | |
) | |
microbenchmark( | |
save(x, file = "x.rda"), times = 10 | |
) | |
microbenchmark( | |
saveRDS(x, file = "x.rds"), times = 10 | |
) | |
microbenchmark( | |
write_feather(x, "x.feather"), times = 10 | |
) | |
rm(x, rows) | |
file.size("x.csv") | |
file.size("x.rda") | |
file.size("x.rds") | |
file.size("x.feather") | |
microbenchmark( | |
y <- read.csv("x.csv"), times = 10 | |
) | |
rm(y) | |
microbenchmark( | |
load("x.rda"), times = 10 | |
) | |
rm(x) | |
microbenchmark( | |
y <- readRDS("x.rds"), times = 10 | |
) | |
rm(y) | |
microbenchmark( | |
y <- read_feather("x.feather"), times = 10 | |
) | |
session_info() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> source('~/feather_benchmark.R', echo=TRUE) | |
> library(devtools) | |
> # install_github("wesm/feather/R") | |
> library(feather) | |
> library(microbenchmark) | |
> set.seed(3-29-16) | |
> rows <- 100000 | |
> x <- data.frame(ints = round(runif(rows, -100, 100)), stringsAsFactors = FALSE) | |
> x$floats <- runif(rows, -100, 100) | |
> x$bools <- sample(c(TRUE, FALSE), rows, replace = TRUE) | |
> x$dates <- as.POSIXct(runif(rows, 100000000, 1459293171), origin = "1970-01-01") | |
> x$categories <- as.factor(sample(c(LETTERS, 0:9), rows, replace = TRUE)) | |
> x$strings <- replicate(rows, paste0(sample(letters, sample(1:10, 1), replace = TRUE), collapse = "")) | |
> microbenchmark( | |
+ write.csv(x, file = "x.csv"), times = 10 | |
+ ) | |
Unit: milliseconds | |
expr min lq mean median uq max neval | |
write.csv(x, file = "x.csv") 920.1746 950.7067 1083.285 1052.906 1109.616 1569.202 10 | |
> microbenchmark( | |
+ save(x, file = "x.rda"), times = 10 | |
+ ) | |
Unit: milliseconds | |
expr min lq mean median uq max neval | |
save(x, file = "x.rda") 485.4359 491.7663 493.8786 495.3945 496.6437 500.302 10 | |
> microbenchmark( | |
+ saveRDS(x, file = "x.rds"), times = 10 | |
+ ) | |
Unit: milliseconds | |
expr min lq mean median uq max neval | |
saveRDS(x, file = "x.rds") 485.4695 493.785 509.7228 497.8886 501.1894 591.053 10 | |
> microbenchmark( | |
+ write_feather(x, "x.feather"), times = 10 | |
+ ) | |
Unit: milliseconds | |
expr min lq mean median uq max neval | |
write_feather(x, "x.feather") 12.36809 27.26378 47.24269 59.39752 65.18266 67.80251 10 | |
> rm(x, rows) | |
> file.size("x.csv") | |
[1] 6667705 | |
> file.size("x.rda") | |
[1] 2172036 | |
> file.size("x.rds") | |
[1] 2172037 | |
> file.size("x.feather") | |
[1] 3761724 | |
> microbenchmark( | |
+ y <- read.csv("x.csv"), times = 10 | |
+ ) | |
Unit: seconds | |
expr min lq mean median uq max neval | |
y <- read.csv("x.csv") 2.085135 2.108388 2.182528 2.209813 2.240324 2.287626 10 | |
> rm(y) | |
> microbenchmark( | |
+ load("x.rda"), times = 10 | |
+ ) | |
Unit: milliseconds | |
expr min lq mean median uq max neval | |
load("x.rda") 76.07915 76.30072 78.13084 76.58067 77.5034 89.92839 10 | |
> rm(x) | |
> microbenchmark( | |
+ y <- readRDS("x.rds"), times = 10 | |
+ ) | |
Unit: milliseconds | |
expr min lq mean median uq max neval | |
y <- readRDS("x.rds") 76.8466 76.98257 78.76689 77.24436 78.49101 90.58261 10 | |
> rm(y) | |
> microbenchmark( | |
+ y <- read_feather("x.feather"), times = 10 | |
+ ) | |
Unit: milliseconds | |
expr min lq mean median uq max neval | |
y <- read_feather("x.feather") 11.5892 11.65439 13.26675 11.68718 11.73939 24.76797 10 | |
> session_info() | |
Session info --------------------------------------------------------------------------------------------------------------------- | |
setting value | |
version R version 3.2.2 (2015-08-14) | |
system x86_64, linux-gnu | |
ui RStudio (0.99.875) | |
language (EN) | |
collate en_US.UTF-8 | |
tz America/New_York | |
date 2016-03-29 | |
Packages ------------------------------------------------------------------------------------------------------------------------- | |
package * version date source | |
colorspace 1.2-6 2015-03-11 CRAN (R 3.2.2) | |
devtools * 1.9.1.9000 2016-01-21 local | |
digest 0.6.9 2016-01-08 CRAN (R 3.2.3) | |
feather * 0.0.0.9000 2016-03-30 Github (wesm/feather@a58e3be) | |
ggplot2 2.1.0 2016-03-01 CRAN (R 3.2.2) | |
gtable 0.2.0 2016-02-26 CRAN (R 3.2.2) | |
memoise 1.0.0 2016-01-29 CRAN (R 3.2.2) | |
microbenchmark * 1.4-2.1 2015-11-25 CRAN (R 3.2.2) | |
munsell 0.4.3 2016-02-13 CRAN (R 3.2.2) | |
plyr 1.8.3 2015-06-12 CRAN (R 3.2.2) | |
Rcpp 0.12.4 2016-03-26 CRAN (R 3.2.2) | |
scales 0.4.0 2016-02-26 CRAN (R 3.2.2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment