Last active
November 16, 2023 06:19
-
-
Save nanxstats/cf71094cc730147a0fc230b86f642db3 to your computer and use it in GitHub Desktop.
simtrial backend benchmark sketch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
n | 1 | 2 | 4 | 8 | 16 | |
---|---|---|---|---|---|---|
dplyr | 5093.77 | 2671.44 | 1447.21 | 810.42 | 446.06 | |
data.table | 1336.79 | 677.94 | 364.5 | 217.75 | 143.95 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
n | 1 | 2 | 4 | 8 | 16 | 32 | |
---|---|---|---|---|---|---|---|
dplyr | 487.89 | 264.16 | 145.39 | 86.51 | 62.51 | 88.83 | |
data.table | 131.3 | 67.36 | 38.89 | 28.19 | 31 | 65.65 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
remove.packages("simtrial") | |
remotes::install_github("Merck/simtrial@341f77f", force = TRUE, update = FALSE) | |
library("simtrial") | |
library("future") | |
library("doFuture") | |
library("tictoc") | |
k <- 10000 | |
enroll_rate <- data.frame(rate = c(5, 20, 10), duration = c(100, 150, 150)) | |
sim_fix <- function(k) { | |
simtrial::sim_fixed_n( | |
n_sim = k, | |
sample_size = 3000, | |
target_event = 700, | |
enroll_rate = enroll_rate, | |
timing_type = 2 | |
) | |
} | |
set.seed(42) | |
plan(sequential) | |
tic() | |
for (i in 1:10) dplyr_01 <- sim_fix(k) | |
toc() | |
set.seed(42) | |
plan(multisession, workers = 2) | |
tic() | |
for (i in 1:10) dplyr_02 <- sim_fix(k) | |
toc() | |
set.seed(42) | |
plan(multisession, workers = 4) | |
tic() | |
for (i in 1:10) dplyr_04 <- sim_fix(k) | |
toc() | |
set.seed(42) | |
plan(multisession, workers = 8) | |
tic() | |
for (i in 1:10) dplyr_08 <- sim_fix(k) | |
toc() | |
set.seed(42) | |
plan(multisession, workers = 16) | |
tic() | |
for (i in 1:10) dplyr_16 <- sim_fix(k) | |
toc() | |
rstudioapi::restartSession() | |
remove.packages("simtrial") | |
remotes::install_github("Merck/simtrial@206ca44", force = TRUE, update = FALSE) | |
library("simtrial") | |
library("future") | |
library("doFuture") | |
library("tictoc") | |
k <- 10000 | |
enroll_rate <- data.frame(rate = c(5, 20, 10), duration = c(100, 150, 150)) | |
sim_fix <- function(k) { | |
simtrial::sim_fixed_n( | |
n_sim = k, | |
sample_size = 3000, | |
target_event = 700, | |
enroll_rate = enroll_rate, | |
timing_type = 2 | |
) | |
} | |
set.seed(42) | |
plan(sequential) | |
tic() | |
for (i in 1:10) dt_01 <- sim_fix(k) | |
toc() | |
set.seed(42) | |
plan(multisession, workers = 2) | |
tic() | |
for (i in 1:10) dt_02 <- sim_fix(k) | |
toc() | |
set.seed(42) | |
plan(multisession, workers = 4) | |
tic() | |
for (i in 1:10) dt_04 <- sim_fix(k) | |
toc() | |
set.seed(42) | |
plan(multisession, workers = 8) | |
tic() | |
for (i in 1:10) dt_08 <- sim_fix(k) | |
toc() | |
set.seed(42) | |
plan(multisession, workers = 16) | |
tic() | |
for (i in 1:10) dt_16 <- sim_fix(k) | |
toc() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("ggplot2") | |
library("cowplot") | |
library("ggsci") | |
x <- read.table("simtrial-10k.tsv") | |
x <- as.data.frame(t(x[, 2:ncol(x)])) | |
colnames(x) <- c("Cores", "dplyr", "data.table") | |
row.names(x) <- NULL | |
# Time | |
df_time <- reshape( | |
data = x, | |
varying = list(c("dplyr", "data.table")), | |
v.names = "Time", | |
timevar = "Backend", | |
times = c("dplyr", "data.table"), | |
direction = "long" | |
) | |
ggplot(df_time, aes(x = Cores, y = Time, color = Backend)) + | |
geom_line() + | |
geom_point() + | |
labs( | |
title = "Performance comparison", | |
x = "Number of cores", | |
y = "Time (s)", | |
color = "Backend" | |
) + | |
scale_x_continuous(breaks = c(1, 2, 4, 8, 16)) + | |
theme_cowplot() + | |
background_grid() + | |
scale_color_d3() | |
# Speedup | |
baseline_dplyr <- x$dplyr[x$Cores == 1] | |
baseline_data_table <- x$data.table[x$Cores == 1] | |
x$speedup_dplyr <- baseline_dplyr / x$dplyr | |
x$speedup_data_table <- baseline_data_table / x$data.table | |
df_speedup <- reshape( | |
data = x, | |
varying = list(c("speedup_dplyr", "speedup_data_table")), | |
v.names = "Speedup", | |
timevar = "Backend", | |
times = c("dplyr", "data.table"), | |
direction = "long" | |
) | |
ggplot(df_speedup, aes(x = Cores, y = Speedup, color = Backend)) + | |
geom_line() + | |
geom_point() + | |
labs( | |
title = "Speedup vs. number of cores", | |
x = "Number of cores", | |
y = "Speedup", | |
color = "Backend" | |
) + | |
scale_x_continuous(breaks = c(1, 2, 4, 8, 16)) + | |
scale_y_continuous(breaks = c(2, 4, 6, 8, 10)) + | |
theme_cowplot() + | |
background_grid() + | |
scale_color_d3() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment