Skip to content

Instantly share code, notes, and snippets.

@nbenn
Created October 30, 2018 07:43
Show Gist options
  • Save nbenn/cc439d6f11e27e65e92f5e7b18ff8f32 to your computer and use it in GitHub Desktop.
Save nbenn/cc439d6f11e27e65e92f5e7b18ff8f32 to your computer and use it in GitHub Desktop.
Experiments with parallel access to bigmemory objects
library(bigmemory)
add_col_var <- function(x) {
biganalytics::apply(x, 2L, function(y) {
col_var <- 0
y_bar <- 0
for (i in y) y_bar <- y_bar + i
y_bar <- y_bar / length(y)
for (i in y) col_var <- col_var + (i - y_bar) ^ 2
y + (col_var / length(y))
})
}
bm_parallel <- function(mat, fun, n_cores, n_chuncks = n_cores) {
func <- function(i, m) {
m[i, ] <- fun(m[i, ])
}
grp_ind <- parallel::splitIndices(nrow(mat), n_chuncks)
if (n_cores == 1L) {
time_taken <- system.time(
lapply(grp_ind, func, mat)
)
} else {
time_taken <- system.time(
parallel::mclapply(grp_ind, func, mat, mc.cores = n_cores)
)
}
list(result = mat, timings = time_taken)
}
rep_run <- function(n_reps, fun, ...) {
res <- lapply(seq_len(n_reps), function(i, ...) {
time_taken <- system.time({
res <- fun(...)
})
list(result = res, overall = time_taken)
}, ...)
overall <- do.call(rbind, lapply(res, `[[`, "overall"))
res <- lapply(res, `[[`, "result")
timings <- do.call(rbind, lapply(res, `[[`, "timings"))
timings <- cbind(timings[, 1:3], overall[, 1:3])
timings <- cbind(mean = apply(timings, 2L, mean),
var = apply(timings, 2L, var))
rownames(timings) <- paste0(rep(c("user (", "system (", "elapsed ("), 2),
rep(c("focus)", "total)"), each = 3))
print(timings)
res[[1L]][["result"]]
}
new_bm <- function(n = 3e6) {
bigmemory::as.big.matrix(
x = matrix(
c(
runif(length(letters) * n, 0, 1),
runif(length(letters) * n, 1, 2),
runif(length(letters) * n, 1, 3),
rnorm(length(letters) * n, 0, 1),
rnorm(length(letters) * n, 1, 2),
rnorm(length(letters) * n, 2, 1)
),
ncol = 6
),
shared = TRUE
)
}
bm <- new_bm()
n_reps <- 10L
bm <- rep_run(n_reps, bm_parallel, bm, add_col_var, 1L, 2L)
> mean var
> user (focus) 59.7323 0.33399268
> system (focus) 13.2913 0.09428823
> elapsed (focus) 73.0359 0.54904966
> user (total) 64.0044 0.26820693
> system (total) 15.1614 0.09650071
> elapsed (total) 79.1799 0.47342632
bm <- rep_run(n_reps, bm_parallel, bm, add_col_var, 2L, 2L)
> mean var
> user (focus) 0.6499 0.001790322
> system (focus) 4.2755 0.007360722
> elapsed (focus) 46.0400 0.027573111
> user (total) 4.9179 0.008596767
> system (total) 6.2301 0.008852322
> elapsed (total) 52.2634 0.038122933
bm <- rep_run(n_reps, bm_parallel, bm, add_col_var, 1L, 3L)
> mean var
> user (focus) 60.0507 0.01421312
> system (focus) 13.5197 0.01155446
> elapsed (focus) 73.5787 0.02537290
> user (total) 65.3022 0.01397618
> system (total) 15.4178 0.01004596
> elapsed (total) 80.7294 0.02088427
bm <- rep_run(n_reps, bm_parallel, bm, add_col_var, 3L, 3L)
> mean var
> user (focus) 0.6682 0.002946622
> system (focus) 4.2276 0.013424267
> elapsed (focus) 32.5987 0.030501567
> user (total) 5.9507 0.003138900
> system (total) 6.1398 0.010955956
> elapsed (total) 39.7945 0.039957389
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment