Last active
July 8, 2019 06:26
-
-
Save ktmud/e32c2b01b505446966d7eb9083301338 to your computer and use it in GitHub Desktop.
Run Parallel Job in R with Future and Purrr
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(future) | |
library(tidyverse) | |
library(datasets) | |
library(randomForest) | |
library(tictoc) | |
plan(multisession, workers = 10) | |
calc_imp <- function(idx) { | |
Sys.sleep(1) | |
rf <- randomForest(mpg ~ ., data=mtcars, ntree=1000, | |
keep.forest=FALSE, importance=TRUE) | |
imp <- importance(rf) | |
data.frame(run_id=idx, var=row.names(imp), imp) | |
} | |
# Paralle Version | |
tic('parallel with future') | |
vals <- map(1:5, ~ future(calc_imp(.))) %>% map_dfr(~ value(.)) | |
as_tibble(vals) %>% sample_n(5) | |
toc() # > 1.583 sec elapsed | |
# Simple Sequential Version | |
tic('simple sequential') | |
vals <- map_dfr(1:5, ~ calc_imp(.)) | |
as_tibble(vals) %>% sample_n(5) | |
toc() # > 5.174 sec elapsed | |
# Note: if your job takes less than 1 sec to finish, often times it's not | |
# benefitial to use parallel computing, because of the overheads in starting | |
# new workers and passing data to and from them. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment