Last active
October 9, 2022 06:41
-
-
Save DavisVaughan/865d95cf0101c24df27b37f4047dd2e5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This example demonstrates running furrr code distributed on 2 AWS instances ("nodes"). | |
# The instances have already been created. | |
library(future) | |
library(furrr) | |
# Two t2.micro AWS instances | |
# Created from http://www.louisaslett.com/RStudio_AMI/ | |
public_ip <- c("34.205.155.182", "34.201.26.217") | |
# This is where my pem file lives (password to connect essentially). | |
ssh_private_key_file <- "~/Desktop/programming/AWS/key-pair/dvaughan.pem" | |
# Connect! | |
cl <- makeClusterPSOCK( | |
## Public IP number of EC2 instance | |
public_ip, | |
## User name (always 'ubuntu') | |
user = "ubuntu", | |
## Use private SSH key registered with AWS | |
rshopts = c( | |
"-o", "StrictHostKeyChecking=no", | |
"-o", "IdentitiesOnly=yes", | |
"-i", ssh_private_key_file | |
), | |
## Set up .libPaths() for the 'ubuntu' user and | |
## install future/purrr/furrr packages | |
rscript_args = c( | |
"-e", shQuote("local({p <- Sys.getenv('R_LIBS_USER'); dir.create(p, recursive = TRUE, showWarnings = FALSE); .libPaths(p)})"), | |
"-e", shQuote("install.packages(c('future', 'purrr', 'furrr'))") | |
), | |
dryrun = FALSE | |
) | |
# Set the plan to use the cluster workers! | |
plan(cluster, workers = cl) | |
# Run some code distributed evenly on the two workers! | |
x <- 1 | |
future_map(1:5, ~{.x + x}) | |
#> [[1]] | |
#> [1] 2 | |
#> | |
#> [[2]] | |
#> [1] 3 | |
#> | |
#> [[3]] | |
#> [1] 4 | |
#> | |
#> [[4]] | |
#> [1] 5 | |
#> | |
#> [[5]] | |
#> [1] 6 | |
# Are we reaallllly running in parallel? | |
library(tictoc) | |
tic() | |
future_map(1:2, ~{ Sys.sleep(10) }) | |
#> [[1]] | |
#> NULL | |
#> | |
#> [[2]] | |
#> NULL | |
toc() | |
#> 13.158 sec elapsed | |
# Shut down | |
parallel::stopCluster(cl) |
Another version that does multi-level distributing. First, we distribute over 2 t2.xlarge AWS instances (2x speedup). Then we distribute over the 4 vCPU's that each instance has (resulting in ~8x speedup for this super simple example).
Using this, we go from 80 seconds
-> 13 seconds
library(furrr)
#> Loading required package: future
library(tictoc)
instance_ips <- c("54.164.159.51", "52.23.226.59")
public_ip <- instance_ips
# This is where my pem file lives (password to connect essentially).
ssh_private_key_file <- "~/Desktop/programming/AWS/key-pair/dvaughan.pem"
# Connect!
cl <- makeClusterPSOCK(
## Public IP number of EC2 instance
public_ip,
## User name (always 'ubuntu')
user = "ubuntu",
## Use private SSH key registered with AWS
rshopts = c(
"-o", "StrictHostKeyChecking=no",
"-o", "IdentitiesOnly=yes",
"-i", ssh_private_key_file
),
## Set up .libPaths() for the 'ubuntu' user and
## install future/purrr/furrr packages
rscript_args = c(
"-e", shQuote("local({p <- Sys.getenv('R_LIBS_USER'); dir.create(p, recursive = TRUE, showWarnings = FALSE); .libPaths(p)})"),
"-e", shQuote("install.packages(c('future', 'purrr', 'furrr'))")
),
dryrun = FALSE
)
# First let's try it sequentially
plan(sequential)
tic()
future_map(1:2, ~{
future_map(1:4, ~Sys.sleep(10))
})
toc()
#> 80.851 sec elapsed
# Multi-distributed plan!
plan(list(tweak(cluster, workers = cl), tweak(multiprocess, workers = 4)))
tic()
# First we distribute over our 2 EC2 instances
future_map(1:2, ~{
# Then we distribute over the 4 vCPUs that each instance has
future_map(1:4, ~Sys.sleep(10))
})
toc()
#> 13.001 sec elapsed
Created on 2018-05-31 by the reprex package (v0.2.0).
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
An updated version that also uses reticulate to let you start and stop the instances.