Last active
April 22, 2018 23:46
-
-
Save MarkEdmondson1234/5d7d894bf725079538a5ad8db8459696 to your computer and use it in GitHub Desktop.
Run massive parallel R jobs cheaply on Google Compute Engine with googleComputeEngineR and future
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## see also http://blog.revolutionanalytics.com/2017/06/doazureparallel-updated.html on how to run on Azure | |
## and cloudyr project for AWS https://github.com/cloudyr/aws.ec2 | |
# now also in docs: https://cloudyr.github.io/googleComputeEngineR/articles/massive-parallel.html | |
library(googleComputeEngineR) | |
library(future) | |
## auto auth to GCE via environment file arguments | |
## create 50 CPUs names | |
vm_names <- paste0("cpu",1:50) | |
## specify the cheapest VMs that may get turned off | |
preemptible = list(preemptible = TRUE) | |
## start up 50 VMs with R base on them (can also customise via Dockerfiles using gce_vm_template instead) | |
fiftyvms <- lapply(vm_names, gce_vm, predefined_type = "n1-standard-1", template = "r-base", scheduling = preemptible) | |
## add any ssh details, username etc. | |
fiftyvms <- lapply(fiftyvms, gce_ssh_setup) | |
## once all launched, add to cluster | |
plan(cluster, workers = as.cluster(fiftyvms)) | |
## the action you want to perform via cluster | |
my_single_function <- function(x){ | |
an_expensive_function(x) | |
} | |
## use future::future_lapply to send each call to the cluster | |
all_results <- future_lapply(1:50, my_single_function) | |
## tidy up | |
lapply(fiftyvms, FUN = gce_vm_stop) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment