Last active
August 3, 2018 17:04
-
-
Save SteveViss/804f5d1263451290cd3a to your computer and use it in GitHub Desktop.
Fitting lm on fake data using 2 nodes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parallelize linear regressions using 4 nodes | |
# Open an interactive job | |
# with the following command line | |
# salloc --time=1:0:0 --ntasks=100 | |
#0. Packages loading | |
library(doParallel) | |
#1. Init the cluster of SOCKETS | |
# Definition: A network socket is an endpoint of an inter-process communication across a computer network. | |
# Get number of Cores by nodes based on SLURM ENV | |
nCores <- as.numeric(Sys.getenv("SLURM_CPUS_ON_NODE")) | |
# Get Nodes addresses | |
nodelist <- Sys.getenv("SLURM_NODELIST") | |
nodelist <- paste0("cdr",unlist(stringi::stri_extract_all_regex(nodelist, "[0-9]+"))) | |
# Build the cluster | |
cl <- makeCluster(rep(nodelist,nCores),type='SOCK') | |
registerDoParallel(cl) | |
#2. Generate fake tree dataset with 100 observations (volume) for 1.000 species and fit linear regression for each species | |
tree.df <- data.frame(species = rep(c(1:1000000), each = 100 ), girth = runif (100000000,7,40 )) | |
tree.df$volume <- tree.df$species / 10 + 5 * tree.df$girth + rnorm ( 10000000 , 0 , 3 ) | |
# Extract species IDs to iterate over | |
species <- unique(tree.df$species) | |
#3. Run foreach loop and store results in fits object | |
fits <- foreach(i = species, .combine = rbind) %dopar% { | |
sp <- subset (tree.df, subset = species == i) | |
fit <- lm (volume ~ girth, data = sp) | |
return ( c(i, fit $ coefficients)) | |
} | |
#4. Stopping the cluster | |
stopCluster(cl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice job !
It will doubtlessly be helpful for many R users !