Last active
May 31, 2020 15:14
-
-
Save lcolladotor/7462753 to your computer and use it in GitHub Desktop.
Testing differing approaches for reducing the memory load when using mclapply in R.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#$ -cwd | |
#$ -l jabba,mem_free=10G,h_vmem=2G,h_fsize=1G | |
#$ -N approach1b | |
#$ -pe local 20 | |
#$ -m e | |
echo "**** Job starts ****" | |
date | |
## For organization | |
mkdir -p logs1b | |
mkdir -p trash | |
## Run approach test | |
Rscript testApproach.R -a "approach1b" -m 20 -d FALSE | |
## Organize log files | |
mv *.p* trash/ | |
mv approach1b.* logs1b/ | |
### Done | |
echo "**** Job ends ****" | |
date |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#$ -cwd | |
#$ -l jabba,mem_free=10G,h_vmem=2G,h_fsize=1G | |
#$ -N approach2 | |
#$ -pe local 20 | |
#$ -m e | |
echo "**** Job starts ****" | |
date | |
## For organization | |
mkdir -p logs2 | |
mkdir -p trash | |
## Run approach test | |
Rscript testApproach.R -a "approach2" -m 20 -d FALSE | |
## Organize log files | |
mv *.p* trash/ | |
mv approach2.* logs2/ | |
### Done | |
echo "**** Job ends ****" | |
date |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#$ -cwd | |
#$ -l jabba,mem_free=10G,h_vmem=2G,h_fsize=1G | |
#$ -N approach3 | |
#$ -pe local 20 | |
#$ -m e | |
echo "**** Job starts ****" | |
date | |
## For organization | |
mkdir -p logs3 | |
mkdir -p trash | |
## Run approach test | |
Rscript testApproach.R -a "approach3" -m 20 -d FALSE | |
## Organize log files | |
mv *.p* trash/ | |
mv approach3.* logs3/ | |
### Done | |
echo "**** Job ends ****" | |
date |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#$ -cwd | |
#$ -l jabba,mem_free=10G,h_vmem=2G,h_fsize=1G | |
#$ -N approach4 | |
#$ -pe local 20 | |
#$ -m e | |
echo "**** Job starts ****" | |
date | |
## For organization | |
mkdir -p logs4 | |
mkdir -p trash | |
## Run approach test | |
Rscript testApproach.R -a "approach4" -m 20 -d TRUE | |
## Organize log files | |
mv *.p* trash/ | |
mv approach4.* logs4/ | |
### Done | |
echo "**** Job ends ****" | |
date |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Load libraries | |
library("getopt") | |
library("parallel") | |
## Specify parameters | |
spec <- matrix(c( | |
'approach', 'a', 1, "character", "Either 'approach1b', 'approach2', 'approach3' or 'approach4'", | |
'mcores', 'm', 1, "integer", "Number of cores", | |
'dataAbsent', 'd', 2, "logical", "Is the data absent?", | |
'help' , 'h', 0, "logical", "Display help" | |
), byrow=TRUE, ncol=5) | |
opt <- getopt(spec) | |
## if help was asked for print a friendly message | |
## and exit with a non-zero error code | |
if (!is.null(opt$help)) { | |
cat(getopt(spec, usage=TRUE)) | |
q(status=1) | |
} | |
if(opt$dataAbsent) { | |
set.seed(20131113) | |
data <- data.frame(matrix(rnorm(1e7), ncol=10)) | |
if(opt$approach != "approach4") { | |
dataSplit1b <- split(data, rep(seq_len(opt$mcores), each= nrow(data) / opt$mcores)) | |
rm(data) | |
save(dataSplit1b, file="dataSplit1b.Rdata") | |
} | |
} | |
if(opt$approach == "approach1b") { | |
if(!opt$dataAbsent) load("dataSplit1b.Rdata") | |
#tracemem(dataSplit1b) | |
## Approach 1b | |
res1b <- mclapply(dataSplit1b, rowMeans, mc.cores=opt$mcores) | |
} else if (opt$approach == "approach2") { | |
if(!opt$dataAbsent) load("dataSplit1b.Rdata") | |
## Save the split data in an environment | |
my.env <- new.env() | |
for(i in seq_len(opt$mcores)) { | |
eval(parse(text=paste0("my.env$data", i, " <- dataSplit1b[[", i, "]]"))) | |
} | |
rm(dataSplit1b) | |
## Function that takes indexes, then extracts the data from the environment | |
applyMyFun <- function(idx, env) { | |
eval(parse(text=paste0("result <- env$", ls(env)[idx]))) | |
rowMeans(result) | |
} | |
## Approach 2 | |
index <- seq_len(opt$mcores) | |
names(index) <- index | |
res2 <- mclapply(index, applyMyFun, env=my.env, mc.cores=opt$mcores) | |
} else if(opt$approach == "approach3") { | |
if(opt$dataAbsent) { | |
## Save the chunks | |
for(i in names(dataSplit1b)) { | |
chunk <- dataSplit1b[[i]] | |
output <- paste0("chunk", i, ".Rdata") | |
save(chunk, file=output) | |
} | |
rm(dataSplit1b) | |
} | |
## Function that loads the chunk | |
applyMyFun2 <- function(idx) { | |
load(paste0("chunk", idx, ".Rdata")) | |
rowMeans(chunk) | |
} | |
## Approach 3 | |
index <- seq_len(opt$mcores) | |
names(index) <- index | |
res3 <- mclapply(index, applyMyFun2, mc.cores=opt$mcores) | |
} else if(opt$approach == "approach4") { | |
#tracemem(data) | |
## Approach 4: credit to Ryan from https://stat.ethz.ch/pipermail/bioc-devel/2013-November/004930.html | |
res4 <- mclapply(splitIndices(nrow(data), opt$mcores), function(i) rowMeans(data[i,]), mc.cores=opt$mcores) | |
} else { | |
stop("Invalid 'approach' option") | |
} | |
## Reproducibility | |
proc.time() | |
sessionInfo() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment