hannesdatta · January 11, 2022 11:11
diff --git a/models.R b/models.R
 # HOW TO MANAGE MEMORY ISSUES IN R?

 # A common problem of data- and computation-intensive projects
 # in R is memory management.

 # Suppose you would like to estimate a series of models,
 # but estimating all of them would exceed your available
 # memory.
 #
 # One solution could be to have individual R scripts
 # for each model, and run them separately from the 
 # terminal (e.g., using RScript script1.R).
 #
 # A disadvantage of this approach is that you'll 
 # quickly end up with a zillion of script1.R-script1zillion.R,
 # and you lose track about which model is the 
 # "right model".
 #
 # So, how can we attain the benefits in memory handling
 # from running "only 1 model", AND the benefits from
 # having all of the models in one script?
 #
 # The solution is quite simple. We're gonna tell 
 # our script WHICH models to run, directly from the
 # terminal.
 #
 # For example, `Rscript models.R m1` will estimate
 # model 1, and `Rscript models.R m1 m2` will estimate models
 # 1-2.
 #
 # Here's the script!

 # Read arguments from the command line (e.g., `m1`, `m2`, `m3`)`
 args = commandArgs(trailingOnly=TRUE)

 # Print arguments to the console, so you know they've been read correctly
 print(args)

 # MODEL 1

 # run only if required
 if ('m1' %in% args) {
  print('running m1')
  
  # not a model, I know... but this is where your model code is supposed to go
  m1 <- rnorm(100)
 }


 if ('m2' %in% args) {
  print('running m2')
  m2 <- rnorm(100)
 }


 if ('m3' %in% args) {
  print('running m3')
  m3 <- rnorm(100)
 }

 # Well, how to we save the objects that we've created, without
 # a priori knowing their names?
 # We can use the following function to scan the global environment for occurence of regular expression (`regex`), 
 # and save all objects in `filename`.
 save_by_regex <- function(regex, filename) {
  lscall = ls(envir=.GlobalEnv)
  stuff_to_save = grep(regex, lscall, value=T)
  if (length(stuff_to_save)>0) {
    cat('saving...\n')
    cat(paste0('(', paste0(stuff_to_save, collapse=', '), ')\n'))
    save(list=stuff_to_save , file = filename)
    cat('...done.\n') } else {
      cat('No objects to save. Verify regular expression.\n')
    }
 }

 # So... this one is going to save all objects starting with `m` (note it's appending those names to the result object)
 save_by_regex('^m', paste0('results_', paste(args, collapse='_'),'.RData'))

 # And... how to run it from the command line?
 # Just go to the command line/terminal, and type

 # `RScript models.R m1`
 # `RScript models.R m1 m3`
 # `RScript models.R m2`
 # etc.
	# HOW TO MANAGE MEMORY ISSUES IN R?

	# A common problem of data- and computation-intensive projects
	# in R is memory management.

	# Suppose you would like to estimate a series of models,
	# but estimating all of them would exceed your available
	# memory.
	#
	# One solution could be to have individual R scripts
	# for each model, and run them separately from the
	# terminal (e.g., using RScript script1.R).
	#
	# A disadvantage of this approach is that you'll
	# quickly end up with a zillion of script1.R-script1zillion.R,
	# and you lose track about which model is the
	# "right model".
	#
	# So, how can we attain the benefits in memory handling
	# from running "only 1 model", AND the benefits from
	# having all of the models in one script?
	#
	# The solution is quite simple. We're gonna tell
	# our script WHICH models to run, directly from the
	# terminal.
	#
	# For example, `Rscript models.R m1` will estimate
	# model 1, and `Rscript models.R m1 m2` will estimate models
	# 1-2.
	#
	# Here's the script!

	# Read arguments from the command line (e.g., `m1`, `m2`, `m3`)`
	args = commandArgs(trailingOnly=TRUE)

	# Print arguments to the console, so you know they've been read correctly
	print(args)

	# MODEL 1

	# run only if required
	if ('m1' %in% args) {
	print('running m1')

	# not a model, I know... but this is where your model code is supposed to go
	m1 <- rnorm(100)
	}


	if ('m2' %in% args) {
	print('running m2')
	m2 <- rnorm(100)
	}


	if ('m3' %in% args) {
	print('running m3')
	m3 <- rnorm(100)
	}

	# Well, how to we save the objects that we've created, without
	# a priori knowing their names?
	# We can use the following function to scan the global environment for occurence of regular expression (`regex`),
	# and save all objects in `filename`.
	save_by_regex <- function(regex, filename) {
	lscall = ls(envir=.GlobalEnv)
	stuff_to_save = grep(regex, lscall, value=T)
	if (length(stuff_to_save)>0) {
	cat('saving...\n')
	cat(paste0('(', paste0(stuff_to_save, collapse=', '), ')\n'))
	save(list=stuff_to_save , file = filename)
	cat('...done.\n') } else {
	cat('No objects to save. Verify regular expression.\n')
	}
	}

	# So... this one is going to save all objects starting with `m` (note it's appending those names to the result object)
	save_by_regex('^m', paste0('results_', paste(args, collapse='_'),'.RData'))

	# And... how to run it from the command line?
	# Just go to the command line/terminal, and type

	# `RScript models.R m1`
	# `RScript models.R m1 m3`
	# `RScript models.R m2`
	# etc.
No results found