DavisVaughan · May 31, 2019 20:31
diff --git a/call-hell.r b/call-hell.r
 # often package developers store the call in their model object
 # and then print it out in their summary / print methods  
 lm_model <- lm(mpg ~ cyl, mtcars)

 lm_model
 #> 
 #> Call:
 #> lm(formula = mpg ~ cyl, data = mtcars)
 #> 
 #> Coefficients:
 #> (Intercept)          cyl  
 #>      37.885       -2.876

 lm_model$call
 #> lm(formula = mpg ~ cyl, data = mtcars)

 # normally this is not an issue, but sometimes package _users_
 # want to call that modeling function programatically. This often
 # means that they use `do.call()`
 .f <- mpg ~ cyl
 .data <- mtcars
 lm_model_do_call <- do.call(lm, list(formula = .f, data = .data))

 # essentially, this inlines the `data` in a textual representation in the call
 # OH THE HORROR
 lm_model_do_call
 #> 
 #> Call:
 #> (function (formula, data, subset, weights, na.action, method = "qr", 
 #>     model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE, 
 #>     contrasts = NULL, offset, ...) 
 #> {
 #>     ret.x <- x
 #>     ret.y <- y
 #>     cl <- match.call()
 #>     mf <- match.call(expand.dots = FALSE)
 #>     m <- match(c("formula", "data", "subset", "weights", "na.action", 
 #>         "offset"), names(mf), 0L)
 #>     mf <- mf[c(1L, m)]
 #>     mf$drop.unused.levels <- TRUE
 #>     mf[[1L]] <- quote(stats::model.frame)
 #>     mf <- eval(mf, parent.frame())
 #>     if (method == "model.frame") 
 #>         return(mf)
 #>     else if (method != "qr") 
 #>         warning(gettextf("method = '%s' is not supported. Using 'qr'", 
 #>             method), domain = NA)
 #>     mt <- attr(mf, "terms")
 #>     y <- model.response(mf, "numeric")
 #>     w <- as.vector(model.weights(mf))
 #>     if (!is.null(w) && !is.numeric(w)) 
 #>         stop("'weights' must be a numeric vector")
 #>     offset <- as.vector(model.offset(mf))
 #>     if (!is.null(offset)) {
 #>         if (length(offset) != NROW(y)) 
 #>             stop(gettextf("number of offsets is %d, should equal %d (number of observations)", 
 #>                 length(offset), NROW(y)), domain = NA)
 #>     }
 #>     if (is.empty.model(mt)) {
 #>         x <- NULL
 #>         z <- list(coefficients = if (is.matrix(y)) matrix(NA_real_, 
 #>             0, ncol(y)) else numeric(), residuals = y, fitted.values = 0 * 
 #>             y, weights = w, rank = 0L, df.residual = if (!is.null(w)) sum(w != 
 #>             0) else if (is.matrix(y)) nrow(y) else length(y))
 #>         if (!is.null(offset)) {
 #>             z$fitted.values <- offset
 #>             z$residuals <- y - offset
 #>         }
 #>     }
 #>     else {
 #>         x <- model.matrix(mt, mf, contrasts)
 #>         z <- if (is.null(w)) 
 #>             lm.fit(x, y, offset = offset, singular.ok = singular.ok, 
 #>                 ...)
 #>         else lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok, 
 #>             ...)
 #>     }
 #>     class(z) <- c(if (is.matrix(y)) "mlm", "lm")
 #>     z$na.action <- attr(mf, "na.action")
 #>     z$offset <- offset
 #>     z$contrasts <- attr(x, "contrasts")
 #>     z$xlevels <- .getXlevels(mt, mf)
 #>     z$call <- cl
 #>     z$terms <- mt
 #>     if (model) 
 #>         z$model <- mf
 #>     if (ret.x) 
 #>         z$x <- x
 #>     if (ret.y) 
 #>         z$y <- y
 #>     if (!qr) 
 #>         z$qr <- NULL
 #>     z
 #> })(formula = mpg ~ cyl, data = structure(list(mpg = c(21, 21, 
 #> 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 
 #> 15.2, 10.4, 10.4, 14.7, 32.4, 30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 
 #> 19.2, 27.3, 26, 30.4, 15.8, 19.7, 15, 21.4), cyl = c(6, 6, 4, 
 #> 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 
 #> 8, 4, 4, 4, 8, 6, 8, 4), disp = c(160, 160, 108, 258, 360, 225, 
 #> 360, 146.7, 140.8, 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 
 #> 440, 78.7, 75.7, 71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 
 #> 95.1, 351, 145, 301, 121), hp = c(110, 110, 93, 110, 175, 105, 
 #> 245, 62, 95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 
 #> 65, 97, 150, 150, 245, 175, 66, 91, 113, 264, 175, 335, 109), 
 #>     drat = c(3.9, 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 
 #>     3.92, 3.92, 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 
 #>     4.22, 3.7, 2.76, 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 
 #>     3.62, 3.54, 4.11), wt = c(2.62, 2.875, 2.32, 3.215, 3.44, 
 #>     3.46, 3.57, 3.19, 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 
 #>     5.424, 5.345, 2.2, 1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 
 #>     3.845, 1.935, 2.14, 1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46, 
 #>     17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3, 
 #>     18.9, 17.4, 17.6, 18, 17.98, 17.82, 17.42, 19.47, 18.52, 
 #>     19.9, 20.01, 16.87, 17.3, 15.41, 17.05, 18.9, 16.7, 16.9, 
 #>     14.5, 15.5, 14.6, 18.6), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 
 #>     1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 
 #>     0, 0, 0, 1), am = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 #>     0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), 
 #>     gear = c(4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 
 #>     3, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 
 #>     4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 
 #>     1, 2, 2, 4, 2, 1, 2, 2, 4, 6, 8, 2)), row.names = c("Mazda RX4", 
 #> "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", 
 #> "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280", 
 #> "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", 
 #> "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic", 
 #> "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin", 
 #> "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2", 
 #> "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora", 
 #> "Volvo 142E"), class = "data.frame"))
 #> 
 #> Coefficients:
 #> (Intercept)          cyl  
 #>      37.885       -2.876

 # note the difference in size!
 object.size(lm_model)
 #> 25528 bytes
 object.size(lm_model_do_call)
 #> 237824 bytes

 # here it is
 object.size(lm_model_do_call$call)
 #> 213024 bytes

 # if you are a package developer, please don't save the call

 # if you are a package user, you can avoid this issue with rlang
 library(rlang)
 #> 
 #> Attaching package: 'rlang'
 #> The following object is masked _by_ '.GlobalEnv':
 #> 
 #>     .data

 clean_lm_call <- call2("lm", mpg ~ cyl, sym("mtcars"))
 clean_lm_call
 #> lm(mpg ~ cyl, mtcars)

 eval_bare(clean_lm_call)
 #> 
 #> Call:
 #> lm(formula = mpg ~ cyl, data = mtcars)
 #> 
 #> Coefficients:
 #> (Intercept)          cyl  
 #>      37.885       -2.876

 # or with base R
 eval(call("lm", mpg ~ cyl, substitute(mtcars)))
 #> 
 #> Call:
 #> lm(formula = mpg ~ cyl, data = mtcars)
 #> 
 #> Coefficients:
 #> (Intercept)          cyl  
 #>      37.885       -2.876

 # if you need to control the environment that `mtcars` is evaluated (i.e. looked up)
 # in, you can either quo() it to tie the environment to the symbol and use eval_tidy()
 # or you can also specify an environment to the `eval_bare()` call for terms to be
 # looked up in

 # lets use a different data set where we have to control the environment

 generate_call_bad <- function() {
  my_data <- mtcars
  call2("lm", mpg ~ cyl, sym("my_data"))
 }

 generate_call_bad()
 #> lm(mpg ~ cyl, my_data)

 # oh no :/
 eval_bare(generate_call_bad())
 #> Error in is.data.frame(data): object 'my_data' not found

 generate_call_good <- function() {
  my_data <- mtcars
  call2("lm", mpg ~ cyl, quo(my_data)) # <- the symbol `my_data` + environment are kept
 }

 call_good <- generate_call_good()
 call_good
 #> lm(mpg ~ cyl, ~my_data)

 # ah there it is, see the `env`ironment?
 # that tells us where to look up `my_data`
 call_good[[3]]
 #> <quosure>
 #> expr: ^my_data
 #> env:  0x7f800e48fa60

 eval_tidy(generate_call_good())
 #> 
 #> Call:
 #> lm(formula = mpg ~ cyl, data = ~my_data)
 #> 
 #> Coefficients:
 #> (Intercept)          cyl  
 #>      37.885       -2.876
	# often package developers store the call in their model object
	# and then print it out in their summary / print methods
	lm_model <- lm(mpg ~ cyl, mtcars)

	lm_model
	#>
	#> Call:
	#> lm(formula = mpg ~ cyl, data = mtcars)
	#>
	#> Coefficients:
	#> (Intercept) cyl
	#> 37.885 -2.876

	lm_model$call
	#> lm(formula = mpg ~ cyl, data = mtcars)

	# normally this is not an issue, but sometimes package _users_
	# want to call that modeling function programatically. This often
	# means that they use `do.call()`
	.f <- mpg ~ cyl
	.data <- mtcars
	lm_model_do_call <- do.call(lm, list(formula = .f, data = .data))

	# essentially, this inlines the `data` in a textual representation in the call
	# OH THE HORROR
	lm_model_do_call
	#>
	#> Call:
	#> (function (formula, data, subset, weights, na.action, method = "qr",
	#> model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE,
	#> contrasts = NULL, offset, ...)
	#> {
	#> ret.x <- x
	#> ret.y <- y
	#> cl <- match.call()
	#> mf <- match.call(expand.dots = FALSE)
	#> m <- match(c("formula", "data", "subset", "weights", "na.action",
	#> "offset"), names(mf), 0L)
	#> mf <- mf[c(1L, m)]
	#> mf$drop.unused.levels <- TRUE
	#> mf[[1L]] <- quote(stats::model.frame)
	#> mf <- eval(mf, parent.frame())
	#> if (method == "model.frame")
	#> return(mf)
	#> else if (method != "qr")
	#> warning(gettextf("method = '%s' is not supported. Using 'qr'",
	#> method), domain = NA)
	#> mt <- attr(mf, "terms")
	#> y <- model.response(mf, "numeric")
	#> w <- as.vector(model.weights(mf))
	#> if (!is.null(w) && !is.numeric(w))
	#> stop("'weights' must be a numeric vector")
	#> offset <- as.vector(model.offset(mf))
	#> if (!is.null(offset)) {
	#> if (length(offset) != NROW(y))
	#> stop(gettextf("number of offsets is %d, should equal %d (number of observations)",
	#> length(offset), NROW(y)), domain = NA)
	#> }
	#> if (is.empty.model(mt)) {
	#> x <- NULL
	#> z <- list(coefficients = if (is.matrix(y)) matrix(NA_real_,
	#> 0, ncol(y)) else numeric(), residuals = y, fitted.values = 0 *
	#> y, weights = w, rank = 0L, df.residual = if (!is.null(w)) sum(w !=
	#> 0) else if (is.matrix(y)) nrow(y) else length(y))
	#> if (!is.null(offset)) {
	#> z$fitted.values <- offset
	#> z$residuals <- y - offset
	#> }
	#> }
	#> else {
	#> x <- model.matrix(mt, mf, contrasts)
	#> z <- if (is.null(w))
	#> lm.fit(x, y, offset = offset, singular.ok = singular.ok,
	#> ...)
	#> else lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok,
	#> ...)
	#> }
	#> class(z) <- c(if (is.matrix(y)) "mlm", "lm")
	#> z$na.action <- attr(mf, "na.action")
	#> z$offset <- offset
	#> z$contrasts <- attr(x, "contrasts")
	#> z$xlevels <- .getXlevels(mt, mf)
	#> z$call <- cl
	#> z$terms <- mt
	#> if (model)
	#> z$model <- mf
	#> if (ret.x)
	#> z$x <- x
	#> if (ret.y)
	#> z$y <- y
	#> if (!qr)
	#> z$qr <- NULL
	#> z
	#> })(formula = mpg ~ cyl, data = structure(list(mpg = c(21, 21,
	#> 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3,
	#> 15.2, 10.4, 10.4, 14.7, 32.4, 30.4, 33.9, 21.5, 15.5, 15.2, 13.3,
	#> 19.2, 27.3, 26, 30.4, 15.8, 19.7, 15, 21.4), cyl = c(6, 6, 4,
	#> 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8,
	#> 8, 4, 4, 4, 8, 6, 8, 4), disp = c(160, 160, 108, 258, 360, 225,
	#> 360, 146.7, 140.8, 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460,
	#> 440, 78.7, 75.7, 71.1, 120.1, 318, 304, 350, 400, 79, 120.3,
	#> 95.1, 351, 145, 301, 121), hp = c(110, 110, 93, 110, 175, 105,
	#> 245, 62, 95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52,
	#> 65, 97, 150, 150, 245, 175, 66, 91, 113, 264, 175, 335, 109),
	#> drat = c(3.9, 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92,
	#> 3.92, 3.92, 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93,
	#> 4.22, 3.7, 2.76, 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22,
	#> 3.62, 3.54, 4.11), wt = c(2.62, 2.875, 2.32, 3.215, 3.44,
	#> 3.46, 3.57, 3.19, 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25,
	#> 5.424, 5.345, 2.2, 1.615, 1.835, 2.465, 3.52, 3.435, 3.84,
	#> 3.845, 1.935, 2.14, 1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46,
	#> 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3,
	#> 18.9, 17.4, 17.6, 18, 17.98, 17.82, 17.42, 19.47, 18.52,
	#> 19.9, 20.01, 16.87, 17.3, 15.41, 17.05, 18.9, 16.7, 16.9,
	#> 14.5, 15.5, 14.6, 18.6), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1,
	#> 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1,
	#> 0, 0, 0, 1), am = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	#> 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1),
	#> gear = c(4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3,
	#> 3, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4,
	#> 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1,
	#> 1, 2, 2, 4, 2, 1, 2, 2, 4, 6, 8, 2)), row.names = c("Mazda RX4",
	#> "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout",
	#> "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280",
	#> "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood",
	#> "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic",
	#> "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin",
	#> "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2",
	#> "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora",
	#> "Volvo 142E"), class = "data.frame"))
	#>
	#> Coefficients:
	#> (Intercept) cyl
	#> 37.885 -2.876

	# note the difference in size!
	object.size(lm_model)
	#> 25528 bytes
	object.size(lm_model_do_call)
	#> 237824 bytes

	# here it is
	object.size(lm_model_do_call$call)
	#> 213024 bytes

	# if you are a package developer, please don't save the call

	# if you are a package user, you can avoid this issue with rlang
	library(rlang)
	#>
	#> Attaching package: 'rlang'
	#> The following object is masked _by_ '.GlobalEnv':
	#>
	#> .data

	clean_lm_call <- call2("lm", mpg ~ cyl, sym("mtcars"))
	clean_lm_call
	#> lm(mpg ~ cyl, mtcars)

	eval_bare(clean_lm_call)
	#>
	#> Call:
	#> lm(formula = mpg ~ cyl, data = mtcars)
	#>
	#> Coefficients:
	#> (Intercept) cyl
	#> 37.885 -2.876

	# or with base R
	eval(call("lm", mpg ~ cyl, substitute(mtcars)))
	#>
	#> Call:
	#> lm(formula = mpg ~ cyl, data = mtcars)
	#>
	#> Coefficients:
	#> (Intercept) cyl
	#> 37.885 -2.876

	# if you need to control the environment that `mtcars` is evaluated (i.e. looked up)
	# in, you can either quo() it to tie the environment to the symbol and use eval_tidy()
	# or you can also specify an environment to the `eval_bare()` call for terms to be
	# looked up in

	# lets use a different data set where we have to control the environment

	generate_call_bad <- function() {
	my_data <- mtcars
	call2("lm", mpg ~ cyl, sym("my_data"))
	}

	generate_call_bad()
	#> lm(mpg ~ cyl, my_data)

	# oh no :/
	eval_bare(generate_call_bad())
	#> Error in is.data.frame(data): object 'my_data' not found

	generate_call_good <- function() {
	my_data <- mtcars
	call2("lm", mpg ~ cyl, quo(my_data)) # <- the symbol `my_data` + environment are kept
	}

	call_good <- generate_call_good()
	call_good
	#> lm(mpg ~ cyl, ~my_data)

	# ah there it is, see the `env`ironment?
	# that tells us where to look up `my_data`
	call_good[[3]]
	#> <quosure>
	#> expr: ^my_data
	#> env: 0x7f800e48fa60

	eval_tidy(generate_call_good())
	#>
	#> Call:
	#> lm(formula = mpg ~ cyl, data = ~my_data)
	#>
	#> Coefficients:
	#> (Intercept) cyl
	#> 37.885 -2.876
No results found