topepo · July 19, 2019 17:27
diff --git a/min_grid.R b/min_grid.R
 # _Prototype_ code to find the minimum grid that should be fit for models. This 
 # exploits the fact that some models can evaluate extra sub-models from the same 
 # object. 

 # devtools::install_github("tidymodels/parsnip")
 # devtools::install_github("tidymodels/dials")

 library(tidymodels)
 #> ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────── tidymodels 0.0.2 ──
 #> ✔ broom     0.5.1          ✔ purrr     0.3.2     
 #> ✔ dials     0.0.2.9000     ✔ recipes   0.1.6     
 #> ✔ dplyr     0.8.3          ✔ rsample   0.0.5     
 #> ✔ ggplot2   3.2.0          ✔ tibble    2.1.3     
 #> ✔ infer     0.4.0          ✔ yardstick 0.0.3.9000
 #> ✔ parsnip   0.0.2.9000
 #> ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidymodels_conflicts() ──
 #> ✖ purrr::discard()  masks scales::discard()
 #> ✖ dplyr::filter()   masks stats::filter()
 #> ✖ dplyr::lag()      masks stats::lag()
 #> ✖ ggplot2::margin() masks dials::margin()
 #> ✖ recipes::step()   masks stats::step()

 min_grid <- function(x, grid, ...) {
  # x is a `model_spec` object from parsnip
  # grid is a tibble of tuning parameer values with names
  #  matching the parameter names. 
  UseMethod("min_grid")
 }

 # As an example, if we fit a boosted tree  model and tune over 
 # trees = 1:20 and min_n = c(20, 30)
 # we should only have to fit two models:
 #
 #   trees = 20 & min_n = 20
 #   trees = 20 & min_n = 30
 # 
 # The logic related to how this "mini grid" gets made is model-specific. 
 # 
 # To get the full set of predictions, we need to know, for each of these two 
 # models, what values of num_terms to give to the multi_predict() function. 
 # 
 # The current idea is to have a list column of the extra models for prediction. 
 # For the example above:
 #
 #   # A tibble: 2 x 3
 #     trees min_n .submodels      
 #     <dbl> <dbl> <list>          
 #   1    20    20 <named list [1]>
 #   2    20    30 <named list [1]>
 #
 # and the .submodels would both be
 #
 #  list(trees = 1:19)
 #
 # There are a lot of other things to consider in future versions like grids
 # where there are multiple columns with the same name (maybe the results of 
 # a recipe) and so on. 

 # helper functions

 # Template for model results that do no have the sub-model feature
 blank_submodels <- function(grid) {
  grid %>%
    dplyr::mutate(.submodels = map(1:nrow(grid), ~ list()))
 }

 get_fixed_args <- function(info) {
  # Get non-sub-model columns to iterate over
  fixed_args <- info$name[!info$has_submodel]
 }

 get_submodel_info <- function(spec, grid) {
  param_info <- 
    get_from_env(paste0(class(spec)[1], "_args")) %>%
    dplyr::filter(engine == spec$engine) %>% 
    dplyr::select(name = parsnip, has_submodel)
  
  # In case a recipe or other activity has grid parameter columns, 
  # add those to the results
  grid_names <- names(grid)
  is_mod_param <- grid_names %in% param_info$name
  if (any(!is_mod_param)) {
    param_info <- 
      param_info %>% 
      bind_rows(
        tibble(name = grid_names[!is_mod_param],
               has_submodel = FALSE)
      )
  }
  param_info %>% dplyr::filter(name %in% grid_names)
 }


 min_grid.boost_tree <- function(x, grid, ...) {
  grid_names <- names(grid)
  param_info <- get_submodel_info(x, grid)
  
  # No ability to do submodels? Finish here:
  if (!any(param_info$has_submodel)) {
    return(blank_submodels(grid))
  }
  
  fixed_args <- get_fixed_args(param_info)
  
  # For boosted trees, fit the model with the most trees (conditional on the 
  # other parameters) so that you can do predictions on the smaller models. 
  fit_only <-
    grid %>%
    dplyr::group_by(!!!syms(fixed_args)) %>%
    dplyr::summarize(trees = max(trees, na.rm = TRUE)) %>%
    dplyr::ungroup()
  
  # Add a column .submodels that is a list with what should be predicted
  # by `multi_predict()` (assuming `predict()` has already been executed
  # on the original value of 'trees')
  min_grid_df <-
    dplyr::full_join(fit_only %>% rename(max_tree = trees), grid, by = fixed_args) %>%
    dplyr::filter(trees != max_tree) %>%
    dplyr::group_by(!!!syms(fixed_args)) %>%
    dplyr::summarize(.submodels = list(list(trees = trees))) %>%
    dplyr::ungroup() %>%
    dplyr::full_join(fit_only, grid, by = fixed_args)
  
  min_grid_df  %>% dplyr::select(one_of(grid_names), .submodels)
 }

 # Examples

 # simple grids
 boosting_spec <- boost_tree() %>% set_engine("xgboost")
 basic_grid <- grid_regular(trees(), min_n(c(10, 20)), tree_depth(), levels = 3)
 smaller_grid <- min_grid(boosting_spec, basic_grid)
 smaller_grid
 #> # A tibble: 9 x 4
 #>   trees min_n tree_depth .submodels      
 #>   <int> <int>      <int> <list>          
 #> 1  2000    10          2 <named list [1]>
 #> 2  2000    15          2 <named list [1]>
 #> 3  2000    20          2 <named list [1]>
 #> 4  2000    10          8 <named list [1]>
 #> 5  2000    15          8 <named list [1]>
 #> 6  2000    20          8 <named list [1]>
 #> 7  2000    10         15 <named list [1]>
 #> 8  2000    15         15 <named list [1]>
 #> 9  2000    20         15 <named list [1]>
 smaller_grid$.submodels[[1]]
 #> $trees
 #> [1]    1 1000

 # non-regular grids
 set.seed(35)
 filler_grid <- grid_max_entropy(trees(), min_n(c(10, 20)), tree_depth(), size = 5)
 min_grid(boosting_spec, filler_grid)
 #> # A tibble: 4 x 4
 #>   trees min_n tree_depth .submodels      
 #>   <int> <int>      <int> <list>          
 #> 1  1259    12          3 <named list [1]>
 #> 2   330    11         11 <NULL>          
 #> 3  1831    19         12 <NULL>          
 #> 4  1342    11         14 <NULL>

 # an engine that does not support submodels:
 spark_spec <- boost_tree() %>% set_engine("spark")

 min_grid(spark_spec, basic_grid)
 #> # A tibble: 27 x 4
 #>    trees min_n tree_depth .submodels
 #>  * <int> <int>      <int> <list>    
 #>  1     1    10          2 <list [0]>
 #>  2  1000    10          2 <list [0]>
 #>  3  2000    10          2 <list [0]>
 #>  4     1    15          2 <list [0]>
 #>  5  1000    15          2 <list [0]>
 #>  6  2000    15          2 <list [0]>
 #>  7     1    20          2 <list [0]>
 #>  8  1000    20          2 <list [0]>
 #>  9  2000    20          2 <list [0]>
 #> 10     1    10          8 <list [0]>
 #> # … with 17 more rows


 min_grid.nearest_neighbor_kknn <- function(x, grid, ...) {
  
  arg_info <-
    get_from_env(paste0(class(x)[1], "_args")) %>%
    dplyr::filter(engine == x$engine & parsnip %in% names(grid))
  
  all_param <- arg_info$parsnip
  
  # If no submodels, just give the grid back with an extra column
  if (!any(arg_info$has_submodel)) {
    return(blank_submodels(grid))
  }
  
  # Get non-sub-model columns to iterate over
  fixed_args <- arg_info$parsnip[!arg_info$has_submodel]
  fixed_syms <- rlang::syms(fixed_args)
  
  fit_only <-
    grid %>%
    dplyr::group_by(!!!fixed_syms) %>%
    # could do any value of k but we'll go with the max
    dplyr::summarize(neighbors = max(neighbors, na.rm = TRUE)) %>%
    dplyr::ungroup()
  
  min_grid_df <-
    dplyr::full_join(fit_only %>% rename(max_neighbors = neighbors), grid, by = fixed_args) %>%
    dplyr::filter(neighbors != max_neighbors) %>%
    dplyr::group_by(!!!fixed_syms) %>%
    dplyr::summarize(.submodels = list(list(neighbors = neighbors))) %>%
    dplyr::ungroup() %>%
    dplyr::full_join(fit_only, grid, by = fixed_args)
  
  min_grid_df  %>% dplyr::select(!!!all_param, .submodels)
 }

 knn_grid <- 
  grid_regular(neighbors(c(1, 10)), dist_power(), weight_func(), levels = 3) %>% 
  # just for kicks:
  slice(-4)

 min_grid.nearest_neighbor_kknn(nearest_neighbor() %>% set_engine("kknn"), knn_grid)
 #> # A tibble: 9 x 4
 #>   neighbors weight_func  dist_power .submodels      
 #>       <int> <chr>             <dbl> <list>          
 #> 1        10 epanechnikov        1   <named list [1]>
 #> 2        10 epanechnikov        1.5 <named list [1]>
 #> 3        10 epanechnikov        2   <named list [1]>
 #> 4        10 rectangular         1   <named list [1]>
 #> 5        10 rectangular         1.5 <named list [1]>
 #> 6        10 rectangular         2   <named list [1]>
 #> 7        10 triangular          1   <named list [1]>
 #> 8        10 triangular          1.5 <named list [1]>
 #> 9        10 triangular          2   <named list [1]>
 knn_grid2 <- grid_max_entropy(neighbors(c(1, 10)), dist_power(), weight_func(), size = 10)
 min_grid.nearest_neighbor_kknn(nearest_neighbor() %>% set_engine("kknn"), knn_grid2)
 #> # A tibble: 10 x 4
 #>    neighbors weight_func  dist_power .submodels
 #>        <int> <chr>             <dbl> <list>    
 #>  1         2 biweight           1.04 <NULL>    
 #>  2         9 epanechnikov       1.10 <NULL>    
 #>  3         9 gaussian           1.06 <NULL>    
 #>  4         9 gaussian           1.78 <NULL>    
 #>  5         3 rank               1.12 <NULL>    
 #>  6         4 rank               1.81 <NULL>    
 #>  7         9 rectangular        1.51 <NULL>    
 #>  8         1 rectangular        1.89 <NULL>    
 #>  9         9 rectangular        1.95 <NULL>    
 #> 10         6 triweight          1.44 <NULL>


 min_grid.mars_earth <- function(x, grid, ...) {

  arg_info <-
    get_from_env(paste0(class(x)[1], "_args")) %>%
    dplyr::filter(engine == x$engine & parsnip %in% names(grid))

  all_param <- arg_info$parsnip

  # If no submodels, just give the grid back with an extra column
  if (!any(arg_info$has_submodel)) {
    return(blank_submodels(grid))
  }

  # Get non-sub-model columns to iterate over
  fixed_args <- arg_info$parsnip[!arg_info$has_submodel]
  fixed_syms <- rlang::syms(fixed_args)

  fit_only <-
    grid %>%
    dplyr::group_by(!!!fixed_syms) %>%
    dplyr::summarize(num_terms = max(num_terms, na.rm = TRUE)) %>%
    dplyr::ungroup()

  min_grid_df <-
    dplyr::full_join(fit_only %>% rename(max_terms = num_terms), grid, by = fixed_args) %>%
    dplyr::filter(num_terms != max_terms) %>%
    dplyr::group_by(!!!fixed_syms) %>%
    dplyr::summarize(.submodels = list(list(num_terms = num_terms))) %>%
    dplyr::ungroup() %>%
    dplyr::full_join(fit_only, grid, by = fixed_args)

  min_grid_df  %>% dplyr::select(!!!all_param, .submodels)
 }


 min_grid.linear_reg_glmnet <- function(x, grid, ...) {

  arg_info <-
    get_from_env(paste0(class(x)[1], "_args")) %>%
    dplyr::filter(engine == x$engine & parsnip %in% names(grid))

  all_param <- arg_info$parsnip

  # If no submodels, just give the grid back with an extra column
  if (!any(arg_info$has_submodel)) {
    return(blank_submodels(grid))
  }

   fit_only <-
    grid %>%
    distinct(mixture)

  # To get entire regularization path, we will set lambda = NULL. Since you
  # can't have a column of NULLS in a data frame, we set to NA.
  min_grid_df <-
    grid %>%
    group_by(mixture) %>%
    nest(-mixture) %>%
    ungroup() %>%
    mutate(
      penalty = NA_real_,  # should really be set to NULL; fix in translate function
      .submodels = map(data, pull, 1)
    ) %>%
    dplyr::select(-data)

  min_grid_df  %>% dplyr::select(!!!all_param, .submodels)
 }

 min_grid.logistic_reg_glmnet <- min_grid.linear_reg_glmnet


 xgb_full_grid <-
  crossing(tree_depth = 10:11, min_n = 5:7, trees = 1:3)

 xgb_full_exp <-
  crossing(tree_depth = 10:11, min_n = 5:7, trees = 3) %>%
  mutate(.submodels = map(trees, ~ 1:2)) %>%
  dplyr::select(tree_depth, trees, min_n, .submodels)

 xgb_full_res <-
  min_grid.boost_tree_xgboost(
    boost_tree() %>% set_engine("xgboost"),
    xgb_full_grid
  )
 #> Error in min_grid.boost_tree_xgboost(boost_tree() %>% set_engine("xgboost"), : could not find function "min_grid.boost_tree_xgboost"

 # Need to use vctrs:
 # Error: Can't join on '.submodels' x '.submodels' because of incompatible types (list / list)
 # all_equal(xgb_full_res, xgb_full_exp)
 # re-use test_by_col() here

 xgb_smol_grid <-
  crossing(tree_depth = 10:11, min_n = 5:7)

 xgb_full_exp <-
  crossing(tree_depth = 10:11, min_n = 5:7) %>%
  mutate(.submodels = map(tree_depth, ~ list()))

 min_grid.boost_tree_xgboost(
  boost_tree() %>% set_engine("xgboost"),
  xgb_smol_grid
 )
 #> Error in min_grid.boost_tree_xgboost(boost_tree() %>% set_engine("xgboost"), : could not find function "min_grid.boost_tree_xgboost"


 c5_full_grid <-
  crossing(min_n = 5:7, trees = 1:3)

 c5_full_exp <-
  crossing(min_n = 5:7, trees = 3) %>%
  mutate(.submodels = map(trees, ~ 1:2)) %>%
  dplyr::select(trees, min_n, .submodels)

 c5_full_res <-
  min_grid.boost_tree_xgboost(
    boost_tree() %>% set_engine("C5.0"),
    c5_full_grid
  )
 #> Error in min_grid.boost_tree_xgboost(boost_tree() %>% set_engine("C5.0"), : could not find function "min_grid.boost_tree_xgboost"

 c5_smol_grid <-
  crossing(min_n = 5:7)

 c5_full_exp <-
  crossing(min_n = 5:7) %>%
  mutate(.submodels = map(min_n, ~ list()))

 min_grid.boost_tree_xgboost(
  boost_tree() %>% set_engine("C5.0"),
  c5_smol_grid
 )
 #> Error in min_grid.boost_tree_xgboost(boost_tree() %>% set_engine("C5.0"), : could not find function "min_grid.boost_tree_xgboost"


 earth_full_grid <-
  crossing(num_terms = 10:11, prod_degree = 1:2, prune_method = c("none", "backward"))

 earth_full_exp <-
  crossing(num_terms = 11, prod_degree = 1:2, prune_method = c("none", "backward")) %>%
  mutate(.submodels = map(prod_degree, ~ 10)) %>%
  dplyr::select(num_terms, prod_degree, prune_method, .submodels)

 earth_full_res <-
  min_grid.mars_earth(
    mars() %>% set_engine("earth"),
    earth_full_grid
  )


 glmn_full_grid <-
  crossing(mixture = (0:3)/3, penalty = c(0, .1, 1))

 glmn_full_exp <-
  crossing(penalty = NA_real_, mixture = (0:3)/3) %>%
  mutate(.submodels = map(mixture, ~ c(0, .1, 1)))

 glmn_full_res <-
  min_grid.linear_reg_glmnet(
    linear_reg() %>% set_engine("glmnet"),
    glmn_full_grid
  )
	# _Prototype_ code to find the minimum grid that should be fit for models. This
	# exploits the fact that some models can evaluate extra sub-models from the same
	# object.

	# devtools::install_github("tidymodels/parsnip")
	# devtools::install_github("tidymodels/dials")

	library(tidymodels)
	#> ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────── tidymodels 0.0.2 ──
	#> ✔ broom 0.5.1 ✔ purrr 0.3.2
	#> ✔ dials 0.0.2.9000 ✔ recipes 0.1.6
	#> ✔ dplyr 0.8.3 ✔ rsample 0.0.5
	#> ✔ ggplot2 3.2.0 ✔ tibble 2.1.3
	#> ✔ infer 0.4.0 ✔ yardstick 0.0.3.9000
	#> ✔ parsnip 0.0.2.9000
	#> ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidymodels_conflicts() ──
	#> ✖ purrr::discard() masks scales::discard()
	#> ✖ dplyr::filter() masks stats::filter()
	#> ✖ dplyr::lag() masks stats::lag()
	#> ✖ ggplot2::margin() masks dials::margin()
	#> ✖ recipes::step() masks stats::step()

	min_grid <- function(x, grid, ...) {
	# x is a `model_spec` object from parsnip
	# grid is a tibble of tuning parameer values with names
	# matching the parameter names.
	UseMethod("min_grid")
	}

	# As an example, if we fit a boosted tree model and tune over
	# trees = 1:20 and min_n = c(20, 30)
	# we should only have to fit two models:
	#
	# trees = 20 & min_n = 20
	# trees = 20 & min_n = 30
	#
	# The logic related to how this "mini grid" gets made is model-specific.
	#
	# To get the full set of predictions, we need to know, for each of these two
	# models, what values of num_terms to give to the multi_predict() function.
	#
	# The current idea is to have a list column of the extra models for prediction.
	# For the example above:
	#
	# # A tibble: 2 x 3
	# trees min_n .submodels
	# <dbl> <dbl> <list>
	# 1 20 20 <named list [1]>
	# 2 20 30 <named list [1]>
	#
	# and the .submodels would both be
	#
	# list(trees = 1:19)
	#
	# There are a lot of other things to consider in future versions like grids
	# where there are multiple columns with the same name (maybe the results of
	# a recipe) and so on.

	# helper functions

	# Template for model results that do no have the sub-model feature
	blank_submodels <- function(grid) {
	grid %>%
	dplyr::mutate(.submodels = map(1:nrow(grid), ~ list()))
	}

	get_fixed_args <- function(info) {
	# Get non-sub-model columns to iterate over
	fixed_args <- info$name[!info$has_submodel]
	}

	get_submodel_info <- function(spec, grid) {
	param_info <-
	get_from_env(paste0(class(spec)[1], "_args")) %>%
	dplyr::filter(engine == spec$engine) %>%
	dplyr::select(name = parsnip, has_submodel)

	# In case a recipe or other activity has grid parameter columns,
	# add those to the results
	grid_names <- names(grid)
	is_mod_param <- grid_names %in% param_info$name
	if (any(!is_mod_param)) {
	param_info <-
	param_info %>%
	bind_rows(
	tibble(name = grid_names[!is_mod_param],
	has_submodel = FALSE)
	)
	}
	param_info %>% dplyr::filter(name %in% grid_names)
	}


	min_grid.boost_tree <- function(x, grid, ...) {
	grid_names <- names(grid)
	param_info <- get_submodel_info(x, grid)

	# No ability to do submodels? Finish here:
	if (!any(param_info$has_submodel)) {
	return(blank_submodels(grid))
	}

	fixed_args <- get_fixed_args(param_info)

	# For boosted trees, fit the model with the most trees (conditional on the
	# other parameters) so that you can do predictions on the smaller models.
	fit_only <-
	grid %>%
	dplyr::group_by(!!!syms(fixed_args)) %>%
	dplyr::summarize(trees = max(trees, na.rm = TRUE)) %>%
	dplyr::ungroup()

	# Add a column .submodels that is a list with what should be predicted
	# by `multi_predict()` (assuming `predict()` has already been executed
	# on the original value of 'trees')
	min_grid_df <-
	dplyr::full_join(fit_only %>% rename(max_tree = trees), grid, by = fixed_args) %>%
	dplyr::filter(trees != max_tree) %>%
	dplyr::group_by(!!!syms(fixed_args)) %>%
	dplyr::summarize(.submodels = list(list(trees = trees))) %>%
	dplyr::ungroup() %>%
	dplyr::full_join(fit_only, grid, by = fixed_args)

	min_grid_df %>% dplyr::select(one_of(grid_names), .submodels)
	}

	# Examples

	# simple grids
	boosting_spec <- boost_tree() %>% set_engine("xgboost")
	basic_grid <- grid_regular(trees(), min_n(c(10, 20)), tree_depth(), levels = 3)
	smaller_grid <- min_grid(boosting_spec, basic_grid)
	smaller_grid
	#> # A tibble: 9 x 4
	#> trees min_n tree_depth .submodels
	#> <int> <int> <int> <list>
	#> 1 2000 10 2 <named list [1]>
	#> 2 2000 15 2 <named list [1]>
	#> 3 2000 20 2 <named list [1]>
	#> 4 2000 10 8 <named list [1]>
	#> 5 2000 15 8 <named list [1]>
	#> 6 2000 20 8 <named list [1]>
	#> 7 2000 10 15 <named list [1]>
	#> 8 2000 15 15 <named list [1]>
	#> 9 2000 20 15 <named list [1]>
	smaller_grid$.submodels[[1]]
	#> $trees
	#> [1] 1 1000

	# non-regular grids
	set.seed(35)
	filler_grid <- grid_max_entropy(trees(), min_n(c(10, 20)), tree_depth(), size = 5)
	min_grid(boosting_spec, filler_grid)
	#> # A tibble: 4 x 4
	#> trees min_n tree_depth .submodels
	#> <int> <int> <int> <list>
	#> 1 1259 12 3 <named list [1]>
	#> 2 330 11 11 <NULL>
	#> 3 1831 19 12 <NULL>
	#> 4 1342 11 14 <NULL>

	# an engine that does not support submodels:
	spark_spec <- boost_tree() %>% set_engine("spark")

	min_grid(spark_spec, basic_grid)
	#> # A tibble: 27 x 4
	#> trees min_n tree_depth .submodels
	#> * <int> <int> <int> <list>
	#> 1 1 10 2 <list [0]>
	#> 2 1000 10 2 <list [0]>
	#> 3 2000 10 2 <list [0]>
	#> 4 1 15 2 <list [0]>
	#> 5 1000 15 2 <list [0]>
	#> 6 2000 15 2 <list [0]>
	#> 7 1 20 2 <list [0]>
	#> 8 1000 20 2 <list [0]>
	#> 9 2000 20 2 <list [0]>
	#> 10 1 10 8 <list [0]>
	#> # … with 17 more rows


	min_grid.nearest_neighbor_kknn <- function(x, grid, ...) {

	arg_info <-
	get_from_env(paste0(class(x)[1], "_args")) %>%
	dplyr::filter(engine == x$engine & parsnip %in% names(grid))

	all_param <- arg_info$parsnip

	# If no submodels, just give the grid back with an extra column
	if (!any(arg_info$has_submodel)) {
	return(blank_submodels(grid))
	}

	# Get non-sub-model columns to iterate over
	fixed_args <- arg_info$parsnip[!arg_info$has_submodel]
	fixed_syms <- rlang::syms(fixed_args)

	fit_only <-
	grid %>%
	dplyr::group_by(!!!fixed_syms) %>%
	# could do any value of k but we'll go with the max
	dplyr::summarize(neighbors = max(neighbors, na.rm = TRUE)) %>%
	dplyr::ungroup()

	min_grid_df <-
	dplyr::full_join(fit_only %>% rename(max_neighbors = neighbors), grid, by = fixed_args) %>%
	dplyr::filter(neighbors != max_neighbors) %>%
	dplyr::group_by(!!!fixed_syms) %>%
	dplyr::summarize(.submodels = list(list(neighbors = neighbors))) %>%
	dplyr::ungroup() %>%
	dplyr::full_join(fit_only, grid, by = fixed_args)

	min_grid_df %>% dplyr::select(!!!all_param, .submodels)
	}

	knn_grid <-
	grid_regular(neighbors(c(1, 10)), dist_power(), weight_func(), levels = 3) %>%
	# just for kicks:
	slice(-4)

	min_grid.nearest_neighbor_kknn(nearest_neighbor() %>% set_engine("kknn"), knn_grid)
	#> # A tibble: 9 x 4
	#> neighbors weight_func dist_power .submodels
	#> <int> <chr> <dbl> <list>
	#> 1 10 epanechnikov 1 <named list [1]>
	#> 2 10 epanechnikov 1.5 <named list [1]>
	#> 3 10 epanechnikov 2 <named list [1]>
	#> 4 10 rectangular 1 <named list [1]>
	#> 5 10 rectangular 1.5 <named list [1]>
	#> 6 10 rectangular 2 <named list [1]>
	#> 7 10 triangular 1 <named list [1]>
	#> 8 10 triangular 1.5 <named list [1]>
	#> 9 10 triangular 2 <named list [1]>
	knn_grid2 <- grid_max_entropy(neighbors(c(1, 10)), dist_power(), weight_func(), size = 10)
	min_grid.nearest_neighbor_kknn(nearest_neighbor() %>% set_engine("kknn"), knn_grid2)
	#> # A tibble: 10 x 4
	#> neighbors weight_func dist_power .submodels
	#> <int> <chr> <dbl> <list>
	#> 1 2 biweight 1.04 <NULL>
	#> 2 9 epanechnikov 1.10 <NULL>
	#> 3 9 gaussian 1.06 <NULL>
	#> 4 9 gaussian 1.78 <NULL>
	#> 5 3 rank 1.12 <NULL>
	#> 6 4 rank 1.81 <NULL>
	#> 7 9 rectangular 1.51 <NULL>
	#> 8 1 rectangular 1.89 <NULL>
	#> 9 9 rectangular 1.95 <NULL>
	#> 10 6 triweight 1.44 <NULL>


	min_grid.mars_earth <- function(x, grid, ...) {

	arg_info <-
	get_from_env(paste0(class(x)[1], "_args")) %>%
	dplyr::filter(engine == x$engine & parsnip %in% names(grid))

	all_param <- arg_info$parsnip

	# If no submodels, just give the grid back with an extra column
	if (!any(arg_info$has_submodel)) {
	return(blank_submodels(grid))
	}

	# Get non-sub-model columns to iterate over
	fixed_args <- arg_info$parsnip[!arg_info$has_submodel]
	fixed_syms <- rlang::syms(fixed_args)

	fit_only <-
	grid %>%
	dplyr::group_by(!!!fixed_syms) %>%
	dplyr::summarize(num_terms = max(num_terms, na.rm = TRUE)) %>%
	dplyr::ungroup()

	min_grid_df <-
	dplyr::full_join(fit_only %>% rename(max_terms = num_terms), grid, by = fixed_args) %>%
	dplyr::filter(num_terms != max_terms) %>%
	dplyr::group_by(!!!fixed_syms) %>%
	dplyr::summarize(.submodels = list(list(num_terms = num_terms))) %>%
	dplyr::ungroup() %>%
	dplyr::full_join(fit_only, grid, by = fixed_args)

	min_grid_df %>% dplyr::select(!!!all_param, .submodels)
	}


	min_grid.linear_reg_glmnet <- function(x, grid, ...) {

	arg_info <-
	get_from_env(paste0(class(x)[1], "_args")) %>%
	dplyr::filter(engine == x$engine & parsnip %in% names(grid))

	all_param <- arg_info$parsnip

	# If no submodels, just give the grid back with an extra column
	if (!any(arg_info$has_submodel)) {
	return(blank_submodels(grid))
	}

	fit_only <-
	grid %>%
	distinct(mixture)

	# To get entire regularization path, we will set lambda = NULL. Since you
	# can't have a column of NULLS in a data frame, we set to NA.
	min_grid_df <-
	grid %>%
	group_by(mixture) %>%
	nest(-mixture) %>%
	ungroup() %>%
	mutate(
	penalty = NA_real_, # should really be set to NULL; fix in translate function
	.submodels = map(data, pull, 1)
	) %>%
	dplyr::select(-data)

	min_grid_df %>% dplyr::select(!!!all_param, .submodels)
	}

	min_grid.logistic_reg_glmnet <- min_grid.linear_reg_glmnet


	xgb_full_grid <-
	crossing(tree_depth = 10:11, min_n = 5:7, trees = 1:3)

	xgb_full_exp <-
	crossing(tree_depth = 10:11, min_n = 5:7, trees = 3) %>%
	mutate(.submodels = map(trees, ~ 1:2)) %>%
	dplyr::select(tree_depth, trees, min_n, .submodels)

	xgb_full_res <-
	min_grid.boost_tree_xgboost(
	boost_tree() %>% set_engine("xgboost"),
	xgb_full_grid
	)
	#> Error in min_grid.boost_tree_xgboost(boost_tree() %>% set_engine("xgboost"), : could not find function "min_grid.boost_tree_xgboost"

	# Need to use vctrs:
	# Error: Can't join on '.submodels' x '.submodels' because of incompatible types (list / list)
	# all_equal(xgb_full_res, xgb_full_exp)
	# re-use test_by_col() here

	xgb_smol_grid <-
	crossing(tree_depth = 10:11, min_n = 5:7)

	xgb_full_exp <-
	crossing(tree_depth = 10:11, min_n = 5:7) %>%
	mutate(.submodels = map(tree_depth, ~ list()))

	min_grid.boost_tree_xgboost(
	boost_tree() %>% set_engine("xgboost"),
	xgb_smol_grid
	)
	#> Error in min_grid.boost_tree_xgboost(boost_tree() %>% set_engine("xgboost"), : could not find function "min_grid.boost_tree_xgboost"


	c5_full_grid <-
	crossing(min_n = 5:7, trees = 1:3)

	c5_full_exp <-
	crossing(min_n = 5:7, trees = 3) %>%
	mutate(.submodels = map(trees, ~ 1:2)) %>%
	dplyr::select(trees, min_n, .submodels)

	c5_full_res <-
	min_grid.boost_tree_xgboost(
	boost_tree() %>% set_engine("C5.0"),
	c5_full_grid
	)
	#> Error in min_grid.boost_tree_xgboost(boost_tree() %>% set_engine("C5.0"), : could not find function "min_grid.boost_tree_xgboost"

	c5_smol_grid <-
	crossing(min_n = 5:7)

	c5_full_exp <-
	crossing(min_n = 5:7) %>%
	mutate(.submodels = map(min_n, ~ list()))

	min_grid.boost_tree_xgboost(
	boost_tree() %>% set_engine("C5.0"),
	c5_smol_grid
	)
	#> Error in min_grid.boost_tree_xgboost(boost_tree() %>% set_engine("C5.0"), : could not find function "min_grid.boost_tree_xgboost"


	earth_full_grid <-
	crossing(num_terms = 10:11, prod_degree = 1:2, prune_method = c("none", "backward"))

	earth_full_exp <-
	crossing(num_terms = 11, prod_degree = 1:2, prune_method = c("none", "backward")) %>%
	mutate(.submodels = map(prod_degree, ~ 10)) %>%
	dplyr::select(num_terms, prod_degree, prune_method, .submodels)

	earth_full_res <-
	min_grid.mars_earth(
	mars() %>% set_engine("earth"),
	earth_full_grid
	)


	glmn_full_grid <-
	crossing(mixture = (0:3)/3, penalty = c(0, .1, 1))

	glmn_full_exp <-
	crossing(penalty = NA_real_, mixture = (0:3)/3) %>%
	mutate(.submodels = map(mixture, ~ c(0, .1, 1)))

	glmn_full_res <-
	min_grid.linear_reg_glmnet(
	linear_reg() %>% set_engine("glmnet"),
	glmn_full_grid
	)