topepo · February 19, 2021 16:51
diff --git a/fixing_ridge_regression_in_glmnet.R b/fixing_ridge_regression_in_glmnet.R
 library(tidymodels)
 library(patchwork)
 library(doMC)
 registerDoMC(cores = 20)

 theme_set(theme_bw() + theme(legend.position = "top")) 

 # ------------------------------------------------------------------------------

 url <- "https://github.com/topepo/cars/raw/master/2019_07_03_city/car_data_splits.RData"
 temp_save <- tempfile()
 download.file(url, destfile = temp_save)
 load(temp_save)
 str(car_train)

 # ------------------------------------------------------------------------------

 set.seed(1)
 car_folds <- vfold_cv(car_train)

 # ------------------------------------------------------------------------------

 glmn_rec <- 
   recipe(mpg ~ ., data = car_train) %>%
   # keep the car name but don't use as a predictor
   update_role(model, new_role = "model") %>%
   # collapse some makes into "other"
   step_other(make, car_class, threshold = 0.005) %>%
   step_other(fuel_type, threshold = 0.01) %>%
   step_dummy(all_nominal(), -model) %>%
   step_zv(all_predictors()) %>% 
   step_normalize(all_predictors()) %>%
   step_ns(eng_displ, cylinders, deg_free = 4)

 penalties <- c(10 ^ seq(-3,-1, length.out = 10))
 glmn_grid <- crossing(mixture = (0:5) / 5, penalty = penalties)

 bland_glmn_spec <- 
   linear_reg(penalty = tune(), mixture = tune()) %>% 
   set_engine("glmnet")

 added_glmn_spec <- 
   linear_reg(penalty = tune(), mixture = tune()) %>% 
   set_engine("glmnet", lambda = penalties)

 ratio_glmn_spec <- 
   linear_reg(penalty = tune(), mixture = tune()) %>% 
   set_engine("glmnet", lambda.min.ratio = 0)

 # ------------------------------------------------------------------------------

 set.seed(2)
 bland_res <- 
   bland_glmn_spec %>% 
   tune_grid(glmn_rec, resamples = car_folds, grid = glmn_grid)


 set.seed(2)
 added_res <- 
   added_glmn_spec %>% 
   tune_grid(glmn_rec, resamples = car_folds, grid = glmn_grid)

 set.seed(2)
 ratio_res <- 
   ratio_glmn_spec %>% 
   tune_grid(glmn_rec, resamples = car_folds, grid = glmn_grid)

 # ------------------------------------------------------------------------------

 bland_data <- bland_res %>% collect_metrics() %>% mutate(config = "No options")

 added_data <- added_res %>% collect_metrics() %>% mutate(config = "specified lambda")

 ratio_data <- ratio_res %>% collect_metrics() %>% mutate(config = "lambda.min.ratio = 0")

 # ------------------------------------------------------------------------------

 bind_rows(bland_data, added_data, ratio_data) %>% 
   dplyr::filter(.metric == "rmse") %>% 
   mutate(mixture = format(mixture),
          config = factor(
             config,
             levels = c("No options", "specified lambda", "lambda.min.ratio = 0")
          )) %>% 
   ggplot(aes(x = penalty, y = mean, col = mixture)) + 
   geom_point() + 
   geom_line() + 
   facet_wrap(~config) + 
   scale_x_log10()
	library(tidymodels)
	library(patchwork)
	library(doMC)
	registerDoMC(cores = 20)

	theme_set(theme_bw() + theme(legend.position = "top"))

	# ------------------------------------------------------------------------------

	url <- "https://github.com/topepo/cars/raw/master/2019_07_03_city/car_data_splits.RData"
	temp_save <- tempfile()
	download.file(url, destfile = temp_save)
	load(temp_save)
	str(car_train)

	# ------------------------------------------------------------------------------

	set.seed(1)
	car_folds <- vfold_cv(car_train)

	# ------------------------------------------------------------------------------

	glmn_rec <-
	recipe(mpg ~ ., data = car_train) %>%
	# keep the car name but don't use as a predictor
	update_role(model, new_role = "model") %>%
	# collapse some makes into "other"
	step_other(make, car_class, threshold = 0.005) %>%
	step_other(fuel_type, threshold = 0.01) %>%
	step_dummy(all_nominal(), -model) %>%
	step_zv(all_predictors()) %>%
	step_normalize(all_predictors()) %>%
	step_ns(eng_displ, cylinders, deg_free = 4)

	penalties <- c(10 ^ seq(-3,-1, length.out = 10))
	glmn_grid <- crossing(mixture = (0:5) / 5, penalty = penalties)

	bland_glmn_spec <-
	linear_reg(penalty = tune(), mixture = tune()) %>%
	set_engine("glmnet")

	added_glmn_spec <-
	linear_reg(penalty = tune(), mixture = tune()) %>%
	set_engine("glmnet", lambda = penalties)

	ratio_glmn_spec <-
	linear_reg(penalty = tune(), mixture = tune()) %>%
	set_engine("glmnet", lambda.min.ratio = 0)

	# ------------------------------------------------------------------------------

	set.seed(2)
	bland_res <-
	bland_glmn_spec %>%
	tune_grid(glmn_rec, resamples = car_folds, grid = glmn_grid)


	set.seed(2)
	added_res <-
	added_glmn_spec %>%
	tune_grid(glmn_rec, resamples = car_folds, grid = glmn_grid)

	set.seed(2)
	ratio_res <-
	ratio_glmn_spec %>%
	tune_grid(glmn_rec, resamples = car_folds, grid = glmn_grid)

	# ------------------------------------------------------------------------------

	bland_data <- bland_res %>% collect_metrics() %>% mutate(config = "No options")

	added_data <- added_res %>% collect_metrics() %>% mutate(config = "specified lambda")

	ratio_data <- ratio_res %>% collect_metrics() %>% mutate(config = "lambda.min.ratio = 0")

	# ------------------------------------------------------------------------------

	bind_rows(bland_data, added_data, ratio_data) %>%
	dplyr::filter(.metric == "rmse") %>%
	mutate(mixture = format(mixture),
	config = factor(
	config,
	levels = c("No options", "specified lambda", "lambda.min.ratio = 0")
	)) %>%
	ggplot(aes(x = penalty, y = mean, col = mixture)) +
	geom_point() +
	geom_line() +
	facet_wrap(~config) +
	scale_x_log10()