ericpgreen · March 16, 2020 17:04
diff --git a/fit_split b/fit_split
 library(modeldata)
 data("stackoverflow")
 library(tidyverse)
 library(tidymodels)

 set.seed(100) # Important!

 # make smaller to save time
  so_split <- initial_split(sample_n(stackoverflow, size = 300), 
                            strata = Remote)
  so_train <- training(so_split)
  so_test  <- testing(so_split)

 # again, simpler so runs faster
  so_folds <- vfold_cv(so_train, v = 2, strata = Remote)

 # recipe
  tune_rec <- recipe(Remote ~ ., 
                     data = so_train) %>% 
    step_dummy(all_nominal(), -all_outcomes()) %>% 
    step_lincomb(all_predictors()) %>% 
    step_downsample(Remote, under_ratio = tune())

 # model
  tune_spec <-
    rand_forest(mtry = tune(),
                min_n = tune()) %>% 
    set_engine("ranger") %>% 
    set_mode("classification")

 # workflow
  tuneboth_wf <-
  workflow() %>% 
    add_recipe(tune_rec) %>% 
    add_model(tune_spec)

 # tuning parameters
  tuneboth_param <- parameters(tuneboth_wf)

  tuneboth_param <- 
  tuneboth_param %>% 
  # Pick an upper bound for mtry: 
    update(mtry = mtry(c(1, 20)))

 # fit
  results <- 
  tuneboth_wf %>% 
    tune_grid(resamples = so_folds, 
              param_info = tuneboth_param)

 # get best tuning parameters
  best <- 
  results %>% 
    select_best(metric = "roc_auc")

 # define final workflow
  wf_final <- 
    tuneboth_wf %>%
    finalize_workflow(best)

 # re-run with best
  fit_split <- function(formula, model, split, ...) {
    wf <- workflows::add_model(
      workflows::add_formula(workflows::workflow(), 
                             formula, 
                             blueprint = 
                               hardhat::default_formula_blueprint(
                                 indicators = FALSE, 
                                 allow_novel_levels = TRUE)), 
      model)
    tune::last_fit(wf, split, ...)
  }
  
 results_best_test <-
  wf_final %>%
  fit_split(split = so_split,
            metrics = metric_set(roc_auc, sens, spec))
	library(modeldata)
	data("stackoverflow")
	library(tidyverse)
	library(tidymodels)

	set.seed(100) # Important!

	# make smaller to save time
	so_split <- initial_split(sample_n(stackoverflow, size = 300),
	strata = Remote)
	so_train <- training(so_split)
	so_test <- testing(so_split)

	# again, simpler so runs faster
	so_folds <- vfold_cv(so_train, v = 2, strata = Remote)

	# recipe
	tune_rec <- recipe(Remote ~ .,
	data = so_train) %>%
	step_dummy(all_nominal(), -all_outcomes()) %>%
	step_lincomb(all_predictors()) %>%
	step_downsample(Remote, under_ratio = tune())

	# model
	tune_spec <-
	rand_forest(mtry = tune(),
	min_n = tune()) %>%
	set_engine("ranger") %>%
	set_mode("classification")

	# workflow
	tuneboth_wf <-
	workflow() %>%
	add_recipe(tune_rec) %>%
	add_model(tune_spec)

	# tuning parameters
	tuneboth_param <- parameters(tuneboth_wf)

	tuneboth_param <-
	tuneboth_param %>%
	# Pick an upper bound for mtry:
	update(mtry = mtry(c(1, 20)))

	# fit
	results <-
	tuneboth_wf %>%
	tune_grid(resamples = so_folds,
	param_info = tuneboth_param)

	# get best tuning parameters
	best <-
	results %>%
	select_best(metric = "roc_auc")

	# define final workflow
	wf_final <-
	tuneboth_wf %>%
	finalize_workflow(best)

	# re-run with best
	fit_split <- function(formula, model, split, ...) {
	wf <- workflows::add_model(
	workflows::add_formula(workflows::workflow(),
	formula,
	blueprint =
	hardhat::default_formula_blueprint(
	indicators = FALSE,
	allow_novel_levels = TRUE)),
	model)
	tune::last_fit(wf, split, ...)
	}

	results_best_test <-
	wf_final %>%
	fit_split(split = so_split,
	metrics = metric_set(roc_auc, sens, spec))
No results found