Skip to content

Instantly share code, notes, and snippets.

@ericpgreen
Created March 16, 2020 17:04
Show Gist options
  • Select an option

  • Save ericpgreen/89dbfc00cf519d0a7af0a93cdb79fc92 to your computer and use it in GitHub Desktop.

Select an option

Save ericpgreen/89dbfc00cf519d0a7af0a93cdb79fc92 to your computer and use it in GitHub Desktop.
library(modeldata)
data("stackoverflow")
library(tidyverse)
library(tidymodels)
set.seed(100) # Important!
# make smaller to save time
so_split <- initial_split(sample_n(stackoverflow, size = 300),
strata = Remote)
so_train <- training(so_split)
so_test <- testing(so_split)
# again, simpler so runs faster
so_folds <- vfold_cv(so_train, v = 2, strata = Remote)
# recipe
tune_rec <- recipe(Remote ~ .,
data = so_train) %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_lincomb(all_predictors()) %>%
step_downsample(Remote, under_ratio = tune())
# model
tune_spec <-
rand_forest(mtry = tune(),
min_n = tune()) %>%
set_engine("ranger") %>%
set_mode("classification")
# workflow
tuneboth_wf <-
workflow() %>%
add_recipe(tune_rec) %>%
add_model(tune_spec)
# tuning parameters
tuneboth_param <- parameters(tuneboth_wf)
tuneboth_param <-
tuneboth_param %>%
# Pick an upper bound for mtry:
update(mtry = mtry(c(1, 20)))
# fit
results <-
tuneboth_wf %>%
tune_grid(resamples = so_folds,
param_info = tuneboth_param)
# get best tuning parameters
best <-
results %>%
select_best(metric = "roc_auc")
# define final workflow
wf_final <-
tuneboth_wf %>%
finalize_workflow(best)
# re-run with best
fit_split <- function(formula, model, split, ...) {
wf <- workflows::add_model(
workflows::add_formula(workflows::workflow(),
formula,
blueprint =
hardhat::default_formula_blueprint(
indicators = FALSE,
allow_novel_levels = TRUE)),
model)
tune::last_fit(wf, split, ...)
}
results_best_test <-
wf_final %>%
fit_split(split = so_split,
metrics = metric_set(roc_auc, sens, spec))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment