Skip to content

Instantly share code, notes, and snippets.

@cimentadaj
Created February 15, 2020 07:25
Show Gist options
  • Save cimentadaj/b17c777e38fa3752f6887eb6c868d6c3 to your computer and use it in GitHub Desktop.
Save cimentadaj/b17c777e38fa3752f6887eb6c868d6c3 to your computer and use it in GitHub Desktop.
library(AmesHousing)
library(tidymodels)
ames <- make_ames()
ml_wflow <-
ames %>%
workflow() %>%
initial_split(prop = .75) %>%
recipe(Sale_Price ~ Longitude + Latitude + Neighborhood, data = ames) %>%
step_log(Sale_Price, base = 10) %>%
step_other(Neighborhood, threshold = 0.05) %>%
step_dummy(recipes::all_nominal()) %>%
step_scale(Sale_Price) %>%
step_scale(Latitude)
mod1 <-
linear_reg(penalty = tune(), mixture = tune()) %>%
set_engine("glmnet") %>%
grid_regular(levels = 10)
mod2 <-
rand_forest(mtry = tune(), trees = tune()) %>%
set_engine("glmnet") %>%
grid_regular(levels = 10)
## The combination of empty parameter specification with any grid_*
## should throw an error.
ml_wflow %>%
add_model(mod1) %>%
vfold_cv() %>%
fit()
# Add new recipes
final_res <-
ml_wflow %>%
update_rcp(
.recipe %>%
step_scale(Sale_Price) %>%
step_center(Sale_Price)
) %>%
add_model(mod2) %>%
vfold_cv() %>%
fit()
## Add new variable
ml_wflow %>%
update_frm(
~ . + x2
) %>%
update_rcp(
.rcp %>% step_dummy(x2)
) %>%
add_model(mod2) %>%
vfold_cv() %>%
fit()
## Replace y
ml_wflow %>%
update_frm(
Latitude ~ .
) %>%
update_rcp(
.rcp %>% step_log(Latitude)
) %>%
add_model(mod2) %>%
vfold_cv() %>%
fit()
## Update whole formula
ml_wflow %>%
update_frm(
Latitude ~ Neighborhood + whatever
) %>%
update_rcp(
.rcp %>%
step_log(Latitude) %>%
step_dummy(Neighborhood) %>%
step_BoxCox(Whatever),
# Remove recipe and accept new one
new = TRUE
) %>%
add_model(mod2) %>%
vfold_cv() %>%
fit()
final_res %>%
update_rcp(
-step_scale,
)
# ml_wflow shouldn't run anything -- it's just a specification
# of all the different steps. `fit` should run everything
ml_wflow <- fit(ml_wflow)
# Plot results of tuning parameters
ml_wflow %>%
autoplot()
# Automatically extract best parameters and fit to the training data
final_model <-
ml_wflow %>%
fit_best_model(metrics = metric_set(rmse))
# Predict on the test data using the last model
# Everything is bundled into a workflow object
# and everything can be extracted with separate
# functions with the same verb
final_model %>%
holdout_error()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment