cimentadaj · February 15, 2020 07:25
diff --git a/ml_wflowideas.R b/ml_wflowideas.R
 library(AmesHousing)
 library(tidymodels)

 ames <- make_ames()

 ml_wflow <-
  ames %>%
  workflow() %>%
  initial_split(prop = .75) %>%
  recipe(Sale_Price ~ Longitude + Latitude + Neighborhood, data = ames) %>%
  step_log(Sale_Price, base = 10) %>%
  step_other(Neighborhood, threshold = 0.05) %>%
  step_dummy(recipes::all_nominal()) %>%
  step_scale(Sale_Price) %>%
  step_scale(Latitude)

 mod1 <-
  linear_reg(penalty = tune(), mixture = tune()) %>%
  set_engine("glmnet") %>%
  grid_regular(levels = 10)

 mod2 <-
  rand_forest(mtry = tune(), trees = tune()) %>%
  set_engine("glmnet") %>%
  grid_regular(levels = 10)

 ## The combination of empty parameter specification with any grid_*
 ## should throw an error.

 ml_wflow %>%
  add_model(mod1) %>%
  vfold_cv() %>%
  fit()

 # Add new recipes
 final_res <-
  ml_wflow %>%
  update_rcp(
    .recipe %>% 
    step_scale(Sale_Price) %>%
    step_center(Sale_Price)
  ) %>%
  add_model(mod2) %>%
  vfold_cv() %>% 
  fit()

 ## Add new variable
 ml_wflow %>%
  update_frm(
    ~ . + x2
  ) %>% 
  update_rcp(
    .rcp %>% step_dummy(x2)
  ) %>%
  add_model(mod2) %>%
  vfold_cv() %>% 
  fit()

 ## Replace y
 ml_wflow %>%
  update_frm(
    Latitude ~ .
  ) %>% 
  update_rcp(
    .rcp %>% step_log(Latitude)
  ) %>%
  add_model(mod2) %>%
  vfold_cv() %>% 
  fit()

 ## Update whole formula
 ml_wflow %>%
  update_frm(
    Latitude ~ Neighborhood + whatever
  ) %>% 
  update_rcp(
    .rcp %>%
    step_log(Latitude) %>%
    step_dummy(Neighborhood) %>%
    step_BoxCox(Whatever),
    # Remove recipe and accept new one
    new = TRUE
  ) %>%
  add_model(mod2) %>%
  vfold_cv() %>% 
  fit()



 final_res %>%
  update_rcp(
    -step_scale,
    
  )





 # ml_wflow shouldn't run anything -- it's just a specification
 # of all the different steps. `fit` should run everything
 ml_wflow <- fit(ml_wflow)

 # Plot results of tuning parameters
 ml_wflow %>%
  autoplot()

 # Automatically extract best parameters and fit to the training data
 final_model <-
  ml_wflow %>%
  fit_best_model(metrics = metric_set(rmse))

 # Predict on the test data using the last model
 # Everything is bundled into a workflow object
 # and everything can be extracted with separate
 # functions with the same verb
 final_model %>%
  holdout_error()
	library(AmesHousing)
	library(tidymodels)

	ames <- make_ames()

	ml_wflow <-
	ames %>%
	workflow() %>%
	initial_split(prop = .75) %>%
	recipe(Sale_Price ~ Longitude + Latitude + Neighborhood, data = ames) %>%
	step_log(Sale_Price, base = 10) %>%
	step_other(Neighborhood, threshold = 0.05) %>%
	step_dummy(recipes::all_nominal()) %>%
	step_scale(Sale_Price) %>%
	step_scale(Latitude)

	mod1 <-
	linear_reg(penalty = tune(), mixture = tune()) %>%
	set_engine("glmnet") %>%
	grid_regular(levels = 10)

	mod2 <-
	rand_forest(mtry = tune(), trees = tune()) %>%
	set_engine("glmnet") %>%
	grid_regular(levels = 10)

	## The combination of empty parameter specification with any grid_*
	## should throw an error.

	ml_wflow %>%
	add_model(mod1) %>%
	vfold_cv() %>%
	fit()

	# Add new recipes
	final_res <-
	ml_wflow %>%
	update_rcp(
	.recipe %>%
	step_scale(Sale_Price) %>%
	step_center(Sale_Price)
	) %>%
	add_model(mod2) %>%
	vfold_cv() %>%
	fit()

	## Add new variable
	ml_wflow %>%
	update_frm(
	~ . + x2
	) %>%
	update_rcp(
	.rcp %>% step_dummy(x2)
	) %>%
	add_model(mod2) %>%
	vfold_cv() %>%
	fit()

	## Replace y
	ml_wflow %>%
	update_frm(
	Latitude ~ .
	) %>%
	update_rcp(
	.rcp %>% step_log(Latitude)
	) %>%
	add_model(mod2) %>%
	vfold_cv() %>%
	fit()

	## Update whole formula
	ml_wflow %>%
	update_frm(
	Latitude ~ Neighborhood + whatever
	) %>%
	update_rcp(
	.rcp %>%
	step_log(Latitude) %>%
	step_dummy(Neighborhood) %>%
	step_BoxCox(Whatever),
	# Remove recipe and accept new one
	new = TRUE
	) %>%
	add_model(mod2) %>%
	vfold_cv() %>%
	fit()



	final_res %>%
	update_rcp(
	-step_scale,

	)





	# ml_wflow shouldn't run anything -- it's just a specification
	# of all the different steps. `fit` should run everything
	ml_wflow <- fit(ml_wflow)

	# Plot results of tuning parameters
	ml_wflow %>%
	autoplot()

	# Automatically extract best parameters and fit to the training data
	final_model <-
	ml_wflow %>%
	fit_best_model(metrics = metric_set(rmse))

	# Predict on the test data using the last model
	# Everything is bundled into a workflow object
	# and everything can be extracted with separate
	# functions with the same verb
	final_model %>%
	holdout_error()