topepo · May 6, 2021 22:00 · topepo · May 27, 2021
diff --git a/two_class_diag_plots.R b/two_class_diag_plots.R
 library(tidymodels)
 tidymodels_prefer()
 theme_set(theme_bw())

 library(doMC)
 registerDoMC(cores = 20)

 # ------------------------------------------------------------------------------

 data(ad_data)
 set.seed(1)
 ad_split <- initial_split(ad_data)
 ad_train <- training(ad_split)
 ad_test <- testing(ad_split)

 set.seed(2)
 ad_folds <- vfold_cv(ad_train, repeats = 5)

 # ------------------------------------------------------------------------------

 boost_spec <-
  boost_tree() %>%
  set_engine("C5.0") %>%
  set_mode("classification")

 ctrl_rs <- control_resamples(save_pred = TRUE)

 boost_res <-
  boost_spec %>%
  fit_resamples(Class ~ ., resamples = ad_folds, control = ctrl_rs)

 boost_in_sample_predictions <- augment(boost_res)

 boost_test_res <-
  boost_spec %>%
  last_fit(Class ~ ., split = ad_split)

 boost_test_predictions <-  augment(boost_test_res)

 # ------------------------------------------------------------------------------

 prob_breaks <- (2:9)/10
 prob_eps <- 0.001
 prob_bins <- 0.025

 # ------------------------------------------------------------------------------

 boost_in_sample_predictions %>%
  ggplot(aes(x = .pred_Impaired)) +
  geom_histogram(binwidth = prob_bins, col = "white") +
  facet_wrap(~ Class, labeller = labeller(Class = label_both), ncol = 1) +
  ggtitle("Predicted probabilities versus true class") +
  xlim(0:1)

 boost_in_sample_predictions %>%
  conf_mat(truth = Class, estimate = .pred_class) %>%
  autoplot()

 # ------------------------------------------------------------------------------

 boost_in_sample_predictions %>%
  mutate(
    .pred_Impaired =
      case_when(
        .pred_Impaired > 1 - prob_eps ~ 1 - prob_eps,
        .pred_Impaired  <    prob_eps ~     prob_eps,
        TRUE ~ .pred_Impaired
      )
  ) %>%
  ggplot(aes(x = p_tau, y = .pred_Impaired)) +
  geom_point()+
  facet_wrap(~ Class, labeller = labeller(Class = label_both), ncol = 1) +
  ggtitle("Predicted probabilities versus numeric variable") +
  # We should make a custom transformation that handles probs at 0 and 1
  scale_y_continuous(trans = scales::logit_trans(), breaks = prob_breaks)

 boost_in_sample_predictions %>%
  mutate(
    .pred_Impaired =
      case_when(
        .pred_Impaired > 1 - prob_eps ~ 1 - prob_eps,
        .pred_Impaired  <    prob_eps ~     prob_eps,
        TRUE ~ .pred_Impaired
      )
  ) %>%
  ggplot(aes(y = Genotype, x = .pred_Impaired)) +
  geom_point() +
  facet_wrap(~ Class, labeller = labeller(Class = label_both), ncol = 1) +
  ggtitle("Predicted probabilities versus factor variable") +
  scale_x_continuous(trans = scales::logit_trans(), breaks = prob_breaks)

 # ------------------------------------------------------------------------------

 boost_in_sample_predictions %>%
  roc_curve(truth = Class, .pred_Impaired) %>%
  autoplot()

 boost_in_sample_predictions %>%
  pr_curve(truth = Class, .pred_Impaired) %>%
  autoplot()
	library(tidymodels)
	tidymodels_prefer()
	theme_set(theme_bw())

	library(doMC)
	registerDoMC(cores = 20)

	# ------------------------------------------------------------------------------

	data(ad_data)
	set.seed(1)
	ad_split <- initial_split(ad_data)
	ad_train <- training(ad_split)
	ad_test <- testing(ad_split)

	set.seed(2)
	ad_folds <- vfold_cv(ad_train, repeats = 5)

	# ------------------------------------------------------------------------------

	boost_spec <-
	boost_tree() %>%
	set_engine("C5.0") %>%
	set_mode("classification")

	ctrl_rs <- control_resamples(save_pred = TRUE)

	boost_res <-
	boost_spec %>%
	fit_resamples(Class ~ ., resamples = ad_folds, control = ctrl_rs)

	boost_in_sample_predictions <- augment(boost_res)

	boost_test_res <-
	boost_spec %>%
	last_fit(Class ~ ., split = ad_split)

	boost_test_predictions <- augment(boost_test_res)

	# ------------------------------------------------------------------------------

	prob_breaks <- (2:9)/10
	prob_eps <- 0.001
	prob_bins <- 0.025

	# ------------------------------------------------------------------------------

	boost_in_sample_predictions %>%
	ggplot(aes(x = .pred_Impaired)) +
	geom_histogram(binwidth = prob_bins, col = "white") +
	facet_wrap(~ Class, labeller = labeller(Class = label_both), ncol = 1) +
	ggtitle("Predicted probabilities versus true class") +
	xlim(0:1)

	boost_in_sample_predictions %>%
	conf_mat(truth = Class, estimate = .pred_class) %>%
	autoplot()

	# ------------------------------------------------------------------------------

	boost_in_sample_predictions %>%
	mutate(
	.pred_Impaired =
	case_when(
	.pred_Impaired > 1 - prob_eps ~ 1 - prob_eps,
	.pred_Impaired < prob_eps ~ prob_eps,
	TRUE ~ .pred_Impaired
	)
	) %>%
	ggplot(aes(x = p_tau, y = .pred_Impaired)) +
	geom_point()+
	facet_wrap(~ Class, labeller = labeller(Class = label_both), ncol = 1) +
	ggtitle("Predicted probabilities versus numeric variable") +
	# We should make a custom transformation that handles probs at 0 and 1
	scale_y_continuous(trans = scales::logit_trans(), breaks = prob_breaks)

	boost_in_sample_predictions %>%
	mutate(
	.pred_Impaired =
	case_when(
	.pred_Impaired > 1 - prob_eps ~ 1 - prob_eps,
	.pred_Impaired < prob_eps ~ prob_eps,
	TRUE ~ .pred_Impaired
	)
	) %>%
	ggplot(aes(y = Genotype, x = .pred_Impaired)) +
	geom_point() +
	facet_wrap(~ Class, labeller = labeller(Class = label_both), ncol = 1) +
	ggtitle("Predicted probabilities versus factor variable") +
	scale_x_continuous(trans = scales::logit_trans(), breaks = prob_breaks)

	# ------------------------------------------------------------------------------

	boost_in_sample_predictions %>%
	roc_curve(truth = Class, .pred_Impaired) %>%
	autoplot()

	boost_in_sample_predictions %>%
	pr_curve(truth = Class, .pred_Impaired) %>%
	autoplot()
No results found