ivopbernardo · February 7, 2024 16:11
diff --git a/caret_examples.r b/caret_examples.r
 # caret library example used in blogpost:
 # https://towardsdatascience.com/a-guide-to-using-caret-in-r-71dec0bda208

 library(caTools)
 library(caret)

 # Train Test Split on both Iris and Mtcars

 train_test_split <- function(df) {
  set.seed(42)
  sample = sample.split(df, SplitRatio = 0.8)
  train = subset(df, sample == TRUE)
  test  = subset(df, sample == FALSE)
  return (list(train, test))
 }

 # To Simplify, we are going to make iris a two classifiction
 # problem

 iris$target <- ifelse(
  iris$Species == 'setosa',
  1,
  0
 )

 # mtcars
 mtcars_train <- train_test_split(mtcars)[[1]]
 mtcars_test <- train_test_split(mtcars)[[2]]

 # iris
 iris_train <- train_test_split(iris)[[1]]
 iris_test <- train_test_split(iris)[[2]]

 # Training a Linear Model

 lm_model <- train(mpg ~ hp + wt + gear + disp, 
      data = mtcars_train, 
      method = "lm")

 summary(lm_model)

 # Training a Logistic Regression

 glm_model <- train(target ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, 
                  data = iris_train, 
                  method = "glm",
                  family = "binomial")

 summary(glm_model)

 # Training a decision tree
 d.tree <- train(mpg ~ hp + wt + gear + disp, 
                  data = mtcars_train, 
                  method = "rpart")

 library(rpart.plot)
 rpart.plot(d.tree$finalModel)

 # Examining our dtree
 d.tree

 # Training a Random Forest
 r.forest <- train(mpg ~ hp + wt + gear + disp, 
                  data = mtcars_train, 
                  method = "ranger")

 r.forest

 # Training a XGBoost Model
 xg.boost <- train(mpg ~ hp + wt + gear + disp, 
                  data = mtcars_train, 
                  method = "xgbTree")


 # Training a k-nearest neighbor
 knn <- train(mpg ~ hp + wt + gear + disp, 
                  data = mtcars_train, 
                  method = "knn")

 # Training a neural network
 neural.network <- train(mpg ~ hp + wt + gear + disp, 
             data = mtcars_train, 
             method = "neuralnet")

 # Training a neural network for Classification
 neural.network.class <- train(target ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, 
                        data = iris_train, 
                        method = "nnet")

 # Results of Neural Network Class
 neural.network.class

 iris_train$target = factor(iris_train$target)

 # Training a neural network for Classifiction with Factor
 neural.network.class.2 <- train(target ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, 
                              data = iris_train, 
                              method = "nnet")

 # K-Fold Cross Validation

 ctrl<- trainControl(method="cv", number=10)

 d.tree.kfold <- train(mpg ~ hp + wt + gear + disp, 
                data = mtcars_train,
                trControl = ctrl,
                method = "rpart")
 d.tree.kfold

 rpart.plot(d.tree.kfold$finalModel)

 # Optimizing for Another Metric
 d.tree.kfold.rsquared <- train(mpg ~ hp + wt + gear + disp, 
                      data = mtcars_train,
                      trControl = ctrl,
                      metric = "Rsquared",
                      method = "rpart")

 d.tree.kfold.rsquared


 # Hyperparameter Tuning
 d.tree.hyperparam <- train(mpg ~ hp + wt + gear + disp, 
                               data = mtcars_train,
                               trControl = ctrl,
                               metric = "Rsquared",
                               method = "rpart",
                               tuneGrid = data.frame(
                                 cp = c(0.0001, 0.001, 0.35, 0.65)))

 d.tree.hyperparam


 r.forest.hyperparam <- train(mpg ~ hp + wt + gear + disp, 
                           data = mtcars_train,
                           trControl = ctrl,
                           metric = "Rsquared",
                           method = "ranger",
                           tuneGrid = data.frame(
                             mtry = c(2, 3, 4),
                             min.node.size = c(2, 4, 10),
                             splitrule = c('variance')))

 r.forest.hyperparam

 # Predicting On the Test Set

 # Linear Regression
 predict(lm_model, mtcars_test)

 # Decision Tree 
 predict(d.tree, mtcars_test)

 # Random Forest
 predict(r.forest, mtcars_test)

 # XGBoost
 predict(xg.boost, mtcars_test)

 # Comparing Predictions

 predictions <- data.frame(
  rf = predict(r.forest, mtcars_test),
  xgboost = predict(xg.boost, mtcars_test)
 )

 library(ggplot2)

 ggplot(
  data = predictions,
  aes(x = rf, y = xgboost)
 ) + geom_point()
	# caret library example used in blogpost:
	# https://towardsdatascience.com/a-guide-to-using-caret-in-r-71dec0bda208

	library(caTools)
	library(caret)

	# Train Test Split on both Iris and Mtcars

	train_test_split <- function(df) {
	set.seed(42)
	sample = sample.split(df, SplitRatio = 0.8)
	train = subset(df, sample == TRUE)
	test = subset(df, sample == FALSE)
	return (list(train, test))
	}

	# To Simplify, we are going to make iris a two classifiction
	# problem

	iris$target <- ifelse(
	iris$Species == 'setosa',
	1,
	0
	)

	# mtcars
	mtcars_train <- train_test_split(mtcars)[[1]]
	mtcars_test <- train_test_split(mtcars)[[2]]

	# iris
	iris_train <- train_test_split(iris)[[1]]
	iris_test <- train_test_split(iris)[[2]]

	# Training a Linear Model

	lm_model <- train(mpg ~ hp + wt + gear + disp,
	data = mtcars_train,
	method = "lm")

	summary(lm_model)

	# Training a Logistic Regression

	glm_model <- train(target ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
	data = iris_train,
	method = "glm",
	family = "binomial")

	summary(glm_model)

	# Training a decision tree
	d.tree <- train(mpg ~ hp + wt + gear + disp,
	data = mtcars_train,
	method = "rpart")

	library(rpart.plot)
	rpart.plot(d.tree$finalModel)

	# Examining our dtree
	d.tree

	# Training a Random Forest
	r.forest <- train(mpg ~ hp + wt + gear + disp,
	data = mtcars_train,
	method = "ranger")

	r.forest

	# Training a XGBoost Model
	xg.boost <- train(mpg ~ hp + wt + gear + disp,
	data = mtcars_train,
	method = "xgbTree")


	# Training a k-nearest neighbor
	knn <- train(mpg ~ hp + wt + gear + disp,
	data = mtcars_train,
	method = "knn")

	# Training a neural network
	neural.network <- train(mpg ~ hp + wt + gear + disp,
	data = mtcars_train,
	method = "neuralnet")

	# Training a neural network for Classification
	neural.network.class <- train(target ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
	data = iris_train,
	method = "nnet")

	# Results of Neural Network Class
	neural.network.class

	iris_train$target = factor(iris_train$target)

	# Training a neural network for Classifiction with Factor
	neural.network.class.2 <- train(target ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
	data = iris_train,
	method = "nnet")

	# K-Fold Cross Validation

	ctrl<- trainControl(method="cv", number=10)

	d.tree.kfold <- train(mpg ~ hp + wt + gear + disp,
	data = mtcars_train,
	trControl = ctrl,
	method = "rpart")
	d.tree.kfold

	rpart.plot(d.tree.kfold$finalModel)

	# Optimizing for Another Metric
	d.tree.kfold.rsquared <- train(mpg ~ hp + wt + gear + disp,
	data = mtcars_train,
	trControl = ctrl,
	metric = "Rsquared",
	method = "rpart")

	d.tree.kfold.rsquared


	# Hyperparameter Tuning
	d.tree.hyperparam <- train(mpg ~ hp + wt + gear + disp,
	data = mtcars_train,
	trControl = ctrl,
	metric = "Rsquared",
	method = "rpart",
	tuneGrid = data.frame(
	cp = c(0.0001, 0.001, 0.35, 0.65)))

	d.tree.hyperparam


	r.forest.hyperparam <- train(mpg ~ hp + wt + gear + disp,
	data = mtcars_train,
	trControl = ctrl,
	metric = "Rsquared",
	method = "ranger",
	tuneGrid = data.frame(
	mtry = c(2, 3, 4),
	min.node.size = c(2, 4, 10),
	splitrule = c('variance')))

	r.forest.hyperparam

	# Predicting On the Test Set

	# Linear Regression
	predict(lm_model, mtcars_test)

	# Decision Tree
	predict(d.tree, mtcars_test)

	# Random Forest
	predict(r.forest, mtcars_test)

	# XGBoost
	predict(xg.boost, mtcars_test)

	# Comparing Predictions

	predictions <- data.frame(
	rf = predict(r.forest, mtcars_test),
	xgboost = predict(xg.boost, mtcars_test)
	)

	library(ggplot2)

	ggplot(
	data = predictions,
	aes(x = rf, y = xgboost)
	) + geom_point()