ivopbernardo · February 4, 2022 18:18
diff --git a/rf_demo.R b/rf_demo.R
 # Don't forget to download the train.csv file
 # to make this gist work.

 # Download it at: https://www.kaggle.com/c/titanic/data?select=train.csv

 # You also need to install ROCR and rpart libraries

 # Reading the titanic train dataset
 titanic <- read.csv('./train.csv')

 # Obtaining the number of rows for training (70%)
 size <- ceiling(0.7*nrow(titanic))

 # Use an indexer to perform train and test split
 set.seed(999)

 train_index <- sample(
  seq_len(nrow(titanic)), size = size
 )
 train_df <- titanic[train_index, ]
 test_df <- titanic[-train_index, ]

 # Loading rpart
 library(rpart)

 # Training decision tree 1
 set.seed(9990)
 oak_tree <- rpart(Survived ~ Fare + Age + Sex + Pclass,
                  data = sample_n(train_df, 600), 
                  method = 'class',
                  control = list(maxdepth = 2,
                                 minsplit=30))

 # Training Decision Tree 2
 set.seed(9991)
 pine_tree <- rpart(Survived ~ Fare + Age + Sex + Pclass,
                   data = sample_n(train_df, 600), 
                   method = 'class',
                   control = list(maxdepth = 3, 
                                  minsplit=3, 
                                  minbucket=4, 
                                  cp=0.01))

 # Training Decision Tree 3
 set.seed(9992)
 elm_tree <- rpart(Survived ~ Fare + Age + Sex + Pclass,
                  data = sample_n(train_df, 600), 
                  method = 'class',
                  control = list(maxdepth = 2, 
                                 minsplit=2, 
                                 minbucket=4, 
                                 cp=0.01))


 # Building function for AUC
 library(ROCR)
 obtainauc <- function(model) {
  predictions <- predict(model, test_df)[,2]
  pred <- prediction(predictions, test_df$Survived)
  perf <- performance(pred, measure = 'auc')
  return ([email protected][[1]])
 }

 # Building ensemble
 ensemble <- (
  predict(oak_tree, test_df)[,2]
  +
    predict(pine_tree, test_df)[,2]
  +
    predict(elm_tree, test_df)[,2]
 )/3

 # Ensemble Performance
 prediction <- prediction(ensemble, test_df$Survived)
 perf <- performance(prediction, measure = 'auc')
 performance_ensemble <- [email protected][[1]]
	# Don't forget to download the train.csv file
	# to make this gist work.

	# Download it at: https://www.kaggle.com/c/titanic/data?select=train.csv

	# You also need to install ROCR and rpart libraries

	# Reading the titanic train dataset
	titanic <- read.csv('./train.csv')

	# Obtaining the number of rows for training (70%)
	size <- ceiling(0.7*nrow(titanic))

	# Use an indexer to perform train and test split
	set.seed(999)

	train_index <- sample(
	seq_len(nrow(titanic)), size = size
	)
	train_df <- titanic[train_index, ]
	test_df <- titanic[-train_index, ]

	# Loading rpart
	library(rpart)

	# Training decision tree 1
	set.seed(9990)
	oak_tree <- rpart(Survived ~ Fare + Age + Sex + Pclass,
	data = sample_n(train_df, 600),
	method = 'class',
	control = list(maxdepth = 2,
	minsplit=30))

	# Training Decision Tree 2
	set.seed(9991)
	pine_tree <- rpart(Survived ~ Fare + Age + Sex + Pclass,
	data = sample_n(train_df, 600),
	method = 'class',
	control = list(maxdepth = 3,
	minsplit=3,
	minbucket=4,
	cp=0.01))

	# Training Decision Tree 3
	set.seed(9992)
	elm_tree <- rpart(Survived ~ Fare + Age + Sex + Pclass,
	data = sample_n(train_df, 600),
	method = 'class',
	control = list(maxdepth = 2,
	minsplit=2,
	minbucket=4,
	cp=0.01))


	# Building function for AUC
	library(ROCR)
	obtainauc <- function(model) {
	predictions <- predict(model, test_df)[,2]
	pred <- prediction(predictions, test_df$Survived)
	perf <- performance(pred, measure = 'auc')
	return ([email protected][[1]])
	}

	# Building ensemble
	ensemble <- (
	predict(oak_tree, test_df)[,2]
	+
	predict(pine_tree, test_df)[,2]
	+
	predict(elm_tree, test_df)[,2]
	)/3

	# Ensemble Performance
	prediction <- prediction(ensemble, test_df$Survived)
	perf <- performance(prediction, measure = 'auc')
	performance_ensemble <- [email protected][[1]]