Question 7: Survival by fare - Loess
Set the seed to 1. Train a model using Loess with the caret gamLoess
method using fare as the only predictor.
What is the accuracy on the test set for the Loess model?
Note: when training models for Titanic Exercises Part 2, please use the S3 method for class formula rather than the default S3 method of caret train()
(see ?caret::train
for details).
# Load necessary libraries
if(!require(titanic)) install.packages("titanic")
if(!require(caret)) install.packages("caret")
if(!require(gam)) install.packages("gam")
if(!require(dplyr)) install.packages("dplyr")
# Load necessary libraries
library(caret)
library(titanic)
library(gam) # Required for gamLoess
library(dplyr)
# Load the data
data(titanic_train)
data(titanic_test)
# Clean the data
titanic_clean <- titanic_train %>%
mutate(Survived = factor(Survived),
Embarked = factor(Embarked),
Age = ifelse(is.na(Age), median(Age, na.rm = TRUE), Age),
FamilySize = SibSp + Parch + 1) %>%
dplyr::select(Survived, Sex, Pclass, Age, Fare, SibSp, Parch, FamilySize, Embarked)
# Set seed for reproducibility
set.seed(1, sample.kind = "Rounding") # Changed to 1 as per instructions
# Create train and test sets
test_index <- createDataPartition(titanic_clean$Survived, times = 1, p = 0.2, list = FALSE)
test_set <- titanic_clean[test_index,]
train_set <- titanic_clean[-test_index,]
# Train the gamLoess model using fare as the only predictor
train_gamLoess <- train(Survived ~ Fare,
method = "gamLoess",
data = train_set,
trControl = trainControl(method = "none"))
# Make predictions on the test set
gamLoess_preds <- predict(train_gamLoess, test_set)
# Calculate accuracy
accuracy <- mean(gamLoess_preds == test_set$Survived)
# Print the accuracy
print(paste("Accuracy on test set:", round(accuracy, 4)))
The code returned
[1] "Accuracy on test set: 0.6592"