Last active
October 10, 2018 08:08
-
-
Save primaryobjects/41c4230e43e11029cff1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Quiz 4 | |
# Question 1. | |
library(ElemStatLearn) | |
library(randomForest) | |
library(caret) | |
data(vowel.train) | |
data(vowel.test) | |
vowel.train$y <- as.factor(vowel.train$y) | |
vowel.test$y <- as.factor(vowel.test$y) | |
set.seed(33833) | |
fit1 <- train(y ~., data=vowel.train, method='rf') | |
fit2 <- train(y ~., data=vowel.train, method='gbm') | |
results1 <- predict(fit1, newdata=vowel.test) | |
results2 <- predict(fit2, newdata=vowel.test) | |
combo <- data.frame(results1, results2, y = vowel.test$y) | |
fit3 <- train(y ~ ., data = combo, method = "rf") | |
results3 <- predict(fit3, newdata = vowel.test) | |
c1 <- confusionMatrix(results1, vowel.test$y) | |
c2 <- confusionMatrix(results2, vowel.test$y) | |
c3 <- confusionMatrix(results3, combo$y) | |
# Question 2. | |
library(caret) | |
library(gbm) | |
set.seed(3433) | |
library(AppliedPredictiveModeling) | |
# Load data. | |
data(AlzheimerDisease) | |
adData = data.frame(diagnosis,predictors) | |
inTrain = createDataPartition(adData$diagnosis, p = 3/4)[[1]] | |
training = adData[ inTrain,] | |
testing = adData[-inTrain,] | |
set.seed(62433) | |
# Train using 3 different models. | |
fit1 <- train(diagnosis ~., data=training, method='rf') | |
fit2 <- train(diagnosis ~., data=training, method='gbm') | |
fit3 <- train(diagnosis ~., data=training, method='lda') | |
# Run models on testing data. | |
results1 <- predict(fit1, newdata=testing) | |
results2 <- predict(fit2, newdata=testing) | |
results3 <- predict(fit3, newdata=testing) | |
# Stack models together and combine with random forests. | |
combo <- data.frame(results1, results2, results3, diagnosis = testing$diagnosis) | |
fit4 <- train(diagnosis ~ ., data = combo, method = "rf") | |
# Run stacked model on testing data. | |
results4 <- predict(fit4, newdata = testing) | |
# random forests = 0.78 | |
c1 <- confusionMatrix(results1, testing$diagnosis) | |
# boosting = 0.80 | |
c2 <- confusionMatrix(results2, testing$diagnosis) | |
# lda = 0.77 | |
c3 <- confusionMatrix(results3, testing$diagnosis) | |
# Stacked models = 0.82 | |
c4 <- confusionMatrix(results4, combo$diagnosis) | |
# Question 3. | |
set.seed(3523) | |
library(AppliedPredictiveModeling) | |
data(concrete) | |
inTrain = createDataPartition(concrete$CompressiveStrength, p = 3/4)[[1]] | |
training = concrete[ inTrain,] | |
testing = concrete[-inTrain,] | |
set.seed(233) | |
fit1 <- train(CompressiveStrength ~., data=training, method='lasso') | |
plot.enet(fit1$finalModel, use.color=TRUE) | |
# Question 4. | |
library(lubridate) # For year() function below | |
library(forecast) | |
library(quantmod) | |
url <- 'https://d396qusza40orc.cloudfront.net/predmachlearn/gaData.csv' | |
fileName <- basename(url); | |
if (!file.exists(fileName)) { | |
download.file(url, fileName, method='curl') | |
} | |
dat <- read.csv(fileName, na.strings=c('', 'NA')) | |
training = dat[year(dat$date) < 2012,] | |
testing = dat[(year(dat$date)) > 2011,] | |
# Create a time-series. | |
tstrain = ts(training$visitsTumblr) | |
# Create a model using bats. | |
fit <- bats(tstrain) | |
# Count the length of the test set, so we can predict for this many points beyond the training data. | |
start <- dim(testing)[1] | |
# Create forecast model for the remaining points beyond training (up to the testing count), use a 95% prediction interval bound. | |
fcast <- forecast(fit, level = 95, h = start) | |
# Check accuracy. | |
accuracy(fcast, testing$visitsTumblr) | |
# For how many of the testing points is the true value within the 95% prediction interval bounds? | |
result <- c() | |
l <- length(fcast$lower) | |
for (i in 1:l){ | |
x <- testing$visitsTumblr[i] | |
a <- fcast$lower[i] < x & x < fcast$upper[i] | |
result <- c(result, a) | |
} | |
sum(result)/l * 100 | |
# Question 5. | |
set.seed(3523) | |
library(AppliedPredictiveModeling) | |
data(concrete) | |
inTrain = createDataPartition(concrete$CompressiveStrength, p = 3/4)[[1]] | |
training = concrete[ inTrain,] | |
testing = concrete[-inTrain,] | |
set.seed(325) | |
library(e1071) | |
library(caret) | |
# Train an svm. | |
fit <- svm(CompressiveStrength ~ ., data=training) | |
# Run svm on test set. | |
results <- predict(fit, testing) | |
# Check accuracy. | |
accuracy(results, testing$CompressiveStrength) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment