Created
August 10, 2015 03:28
-
-
Save primaryobjects/554e7c8ae6e8bfadbf95 to your computer and use it in GitHub Desktop.
Predicting with Regression in R.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(caret) | |
data(faithful) | |
set.seed(333) | |
# Plot data. | |
plot(x=faithful$waiting, faithful$eruptions) | |
# Calculate linear model. | |
fit <- lm(eruptions ~ waiting, data=faithful) | |
# Plot trend line. | |
abline(fit) | |
# Now do it again, this time with machine learning. | |
inTrain <- createDataPartition(y=faithful$eruptions, p = 0.6, list = FALSE) | |
training <- faithful[inTrain,] | |
test <- faithful[-inTrain,] | |
# Plot training data. | |
plot(x=training$waiting, y=training$eruptions) | |
# Train linear model (the coefficient values should be very similar to the first linear model fit that we made above). | |
fit2 <- train(eruptions ~ ., data = training, method = 'lm') | |
# Plot trend line from trained model. | |
results <- predict(fit2, newdata=training) | |
lines(x=training$waiting, y=results) | |
# Calculate error with RMSE (root mean square error). | |
trainRMSE <- sqrt(sum((fit2$finalModel$fitted.values - training$eruptions) ^ 2)) | |
# Now predict on the test set and draw a new line. | |
plot(x=test$waiting, y=test$eruptions) | |
# Draw the predicted regression line on the test set. It should match the data points pretty closely, as it was trained on the training set. | |
results <- predict(fit2, newdata=test) | |
lines(x=test$waiting, y=results) | |
# Calculate error with RMSE (root mean square error). | |
testRMSE <- sqrt(sum((results - test$eruptions) ^ 2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment