primaryobjects · August 10, 2015 03:28
diff --git a/regression-machine-learning.R b/regression-machine-learning.R
 library(caret)

 data(faithful)

 set.seed(333)

 # Plot data.
 plot(x=faithful$waiting, faithful$eruptions)

 # Calculate linear model.
 fit <- lm(eruptions ~ waiting, data=faithful)

 # Plot trend line.
 abline(fit)

 # Now do it again, this time with machine learning.
 inTrain <- createDataPartition(y=faithful$eruptions, p = 0.6, list = FALSE)
 training <- faithful[inTrain,]
 test <- faithful[-inTrain,]

 # Plot training data.
 plot(x=training$waiting, y=training$eruptions)

 # Train linear model (the coefficient values should be very similar to the first linear model fit that we made above).
 fit2 <- train(eruptions ~ ., data = training, method = 'lm')

 # Plot trend line from trained model.
 results <- predict(fit2, newdata=training)
 lines(x=training$waiting, y=results)

 # Calculate error with RMSE (root mean square error).
 trainRMSE <- sqrt(sum((fit2$finalModel$fitted.values - training$eruptions) ^ 2))

 # Now predict on the test set and draw a new line.
 plot(x=test$waiting, y=test$eruptions)
 # Draw the predicted regression line on the test set. It should match the data points pretty closely, as it was trained on the training set.
 results <- predict(fit2, newdata=test)
 lines(x=test$waiting, y=results)

 # Calculate error with RMSE (root mean square error).
 testRMSE <- sqrt(sum((results - test$eruptions) ^ 2))
	library(caret)

	data(faithful)

	set.seed(333)

	# Plot data.
	plot(x=faithful$waiting, faithful$eruptions)

	# Calculate linear model.
	fit <- lm(eruptions ~ waiting, data=faithful)

	# Plot trend line.
	abline(fit)

	# Now do it again, this time with machine learning.
	inTrain <- createDataPartition(y=faithful$eruptions, p = 0.6, list = FALSE)
	training <- faithful[inTrain,]
	test <- faithful[-inTrain,]

	# Plot training data.
	plot(x=training$waiting, y=training$eruptions)

	# Train linear model (the coefficient values should be very similar to the first linear model fit that we made above).
	fit2 <- train(eruptions ~ ., data = training, method = 'lm')

	# Plot trend line from trained model.
	results <- predict(fit2, newdata=training)
	lines(x=training$waiting, y=results)

	# Calculate error with RMSE (root mean square error).
	trainRMSE <- sqrt(sum((fit2$finalModel$fitted.values - training$eruptions) ^ 2))

	# Now predict on the test set and draw a new line.
	plot(x=test$waiting, y=test$eruptions)
	# Draw the predicted regression line on the test set. It should match the data points pretty closely, as it was trained on the training set.
	results <- predict(fit2, newdata=test)
	lines(x=test$waiting, y=results)

	# Calculate error with RMSE (root mean square error).
	testRMSE <- sqrt(sum((results - test$eruptions) ^ 2))