Created
October 21, 2017 20:48
-
-
Save doron2402/1366a9e2849adabf9a4e44093dc3b756 to your computer and use it in GitHub Desktop.
Tip VS Age - Linear regression model using R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Predict how much tip will get per our clients age | |
# Using a simple linear regression model | |
dataset = read.csv('tip.csv') | |
library(caTools) | |
# choose random number for the see | |
set.seed(100) | |
# Split the dataset between test and train set | |
split = sample.split(dataset$Tip, SplitRatio = 2/3) | |
training_set = subset(dataset, split == TRUE) | |
test_set = subset(dataset, split == FALSE) | |
regressor = lm(formula = Tip ~ Age, | |
data = training_set) | |
# I'll be using ggplot2 to visualize the data | |
# installing `ggplot2` | |
install.packages('ggplot2') | |
# load library | |
library(ggplot2) | |
# First we should generate a graph using our train set | |
# this will give us the slope for our train set which later we can compare it | |
# with our test set | |
ggplot() + | |
geom_point(aes(x = training_set$Age, | |
y = training_set$Tip), | |
colour = 'red') + | |
geom_line(aes(x = training_set$Age, | |
y = predict(regressor, newdata = training_set)), | |
colour = 'blue') + | |
ggtitle('Tip vs Client Age (Training set)') + | |
xlab('Client Age') + | |
ylab('Tip') | |
ggplot() + | |
geom_point(aes(x = test_set$Age, | |
y = test_set$Tip), | |
colour = 'green') + | |
geom_line(aes(x = training_set$Age, | |
y = predict(regressor, newdata = training_set)), | |
colour = 'red') + | |
ggtitle('Tip vs Client Age (Test Set)') + | |
xlab('Client Age') + | |
ylab('Tip') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment