Skip to content

Instantly share code, notes, and snippets.

@doron2402
Created October 21, 2017 20:48
Show Gist options
  • Save doron2402/1366a9e2849adabf9a4e44093dc3b756 to your computer and use it in GitHub Desktop.
Save doron2402/1366a9e2849adabf9a4e44093dc3b756 to your computer and use it in GitHub Desktop.
Tip VS Age - Linear regression model using R
# Predict how much tip will get per our clients age
# Using a simple linear regression model
dataset = read.csv('tip.csv')
library(caTools)
# choose random number for the see
set.seed(100)
# Split the dataset between test and train set
split = sample.split(dataset$Tip, SplitRatio = 2/3)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
regressor = lm(formula = Tip ~ Age,
data = training_set)
# I'll be using ggplot2 to visualize the data
# installing `ggplot2`
install.packages('ggplot2')
# load library
library(ggplot2)
# First we should generate a graph using our train set
# this will give us the slope for our train set which later we can compare it
# with our test set
ggplot() +
geom_point(aes(x = training_set$Age,
y = training_set$Tip),
colour = 'red') +
geom_line(aes(x = training_set$Age,
y = predict(regressor, newdata = training_set)),
colour = 'blue') +
ggtitle('Tip vs Client Age (Training set)') +
xlab('Client Age') +
ylab('Tip')
ggplot() +
geom_point(aes(x = test_set$Age,
y = test_set$Tip),
colour = 'green') +
geom_line(aes(x = training_set$Age,
y = predict(regressor, newdata = training_set)),
colour = 'red') +
ggtitle('Tip vs Client Age (Test Set)') +
xlab('Client Age') +
ylab('Tip')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment