Skip to content

Instantly share code, notes, and snippets.

@netskink
Created September 11, 2016 01:49
Show Gist options
  • Save netskink/ea8456b446c7e909e05d7b2a2b5e3630 to your computer and use it in GitHub Desktop.
Save netskink/ea8456b446c7e909e05d7b2a2b5e3630 to your computer and use it in GitHub Desktop.
computing regression parameters gradient descent walkthru
# coding: utf-8
# # Computing Regression parameters (gradient descent example)
# The data
# Consider the following 5 point synthetic data set
# In[1]:
import graphlab
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
# In[2]:
# construct a one-dimensional vector using graphlab
sx = graphlab.SArray([0,1,2,3,4])
# In[3]:
# construct a second one-dimensional vector
sy = graphlab.SArray([1,3,7,13,21])
# In[4]:
sx
# In[5]:
sy
# In[6]:
st = graphlab.SFrame([sx,sy])
# ## which is plotted below
# In[7]:
st.show()
# # What we need
# Now that we have computed the regression line using a closed form solution lets do it again but with gradient descent.
# This corresponds to slide 70 in this weeks notes.
# We are doing the stuff in blue at the bottom.
#
# The point here is that we are computing w0 - the slope and w1 the intercept.
# These two values are being used as two values in a vector. Since its a vector it has a direction and
# a magnitude. Each step is computing the new [w0,w1] based on the old one plus an adjustment * the stuff in the brackets.
# In the notes they show a 2 * eta as the step. Here it is combined into a single value step_size = 0.05. It does not vary as the loop iterates as mentioned in class.
#
# Recall that
#
# * The derivaitve of the cost for the intercept is the sum of the errors
# * The derivative of the cost for the slop is the sum of the product of the errors and the input.
#
# We will need a starting value for the slope and intercept, a step_size and a tolerance
#
# * initial_intercept = 0
# * initial_slope = 0
# * step_size = 0.05
# * tolerance = 0.01
#
# ## The algorithm
# In each step of the gradient descent we will do the following:
#
# 1. Computer the predicted values given the current slope and intercept
# 2. Compute the prediction errors (prediction - Y)
# 3. Update the intercept:
#
# 1. compute the derivative: sum(errors)
# 1. compute the adjustment as step_size times the derivative
# 1. decrease the intercept by the adjustment
#
# 4. Update the slope:
#
# 1. compute the derivative: sum(errors*input)
# 2. compute the adjustment as step_size times the derivative
# 3. decrease the slope by the adjustment
#
# 5. Compute the magnitude of the gradient
# 6. Check for convergence
# ## The algorithm in Action
# **WARNING** in python notebooks the cell values persist, so if you shift-enter a cell twice
# its calculated twice. So, if you execute one of the cells below twice to see how its working
# it will update values in memory, not from the initial conditions below.
# In[30]:
sx = graphlab.SArray([0,1,2,3,4])
sy = graphlab.SArray([1,3,7,13,21])
step_size = 0.05
intercept = 0
slope = 0
tolerance = 0.01
# ### First Step
# In[31]:
# 1. compute the predicted values given the current slope and intercetp
spredictions = graphlab.SArray([0,0,0,0,0])
# 2. compute the prediction errors
serrors = spredictions - sy
# 3. Update the intercept
#
# a. compute the deriviative: sum(errors)
sderivative_intercept = serrors.sum()
# b. compute the adjustment
sadjustment = step_size * sderivative_intercept
# c. decrease the intercept by the adjustment
intercept = intercept - sadjustment
# 4. update slope
#
# a. compute the derivative, what is the input? the input is x.
sderivative_slope = (serrors*sx).sum()
# b. compute the adjustment = step_size * derivative
sadjustment = step_size * sderivative_slope
# c. decrease the slope by the adjustment
slope = slope - sadjustment
# 5. compute the magnitude of the gradient
magnitude = sqrt(sderivative_intercept**2 + sderivative_slope**2)
print "intercept is {}".format(intercept)
print "slope is {}".format(slope)
print "magnitude is {}".format(magnitude)
# magnitude > tolerance not converged
#
# In[32]:
def compute_intercept0():
global sderivative_intercept
global sadjustment
global intercept
sderivative_intercept = serrors.sum()
sadjustment = step_size * sderivative_intercept
intercept = intercept - sadjustment
def compute_slope0():
global sderivative_slope
global sadjustment
global slope
sderivative_slope = (serrors*sx).sum()
sadjustment = step_size * sderivative_slope
slope = slope - sadjustment
# this updates predictions using current slope and intercept
def f0(x):
global slope
global intercept
return (x*slope + intercept)
# ### Second Step
# at start
# intercept should be 2.25
# slope should be 7
# In[33]:
spredictions = f0(sx)
serrors = spredictions - sy
compute_intercept0()
compute_slope0()
magnitude = sqrt(sderivative_intercept**2 + sderivative_slope**2)
print "intercept is {}".format(intercept)
print "slope is {}".format(slope)
print "magnitude is {}".format(magnitude)
# ### Third Step
# at start
# intercept should be 0.4375
# slope should be 2.375
# In[34]:
spredictions = f0(sx)
serrors = spredictions - sy
compute_intercept0()
compute_slope0()
magnitude = sqrt(sderivative_intercept**2 + sderivative_slope**2)
print "intercept is {}".format(intercept)
print "slope is {}".format(slope)
print "magnitude is {}".format(magnitude)
# #### Ok, we see what is going on now,
# do it a little more professionally
# In[154]:
# Initial setup
sx = graphlab.SArray([0,1,2,3,4])
sy = graphlab.SArray([1,3,7,13,21])
step_size = 0.05
intercept = 0.0
slope = 0.0
tolerance = 0.01
def compute_intercept(intercept, step_size, errors):
derivative_intercept = errors.sum()
adjustment = step_size * derivative_intercept
intercept = intercept - adjustment
return intercept,derivative_intercept
def compute_slope(slope, step_size, errors, x):
derivative_slope = (errors*x).sum()
adjustment = step_size * derivative_slope
slope = slope - adjustment
return slope,derivative_slope
# this updates predictions using current slope and intercept
def f(x, slope, intercept):
return (x*slope + intercept)
# Our accumulated results, we take an sframe and the number of iterations
# as input. We output an sframe with a new column of predictions for each iteration.
x_y_yhat_SF = graphlab.SFrame({'x':sx, 'y':sy})
# Accumulate the slope and intercept
slope_intercept_SF = graphlab.SFrame({'slope':[slope], 'intercept':[intercept]})
def calculate_new_estimate(x_y_yhat_SF, slope_intercept_SF, step_size, iterations=5):
i = 0
slope = slope_intercept_SF['slope'][0]
intercept = slope_intercept_SF['intercept'][0]
# Accumulate the magnitude
magnitude_SF = graphlab.SFrame({'iteration':[-1] , 'magnitude':[0.0]})
# need to adjust for break of loop if tolerance is met
while (i<iterations):
# our prediction is yhat where yhat = mx+b
predictions = f(x_y_yhat_SF['x'], slope, intercept)
# compute the error
errors = predictions - x_y_yhat_SF['y']
# compute new w0|intercept|b and w1|slope|m
intercept, derivative_intercept = compute_intercept(intercept, step_size, errors)
slope, derivative_slope = compute_slope(slope, step_size, errors, x_y_yhat_SF['x'])
# determine magnitude of vector
magnitude = sqrt(derivative_intercept**2 + derivative_slope**2)
#print intercept, slope, magnitude
# append prediction to x,y,yhat table. Each prediction adds a new yhat
x_y_yhat_SF.add_column(predictions, name="pred {}".format(i))
# append slope and intercept
slope_intercept_SF = slope_intercept_SF.append(graphlab.SFrame({'slope':[slope], 'intercept':[intercept]}))
# append the magnitude
magnitude_SF = magnitude_SF.append(graphlab.SFrame({ 'iteration':[i] , 'magnitude':[magnitude]}))
# keep track of our iterations
i=i+1
# return the sframe of x,y, and yhats
return (x_y_yhat_SF,slope_intercept_SF,magnitude_SF)
# In[155]:
(x_y_yhat_SF, slope_intercept_SF, magnitude_SF) = calculate_new_estimate(x_y_yhat_SF, slope_intercept_SF, step_size, iterations=78)
# In[156]:
# do either one.
#x_y_yhat_SF.show()
#slope_intercept_SF.show()
magnitude_SF.show()
# In[152]:
sqrt(45**2 + 140**2)
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment