Created
September 11, 2016 01:49
-
-
Save netskink/ea8456b446c7e909e05d7b2a2b5e3630 to your computer and use it in GitHub Desktop.
computing regression parameters gradient descent walkthru
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# # Computing Regression parameters (gradient descent example) | |
# The data | |
# Consider the following 5 point synthetic data set | |
# In[1]: | |
import graphlab | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from math import sqrt | |
# In[2]: | |
# construct a one-dimensional vector using graphlab | |
sx = graphlab.SArray([0,1,2,3,4]) | |
# In[3]: | |
# construct a second one-dimensional vector | |
sy = graphlab.SArray([1,3,7,13,21]) | |
# In[4]: | |
sx | |
# In[5]: | |
sy | |
# In[6]: | |
st = graphlab.SFrame([sx,sy]) | |
# ## which is plotted below | |
# In[7]: | |
st.show() | |
# # What we need | |
# Now that we have computed the regression line using a closed form solution lets do it again but with gradient descent. | |
# This corresponds to slide 70 in this weeks notes. | |
# We are doing the stuff in blue at the bottom. | |
# | |
# The point here is that we are computing w0 - the slope and w1 the intercept. | |
# These two values are being used as two values in a vector. Since its a vector it has a direction and | |
# a magnitude. Each step is computing the new [w0,w1] based on the old one plus an adjustment * the stuff in the brackets. | |
# In the notes they show a 2 * eta as the step. Here it is combined into a single value step_size = 0.05. It does not vary as the loop iterates as mentioned in class. | |
# | |
# Recall that | |
# | |
# * The derivaitve of the cost for the intercept is the sum of the errors | |
# * The derivative of the cost for the slop is the sum of the product of the errors and the input. | |
# | |
# We will need a starting value for the slope and intercept, a step_size and a tolerance | |
# | |
# * initial_intercept = 0 | |
# * initial_slope = 0 | |
# * step_size = 0.05 | |
# * tolerance = 0.01 | |
# | |
# ## The algorithm | |
# In each step of the gradient descent we will do the following: | |
# | |
# 1. Computer the predicted values given the current slope and intercept | |
# 2. Compute the prediction errors (prediction - Y) | |
# 3. Update the intercept: | |
# | |
# 1. compute the derivative: sum(errors) | |
# 1. compute the adjustment as step_size times the derivative | |
# 1. decrease the intercept by the adjustment | |
# | |
# 4. Update the slope: | |
# | |
# 1. compute the derivative: sum(errors*input) | |
# 2. compute the adjustment as step_size times the derivative | |
# 3. decrease the slope by the adjustment | |
# | |
# 5. Compute the magnitude of the gradient | |
# 6. Check for convergence | |
# ## The algorithm in Action | |
# **WARNING** in python notebooks the cell values persist, so if you shift-enter a cell twice | |
# its calculated twice. So, if you execute one of the cells below twice to see how its working | |
# it will update values in memory, not from the initial conditions below. | |
# In[30]: | |
sx = graphlab.SArray([0,1,2,3,4]) | |
sy = graphlab.SArray([1,3,7,13,21]) | |
step_size = 0.05 | |
intercept = 0 | |
slope = 0 | |
tolerance = 0.01 | |
# ### First Step | |
# In[31]: | |
# 1. compute the predicted values given the current slope and intercetp | |
spredictions = graphlab.SArray([0,0,0,0,0]) | |
# 2. compute the prediction errors | |
serrors = spredictions - sy | |
# 3. Update the intercept | |
# | |
# a. compute the deriviative: sum(errors) | |
sderivative_intercept = serrors.sum() | |
# b. compute the adjustment | |
sadjustment = step_size * sderivative_intercept | |
# c. decrease the intercept by the adjustment | |
intercept = intercept - sadjustment | |
# 4. update slope | |
# | |
# a. compute the derivative, what is the input? the input is x. | |
sderivative_slope = (serrors*sx).sum() | |
# b. compute the adjustment = step_size * derivative | |
sadjustment = step_size * sderivative_slope | |
# c. decrease the slope by the adjustment | |
slope = slope - sadjustment | |
# 5. compute the magnitude of the gradient | |
magnitude = sqrt(sderivative_intercept**2 + sderivative_slope**2) | |
print "intercept is {}".format(intercept) | |
print "slope is {}".format(slope) | |
print "magnitude is {}".format(magnitude) | |
# magnitude > tolerance not converged | |
# | |
# In[32]: | |
def compute_intercept0(): | |
global sderivative_intercept | |
global sadjustment | |
global intercept | |
sderivative_intercept = serrors.sum() | |
sadjustment = step_size * sderivative_intercept | |
intercept = intercept - sadjustment | |
def compute_slope0(): | |
global sderivative_slope | |
global sadjustment | |
global slope | |
sderivative_slope = (serrors*sx).sum() | |
sadjustment = step_size * sderivative_slope | |
slope = slope - sadjustment | |
# this updates predictions using current slope and intercept | |
def f0(x): | |
global slope | |
global intercept | |
return (x*slope + intercept) | |
# ### Second Step | |
# at start | |
# intercept should be 2.25 | |
# slope should be 7 | |
# In[33]: | |
spredictions = f0(sx) | |
serrors = spredictions - sy | |
compute_intercept0() | |
compute_slope0() | |
magnitude = sqrt(sderivative_intercept**2 + sderivative_slope**2) | |
print "intercept is {}".format(intercept) | |
print "slope is {}".format(slope) | |
print "magnitude is {}".format(magnitude) | |
# ### Third Step | |
# at start | |
# intercept should be 0.4375 | |
# slope should be 2.375 | |
# In[34]: | |
spredictions = f0(sx) | |
serrors = spredictions - sy | |
compute_intercept0() | |
compute_slope0() | |
magnitude = sqrt(sderivative_intercept**2 + sderivative_slope**2) | |
print "intercept is {}".format(intercept) | |
print "slope is {}".format(slope) | |
print "magnitude is {}".format(magnitude) | |
# #### Ok, we see what is going on now, | |
# do it a little more professionally | |
# In[154]: | |
# Initial setup | |
sx = graphlab.SArray([0,1,2,3,4]) | |
sy = graphlab.SArray([1,3,7,13,21]) | |
step_size = 0.05 | |
intercept = 0.0 | |
slope = 0.0 | |
tolerance = 0.01 | |
def compute_intercept(intercept, step_size, errors): | |
derivative_intercept = errors.sum() | |
adjustment = step_size * derivative_intercept | |
intercept = intercept - adjustment | |
return intercept,derivative_intercept | |
def compute_slope(slope, step_size, errors, x): | |
derivative_slope = (errors*x).sum() | |
adjustment = step_size * derivative_slope | |
slope = slope - adjustment | |
return slope,derivative_slope | |
# this updates predictions using current slope and intercept | |
def f(x, slope, intercept): | |
return (x*slope + intercept) | |
# Our accumulated results, we take an sframe and the number of iterations | |
# as input. We output an sframe with a new column of predictions for each iteration. | |
x_y_yhat_SF = graphlab.SFrame({'x':sx, 'y':sy}) | |
# Accumulate the slope and intercept | |
slope_intercept_SF = graphlab.SFrame({'slope':[slope], 'intercept':[intercept]}) | |
def calculate_new_estimate(x_y_yhat_SF, slope_intercept_SF, step_size, iterations=5): | |
i = 0 | |
slope = slope_intercept_SF['slope'][0] | |
intercept = slope_intercept_SF['intercept'][0] | |
# Accumulate the magnitude | |
magnitude_SF = graphlab.SFrame({'iteration':[-1] , 'magnitude':[0.0]}) | |
# need to adjust for break of loop if tolerance is met | |
while (i<iterations): | |
# our prediction is yhat where yhat = mx+b | |
predictions = f(x_y_yhat_SF['x'], slope, intercept) | |
# compute the error | |
errors = predictions - x_y_yhat_SF['y'] | |
# compute new w0|intercept|b and w1|slope|m | |
intercept, derivative_intercept = compute_intercept(intercept, step_size, errors) | |
slope, derivative_slope = compute_slope(slope, step_size, errors, x_y_yhat_SF['x']) | |
# determine magnitude of vector | |
magnitude = sqrt(derivative_intercept**2 + derivative_slope**2) | |
#print intercept, slope, magnitude | |
# append prediction to x,y,yhat table. Each prediction adds a new yhat | |
x_y_yhat_SF.add_column(predictions, name="pred {}".format(i)) | |
# append slope and intercept | |
slope_intercept_SF = slope_intercept_SF.append(graphlab.SFrame({'slope':[slope], 'intercept':[intercept]})) | |
# append the magnitude | |
magnitude_SF = magnitude_SF.append(graphlab.SFrame({ 'iteration':[i] , 'magnitude':[magnitude]})) | |
# keep track of our iterations | |
i=i+1 | |
# return the sframe of x,y, and yhats | |
return (x_y_yhat_SF,slope_intercept_SF,magnitude_SF) | |
# In[155]: | |
(x_y_yhat_SF, slope_intercept_SF, magnitude_SF) = calculate_new_estimate(x_y_yhat_SF, slope_intercept_SF, step_size, iterations=78) | |
# In[156]: | |
# do either one. | |
#x_y_yhat_SF.show() | |
#slope_intercept_SF.show() | |
magnitude_SF.show() | |
# In[152]: | |
sqrt(45**2 + 140**2) | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment