Created
January 9, 2019 22:55
-
-
Save sergeyprokudin/c02829792eea098d322dd47c11f98225 to your computer and use it in GitHub Desktop.
Linear Regression with Stochastic Gradient Decent
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def linear_regression(x, y, lr=1.0e-4, n_steps=10000, lr_decay=0.5): | |
"""Solves 1 dimensional regression problem: provides w=(w_1, ..., w_n) such that: | |
w* = min_{w} sum_{i=1}{m}[(<w, x_i> - y_i)**2] (mean squared error) | |
L(w) = sum_{i=1}{m}[(<w, x_i> - y_i)**2] | |
<w, x_i> = w_1*x_{i1} + w_2*x_{i2} + ... w_n*x_{in} | |
Solutions: | |
Algo #2: gradient decent | |
L(w) is a convex function (can you prove it?*) => every local minima is a global minima as well! | |
vector dL/dw is pointing towards the direction of function growth => -dL/dw | |
dL/dw_j = 0 , j=1,..,n | |
dL/dw_j = \sum_{i=1}{n}{ (2*x_ij*(<w, x_i> - y_i) ) = 0 | |
dL/dw = 2X^T@X@w - 2@X^T@y | |
and then update vector w: | |
w_t = w_{t-1} - lr*dL\dw | |
Questions: | |
1. How to select a learning rate? | |
(a) simple heuristic: whenever MSE increases, decrease the rate by some factor | |
(b) constant decay: at every step, decreaset it a bit: | |
lr_t = (eps^t) * lr_ini | |
""" | |
def _get_gradient(w): | |
# dL/dw_j = \sum_{i=1}{n}{ (2*x_ij*(<w, x_i> - y_i) ) | |
# <=> dL/dw = 2 | |
dw = 2*(x.T@x@w - x.T@y) | |
return dw | |
n_samples, n_params = x.shape | |
w = np.random.normal(size=(n_params, 1)) | |
def _mse(x, y, w): | |
return np.sum(np.square(x@w-y)) | |
curr_mse = _mse(x, y, w) | |
for i in range(0, n_steps): | |
dw = _get_gradient(w) | |
w = w - lr*dw | |
if _mse(x, y, w) > curr_mse: | |
lr *= lr_decay | |
print("decreasing learning rate, current: %f" % lr) | |
return w | |
#DEMO | |
from sklearn.datasets import load_boston | |
import matplotlib.pyplot as plt | |
%matplotlib inline | |
# Artificial data: we set the regression task | |
x = np.arange(0, 1, 0.01) | |
x = np.vstack([np.ones(x.shape), x]).T | |
y = [email protected]([[1], [2]]) + np.random.normal(size=[x.shape[0], 1], scale=0.1) | |
w = linear_regression(x, y, lr=1.0e-4, n_steps=10000, lr_decay=0.5) | |
xr = np.arange(0, 1, 0.01) | |
xr = np.vstack([np.ones(xr.shape), xr]).T | |
yr = xr@w | |
ypreds = x@w | |
mse = np.mean(np.square(y-ypreds)) | |
print("mean squared error: %f" % mse) | |
plt.title("Artificial data") | |
plt.scatter(xr[:, 1], yr, s=4) | |
plt.scatter(x[:, 1], y) | |
plt.show() | |
# Regressing 1 value of Boston housing data | |
ds = load_boston() | |
x = ds['data'][:, 12] | |
x = (x - np.mean(x, axis=0)) / np.std(x, axis=0) | |
x = np.vstack([np.ones(x.shape), x]).T | |
#x = (x - np.mean(x, axis=0)) / np.std(x, axis=0) | |
y = ds['target'].reshape([-1, 1]) | |
w = linear_regression(x, y, lr=1.0e-2, n_steps=100, lr_decay=0.1) | |
xr = np.arange(-3, 3, 0.1) | |
xr = np.vstack([np.ones(xr.shape), xr]).T | |
yr = xr@w | |
ypreds = x@w | |
mse = np.mean(np.square(y-ypreds)) | |
print("mean squared error: %f" % mse) | |
plt.title("Boston housing data") | |
plt.scatter(xr[:, 1], yr, s=4, label='learned regressor') | |
plt.scatter(x[:, 1], y, label='data') | |
plt.legend() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment