Last active
February 13, 2022 23:04
-
-
Save aalvan/7af44c345e1f359425b2e42bb393932b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# Setting a random seed, feel free to change it and see different solutions. | |
np.random.seed(42) | |
# TODO: Fill in code in the function below to implement a gradient descent | |
# step for linear regression, following a squared error rule. See the docstring | |
# for parameters and returned variables. | |
def MSEStep(X, y, W, b, learn_rate = 0.005): | |
""" | |
This function implements the gradient descent step for squared error as a | |
performance metric. | |
Parameters | |
X : array of predictor features | |
y : array of outcome values | |
W : predictor feature coefficients | |
b : regression function intercept | |
learn_rate : learning rate | |
Returns | |
W_new : predictor feature coefficients following gradient descent step | |
b_new : intercept following gradient descent step | |
""" | |
# Fill in code | |
y_pred = np.matmul(X,W) + b | |
error = y - y_pred | |
W_new = W + learn_rate * np.matmul(error, X) | |
b_new = b + learn_rate * error.sum() | |
return W_new, b_new | |
# The parts of the script below will be run when you press the "Test Run" | |
# button. The gradient descent step will be performed multiple times on | |
# the provided dataset, and the returned list of regression coefficients | |
# will be plotted. | |
def miniBatchGD(X, y, batch_size = 20, learn_rate = 0.005, num_iter = 25): | |
""" | |
This function performs mini-batch gradient descent on a given dataset. | |
Parameters | |
X : array of predictor features | |
y : array of outcome values | |
batch_size : how many data points will be sampled for each iteration | |
learn_rate : learning rate | |
num_iter : number of batches used | |
Returns | |
regression_coef : array of slopes and intercepts generated by gradient | |
descent procedure | |
""" | |
n_points = X.shape[0] | |
W = np.zeros(X.shape[1]) # coefficients | |
b = 0 # intercept | |
# run iterations | |
regression_coef = [np.hstack((W,b))] | |
for _ in range(num_iter): | |
batch = np.random.choice(range(n_points), batch_size) | |
X_batch = X[batch,:] | |
y_batch = y[batch] | |
W, b = MSEStep(X_batch, y_batch, W, b, learn_rate) | |
regression_coef.append(np.hstack((W,b))) | |
return regression_coef | |
if __name__ == "__main__": | |
# perform gradient descent | |
data = np.loadtxt('data.csv', delimiter = ',') | |
X = data[:,:-1] | |
y = data[:,-1] | |
regression_coef = miniBatchGD(X, y) | |
# plot the results | |
import matplotlib.pyplot as plt | |
plt.figure() | |
X_min = X.min() | |
X_max = X.max() | |
counter = len(regression_coef) | |
for W, b in regression_coef: | |
counter -= 1 | |
color = [1 - 0.92 ** counter for _ in range(3)] | |
plt.plot([X_min, X_max],[X_min * W + b, X_max * W + b], color = color) | |
plt.scatter(X, y, zorder = 3) | |
plt.show() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-0.72407 | 2.23863 | |
---|---|---|
-2.40724 | -0.00156 | |
2.64837 | 3.01665 | |
0.36092 | 2.31019 | |
0.67312 | 2.05950 | |
-0.45460 | 1.24736 | |
2.20168 | 2.82497 | |
1.15605 | 2.21802 | |
0.50694 | 1.43644 | |
-0.85952 | 1.74980 | |
-0.59970 | 1.63259 | |
1.46804 | 2.43461 | |
-1.05659 | 1.02226 | |
1.29177 | 3.11769 | |
-0.74565 | 0.81194 | |
0.15033 | 2.81910 | |
-1.49627 | 0.53105 | |
-0.72071 | 1.64845 | |
0.32924 | 1.91416 | |
-0.28053 | 2.11376 | |
-1.36115 | 1.70969 | |
0.74678 | 2.92253 | |
0.10621 | 3.29827 | |
0.03256 | 1.58565 | |
-0.98290 | 2.30455 | |
-1.15661 | 1.79169 | |
0.09024 | 1.54723 | |
-1.03816 | 1.06893 | |
-0.00604 | 1.78802 | |
0.16278 | 1.84746 | |
-0.69869 | 1.58732 | |
1.03857 | 1.94799 | |
-0.11783 | 3.09324 | |
-0.95409 | 1.86155 | |
-0.81839 | 1.88817 | |
-1.28802 | 1.39474 | |
0.62822 | 1.71526 | |
-2.29674 | 1.75695 | |
-0.85601 | 1.12981 | |
-1.75223 | 1.67000 | |
-1.19662 | 0.66711 | |
0.97781 | 3.11987 | |
-1.17110 | 0.56924 | |
0.15835 | 2.28231 | |
-0.58918 | 1.23798 | |
-1.79678 | 1.35803 | |
-0.95727 | 1.75579 | |
0.64556 | 1.91470 | |
0.24625 | 2.33029 | |
0.45917 | 3.25263 | |
1.21036 | 2.07602 | |
-0.60116 | 1.54254 | |
0.26851 | 2.79202 | |
0.49594 | 1.96178 | |
-2.67877 | 0.95898 | |
0.49402 | 1.96690 | |
1.18643 | 3.06144 | |
-0.17741 | 1.85984 | |
0.57938 | 1.82967 | |
-2.14926 | 0.62285 | |
2.27700 | 3.63838 | |
-1.05695 | 1.11807 | |
1.68288 | 2.91735 | |
-1.53513 | 1.99668 | |
0.00099 | 1.76149 | |
0.45520 | 2.31938 | |
-0.37855 | 0.90172 | |
1.35638 | 3.49432 | |
0.01763 | 1.87838 | |
2.21725 | 2.61171 | |
-0.44442 | 2.06623 | |
0.89583 | 3.04041 | |
1.30499 | 2.42824 | |
0.10883 | 0.63190 | |
1.79466 | 2.95265 | |
-0.00733 | 1.87546 | |
0.79862 | 3.44953 | |
-0.12353 | 1.53740 | |
-1.34999 | 1.59958 | |
-0.67825 | 1.57832 | |
-0.17901 | 1.73312 | |
0.12577 | 2.00244 | |
1.11943 | 2.08990 | |
-3.02296 | 1.09255 | |
0.64965 | 1.28183 | |
1.05994 | 2.32358 | |
0.53360 | 1.75136 | |
-0.73591 | 1.43076 | |
-0.09569 | 2.81376 | |
1.04694 | 2.56597 | |
0.46511 | 2.36401 | |
-0.75463 | 2.30161 | |
-0.94159 | 1.94500 | |
-0.09314 | 1.87619 | |
-0.98641 | 1.46602 | |
-0.92159 | 1.21538 | |
0.76953 | 2.39377 | |
0.03283 | 1.55730 | |
-1.07619 | 0.70874 | |
0.20174 | 1.76894 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def MSEStep(X, y, W, b, learn_rate = 0.001): | |
""" | |
This function implements the gradient descent step for squared error as a | |
performance metric. | |
Parameters | |
X : array of predictor features | |
y : array of outcome values | |
W : predictor feature coefficients | |
b : regression function intercept | |
learn_rate : learning rate | |
Returns | |
W_new : predictor feature coefficients following gradient descent step | |
b_new : intercept following gradient descent step | |
""" | |
# compute errors | |
y_pred = np.matmul(X, W) + b | |
error = y - y_pred | |
# compute steps | |
W_new = W + learn_rate * np.matmul(error, X) | |
b_new = b + learn_rate * error.sum() | |
return W_new, b_new |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment