|
# |
|
# check_costfun.py |
|
# date. 11/8/2016 |
|
# |
|
|
|
import numpy as np |
|
|
|
## =============== cofiCostFunc.m part ================================== |
|
def cofi_costfunc(Y, R, shapes, lambda_c, params=None): |
|
# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ... |
|
# num_features, lambda) returns the cost and gradient for the |
|
# collaborative filtering problem. |
|
# Unfold the U and W matrices from params |
|
num_users = shapes[0] |
|
num_movies = shapes[1] |
|
num_features = shapes[2] |
|
boundary = num_movies * num_features |
|
params_c1 = np.copy(params[:boundary]) |
|
params_c2 = np.copy(params[boundary:]) |
|
X = params_c1.reshape((num_movies, num_features)) |
|
Theta = params_c2.reshape((num_users, num_features)) |
|
|
|
# You need to return the following values correctly |
|
Jcost = 0.5 * (((np.dot(X, Theta.T) - Y) * R) ** 2).sum() \ |
|
+ 0.5 * lambda_c * ((X.ravel() **2).sum() + (Theta.ravel() **2).sum()) |
|
|
|
return Jcost |
|
|
|
def cofi_costfunc_grad(Y, R, shapes, lambda_c, params=None): |
|
# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ... |
|
# num_features, lambda) returns the cost and gradient for the |
|
# collaborative filtering problem. |
|
# Unfold the U and W matrices from params |
|
num_users = shapes[0] |
|
num_movies = shapes[1] |
|
num_features = shapes[2] |
|
boundary = num_movies * num_features |
|
params_c1 = np.copy(params[:boundary]) |
|
params_c2 = np.copy(params[boundary:]) |
|
X = params_c1.reshape((num_movies, num_features)) |
|
Theta = params_c2.reshape((num_users, num_features)) |
|
|
|
# You need to return the following values correctly |
|
X_grad = np.dot(((np.dot(X, Theta.T) - Y) * R), Theta) + lambda_c * X |
|
Tgsub = (np.dot(X, Theta.T) - Y) * R |
|
Theta_grad = np.dot(Tgsub.T, X) + lambda_c * Theta |
|
Jgrad = np.concatenate((X_grad.ravel(), Theta_grad.ravel())) |
|
|
|
return Jgrad |
|
|
|
def compute_numerical_grad(cost_fn, Y, R, shapes, lambda_c, theta): |
|
# COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" |
|
# and gives us a numerical estimate of the gradient.# |
|
# check_costfun.py |
|
# date. 11/8/2016 |
|
# |
|
|
|
import numpy as np |
|
|
|
## =============== cofiCostFunc.m part ================================== |
|
def cofi_costfunc(Y, R, shapes, lambda_c, params=None): |
|
# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ... |
|
# num_features, lambda) returns the cost and gradient for the |
|
# collaborative filtering problem. |
|
# Unfold the U and W matrices from params |
|
num_users = shapes[0] |
|
num_movies = shapes[1] |
|
num_features = shapes[2] |
|
boundary = num_movies * num_features |
|
params_c1 = np.copy(params[:boundary]) |
|
params_c2 = np.copy(params[boundary:]) |
|
X = params_c1.reshape((num_movies, num_features)) |
|
Theta = params_c2.reshape((num_users, num_features)) |
|
|
|
# You need to return the following values correctly |
|
Jcost = 0.5 * (((np.dot(X, Theta.T) - Y) * R) ** 2).sum() \ |
|
+ 0.5 * lambda_c * ((X.ravel() **2).sum() + (Theta.ravel() **2).sum()) |
|
|
|
return Jcost |
|
|
|
def cofi_costfunc_grad(Y, R, shapes, lambda_c, params=None): |
|
# [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ... |
|
# num_features, lambda) returns the cost and gradient for the |
|
# collaborative filtering problem. |
|
# Unfold the U and W matrices from params |
|
num_users = shapes[0] |
|
num_movies = shapes[1] |
|
num_features = shapes[2] |
|
boundary = num_movies * num_features |
|
params_c1 = np.copy(params[:boundary]) |
|
params_c2 = np.copy(params[boundary:]) |
|
X = params_c1.reshape((num_movies, num_features)) |
|
Theta = params_c2.reshape((num_users, num_features)) |
|
|
|
# You need to return the following values correctly |
|
X_grad = np.dot(((np.dot(X, Theta.T) - Y) * R), Theta) + lambda_c * X |
|
Tgsub = (np.dot(X, Theta.T) - Y) * R |
|
Theta_grad = np.dot(Tgsub.T, X) + lambda_c * Theta |
|
Jgrad = np.concatenate((X_grad.ravel(), Theta_grad.ravel())) |
|
|
|
return Jgrad |
|
|
|
def compute_numerical_grad(cost_fn, Y, R, shapes, lambda_c, theta): |
|
# COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" |
|
# and gives us a numerical estimate of the gradient. |
|
# numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical |
|
# gradient of the function J around theta. Calling y = J(theta) should |
|
# return the function value at theta. |
|
|
|
# Notes: The following code implements numerical gradient checking, and |
|
# returns the numerical gradient.It sets numgrad(i) to (a numerical |
|
# approximation of) the partial derivative of J with respect to the |
|
# i-th input argument, evaluated at theta. (i.e., numgrad(i) should |
|
# be the (approximately) the partial derivative of J with respect |
|
# to theta(i).) |
|
|
|
numgrad = np.zeros_like(theta) |
|
perturb = np.zeros_like(theta) |
|
eps = 1.e-4 |
|
|
|
for p in range(len(theta)): |
|
# Set perturbation vector |
|
perturb[p] = eps |
|
params1 = theta - perturb |
|
params2 = theta + perturb |
|
loss1 = cost_fn(Y, R, shapes, lambda_c, params1) |
|
loss2 = cost_fn(Y, R, shapes, lambda_c, params2) |
|
# Compute Numerical Gradient |
|
numgrad[p] = (loss2 - loss1) / (2. * eps) |
|
perturb[p] = 0. |
|
|
|
return numgrad |
|
# |
|
|
|
def check_costfn(lambda_c=0.0): |
|
|
|
# Create small problem |
|
X_t = np.random.rand(4, 3) |
|
Theta_t = np.random.rand(5, 3) |
|
|
|
# Zap out most entries |
|
Y = np.dot(X_t, Theta_t.T) |
|
Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0. |
|
R = np.zeros_like(Y) |
|
R[Y != 0.] = 1. |
|
|
|
# Run Gradient Checking |
|
X = np.random.randn(4, 3) |
|
Theta = np.random.randn(5, 3) |
|
num_users = Y.shape[1] |
|
num_movies = Y.shape[0] |
|
num_features = Theta_t.shape[1] |
|
theta_arg = np.concatenate((X.ravel(), Theta.ravel())) |
|
shapes = [num_users, num_movies, num_features] |
|
|
|
numgrad = compute_numerical_grad( |
|
cofi_costfunc, Y, R, shapes, lambda_c, theta_arg |
|
) |
|
|
|
grad_ret = cofi_costfunc_grad(Y, R, shapes, lambda_c, theta_arg) |
|
grad = grad_ret.ravel() |
|
|
|
print(' [numgrad : grad] ') |
|
for i in range(len(numgrad)): |
|
print(' {0:>10.4f} : {1:>10.4f} '.format(numgrad[i],grad[i])) |
|
|
|
print('The above two columns you get should be very similar.\n', \ |
|
'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n') |
|
|
|
diff = np.linalg.norm(numgrad-grad) / np.linalg.norm(numgrad+grad) |
|
|
|
print('If your backpropagation implementation is correct, then \n' \ |
|
'the relative difference will be small (less than 1e-9). \n' \ |
|
'\nRelative Difference: ', diff) |
|
# |
|
|
|
# numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical |
|
# gradient of the function J around theta. Calling y = J(theta) should |
|
# return the function value at theta. |
|
|
|
# Notes: The following code implements numerical gradient checking, and |
|
# returns the numerical gradient.It sets numgrad(i) to (a numerical |
|
# approximation of) the partial derivative of J with respect to the |
|
# i-th input argument, evaluated at theta. (i.e., numgrad(i) should |
|
# be the (approximately) the partial derivative of J with respect |
|
# to theta(i).) |
|
|
|
numgrad = np.zeros_like(theta) |
|
perturb = np.zeros_like(theta) |
|
eps = 1.e-4 |
|
|
|
for p in range(len(theta)): |
|
# Set perturbation vector |
|
perturb[p] = eps |
|
params1 = theta - perturb |
|
params2 = theta + perturb |
|
loss1 = cost_fn(Y, R, shapes, lambda_c, params1) |
|
loss2 = cost_fn(Y, R, shapes, lambda_c, params2) |
|
# Compute Numerical Gradient |
|
numgrad[p] = (loss2 - loss1) / (2. * eps) |
|
perturb[p] = 0. |
|
|
|
return numgrad |
|
# |
|
|
|
def check_costfn(lambda_c=0.0): |
|
|
|
# Create small problem |
|
X_t = np.random.rand(4, 3) |
|
Theta_t = np.random.rand(5, 3) |
|
|
|
# Zap out most entries |
|
Y = np.dot(X_t, Theta_t.T) |
|
Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0. |
|
R = np.zeros_like(Y) |
|
R[Y != 0.] = 1. |
|
|
|
# Run Gradient Checking |
|
X = np.random.randn(4, 3) |
|
Theta = np.random.randn(5, 3) |
|
num_users = Y.shape[1] |
|
num_movies = Y.shape[0] |
|
num_features = Theta_t.shape[1] |
|
theta_arg = np.concatenate((X.ravel(), Theta.ravel())) |
|
shapes = [num_users, num_movies, num_features] |
|
|
|
numgrad = compute_numerical_grad( |
|
cofi_costfunc, Y, R, shapes, lambda_c, theta_arg |
|
) |
|
|
|
grad_ret = cofi_costfunc_grad(Y, R, shapes, lambda_c, theta_arg) |
|
grad = grad_ret.ravel() |
|
|
|
print(' [numgrad : grad] ') |
|
for i in range(len(numgrad)): |
|
print(' {0:>10.4f} : {1:>10.4f} '.format(numgrad[i],grad[i])) |
|
|
|
print('The above two columns you get should be very similar.\n', \ |
|
'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n') |
|
|
|
diff = np.linalg.norm(numgrad-grad) / np.linalg.norm(numgrad+grad) |
|
|
|
print('If your backpropagation implementation is correct, then \n' \ |
|
'the relative difference will be small (less than 1e-9). \n' \ |
|
'\nRelative Difference: ', diff) |
|
# |