Created
October 9, 2018 01:51
-
-
Save srikarplus/1572f7c06b44ad402ec9583e9b97beee to your computer and use it in GitHub Desktop.
gradient for neural network
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def nnGrad(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda): | |
initial_theta1 = np.reshape(nn_params[:hidden_layer_size*(input_layer_size+1)], (hidden_layer_size, input_layer_size+1), 'F') | |
initial_theta2 = np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):], (num_labels, hidden_layer_size+1), 'F') | |
y_d = pd.get_dummies(y.flatten()) | |
delta1 = np.zeros(initial_theta1.shape) | |
delta2 = np.zeros(initial_theta2.shape) | |
m = len(y) | |
for i in range(X.shape[0]): | |
ones = np.ones(1) | |
a1 = np.hstack((ones, X[i])) | |
z2 = a1 @ initial_theta1.T | |
a2 = np.hstack((ones, sigmoid(z2))) | |
z3 = a2 @ initial_theta2.T | |
a3 = sigmoid(z3) | |
d3 = a3 - y_d.iloc[i,:][np.newaxis,:] | |
z2 = np.hstack((ones, z2)) | |
d2 = np.multiply(initial_theta2.T @ d3.T, sigmoidGrad(z2).T[:,np.newaxis]) | |
delta1 = delta1 + d2[1:,:] @ a1[np.newaxis,:] | |
delta2 = delta2 + d3.T @ a2[np.newaxis,:] | |
delta1 /= m | |
delta2 /= m | |
#print(delta1.shape, delta2.shape) | |
delta1[:,1:] = delta1[:,1:] + initial_theta1[:,1:] * lmbda / m | |
delta2[:,1:] = delta2[:,1:] + initial_theta2[:,1:] * lmbda / m | |
return np.hstack((delta1.ravel(order='F'), delta2.ravel(order='F'))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment