Created
September 17, 2017 15:58
-
-
Save folex/9cd9ed7fd51664561c3caeb318117e3c to your computer and use it in GitHub Desktop.
backward_propagation_with_dropout
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# GRADED FUNCTION: backward_propagation_with_dropout | |
def backward_propagation_with_dropout(X, Y, cache, keep_prob): | |
""" | |
Implements the backward propagation of our baseline model to which we added dropout. | |
Arguments: | |
X -- input dataset, of shape (2, number of examples) | |
Y -- "true" labels vector, of shape (output size, number of examples) | |
cache -- cache output from forward_propagation_with_dropout() | |
keep_prob - probability of keeping a neuron active during drop-out, scalar | |
Returns: | |
gradients -- A dictionary with the gradients with respect to each parameter, activation and pre-activation variables | |
""" | |
m = X.shape[1] | |
(Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) = cache | |
dZ3 = A3 - Y | |
dW3 = 1./m * np.dot(dZ3, A2.T) | |
db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True) | |
dA2 = np.dot(W3.T, dZ3) | |
### START CODE HERE ### (≈ 2 lines of code) | |
dA2 = np.multiply(dA2, D2) # Step 1: Apply mask D2 to shut down the same neurons as during the forward propagation | |
dA2 = dA2 / keep_prob # Step 2: Scale the value of neurons that haven't been shut down | |
### END CODE HERE ### | |
dZ2 = np.multiply(dA2, np.int64(A2 > 0)) | |
dW2 = 1./m * np.dot(dZ2, A1.T) | |
db2 = 1./m * np.sum(dZ2, axis=1, keepdims = True) | |
dA1 = np.dot(W2.T, dZ2) | |
### START CODE HERE ### (≈ 2 lines of code) | |
dA1 = np.multiply(dA1, D1) # Step 1: Apply mask D1 to shut down the same neurons as during the forward propagation | |
dA1 = dA1 / keep_prob # Step 2: Scale the value of neurons that haven't been shut down | |
### END CODE HERE ### | |
dZ1 = np.multiply(dA1, np.int64(A1 > 0)) | |
dW1 = 1./m * np.dot(dZ1, X.T) | |
db1 = 1./m * np.sum(dZ1, axis=1, keepdims = True) | |
gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2, | |
"dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1, | |
"dZ1": dZ1, "dW1": dW1, "db1": db1} | |
return gradients |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment