folex · September 17, 2017 15:58
diff --git a/backward_propagation_with_dropout.py b/backward_propagation_with_dropout.py
 # GRADED FUNCTION: backward_propagation_with_dropout

 def backward_propagation_with_dropout(X, Y, cache, keep_prob):
    """
    Implements the backward propagation of our baseline model to which we added dropout.
    
    Arguments:
    X -- input dataset, of shape (2, number of examples)
    Y -- "true" labels vector, of shape (output size, number of examples)
    cache -- cache output from forward_propagation_with_dropout()
    keep_prob - probability of keeping a neuron active during drop-out, scalar
    
    Returns:
    gradients -- A dictionary with the gradients with respect to each parameter, activation and pre-activation variables
    """
    
    m = X.shape[1]
    (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) = cache
    
    dZ3 = A3 - Y
    dW3 = 1./m * np.dot(dZ3, A2.T)
    db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)
    dA2 = np.dot(W3.T, dZ3)
    ### START CODE HERE ### (≈ 2 lines of code)
    dA2 = np.multiply(dA2, D2)              # Step 1: Apply mask D2 to shut down the same neurons as during the forward propagation
    dA2 = dA2 / keep_prob              # Step 2: Scale the value of neurons that haven't been shut down
    ### END CODE HERE ###
    dZ2 = np.multiply(dA2, np.int64(A2 > 0))
    dW2 = 1./m * np.dot(dZ2, A1.T)
    db2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)
    
    dA1 = np.dot(W2.T, dZ2)
    ### START CODE HERE ### (≈ 2 lines of code)
    dA1 = np.multiply(dA1, D1)              # Step 1: Apply mask D1 to shut down the same neurons as during the forward propagation
    dA1 = dA1 / keep_prob              # Step 2: Scale the value of neurons that haven't been shut down
    ### END CODE HERE ###
    dZ1 = np.multiply(dA1, np.int64(A1 > 0))
    dW1 = 1./m * np.dot(dZ1, X.T)
    db1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)
    
    gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2,
                 "dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1, 
                 "dZ1": dZ1, "dW1": dW1, "db1": db1}
    
    return gradients
	# GRADED FUNCTION: backward_propagation_with_dropout

	def backward_propagation_with_dropout(X, Y, cache, keep_prob):
	"""
	Implements the backward propagation of our baseline model to which we added dropout.

	Arguments:
	X -- input dataset, of shape (2, number of examples)
	Y -- "true" labels vector, of shape (output size, number of examples)
	cache -- cache output from forward_propagation_with_dropout()
	keep_prob - probability of keeping a neuron active during drop-out, scalar

	Returns:
	gradients -- A dictionary with the gradients with respect to each parameter, activation and pre-activation variables
	"""

	m = X.shape[1]
	(Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) = cache

	dZ3 = A3 - Y
	dW3 = 1./m * np.dot(dZ3, A2.T)
	db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)
	dA2 = np.dot(W3.T, dZ3)
	### START CODE HERE ### (≈ 2 lines of code)
	dA2 = np.multiply(dA2, D2) # Step 1: Apply mask D2 to shut down the same neurons as during the forward propagation
	dA2 = dA2 / keep_prob # Step 2: Scale the value of neurons that haven't been shut down
	### END CODE HERE ###
	dZ2 = np.multiply(dA2, np.int64(A2 > 0))
	dW2 = 1./m * np.dot(dZ2, A1.T)
	db2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)

	dA1 = np.dot(W2.T, dZ2)
	### START CODE HERE ### (≈ 2 lines of code)
	dA1 = np.multiply(dA1, D1) # Step 1: Apply mask D1 to shut down the same neurons as during the forward propagation
	dA1 = dA1 / keep_prob # Step 2: Scale the value of neurons that haven't been shut down
	### END CODE HERE ###
	dZ1 = np.multiply(dA1, np.int64(A1 > 0))
	dW1 = 1./m * np.dot(dZ1, X.T)
	db1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)

	gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2,
	"dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1,
	"dZ1": dZ1, "dW1": dW1, "db1": db1}

	return gradients