ImadDabbura · September 20, 2018 15:03 · curlup · Mar 9, 2019
diff --git a/coding_nn_bwd_pass.py b/coding_nn_bwd_pass.py
 def sigmoid_gradient(dA, Z):
    A, Z = sigmoid(Z)
    dZ = dA * A * (1 - A)

    return dZ


 def tanh_gradient(dA, Z):
    A, Z = tanh(Z)
    dZ = dA * (1 - np.square(A))

    return dZ


 def relu_gradient(dA, Z):
    A, Z = relu(Z)
    dZ = np.multiply(dA, np.int64(A > 0))

    return dZ


 # define helper functions that will be used in L-model back-prop
 def linear_backword(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = (1 / m) * np.dot(dZ, A_prev.T)
    db = (1 / m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

    assert dA_prev.shape == A_prev.shape
    assert dW.shape == W.shape
    assert db.shape == b.shape

    return dA_prev, dW, db


 def linear_activation_backward(dA, cache, activation_fn):
    linear_cache, activation_cache = cache

    if activation_fn == "sigmoid":
        dZ = sigmoid_gradient(dA, activation_cache)
        dA_prev, dW, db = linear_backword(dZ, linear_cache)

    elif activation_fn == "tanh":
        dZ = tanh_gradient(dA, activation_cache)
        dA_prev, dW, db = linear_backword(dZ, linear_cache)

    elif activation_fn == "relu":
        dZ = relu_gradient(dA, activation_cache)
        dA_prev, dW, db = linear_backword(dZ, linear_cache)

    return dA_prev, dW, db


 def L_model_backward(AL, y, caches, hidden_layers_activation_fn="relu"):
    y = y.reshape(AL.shape)
    L = len(caches)
    grads = {}

    dAL = np.divide(AL - y, np.multiply(AL, 1 - AL))

    grads["dA" + str(L - 1)], grads["dW" + str(L)], grads[
        "db" + str(L)] = linear_activation_backward(
            dAL, caches[L - 1], "sigmoid")

    for l in range(L - 1, 0, -1):
        current_cache = caches[l - 1]
        grads["dA" + str(l - 1)], grads["dW" + str(l)], grads[
            "db" + str(l)] = linear_activation_backward(
                grads["dA" + str(l)], current_cache,
                hidden_layers_activation_fn)

    return grads
	def sigmoid_gradient(dA, Z):
	A, Z = sigmoid(Z)
	dZ = dA * A * (1 - A)

	return dZ


	def tanh_gradient(dA, Z):
	A, Z = tanh(Z)
	dZ = dA * (1 - np.square(A))

	return dZ


	def relu_gradient(dA, Z):
	A, Z = relu(Z)
	dZ = np.multiply(dA, np.int64(A > 0))

	return dZ


	# define helper functions that will be used in L-model back-prop
	def linear_backword(dZ, cache):
	A_prev, W, b = cache
	m = A_prev.shape[1]

	dW = (1 / m) * np.dot(dZ, A_prev.T)
	db = (1 / m) * np.sum(dZ, axis=1, keepdims=True)
	dA_prev = np.dot(W.T, dZ)

	assert dA_prev.shape == A_prev.shape
	assert dW.shape == W.shape
	assert db.shape == b.shape

	return dA_prev, dW, db


	def linear_activation_backward(dA, cache, activation_fn):
	linear_cache, activation_cache = cache

	if activation_fn == "sigmoid":
	dZ = sigmoid_gradient(dA, activation_cache)
	dA_prev, dW, db = linear_backword(dZ, linear_cache)

	elif activation_fn == "tanh":
	dZ = tanh_gradient(dA, activation_cache)
	dA_prev, dW, db = linear_backword(dZ, linear_cache)

	elif activation_fn == "relu":
	dZ = relu_gradient(dA, activation_cache)
	dA_prev, dW, db = linear_backword(dZ, linear_cache)

	return dA_prev, dW, db


	def L_model_backward(AL, y, caches, hidden_layers_activation_fn="relu"):
	y = y.reshape(AL.shape)
	L = len(caches)
	grads = {}

	dAL = np.divide(AL - y, np.multiply(AL, 1 - AL))

	grads["dA" + str(L - 1)], grads["dW" + str(L)], grads[
	"db" + str(L)] = linear_activation_backward(
	dAL, caches[L - 1], "sigmoid")

	for l in range(L - 1, 0, -1):
	current_cache = caches[l - 1]
	grads["dA" + str(l - 1)], grads["dW" + str(l)], grads[
	"db" + str(l)] = linear_activation_backward(
	grads["dA" + str(l)], current_cache,
	hidden_layers_activation_fn)

	return grads