techedlaksh · July 9, 2017 02:41
diff --git a/basic_nn.py b/basic_nn.py
 # library
 import numpy as np

 # b_size is batch size
 # inp_dim is input dimension
 # hid_dim is hidden dimension
 # out_dim is output dimension
 b_size, inp_dim, hid_dim, out_dim = 64, 1000, 100, 10

 # Create random dataset
 X = np.random.randn(b_size, inp_dim)
 y = np.random.randn(b_size, out_dim)

 # Random weights initialised
 w1 = np.random.randn(inp_dim, hid_dim)
 w2 = np.random.randn(hid_dim, out_dim)

 # Learning Rate
 lr = 1e-5

 # 20 is the Number of Epoches
 for t in range(20):
    # Forward Pass
    h = X.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)
    
    # Compute the loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)
    
    # Backpropogation to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = X.T.dot(grad_h)
    
    # Update weights
    w1 -= lr * grad_w1
    w2 -= lr * grad_w2
	# library
	import numpy as np

	# b_size is batch size
	# inp_dim is input dimension
	# hid_dim is hidden dimension
	# out_dim is output dimension
	b_size, inp_dim, hid_dim, out_dim = 64, 1000, 100, 10

	# Create random dataset
	X = np.random.randn(b_size, inp_dim)
	y = np.random.randn(b_size, out_dim)

	# Random weights initialised
	w1 = np.random.randn(inp_dim, hid_dim)
	w2 = np.random.randn(hid_dim, out_dim)

	# Learning Rate
	lr = 1e-5

	# 20 is the Number of Epoches
	for t in range(20):
	# Forward Pass
	h = X.dot(w1)
	h_relu = np.maximum(h, 0)
	y_pred = h_relu.dot(w2)

	# Compute the loss
	loss = np.square(y_pred - y).sum()
	print(t, loss)

	# Backpropogation to compute gradients of w1 and w2 with respect to loss
	grad_y_pred = 2.0 * (y_pred - y)
	grad_w2 = h_relu.T.dot(grad_y_pred)
	grad_h_relu = grad_y_pred.dot(w2.T)
	grad_h = grad_h_relu.copy()
	grad_h[h < 0] = 0
	grad_w1 = X.T.dot(grad_h)

	# Update weights
	w1 -= lr * grad_w1
	w2 -= lr * grad_w2