Skip to content

Instantly share code, notes, and snippets.

@shaybensasson
Created November 27, 2016 12:13
Show Gist options
  • Select an option

  • Save shaybensasson/79eca42fdd04e2a993af67e16cda74b2 to your computer and use it in GitHub Desktop.

Select an option

Save shaybensasson/79eca42fdd04e2a993af67e16cda74b2 to your computer and use it in GitHub Desktop.
Pure Numpy implementation of Anns with 1 hidden layer solving Xor and Donut classification problems
# revisiting the XOR and donut problems to show how features
# can be learned automatically using neural networks.
#
# the notes for this class can be found at:
# https://www.udemy.com/data-science-deep-learning-in-python
import numpy as np
import matplotlib.pyplot as plt
# for binary classification! no softmax here
def sig(x, cw=1, ch=1):
return ch*1/(1+np.exp(-cw*x))
def sig_d(x):
return x * (1-x)
def tanh(x):
return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
def tanh_d(x):
return (1- x*x)
def forward(X, W1, b1, W2, b2):
#using sig as hidden activation function
# and softmax/sigmoid for binary (classification) output
Z = act(X.dot(W1)+b1) #4x5
Y = sig(Z.dot(W2)+b2) #4x1
return Y, Z
def predict(X, W1, b1, W2, b2):
Y, _ = forward(X, W1, b1, W2, b2)
return np.round(Y)
def derivative_w2(Z, T, Y):
# Z is (N, M), (4,5)
return (T-Y).dot(Z) #W2: 1x5
def derivative_b2(T, Y):
return (T-Y).sum(axis=0) #1x1
def derivative_w1(X, Z, T, Y, W2):
#W2: 1x5
#T,Y: 4x1
#Z: 4x5
#X: 4x2
#dZ = (T - Y).dot(W2) * tanh_d(Z)
dZ = np.outer(T - Y,W2) * act_d(Z)
return X.T.dot(dZ) #W1: 2x5
def derivative_b1(Z, T, Y, W2):
#TODO: UNCOMMENT THIS: assert Z * (1 - Z) == (1 - Z * Z), 'just for you: sig_d is not tanh_d'
dZ = np.outer(T - Y, W2) * act_d(Z)
return dZ.sum(axis=0) #1x5
def cost(T, Y):
return -np.mean(T*np.log(Y)+(1-T)*np.log(1-Y))
def test_xor():
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) #4x2
Y = np.array([0, 1, 1, 0]) #4x1
W1 = np.random.randn(2, 5) #2,5
b1 = np.zeros(5) #1,5
W2 = np.random.randn(5) #1,5
b2 = 0 #1x1
LL = [] # keep track of likelihoods
learning_rate = 10e-3
regularization = 0.
last_error_rate = None
for i in xrange(30000):
pY, Z = forward(X, W1, b1, W2, b2)
ll = cost(Y, pY)
prediction = predict(X, W1, b1, W2, b2)
er = np.mean(prediction != Y)
if er != last_error_rate:
last_error_rate = er
print "error rate:", er
print "true:", Y
print "pred:", prediction
# if LL and ll < LL[-1]:
# print "early exit"
# break
LL.append(ll)
W2 += learning_rate * (derivative_w2(Z, Y, pY) - regularization * W2)
b2 += learning_rate * (derivative_b2(Y, pY) - regularization * b2)
W1 += learning_rate * (derivative_w1(X, Z, Y, pY, W2) - regularization * W1)
b1 += learning_rate * (derivative_b1(Z, Y, pY, W2) - regularization * b1)
if i % 1000 == 0:
print ll
print "final classification rate:", np.mean(prediction == Y)
plt.plot(LL)
plt.show()
def test_donut():
# donut example
N = 1000
R_inner = 5
R_outer = 10
# distance from origin is radius + random normal
# angle theta is uniformly distributed between (0, 2pi)
R1 = np.random.randn(N/2) + R_inner
theta = 2*np.pi*np.random.random(N/2)
X_inner = np.concatenate([[R1 * np.cos(theta)], [R1 * np.sin(theta)]]).T
R2 = np.random.randn(N/2) + R_outer
theta = 2*np.pi*np.random.random(N/2)
X_outer = np.concatenate([[R2 * np.cos(theta)], [R2 * np.sin(theta)]]).T
X = np.concatenate([ X_inner, X_outer ])
Y = np.array([0]*(N/2) + [1]*(N/2))
n_hidden = 8
W1 = np.random.randn(2, n_hidden)
b1 = np.random.randn(n_hidden)
W2 = np.random.randn(n_hidden)
b2 = np.random.randn(1)
LL = [] # keep track of likelihoods
learning_rate = 0.00005
regularization = 0.2
last_error_rate = None
#for i in xrange(160000):
for i in xrange(int(1E05)):
pY, Z = forward(X, W1, b1, W2, b2)
ll = cost(Y, pY)
prediction = predict(X, W1, b1, W2, b2)
er = np.abs(prediction - Y).mean()
LL.append(ll)
W2 += learning_rate * (derivative_w2(Z, Y, pY) - regularization * W2)
b2 += learning_rate * (derivative_b2(Y, pY) - regularization * b2)
W1 += learning_rate * (derivative_w1(X, Z, Y, pY, W2) - regularization * W1)
b1 += learning_rate * (derivative_b1(Z, Y, pY, W2) - regularization * b1)
if i % 10000 == 0:
print "i:", i, "ll:", ll, "classification rate:", 1 - er
plt.plot(LL)
plt.show()
if __name__ == '__main__':
act = tanh
act_d = tanh_d
test_xor()
#test_donut()
act = sig
act_d = sig_d
test_xor()
#test_donut()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment