Created
November 27, 2016 12:13
-
-
Save shaybensasson/79eca42fdd04e2a993af67e16cda74b2 to your computer and use it in GitHub Desktop.
Pure Numpy implementation of Anns with 1 hidden layer solving Xor and Donut classification problems
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # revisiting the XOR and donut problems to show how features | |
| # can be learned automatically using neural networks. | |
| # | |
| # the notes for this class can be found at: | |
| # https://www.udemy.com/data-science-deep-learning-in-python | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| # for binary classification! no softmax here | |
| def sig(x, cw=1, ch=1): | |
| return ch*1/(1+np.exp(-cw*x)) | |
| def sig_d(x): | |
| return x * (1-x) | |
| def tanh(x): | |
| return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x)) | |
| def tanh_d(x): | |
| return (1- x*x) | |
| def forward(X, W1, b1, W2, b2): | |
| #using sig as hidden activation function | |
| # and softmax/sigmoid for binary (classification) output | |
| Z = act(X.dot(W1)+b1) #4x5 | |
| Y = sig(Z.dot(W2)+b2) #4x1 | |
| return Y, Z | |
| def predict(X, W1, b1, W2, b2): | |
| Y, _ = forward(X, W1, b1, W2, b2) | |
| return np.round(Y) | |
| def derivative_w2(Z, T, Y): | |
| # Z is (N, M), (4,5) | |
| return (T-Y).dot(Z) #W2: 1x5 | |
| def derivative_b2(T, Y): | |
| return (T-Y).sum(axis=0) #1x1 | |
| def derivative_w1(X, Z, T, Y, W2): | |
| #W2: 1x5 | |
| #T,Y: 4x1 | |
| #Z: 4x5 | |
| #X: 4x2 | |
| #dZ = (T - Y).dot(W2) * tanh_d(Z) | |
| dZ = np.outer(T - Y,W2) * act_d(Z) | |
| return X.T.dot(dZ) #W1: 2x5 | |
| def derivative_b1(Z, T, Y, W2): | |
| #TODO: UNCOMMENT THIS: assert Z * (1 - Z) == (1 - Z * Z), 'just for you: sig_d is not tanh_d' | |
| dZ = np.outer(T - Y, W2) * act_d(Z) | |
| return dZ.sum(axis=0) #1x5 | |
| def cost(T, Y): | |
| return -np.mean(T*np.log(Y)+(1-T)*np.log(1-Y)) | |
| def test_xor(): | |
| X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) #4x2 | |
| Y = np.array([0, 1, 1, 0]) #4x1 | |
| W1 = np.random.randn(2, 5) #2,5 | |
| b1 = np.zeros(5) #1,5 | |
| W2 = np.random.randn(5) #1,5 | |
| b2 = 0 #1x1 | |
| LL = [] # keep track of likelihoods | |
| learning_rate = 10e-3 | |
| regularization = 0. | |
| last_error_rate = None | |
| for i in xrange(30000): | |
| pY, Z = forward(X, W1, b1, W2, b2) | |
| ll = cost(Y, pY) | |
| prediction = predict(X, W1, b1, W2, b2) | |
| er = np.mean(prediction != Y) | |
| if er != last_error_rate: | |
| last_error_rate = er | |
| print "error rate:", er | |
| print "true:", Y | |
| print "pred:", prediction | |
| # if LL and ll < LL[-1]: | |
| # print "early exit" | |
| # break | |
| LL.append(ll) | |
| W2 += learning_rate * (derivative_w2(Z, Y, pY) - regularization * W2) | |
| b2 += learning_rate * (derivative_b2(Y, pY) - regularization * b2) | |
| W1 += learning_rate * (derivative_w1(X, Z, Y, pY, W2) - regularization * W1) | |
| b1 += learning_rate * (derivative_b1(Z, Y, pY, W2) - regularization * b1) | |
| if i % 1000 == 0: | |
| print ll | |
| print "final classification rate:", np.mean(prediction == Y) | |
| plt.plot(LL) | |
| plt.show() | |
| def test_donut(): | |
| # donut example | |
| N = 1000 | |
| R_inner = 5 | |
| R_outer = 10 | |
| # distance from origin is radius + random normal | |
| # angle theta is uniformly distributed between (0, 2pi) | |
| R1 = np.random.randn(N/2) + R_inner | |
| theta = 2*np.pi*np.random.random(N/2) | |
| X_inner = np.concatenate([[R1 * np.cos(theta)], [R1 * np.sin(theta)]]).T | |
| R2 = np.random.randn(N/2) + R_outer | |
| theta = 2*np.pi*np.random.random(N/2) | |
| X_outer = np.concatenate([[R2 * np.cos(theta)], [R2 * np.sin(theta)]]).T | |
| X = np.concatenate([ X_inner, X_outer ]) | |
| Y = np.array([0]*(N/2) + [1]*(N/2)) | |
| n_hidden = 8 | |
| W1 = np.random.randn(2, n_hidden) | |
| b1 = np.random.randn(n_hidden) | |
| W2 = np.random.randn(n_hidden) | |
| b2 = np.random.randn(1) | |
| LL = [] # keep track of likelihoods | |
| learning_rate = 0.00005 | |
| regularization = 0.2 | |
| last_error_rate = None | |
| #for i in xrange(160000): | |
| for i in xrange(int(1E05)): | |
| pY, Z = forward(X, W1, b1, W2, b2) | |
| ll = cost(Y, pY) | |
| prediction = predict(X, W1, b1, W2, b2) | |
| er = np.abs(prediction - Y).mean() | |
| LL.append(ll) | |
| W2 += learning_rate * (derivative_w2(Z, Y, pY) - regularization * W2) | |
| b2 += learning_rate * (derivative_b2(Y, pY) - regularization * b2) | |
| W1 += learning_rate * (derivative_w1(X, Z, Y, pY, W2) - regularization * W1) | |
| b1 += learning_rate * (derivative_b1(Z, Y, pY, W2) - regularization * b1) | |
| if i % 10000 == 0: | |
| print "i:", i, "ll:", ll, "classification rate:", 1 - er | |
| plt.plot(LL) | |
| plt.show() | |
| if __name__ == '__main__': | |
| act = tanh | |
| act_d = tanh_d | |
| test_xor() | |
| #test_donut() | |
| act = sig | |
| act_d = sig_d | |
| test_xor() | |
| #test_donut() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment