Skip to content

Instantly share code, notes, and snippets.

@cozek
Created June 4, 2019 07:03
Show Gist options
  • Save cozek/8515f816e613b1ba88c193445ab4d74e to your computer and use it in GitHub Desktop.
Save cozek/8515f816e613b1ba88c193445ab4d74e to your computer and use it in GitHub Desktop.
A basic python numpy implementation of a neural network
import numpy as np
import matplotlib.pyplot as plt
from pprint import pprint
from tqdm import tqdm
class myNN(object):
def __init__(self):
self.params = None
self.layer_dims = None
def activate(self,A,activation):
if activation == 'sigmoid':
return self.sigmoid(A)
if activation == 'tanh':
return np.tanh(A)
if activation =='relu':
return self.relu(A)
def init_layers(self,layer_dims):
np.random.seed(1)
params = {}
for i in range(1,len(layer_dims)):
params['W' + str(i)] = np.random.randn(layer_dims[i]['size'],layer_dims[i-1]['size']) * np.sqrt(2./layer_dims[i-1]['size'])
params['b' + str(i)] = np.zeros((layer_dims[i]['size'],1))
self.layer_dims = layer_dims
self.params = params
def sigmoid(self,A):
return 1/(1+np.exp(-A))
def relu(self,A):
return np.maximum(0,A)
def cost_func(self,AL,Y):
m = AL.shape[1]
cost = -(1./m)*( np.dot(np.log(AL),Y.T) + np.dot(np.log(1-AL),1-Y.T) )
return np.squeeze(cost)
def get_grad(self,A,Z,activation):
if activation == 'tanh':
return 1-(A**2)
if activation == 'sigmoid':
return A * (1 - A)
if activation == 'relu':
# A[Z<=0]=0
# return A
return np.int64(A>0)
def forward(self,X,y):
self.Y = y
forward_cache = {}
forward_cache['A0'] = X
for i in range(1,len(layer_dims)):
W = 'W'+str(i)
b = 'b'+str(i)
Z = 'Z'+str(i)
A = 'A'+str(i)
forward_cache[Z] = np.dot(self.params[W],forward_cache['A'+str(i-1)]) + self.params[b]
forward_cache[A] = self.activate(forward_cache[Z],self.layer_dims[i]['activation'])
self.forward_cache = forward_cache
cost = self.cost_func(forward_cache[A],y)
self.costs.append(cost)
def backward(self):
grads = {}
AL = self.forward_cache['A'+str(len(self.layer_dims)-1)]
grads['dA'+str(len(self.layer_dims)-1)] = -(np.divide(self.Y,AL) - np.divide(1-self.Y,1-AL) )
for i in reversed(range(1,len(layer_dims))):
dW = 'dW'+str(i)
db = 'db'+str(i)
dA = 'dA'+str(i)
dZ = 'dZ'+str(i)
W = 'W'+str(i)
b = 'b'+str(i)
Z = 'Z'+str(i)
A = 'A'+str(i)
A_prev = 'A'+str(i-1)
m = self.forward_cache[A].shape[1]
grads[dZ] = grads[dA] * self.get_grad(self.forward_cache[A],self.forward_cache[Z],self.layer_dims[i]['activation'])
grads[dW] = (1/m) * np.dot(grads[dZ],self.forward_cache[A_prev].T)
grads[db] = (1/m) * np.sum(grads[dZ],axis = 1,keepdims=True)
if i!=1:
grads['dA'+str(i-1)] = np.dot(self.params[W].T,grads[dZ])
self.grads = grads
def update_params(self):
for i in range(1,len(layer_dims)):
self.params['W'+str(i)] = self.params['W'+str(i)] - self.lr*self.grads['dW'+str(i)]
self.params['b'+str(i)] = self.params['b'+str(i)] - self.lr*self.grads['db'+str(i)]
def predict(self,X,Y):
self.forward(X,Y)
predictions = np.where(self.forward_cache['A'+str(len(layer_dims)-1)] > 0.5, 1, 0)
return float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100)
def start_model(self,X,Y,num_iter = 1000,lr=0.01,print_cost = False):
self.costs = []
self.lr = lr
for i in tqdm(range(num_iter)):
self.forward(X,Y)
if print_cost == True and i%100==0:
print('Cost at iter {} is {}'.format(i,self.costs[i]))
self.backward()
self.update_params()
def plot_cost(self):
plt.plot(np.squeeze(self.costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(self.lr))
plt.show()
@cozek
Copy link
Author

cozek commented Jun 4, 2019

#quick testing

def load_planar_dataset():
    np.random.seed(1)
    m = 400 # number of examples
    N = int(m/2) # number of points per class
    D = 2 # dimensionality
    X = np.zeros((m,D)) # data matrix where each row is a single example
    Y = np.zeros((m,1), dtype='uint8') # labels vector (0 for red, 1 for blue)
    a = 4 # maximum ray of the flower

    for j in range(2):
        ix = range(N*j,N*(j+1))
        t = np.linspace(j*3.12,(j+1)*3.12,N) + np.random.randn(N)*0.2 # theta
        r = a*np.sin(4*t) + np.random.randn(N)*0.2 # radius
        X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
        Y[ix] = j
        
    X = X.T
    Y = Y.T

    return X, Y


X, Y = load_planar_dataset()
layer_dims = ({'size':X.shape[0],'activation':'none'},
              {'size':5,'activation':'tanh'},
              {'size':4,'activation':'tanh'},
              {'size':Y.shape[0],'activation':'sigmoid'},
             )

testNN = myNN()

testNN.init_layers(layer_dims)

testNN.start_model(X,Y,num_iter=10000,lr=0.5,print_cost = False)
testNN.plot_cost()
testNN.predict(X,Y)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment