cozek · June 4, 2019 07:03 · cozek · Jun 4, 2019
diff --git a/basic_ann_v1.py b/basic_ann_v1.py
 import numpy as np
 import matplotlib.pyplot as plt
 from pprint import pprint
 from tqdm import tqdm

 class myNN(object):
    
    def __init__(self):
        self.params = None
        self.layer_dims = None
    
    
    def activate(self,A,activation):
        if activation == 'sigmoid':
            return self.sigmoid(A)
        if activation == 'tanh':
            return np.tanh(A)
        if activation =='relu':
            return self.relu(A)
        
        
    def init_layers(self,layer_dims):
        np.random.seed(1)
        params = {}
        for i in range(1,len(layer_dims)):
            params['W' + str(i)] = np.random.randn(layer_dims[i]['size'],layer_dims[i-1]['size']) * np.sqrt(2./layer_dims[i-1]['size'])
            params['b' + str(i)] = np.zeros((layer_dims[i]['size'],1))
        self.layer_dims = layer_dims
        self.params = params
        
        
    def sigmoid(self,A):
        return 1/(1+np.exp(-A))
    
    def relu(self,A):
        return np.maximum(0,A)

    
    def cost_func(self,AL,Y):
        m = AL.shape[1]
        cost = -(1./m)*( np.dot(np.log(AL),Y.T) + np.dot(np.log(1-AL),1-Y.T) )
        return np.squeeze(cost)
    
    
    def get_grad(self,A,Z,activation):
        if activation == 'tanh':
            return 1-(A**2)
        if activation == 'sigmoid':
            return A * (1 - A)
        if activation == 'relu':
 #             A[Z<=0]=0
 #             return A
            return np.int64(A>0)
 
    
    def forward(self,X,y):
        self.Y = y
        forward_cache = {}
        forward_cache['A0'] = X
        for i in range(1,len(layer_dims)):
            W = 'W'+str(i)
            b = 'b'+str(i)
            Z = 'Z'+str(i)
            A = 'A'+str(i)
            forward_cache[Z] =  np.dot(self.params[W],forward_cache['A'+str(i-1)]) + self.params[b]
            forward_cache[A] = self.activate(forward_cache[Z],self.layer_dims[i]['activation'])
        self.forward_cache = forward_cache
        cost = self.cost_func(forward_cache[A],y)

        self.costs.append(cost)
            
        
    def backward(self):

        grads = {}
        AL = self.forward_cache['A'+str(len(self.layer_dims)-1)]
        grads['dA'+str(len(self.layer_dims)-1)] = -(np.divide(self.Y,AL) - np.divide(1-self.Y,1-AL) )
        
        for i in reversed(range(1,len(layer_dims))):
            dW = 'dW'+str(i)
            db = 'db'+str(i)
            dA = 'dA'+str(i)
            dZ = 'dZ'+str(i)
            W = 'W'+str(i)
            b = 'b'+str(i)
            Z = 'Z'+str(i)
            A = 'A'+str(i)
            A_prev = 'A'+str(i-1)
            m = self.forward_cache[A].shape[1]
            grads[dZ] = grads[dA] * self.get_grad(self.forward_cache[A],self.forward_cache[Z],self.layer_dims[i]['activation'])
            grads[dW] = (1/m) * np.dot(grads[dZ],self.forward_cache[A_prev].T)
            grads[db] = (1/m) * np.sum(grads[dZ],axis = 1,keepdims=True)
            if i!=1:
                grads['dA'+str(i-1)] = np.dot(self.params[W].T,grads[dZ]) 
        
        self.grads = grads
        
    def update_params(self):

        for i in range(1,len(layer_dims)):
            self.params['W'+str(i)] = self.params['W'+str(i)] - self.lr*self.grads['dW'+str(i)]
            self.params['b'+str(i)] = self.params['b'+str(i)] - self.lr*self.grads['db'+str(i)]

    def predict(self,X,Y):
        self.forward(X,Y)
        predictions = np.where(self.forward_cache['A'+str(len(layer_dims)-1)] > 0.5, 1, 0)
        return float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100)


    def start_model(self,X,Y,num_iter = 1000,lr=0.01,print_cost = False):
        self.costs = []
        self.lr = lr
        for i in tqdm(range(num_iter)):
            self.forward(X,Y)
            
            if print_cost == True and i%100==0:
                print('Cost at iter {} is {}'.format(i,self.costs[i]))

            self.backward()
            self.update_params()
        

    def plot_cost(self):
        plt.plot(np.squeeze(self.costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(self.lr))
        plt.show()
	import numpy as np
	import matplotlib.pyplot as plt
	from pprint import pprint
	from tqdm import tqdm

	class myNN(object):

	def __init__(self):
	self.params = None
	self.layer_dims = None


	def activate(self,A,activation):
	if activation == 'sigmoid':
	return self.sigmoid(A)
	if activation == 'tanh':
	return np.tanh(A)
	if activation =='relu':
	return self.relu(A)


	def init_layers(self,layer_dims):
	np.random.seed(1)
	params = {}
	for i in range(1,len(layer_dims)):
	params['W' + str(i)] = np.random.randn(layer_dims[i]['size'],layer_dims[i-1]['size']) * np.sqrt(2./layer_dims[i-1]['size'])
	params['b' + str(i)] = np.zeros((layer_dims[i]['size'],1))
	self.layer_dims = layer_dims
	self.params = params


	def sigmoid(self,A):
	return 1/(1+np.exp(-A))

	def relu(self,A):
	return np.maximum(0,A)


	def cost_func(self,AL,Y):
	m = AL.shape[1]
	cost = -(1./m)*( np.dot(np.log(AL),Y.T) + np.dot(np.log(1-AL),1-Y.T) )
	return np.squeeze(cost)


	def get_grad(self,A,Z,activation):
	if activation == 'tanh':
	return 1-(A**2)
	if activation == 'sigmoid':
	return A * (1 - A)
	if activation == 'relu':
	# A[Z<=0]=0
	# return A
	return np.int64(A>0)


	def forward(self,X,y):
	self.Y = y
	forward_cache = {}
	forward_cache['A0'] = X
	for i in range(1,len(layer_dims)):
	W = 'W'+str(i)
	b = 'b'+str(i)
	Z = 'Z'+str(i)
	A = 'A'+str(i)
	forward_cache[Z] = np.dot(self.params[W],forward_cache['A'+str(i-1)]) + self.params[b]
	forward_cache[A] = self.activate(forward_cache[Z],self.layer_dims[i]['activation'])
	self.forward_cache = forward_cache
	cost = self.cost_func(forward_cache[A],y)

	self.costs.append(cost)


	def backward(self):

	grads = {}
	AL = self.forward_cache['A'+str(len(self.layer_dims)-1)]
	grads['dA'+str(len(self.layer_dims)-1)] = -(np.divide(self.Y,AL) - np.divide(1-self.Y,1-AL) )

	for i in reversed(range(1,len(layer_dims))):
	dW = 'dW'+str(i)
	db = 'db'+str(i)
	dA = 'dA'+str(i)
	dZ = 'dZ'+str(i)
	W = 'W'+str(i)
	b = 'b'+str(i)
	Z = 'Z'+str(i)
	A = 'A'+str(i)
	A_prev = 'A'+str(i-1)
	m = self.forward_cache[A].shape[1]
	grads[dZ] = grads[dA] * self.get_grad(self.forward_cache[A],self.forward_cache[Z],self.layer_dims[i]['activation'])
	grads[dW] = (1/m) * np.dot(grads[dZ],self.forward_cache[A_prev].T)
	grads[db] = (1/m) * np.sum(grads[dZ],axis = 1,keepdims=True)
	if i!=1:
	grads['dA'+str(i-1)] = np.dot(self.params[W].T,grads[dZ])

	self.grads = grads

	def update_params(self):

	for i in range(1,len(layer_dims)):
	self.params['W'+str(i)] = self.params['W'+str(i)] - self.lr*self.grads['dW'+str(i)]
	self.params['b'+str(i)] = self.params['b'+str(i)] - self.lr*self.grads['db'+str(i)]

	def predict(self,X,Y):
	self.forward(X,Y)
	predictions = np.where(self.forward_cache['A'+str(len(layer_dims)-1)] > 0.5, 1, 0)
	return float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100)


	def start_model(self,X,Y,num_iter = 1000,lr=0.01,print_cost = False):
	self.costs = []
	self.lr = lr
	for i in tqdm(range(num_iter)):
	self.forward(X,Y)

	if print_cost == True and i%100==0:
	print('Cost at iter {} is {}'.format(i,self.costs[i]))

	self.backward()
	self.update_params()


	def plot_cost(self):
	plt.plot(np.squeeze(self.costs))
	plt.ylabel('cost')
	plt.xlabel('iterations (per tens)')
	plt.title("Learning rate =" + str(self.lr))
	plt.show()