Created
June 24, 2017 17:12
-
-
Save Hamza5/f1530a1a49585ff7e071c95a01da7870 to your computer and use it in GitHub Desktop.
Python module containing basic neural networks functions.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
''' | |
A module containing artificial neural networks functions. | |
Created by Hamza Abbad. | |
''' | |
from numpy import exp,ones,hstack,ndarray,array,zeros,sum,log,zeros_like,any,abs,sqrt | |
from numbers import Real, Integral | |
def sigmoid(Z): | |
# Z is a 2 dimensional array. | |
'''Calculates the sigmoid of the elements of the array''' | |
return 1 / (1 + exp(-Z)) | |
def sigmoid_gradient(Z): | |
# Z is a 2 dimensional array. | |
'''Calculates the gradient of the sigmoid function for an array''' | |
sig = sigmoid(Z) | |
return sig * (1-sig) | |
def one_of_m(y, possible_labels): | |
# y is a 1 dimensional lables array, possible_labels is the number of possible labels. | |
''' | |
Returns a M*K array, M is the length of y (the number of examples), K is is the number of possible labels. | |
''' | |
Y = zeros((y.shape[0], possible_labels), dtype=int) | |
for i in range(y.shape[0]): | |
Y[i,y[i]] = 1 | |
return Y | |
def feedforward(X, Thetas, activation_function = sigmoid, keep_Z = False): | |
# X is 2 dimensional array of data, Thetas is a list of 2 dimensional arrays of weights. | |
''' | |
Performs the forword propagation of an artificial neural network and calculates the output from : | |
- X : M*N array of examples, M is the number of examples, N is the number of features. | |
- Thetas : List of L arrays, L is the number of layers of the network minus one, each array has a dimension of Ui*Vi | |
where Ui is the number of neurons in the layer i excluding the bias unit and Vi is number of neurons in the layer i-1 | |
including the bias. For the wights of the first hidden layer, Vi = N (number of inputs). For the weights of the output layer, | |
Ui = number of output | |
''' | |
A = hstack((ones((X.shape[0],1)), X)) # Add the Column X0 | |
if keep_Z : | |
Zs = [] | |
for i in range(len(Thetas)): | |
Z = A.dot(Thetas[i].transpose()) | |
if keep_Z : | |
Zs.append(Z) | |
A = hstack((ones((A.shape[0],1)), activation_function(Z))) # Column of ones represents the bias unit | |
if keep_Z : | |
return (A[:,1:], Zs) | |
return A[:,1:] | |
def backpropagation(X, Y, H, Thetas, Zs, activation_function = sigmoid, activation_gradient = sigmoid_gradient, reg_lambda = 0): | |
# X,Y,H,Thetas,Zs are all 2 dimensional arrays. | |
''' | |
Performs a backpropagation pass and calculates the gradient of an artificial neural network from : | |
- X : M*N array of examples, M is the number of examples, N is the number of features. | |
- Y : M*K array of labels, M is the number of examples, K is the number of units in the output layer. | |
- H : M*K array of predicted values returned by the feedforward pass. | |
- Thetas : List of L arrays, L is the number of layers of the network minus one, in other words, the number of weight arrays. | |
- Zs : List of L arrays returned by the feedforward pass. | |
- reg_lambda : The constant of regularization. | |
''' | |
X = hstack((ones((X.shape[0],1)), X)) # Add the Column X0 | |
Deltas = [zeros_like(Theta) for Theta in Thetas] | |
As = [hstack((ones((X.shape[0],1)), activation_function(Z))) for Z in Zs] # Activation values | |
for m in range(X.shape[0]): # For each example | |
delta = (H[m:m+1,:] - Y[m:m+1,:]).transpose() | |
for i in range(len(Deltas)-1, 0, -1) : | |
Deltas[i] += delta.dot(As[i-1][m:m+1,:]) | |
delta = Thetas[i].transpose().dot(delta)[1:,:] * activation_gradient(Zs[i-1][m:m+1,:]).transpose() | |
Deltas[0] += delta.dot(X[m:m+1,:]) | |
for i in range(len(Deltas)): | |
Deltas[i] /= X.shape[0] | |
Deltas[i][:,1:] += reg_lambda / X.shape[0] * Thetas[i][:,1:] | |
return Deltas | |
def cost(X, Y, H, Thetas, reg_lambda = 0): | |
''' | |
Calculates the cost of the weights in an artificial neural network. | |
''' | |
J = -1 / X.shape[0] * sum(Y * log(H) + (1 - Y) * log(1 - H)) # Original cost function. | |
if reg_lambda != 0: | |
regularization = 0 # Add the regularization. | |
for Theta in Thetas: | |
regularization += sum(Theta[:,1:] ** 2) | |
regularization *= reg_lambda / (2 * X.shape[0]) | |
J += regularization | |
return J | |
def _numerical_gradient_check(cost_function, Thetas, epsilon = 1e-4): | |
''' | |
Calculates the numerical gradient of an artificial neural network. | |
Used only for checking the implementation of the backpropagation. | |
''' | |
numerical_gradient = [zeros_like(Theta) for Theta in Thetas] | |
for i in range(len(Thetas)): | |
Theta = Thetas[i] | |
perturb = zeros_like(Theta) | |
for p1 in range(Theta.shape[0]): | |
for p2 in range(Theta.shape[1]): | |
perturb[p1,p2] = epsilon | |
Thetas[i] = Theta - perturb | |
loss1 = cost_function(Thetas) | |
Thetas[i] = Theta + perturb | |
loss2 = cost_function(Thetas) | |
numerical_gradient[i][p1, p2] = (loss2 - loss1) / (2 * epsilon) | |
perturb[p1,p2] = 0 | |
Thetas[i] = Theta | |
return numerical_gradient | |
def _check_parameters(X = None, y = None, possible_labels = None, Y = None, H = None, Thetas = None, Zs = None, reg_lambda = None, epsilon = None): | |
''' | |
Checks the parameters sent to other functions. | |
''' | |
if not (X is None) : | |
if not isinstance(X, ndarray) : | |
raise TypeError("X must be a NumPy array") | |
if X.ndim != 2 : | |
raise ValueError("X must be 2 dimensional array") | |
if not (y is None) : | |
if not isinstance(y, ndarray) : | |
raise TypeError("y must be a NumPy array") | |
if y.ndim != 1 : | |
raise ValueError("y must be 1 dimensional array") | |
if not (possible_labels is None) : | |
if not isinstance(possible_labels, Integral) : | |
raise TypeError("possible_labels must be an integer") | |
if possible_labels <= 0 : | |
raise ValueError("possible_labels must be positive") | |
if any(y < 0) or any(y >= possible_labels) : | |
raise ValueError("All labels in y must be between 0 inclusive and possible_labels exclusive") | |
if not (Y is None) : | |
if not isinstance(Y, ndarray) : | |
raise TypeError("Y must be a NumPy array") | |
if Y.ndim != 2 : | |
raise ValueError("Y must be 2 dimensional array") | |
if not (H is None) : | |
if not isinstance(H, ndarray) : | |
raise TypeError("H must be a NumPy array") | |
if H.shape != Y.shape : | |
raise ValueError("H and Y must have the same dimensions") | |
if not (Thetas is None) : | |
if not isinstance(Thetas, list) : | |
raise TypeError("Thetas must be a list") | |
for i in range(len(Thetas)): | |
if not isinstance(Thetas[i], ndarray) : | |
raise TypeError("Thetas["+str(i)+"] must be a NumPy array") | |
if Thetas[i].ndim != 2 : | |
raise ValueError("Thetas["+str(i)+"] must be 2 dimensional array") | |
if not (X is None) : | |
if Thetas[0].shape[1]-1 != X.shape[1] : | |
raise ValueError("The second dimension of X must be equal to the second dimension of Thetas[0] minus one") | |
for i in range(1, len(Thetas)-1): | |
if Thetas[i].shape[1]-1 != Thetas[i-1].shape[0] : | |
raise ValueError("The first dimension of Thetas["+str(i-1)+"] must be equal to the second dimension of Thetas["+str(i)+"] minus one") | |
if not (Y is None) and Thetas[-1].shape[0] != Y.shape[1] : | |
raise ValueError("The first dimension of Thetas["+len(Thetas)-1+"] must be equal to the second dimension of Y") | |
if not (Zs is None) : | |
if not isinstance(Zs, list) : | |
raise TypeError("Zs must be a list") | |
if not (Thetas is None) and len(Zs) != len(Thetas) : | |
raise ValueError("Zs and Thetas must have the same length") | |
for i in range(len(Zs)): | |
if not isinstance(Zs[i], ndarray) : | |
raise TypeError("Zs["+str(i)+"] must be a NumPy array") | |
if Zs[i].ndim != 2 : | |
raise ValueError("Zs["+str(i)+"] must be 2 dimensional array") | |
if not (X is None) and Zs[i].shape[0] != X.shape[0] : | |
raise ValueError("The first dimension of Zs["+str(i)+"] must be equal to the first dimension of X") | |
if not (Thetas is None) and Zs[i].shape[1] != Thetas[i].shape[0] : | |
raise ValueError("The first dimension of Zs["+str(i)+"] must be equal to the second dimension of Thetas["+str(i)+"]") | |
if not (reg_lambda is None) : | |
if not isinstance(reg_lambda, Real) : | |
raise TypeError("reg_lambda must be a real number") | |
if reg_lambda < 0 : | |
raise ValueError("reg_lambda can't be negative") | |
if not (epsilon is None) : | |
if not isinstance(epsilon, Real) : | |
raise TypeError("epsilon must be a real number") | |
if epsilon <= 0 : | |
raise ValueError("epsilon must be positive") | |
def gradient_descent(X, Y, Thetas, regularization_const, alpha, max_iterations, max_cost, max_gradient): | |
H, Zs = feedforward(X, Thetas, keep_Z = True) | |
J = cost(X, Y, H, Thetas, regularization_const) | |
gradient = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = regularization_const) | |
for j in range(len(Thetas)): | |
Thetas[j] -= alpha * gradient[j] | |
i = 1 | |
gradient_value = sqrt(sum(array([sum(g**2) for g in gradient]))) | |
while i < max_iterations and J > max_cost and gradient_value > max_gradient : | |
H, Zs = feedforward(X, Thetas, keep_Z = True) | |
gradient = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = regularization_const) | |
for j in range(len(Thetas)): | |
Thetas[j] -= alpha * gradient[j] | |
J = cost(X, Y, H, Thetas, regularization_const) | |
gradient_value = sqrt(sum(array([sum(g**2) for g in gradient]))) | |
i += 1 | |
return i | |
if __name__ == '__main__': | |
X = array([ | |
[0.54030, -0.41615], | |
[-0.98999, -0.65364], | |
[0.28366, 0.96017] | |
]) | |
Theta1 = array([ | |
[ 0.66294134, 0.5955722 , 0.55686872], | |
[ 0.89952311, -0.25349201, 0.20393105] | |
]) | |
Theta2 = array([ | |
[-0.12538753, 0.71265549, 0.70997811], | |
[ 1.07216431, 0.02318498, 0.39948136], | |
[ 1.01592775, -0.20193439, -0.2109316 ], | |
[ 1.17180091, -0.21056782, 0.63941167] | |
]) | |
Theta3 = array([ | |
[-0.12275993, 1.3893123 , 1.46562893, 0.29220495, 1.45709864], | |
[-0.40565099, 1.05983036, 1.04174971, 0.27491176, 1.27062198], | |
[ 0.99863495, -0.27216449, 1.39228261, 1.30152133, -0.03507692], | |
[ 0.96217801, 0.62635206, -0.00421884, 1.37500325, -0.33949522] | |
]) | |
Thetas = [Theta1, Theta2, Theta3] | |
y = array([3,1,2]) | |
reg = 1 | |
# _check_parameters(Thetas=Thetas, y=y, reg_lambda=reg) | |
H, Zs = feedforward(X, Thetas, keep_Z = True) | |
print("H",H) | |
Y = one_of_m(y, Thetas[-1].shape[0]) | |
print("Y",Y) | |
# _check_parameters(X=X, Y=Y, H=H, Zs=Zs, Thetas=Thetas) | |
grad = backpropagation(X, Y, H, Thetas, Zs, reg_lambda = reg) | |
J = lambda t: cost(X,Y,feedforward(X,t),t,reg) | |
print("Cost before optimization :",J(Thetas)) | |
iterations = gradient_descent(X, Y, Thetas, reg, alpha=0.1, max_iterations=5000, max_cost=1e-3, max_gradient=0) | |
print("Cost after optimization :", J(Thetas), end='\n'*2) | |
print("Iterations :", iterations) | |
print("Thetas :", end='\n'*2) | |
for Theta in Thetas : | |
print(Theta, end='\n'*2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment