Created
March 14, 2018 07:43
-
-
Save lotabout/7a98d62caa4b0e7084ee0e85e79a5fe4 to your computer and use it in GitHub Desktop.
implementation of backpropogation algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# implementation of backpropogation algorithm based on | |
# http://neuralnetworksanddeeplearning.com/chap1.html | |
# | |
# note that mini-batches are calculated in matrix form | |
import numpy as np | |
import random | |
def chunks(lst, n): | |
"""split lst into evenly sized chunks of n""" | |
for i in range(0, len(lst), n): | |
yield lst[i:i+n] | |
def sigmoid(z): | |
return 1 / (1 + np.exp(-z)) | |
def sigmoid_prime(z): | |
"""Derivative of the sigmoid function.""" | |
return sigmoid(z)*(1-sigmoid(z)) | |
class Network(object): | |
"""A Neural Network""" | |
def __init__(self, sizes): | |
"""sizes is List[int] representing number of unit of each layer, starting from the input layer""" | |
self.num_layers = len(sizes) | |
self.sizes = sizes | |
self.biases = [np.random.randn(n, 1) for n in self.sizes[1:]] | |
self.weights = [np.random.randn(y, x) for x, y in zip(self.sizes, self.sizes[1:])] | |
def feed_forward(self, X): | |
"""return the result of the network given input X""" | |
activation = X | |
for b, w in zip(self.biases, self.weights): | |
activation = sigmoid(w.dot(activation) + b) | |
return activation | |
def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None, matrix=True): | |
"""Train the NN using mini-batch stochastic gradient descent. The "training_data" is | |
:training_data: a list of tuples "(x, y)" where "x" is the input and "y" is the label. | |
:epochs: number of iteration | |
:mini_batch_size: number of mini_batches in a chunk | |
:eta: learning rate | |
:test_data: If provided, then "test_data" will be evaluated after each epoch | |
:returns: None | |
""" | |
if test_data: | |
n_test = len(test_data) | |
n = len(training_data) | |
update_mini_batch = self.update_mini_batch_matrix if matrix else self.update_mini_batch | |
for j in range(epochs): | |
random.shuffle(training_data) | |
mini_batches = list(chunks(training_data, mini_batch_size)) | |
for mini_batch in mini_batches: | |
update_mini_batch(mini_batch, eta) | |
if test_data: | |
print(f"Epoch {j}: {self.evaluate(test_data)}/{n_test}") | |
else: | |
print(f"Epoch {j} complete") | |
def update_mini_batch(self, mini_batch, eta): | |
"""Update the NN's weight and bias by applying gradient decent using backpropogation | |
:mini_batch: List[(x, y)] where x is input and y is label | |
:eta: learning rate | |
:returns: None | |
""" | |
nabla_b = [np.zeros(b.shape) for b in self.biases] | |
nabla_w = [np.zeros(w.shape) for w in self.weights] | |
for x, y in mini_batch: | |
delta_nbla_b, delta_nbla_w = self.backprop(x, y) | |
nabla_b = [nb+dnb for nb,dnb in zip(nabla_b, delta_nbla_b)] | |
nabla_w = [nw+dnw for nw,dnw in zip(nabla_w, delta_nbla_w)] | |
self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)] | |
self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)] | |
def update_mini_batch_matrix(self, mini_batch, eta): | |
"""Matrix version of update_mini_batch | |
:mini_batch: List[(x, y)] where x is input and y is label | |
:eta: learning rate | |
:returns: None | |
""" | |
# construct X | |
X = np.concatenate([x for x,_ in mini_batch], axis=1) | |
Y = np.concatenate([y for _,y in mini_batch], axis=1) | |
nabla_b, nabla_w = self.backprop(X, Y) | |
self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)] | |
self.biases = [b-(eta/len(mini_batch))*np.sum(nb, axis=1).reshape(b.shape) for b, nb in zip(self.biases, nabla_b)] | |
def evaluate(self, test_data): | |
"""evaluate the label for test_data according to the current weight/bias | |
:test_data: List[(x, y)] | |
:returns: number of test_data that is correctly predicted | |
""" | |
predictions = [(np.argmax(self.feed_forward(x)), y) for (x, y) in test_data] | |
return sum([int(x == y) for x, y in predictions]) | |
def cost_derivative(self, output_activations, y): | |
""" | |
:output_activations: TODO | |
:y: TODO | |
:returns: the vector of partial derivatives \partial C_x / \partial a for the output activations. | |
""" | |
return (output_activations - y) | |
def backprop(self, x, y): | |
"""backpropogation | |
:x: column vector or a matrix of xs [col, col, ...] | |
:y: column vector or a matrix of ys [col, col, ...] | |
:returns: a tuple (nabla_b, nabla_w): (List[float], List[List[float]]) representing the | |
gradient for cost function C_x | |
""" | |
nabla_b = [None] * len(self.biases) | |
nabla_w = [None] * len(self.weights) | |
# feed forward | |
activation = x | |
activations = [x] # list to store activations layer by layer | |
zs = [] # list to store all z values, i.e. the linear combination | |
for b, w in zip(self.biases, self.weights): | |
z = np.add(w.dot(activation), b) | |
activation = sigmoid(z) | |
zs.append(z) | |
activations.append(activation) | |
# backward pass | |
delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) | |
nabla_b[-1] = delta | |
nabla_w[-1] = np.dot(delta, activations[-2].transpose()) | |
# Note that the variable l in the loop below is used a little | |
# differently to the notation in Chapter 2 of the book. Here, | |
# l = 1 means the last layer of neurons, l = 2 is the | |
# second-last layer, and so on. It's a renumbering of the | |
# scheme in the book, used here to take advantage of the fact | |
# that Python can use negative indices in lists. | |
for l in range(2, self.num_layers): | |
z = zs[-l] | |
sp = sigmoid_prime(z) | |
delta = np.dot(self.weights[-l+1].transpose(), delta) * sp | |
nabla_b[-l] = delta | |
nabla_w[-l] = np.dot(delta, activations[-l-1].transpose()) | |
return (nabla_b, nabla_w) | |
import mnist_loader | |
training_data, validation_data, test_data = mnist_loader.load_data_wrapper() | |
net = Network([784, 30, 10]) | |
net.SGD(training_data, 30, 10, 3.0, test_data=test_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The data MNIST dataset could be fetched from https://github.com/mnielsen/neural-networks-and-deep-learning.git
Note the code above is written in Python 3, and the
mnist_loader
is written in Python 2, so you may need to apply the following changes to make it work with Python 3.