lotabout · March 14, 2018 07:43 · lotabout · Mar 14, 2018
diff --git a/neural-network.py b/neural-network.py
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 # implementation of backpropogation algorithm based on
 # http://neuralnetworksanddeeplearning.com/chap1.html
 #
 # note that mini-batches are calculated in matrix form

 import numpy as np
 import random

 def chunks(lst, n):
    """split lst into evenly sized chunks of n"""
    for i in range(0, len(lst), n):
        yield lst[i:i+n]

 def sigmoid(z):
    return 1 / (1 + np.exp(-z))

 def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

 class Network(object):
    """A Neural Network"""
    def __init__(self, sizes):
        """sizes is List[int] representing number of unit of each layer, starting from the input layer"""
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(n, 1) for n in self.sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(self.sizes, self.sizes[1:])]

    def feed_forward(self, X):
        """return the result of the network given input X"""
        activation = X
        for b, w in zip(self.biases, self.weights):
            activation = sigmoid(w.dot(activation) + b)
        return activation

    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None, matrix=True):
        """Train the NN using mini-batch stochastic gradient descent. The "training_data" is

        :training_data: a list of tuples "(x, y)" where "x" is the input and "y" is the label.
        :epochs: number of iteration
        :mini_batch_size: number of mini_batches in a chunk
        :eta: learning rate
        :test_data: If provided, then "test_data" will be evaluated after each epoch
        :returns: None

        """
        if test_data:
            n_test = len(test_data)

        n = len(training_data)

        update_mini_batch = self.update_mini_batch_matrix if matrix else self.update_mini_batch

        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = list(chunks(training_data, mini_batch_size))

            for mini_batch in mini_batches:
                update_mini_batch(mini_batch, eta)

            if test_data:
                print(f"Epoch {j}: {self.evaluate(test_data)}/{n_test}")
            else:
                print(f"Epoch {j} complete")

    def update_mini_batch(self, mini_batch, eta):
        """Update the NN's weight and bias by applying gradient decent using backpropogation

        :mini_batch: List[(x, y)] where x is input and y is label
        :eta: learning rate
        :returns: None

        """

        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        for x, y in mini_batch:
            delta_nbla_b, delta_nbla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb,dnb in zip(nabla_b, delta_nbla_b)]
            nabla_w = [nw+dnw for nw,dnw in zip(nabla_w, delta_nbla_w)]
        self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
        self.biases  = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]

    def update_mini_batch_matrix(self, mini_batch, eta):
        """Matrix version of update_mini_batch

        :mini_batch: List[(x, y)] where x is input and y is label
        :eta: learning rate
        :returns: None

        """
        # construct X

        X = np.concatenate([x for x,_ in mini_batch], axis=1)
        Y = np.concatenate([y for _,y in mini_batch], axis=1)

        nabla_b, nabla_w = self.backprop(X, Y)

        self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
        self.biases  = [b-(eta/len(mini_batch))*np.sum(nb, axis=1).reshape(b.shape) for b, nb in zip(self.biases, nabla_b)]

    def evaluate(self, test_data):
        """evaluate the label for test_data according to the current weight/bias

        :test_data: List[(x, y)]
        :returns: number of test_data that is correctly predicted

        """
        predictions = [(np.argmax(self.feed_forward(x)), y) for (x, y) in test_data]
        return sum([int(x == y) for x, y in predictions])

    def cost_derivative(self, output_activations, y):
        """

        :output_activations: TODO
        :y: TODO
        :returns: the vector of partial derivatives \partial C_x / \partial a for the output activations.


        """
        return (output_activations - y)

    def backprop(self, x, y):
        """backpropogation

        :x: column vector or a matrix of xs [col, col, ...]
        :y: column vector or a matrix of ys [col, col, ...]
        :returns: a tuple (nabla_b, nabla_w): (List[float], List[List[float]]) representing the
        gradient for cost function C_x

        """
        nabla_b = [None] * len(self.biases)
        nabla_w = [None] * len(self.weights)

        # feed forward
        activation = x
        activations = [x] # list to store activations layer by layer
        zs = [] # list to store all z values, i.e. the linear combination

        for b, w in zip(self.biases, self.weights):
            z = np.add(w.dot(activation), b)
            activation = sigmoid(z)

            zs.append(z)
            activations.append(activation)

        # backward pass
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])

        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # Note that the variable l in the loop below is used a little
        # differently to the notation in Chapter 2 of the book.  Here,
        # l = 1 means the last layer of neurons, l = 2 is the
        # second-last layer, and so on.  It's a renumbering of the
        # scheme in the book, used here to take advantage of the fact
        # that Python can use negative indices in lists.
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)

 import mnist_loader
 training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
 net = Network([784, 30, 10])
 net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	# implementation of backpropogation algorithm based on
	# http://neuralnetworksanddeeplearning.com/chap1.html
	#
	# note that mini-batches are calculated in matrix form

	import numpy as np
	import random

	def chunks(lst, n):
	"""split lst into evenly sized chunks of n"""
	for i in range(0, len(lst), n):
	yield lst[i:i+n]

	def sigmoid(z):
	return 1 / (1 + np.exp(-z))

	def sigmoid_prime(z):
	"""Derivative of the sigmoid function."""
	return sigmoid(z)*(1-sigmoid(z))

	class Network(object):
	"""A Neural Network"""
	def __init__(self, sizes):
	"""sizes is List[int] representing number of unit of each layer, starting from the input layer"""
	self.num_layers = len(sizes)
	self.sizes = sizes
	self.biases = [np.random.randn(n, 1) for n in self.sizes[1:]]
	self.weights = [np.random.randn(y, x) for x, y in zip(self.sizes, self.sizes[1:])]

	def feed_forward(self, X):
	"""return the result of the network given input X"""
	activation = X
	for b, w in zip(self.biases, self.weights):
	activation = sigmoid(w.dot(activation) + b)
	return activation

	def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None, matrix=True):
	"""Train the NN using mini-batch stochastic gradient descent. The "training_data" is

	:training_data: a list of tuples "(x, y)" where "x" is the input and "y" is the label.
	:epochs: number of iteration
	:mini_batch_size: number of mini_batches in a chunk
	:eta: learning rate
	:test_data: If provided, then "test_data" will be evaluated after each epoch
	:returns: None

	"""
	if test_data:
	n_test = len(test_data)

	n = len(training_data)

	update_mini_batch = self.update_mini_batch_matrix if matrix else self.update_mini_batch

	for j in range(epochs):
	random.shuffle(training_data)
	mini_batches = list(chunks(training_data, mini_batch_size))

	for mini_batch in mini_batches:
	update_mini_batch(mini_batch, eta)

	if test_data:
	print(f"Epoch {j}: {self.evaluate(test_data)}/{n_test}")
	else:
	print(f"Epoch {j} complete")

	def update_mini_batch(self, mini_batch, eta):
	"""Update the NN's weight and bias by applying gradient decent using backpropogation

	:mini_batch: List[(x, y)] where x is input and y is label
	:eta: learning rate
	:returns: None

	"""

	nabla_b = [np.zeros(b.shape) for b in self.biases]
	nabla_w = [np.zeros(w.shape) for w in self.weights]

	for x, y in mini_batch:
	delta_nbla_b, delta_nbla_w = self.backprop(x, y)
	nabla_b = [nb+dnb for nb,dnb in zip(nabla_b, delta_nbla_b)]
	nabla_w = [nw+dnw for nw,dnw in zip(nabla_w, delta_nbla_w)]
	self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
	self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]

	def update_mini_batch_matrix(self, mini_batch, eta):
	"""Matrix version of update_mini_batch

	:mini_batch: List[(x, y)] where x is input and y is label
	:eta: learning rate
	:returns: None

	"""
	# construct X

	X = np.concatenate([x for x,_ in mini_batch], axis=1)
	Y = np.concatenate([y for _,y in mini_batch], axis=1)

	nabla_b, nabla_w = self.backprop(X, Y)

	self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
	self.biases = [b-(eta/len(mini_batch))*np.sum(nb, axis=1).reshape(b.shape) for b, nb in zip(self.biases, nabla_b)]

	def evaluate(self, test_data):
	"""evaluate the label for test_data according to the current weight/bias

	:test_data: List[(x, y)]
	:returns: number of test_data that is correctly predicted

	"""
	predictions = [(np.argmax(self.feed_forward(x)), y) for (x, y) in test_data]
	return sum([int(x == y) for x, y in predictions])

	def cost_derivative(self, output_activations, y):
	"""

	:output_activations: TODO
	:y: TODO
	:returns: the vector of partial derivatives \partial C_x / \partial a for the output activations.


	"""
	return (output_activations - y)

	def backprop(self, x, y):
	"""backpropogation

	:x: column vector or a matrix of xs [col, col, ...]
	:y: column vector or a matrix of ys [col, col, ...]
	:returns: a tuple (nabla_b, nabla_w): (List[float], List[List[float]]) representing the
	gradient for cost function C_x

	"""
	nabla_b = [None] * len(self.biases)
	nabla_w = [None] * len(self.weights)

	# feed forward
	activation = x
	activations = [x] # list to store activations layer by layer
	zs = [] # list to store all z values, i.e. the linear combination

	for b, w in zip(self.biases, self.weights):
	z = np.add(w.dot(activation), b)
	activation = sigmoid(z)

	zs.append(z)
	activations.append(activation)

	# backward pass
	delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])

	nabla_b[-1] = delta
	nabla_w[-1] = np.dot(delta, activations[-2].transpose())
	# Note that the variable l in the loop below is used a little
	# differently to the notation in Chapter 2 of the book. Here,
	# l = 1 means the last layer of neurons, l = 2 is the
	# second-last layer, and so on. It's a renumbering of the
	# scheme in the book, used here to take advantage of the fact
	# that Python can use negative indices in lists.
	for l in range(2, self.num_layers):
	z = zs[-l]
	sp = sigmoid_prime(z)
	delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
	nabla_b[-l] = delta
	nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
	return (nabla_b, nabla_w)

	import mnist_loader
	training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
	net = Network([784, 30, 10])
	net.SGD(training_data, 30, 10, 3.0, test_data=test_data)