Created
February 27, 2018 16:39
-
-
Save kbarbary/b4ab66208c72e8b7ceaafe5a598a4534 to your computer and use it in GitHub Desktop.
What does a minimal implementation of a multi-layer dense neural net (with backpropagation) look like?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""A minimal implementation of a dense neural net with an arbitrary | |
number of layers, backpropagation, and a few different activation functions.""" | |
import numpy as np | |
# Activation and cost functions (with gradients) | |
def sigmoid(x): | |
y = 1.0 / (1.0 + np.exp(-x)) | |
return y, y * (1.0 - y) | |
def relu(x): | |
y = np.maximum(0.0, x) | |
grad = (x > 0.0).astype(np.float64) | |
return y, grad | |
def tanh(x): | |
y = np.tanh(x) | |
return y, 1.0 - y**2 | |
def cross_entropy_cost(A, Y): | |
m = Y.shape[1] | |
cost = (1.0 / m) * np.sum(-Y * np.log(A) - (1.0 - Y) * np.log(1.0 - A)) | |
dA = (1.0 / m) * (-(Y / A) + (1.0 - Y) / (1.0 - A)) # dcost/dA | |
return cost, dA | |
class Layer(object): | |
def __init__(self, n_in: int, n_out: int, activation): | |
self.g = activation | |
self.W = np.random.normal(scale=0.01, size=(n_out, n_in)) | |
self.b = np.zeros((n_out, 1)) | |
self._cache = {} | |
def __call__(self, X): | |
"""Forward propagation (and cache intermediate results)""" | |
Z = self.W @ X + self.b | |
A, dAdZ = self.g(Z) | |
self._cache['X'] = X | |
self._cache['Z'] = Z | |
self._cache['dAdZ'] = dAdZ | |
return A | |
def backward(self, dA, alpha): | |
"""Backward propagation and update parameters""" | |
dZ = dA * self._cache['dAdZ'] | |
dW = dZ @ self._cache['X'].T | |
db = np.sum(dZ, axis=1, keepdims=True) | |
dX = self.W.T @ dZ | |
# update | |
self.W -= alpha * dW | |
self.b -= alpha * db | |
return dX | |
class NeuralNetwork(object): | |
def __init__(self, layer_sizes, activations): | |
assert len(activations) == len(layer_sizes) - 1 | |
self.layers = [Layer(layer_sizes[i], layer_sizes[i+1], activations[i]) | |
for i in range(len(activations))] | |
self.costs = [] | |
def __call__(self, X): | |
for layer in self.layers: | |
X = layer(X) | |
return X | |
def train(self, X, Y, niter=100, alpha=0.05): | |
for i in range(niter): | |
A = self(X) | |
cost, dA = cross_entropy_cost(A, Y) | |
# backprop and update parameters via gradient descent | |
for layer in reversed(self.layers): | |
dA = layer.backward(dA, alpha) | |
self.costs.append(cost) | |
if not (i % 10): | |
print(i, "cost =", cost) | |
return self | |
# Testing | |
import os | |
import gzip | |
from urllib.request import urlopen | |
def download_gzip_file(url, file_name): | |
response = gzip.GzipFile(fileobj=urlopen(url)) | |
with open(file_name, 'wb') as f: | |
f.write(response.read()) | |
def read_idx(fname): | |
"""Read an IDX format file into a numpy array. IDX is a very simple | |
binary format described here: http://yann.lecun.com/exdb/mnist/""" | |
with open(fname, 'rb') as f: | |
# read magic bytes: dtype and ndim | |
magic = f.read(4) | |
assert magic[0:2] == b'\x00\x00' | |
dtypes = {8: np.uint8, 9: np.int8, 11: np.int16, | |
12: np.int32, 13: np.float32, 14: np.float64} | |
dtype = np.dtype(dtypes[magic[2]]).newbyteorder('>') | |
ndim = magic[3] | |
# read dimensions | |
dims = [] | |
for i in range(ndim): | |
b = f.read(4) | |
dims.append(int.from_bytes(b, byteorder='big')) | |
# read data | |
data = np.fromfile(f, dtype=dtype, count=np.product(dims)) | |
data.shape = dims | |
return data | |
if __name__ == '__main__': | |
# get some data | |
root_url = "http://yann.lecun.com/exdb/mnist/" | |
urls = {"train_images": root_url + "train-images-idx3-ubyte.gz", | |
"train_labels": root_url + "train-labels-idx1-ubyte.gz", | |
"test_images": root_url + "t10k-images-idx3-ubyte.gz", | |
"test_labels": root_url + "t10k-labels-idx1-ubyte.gz"} | |
fnames = {key: url.split('/')[-1][:-3] for key, url in urls.items()} | |
for key in urls: | |
if not os.path.exists(fnames[key]): | |
download_gzip_file(urls[key], fnames[key]) | |
# Read the data | |
data = {key: read_idx(fname) for key, fname in fnames.items()} | |
# Munge data: flatten and scale X | |
X = {} | |
for k in ('train', 'test'): | |
images = data[k + '_images'] | |
images = images.reshape((images.shape[0], -1)).T | |
X[k] = images / images.max(axis=0) | |
# Munge data: one-hot encode Y | |
Y = {} | |
for k in ('train', 'test'): | |
labels = data[k + '_labels'] | |
y = np.zeros((labels.size, labels.max() + 1)) | |
y[np.arange(labels.size), labels] = 1.0 | |
Y[k] = y.T | |
# Run training | |
network = NeuralNetwork([28*28, 100, 10], [relu, sigmoid]) | |
network.train(X['train'], Y['train'], niter=300, alpha=0.2) | |
# show training cost | |
import matplotlib.pyplot as plt | |
plt.plot(network.costs) | |
plt.ylim(ymin=0.0) | |
plt.ylabel("cost") | |
plt.xlabel("iteration") | |
plt.savefig("costs.png") | |
# Validate | |
for key in ('train', 'test'): | |
print(key, 'set') | |
Ypred = network(X[key]) | |
labels = np.argmax(Ypred, axis=0) | |
print("Truth: ", data[key + '_labels'][:30]) | |
print("Prediction:", labels[:30]) | |
correct = data[key + '_labels'] == labels | |
print("Correct: {:6.2f}%\n".format(100.0 * correct.mean())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment