Last active
March 5, 2020 11:41
-
-
Save mbrengel/ca651d42cad36b773831b1f20f41a634 to your computer and use it in GitHub Desktop.
Bare-bones hard-coded vanilla shallow feedforward neural network (sigmoid activation, cross entropy cost function, stochastic gradient descent, backpropagation, regularization) for the MNIST dataset yielding ~98% accuracy.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import gzip | |
import io | |
import numpy as np | |
import random | |
import requests | |
# reproducibility | |
np.random.seed(1337) | |
# parse data | |
def get_Xy(imagefile, labelfile): | |
# labels | |
r = requests.get(f"http://yann.lecun.com/exdb/mnist/{labelfile}", stream=True) | |
r.raw.decode_content = True | |
with gzip.GzipFile(fileobj=io.BytesIO(r.raw.read())) as f: | |
f.read(8) | |
labels = np.frombuffer(f.read(), dtype=np.uint8) | |
labels = np.array([[1.0 if i == l else .0 for i in range(10)] for l in labels]) | |
# images | |
r = requests.get(f"http://yann.lecun.com/exdb/mnist/{imagefile}", stream=True) | |
r.raw.decode_content = True | |
with gzip.GzipFile(fileobj=io.BytesIO(r.raw.read())) as f: | |
f.read(16) | |
imgs = np.frombuffer(f.read(), dtype=np.uint8).reshape(len(labels), 28 * 28).astype(np.float32) / 255 | |
return imgs, labels | |
X, y = get_Xy("train-images-idx3-ubyte.gz", "train-labels-idx1-ubyte.gz") | |
X_tr, y_tr = X[:50000], y[:50000] | |
X_va, y_va = X[50000:], y[50000:] | |
X_te, y_te = get_Xy("t10k-images-idx3-ubyte.gz", "t10k-labels-idx1-ubyte.gz") | |
# sigmoid activation function | |
def sigmoid(x, prime=False): | |
return 1 / (1 + np.exp(-x)) | |
# weights + biases | |
b2 = np.random.randn(1, 100) | |
b3 = np.random.randn(1, 10) | |
w2 = np.random.randn(28 * 28, 100) / 28 | |
w3 = np.random.randn(100, 10) / 10 | |
# mini batch size | |
mbsz = 10 | |
# learning rate | |
eta = .1 | |
# regularization parameter | |
lam = 5.0 | |
# feed forward with trace of individual activation layers | |
def feed_forward(X): | |
a1 = X | |
z2 = np.dot(a1, w2) + b2 | |
a2 = sigmoid(z2) | |
z3 = np.dot(a2, w3) + b3 | |
a3 = sigmoid(z3) | |
return a1, z2, a2, z3, a3 | |
# costs + correct classifications | |
def evaluate(X, y): | |
a3 = feed_forward(X)[-1] | |
cost = np.sum(np.nan_to_num(-y * np.log(a3) - (1 - y) * (np.log(1 - a3)))) / len(X) | |
cost += (.5 * lam * (np.linalg.norm(w2) ** 2 + np.linalg.norm(w3) ** 2)) / len(X_tr) | |
correct = sum(1 if np.argmax(a) == np.argmax(b) else 0 for a, b in zip(a3, y)) | |
return cost, correct | |
# learn | |
for epoch in range(50): | |
# create mini batches | |
Xy_tr = list(zip(X_tr, y_tr)) | |
np.random.shuffle(Xy_tr) | |
X_tr, y_tr = map(np.array, zip(*Xy_tr)) | |
for (X, y) in [(X_tr[i:i+mbsz], y_tr[i:i+mbsz]) for i in range(0, len(X_tr), mbsz)]: | |
# feed forward | |
a1, z2, a2, z3, a3 = feed_forward(X) | |
# calculate error | |
delta3 = (a3 - y) | |
delta2 = np.dot(delta3, w3.T) * (a2 * (1 - a2)) | |
# update weights + biases | |
w3 *= 1 - (eta * lam) / len(X_tr) | |
w3 -= (eta / len(X)) * np.dot(a2.T, delta3) | |
b3 -= (eta / len(X)) * sum(delta3) | |
w2 *= 1 - (eta * lam) / len(X_tr) | |
w2 -= (eta / len(X)) * np.dot(a1.T, delta2) | |
b2 -= (eta / len(X)) * sum(delta2) | |
# status | |
print(f"Epoch {epoch + 1}") | |
for l, X, y in (("tr", X_tr, y_tr), ("va", X_va, y_va), ("te", X_te, y_te)): | |
cost, correct = evaluate(X, y) | |
print(f"{l}: cost {cost:.5f} acc {correct} / {len(X)} ({float(correct) / len(X) * 100:.2f}%)") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment