Skip to content

Instantly share code, notes, and snippets.

@mbrengel
Last active March 5, 2020 11:41
Show Gist options
  • Save mbrengel/ca651d42cad36b773831b1f20f41a634 to your computer and use it in GitHub Desktop.
Save mbrengel/ca651d42cad36b773831b1f20f41a634 to your computer and use it in GitHub Desktop.
Bare-bones hard-coded vanilla shallow feedforward neural network (sigmoid activation, cross entropy cost function, stochastic gradient descent, backpropagation, regularization) for the MNIST dataset yielding ~98% accuracy.
#!/usr/bin/env python3
import gzip
import io
import numpy as np
import random
import requests
# reproducibility
np.random.seed(1337)
# parse data
def get_Xy(imagefile, labelfile):
# labels
r = requests.get(f"http://yann.lecun.com/exdb/mnist/{labelfile}", stream=True)
r.raw.decode_content = True
with gzip.GzipFile(fileobj=io.BytesIO(r.raw.read())) as f:
f.read(8)
labels = np.frombuffer(f.read(), dtype=np.uint8)
labels = np.array([[1.0 if i == l else .0 for i in range(10)] for l in labels])
# images
r = requests.get(f"http://yann.lecun.com/exdb/mnist/{imagefile}", stream=True)
r.raw.decode_content = True
with gzip.GzipFile(fileobj=io.BytesIO(r.raw.read())) as f:
f.read(16)
imgs = np.frombuffer(f.read(), dtype=np.uint8).reshape(len(labels), 28 * 28).astype(np.float32) / 255
return imgs, labels
X, y = get_Xy("train-images-idx3-ubyte.gz", "train-labels-idx1-ubyte.gz")
X_tr, y_tr = X[:50000], y[:50000]
X_va, y_va = X[50000:], y[50000:]
X_te, y_te = get_Xy("t10k-images-idx3-ubyte.gz", "t10k-labels-idx1-ubyte.gz")
# sigmoid activation function
def sigmoid(x, prime=False):
return 1 / (1 + np.exp(-x))
# weights + biases
b2 = np.random.randn(1, 100)
b3 = np.random.randn(1, 10)
w2 = np.random.randn(28 * 28, 100) / 28
w3 = np.random.randn(100, 10) / 10
# mini batch size
mbsz = 10
# learning rate
eta = .1
# regularization parameter
lam = 5.0
# feed forward with trace of individual activation layers
def feed_forward(X):
a1 = X
z2 = np.dot(a1, w2) + b2
a2 = sigmoid(z2)
z3 = np.dot(a2, w3) + b3
a3 = sigmoid(z3)
return a1, z2, a2, z3, a3
# costs + correct classifications
def evaluate(X, y):
a3 = feed_forward(X)[-1]
cost = np.sum(np.nan_to_num(-y * np.log(a3) - (1 - y) * (np.log(1 - a3)))) / len(X)
cost += (.5 * lam * (np.linalg.norm(w2) ** 2 + np.linalg.norm(w3) ** 2)) / len(X_tr)
correct = sum(1 if np.argmax(a) == np.argmax(b) else 0 for a, b in zip(a3, y))
return cost, correct
# learn
for epoch in range(50):
# create mini batches
Xy_tr = list(zip(X_tr, y_tr))
np.random.shuffle(Xy_tr)
X_tr, y_tr = map(np.array, zip(*Xy_tr))
for (X, y) in [(X_tr[i:i+mbsz], y_tr[i:i+mbsz]) for i in range(0, len(X_tr), mbsz)]:
# feed forward
a1, z2, a2, z3, a3 = feed_forward(X)
# calculate error
delta3 = (a3 - y)
delta2 = np.dot(delta3, w3.T) * (a2 * (1 - a2))
# update weights + biases
w3 *= 1 - (eta * lam) / len(X_tr)
w3 -= (eta / len(X)) * np.dot(a2.T, delta3)
b3 -= (eta / len(X)) * sum(delta3)
w2 *= 1 - (eta * lam) / len(X_tr)
w2 -= (eta / len(X)) * np.dot(a1.T, delta2)
b2 -= (eta / len(X)) * sum(delta2)
# status
print(f"Epoch {epoch + 1}")
for l, X, y in (("tr", X_tr, y_tr), ("va", X_va, y_va), ("te", X_te, y_te)):
cost, correct = evaluate(X, y)
print(f"{l}: cost {cost:.5f} acc {correct} / {len(X)} ({float(correct) / len(X) * 100:.2f}%)")
print
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment