Skip to content

Instantly share code, notes, and snippets.

@mbrengel
Last active March 5, 2020 11:41
Show Gist options
  • Save mbrengel/ca651d42cad36b773831b1f20f41a634 to your computer and use it in GitHub Desktop.
Save mbrengel/ca651d42cad36b773831b1f20f41a634 to your computer and use it in GitHub Desktop.
Bare-bones hard-coded vanilla shallow feedforward neural network (sigmoid activation, cross entropy cost function, stochastic gradient descent, backpropagation, regularization) for the MNIST dataset yielding ~98% accuracy.
#!/usr/bin/env python3
import gzip
import io
import numpy as np
import random
import requests
# reproducibility
np.random.seed(1337)
# parse data
def get_Xy(imagefile, labelfile):
# labels
r = requests.get(f"http://yann.lecun.com/exdb/mnist/{labelfile}", stream=True)
r.raw.decode_content = True
with gzip.GzipFile(fileobj=io.BytesIO(r.raw.read())) as f:
f.read(8)
labels = np.frombuffer(f.read(), dtype=np.uint8)
labels = np.array([[1.0 if i == l else .0 for i in range(10)] for l in labels])
# images
r = requests.get(f"http://yann.lecun.com/exdb/mnist/{imagefile}", stream=True)
r.raw.decode_content = True
with gzip.GzipFile(fileobj=io.BytesIO(r.raw.read())) as f:
f.read(16)
imgs = np.frombuffer(f.read(), dtype=np.uint8).reshape(len(labels), 28 * 28).astype(np.float32) / 255
return imgs, labels
X, y = get_Xy("train-images-idx3-ubyte.gz", "train-labels-idx1-ubyte.gz")
X_tr, y_tr = X[:50000], y[:50000]
X_va, y_va = X[50000:], y[50000:]
X_te, y_te = get_Xy("t10k-images-idx3-ubyte.gz", "t10k-labels-idx1-ubyte.gz")
# sigmoid activation function
def sigmoid(x, prime=False):
return 1 / (1 + np.exp(-x))
# weights + biases
b2 = np.random.randn(1, 100)
b3 = np.random.randn(1, 10)
w2 = np.random.randn(28 * 28, 100) / 28
w3 = np.random.randn(100, 10) / 10
# mini batch size
mbsz = 10
# learning rate
eta = .1
# regularization parameter
lam = 5.0
# feed forward with trace of individual activation layers
def feed_forward(X):
a1 = X
z2 = np.dot(a1, w2) + b2
a2 = sigmoid(z2)
z3 = np.dot(a2, w3) + b3
a3 = sigmoid(z3)
return a1, z2, a2, z3, a3
# costs + correct classifications
def evaluate(X, y):
a3 = feed_forward(X)[-1]
cost = np.sum(np.nan_to_num(-y * np.log(a3) - (1 - y) * (np.log(1 - a3)))) / len(X)
cost += (.5 * lam * (np.linalg.norm(w2) ** 2 + np.linalg.norm(w3) ** 2)) / len(X_tr)
correct = sum(1 if np.argmax(a) == np.argmax(b) else 0 for a, b in zip(a3, y))
return cost, correct
# learn
for epoch in range(50):
# create mini batches
Xy_tr = list(zip(X_tr, y_tr))
np.random.shuffle(Xy_tr)
X_tr, y_tr = map(np.array, zip(*Xy_tr))
for (X, y) in [(X_tr[i:i+mbsz], y_tr[i:i+mbsz]) for i in range(0, len(X_tr), mbsz)]:
# feed forward
a1, z2, a2, z3, a3 = feed_forward(X)
# calculate error
delta3 = (a3 - y)
delta2 = np.dot(delta3, w3.T) * (a2 * (1 - a2))
# update weights + biases
w3 *= 1 - (eta * lam) / len(X_tr)
w3 -= (eta / len(X)) * np.dot(a2.T, delta3)
b3 -= (eta / len(X)) * sum(delta3)
w2 *= 1 - (eta * lam) / len(X_tr)
w2 -= (eta / len(X)) * np.dot(a1.T, delta2)
b2 -= (eta / len(X)) * sum(delta2)
# status
print(f"Epoch {epoch + 1}")
for l, X, y in (("tr", X_tr, y_tr), ("va", X_va, y_va), ("te", X_te, y_te)):
cost, correct = evaluate(X, y)
print(f"{l}: cost {cost:.5f} acc {correct} / {len(X)} ({float(correct) / len(X) * 100:.2f}%)")
print
Display the source blob
Display the rendered blob
Raw
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment