Skip to content

Instantly share code, notes, and snippets.

Last active March 5, 2020 11:41
Show Gist options
  • Save mbrengel/ca651d42cad36b773831b1f20f41a634 to your computer and use it in GitHub Desktop.
Save mbrengel/ca651d42cad36b773831b1f20f41a634 to your computer and use it in GitHub Desktop.
Bare-bones hard-coded vanilla shallow feedforward neural network (sigmoid activation, cross entropy cost function, stochastic gradient descent, backpropagation, regularization) for the MNIST dataset yielding ~98% accuracy.
#!/usr/bin/env python3
import gzip
import io
import numpy as np
import random
import requests
# reproducibility
# parse data
def get_Xy(imagefile, labelfile):
# labels
r = requests.get(f"{labelfile}", stream=True)
r.raw.decode_content = True
with gzip.GzipFile(fileobj=io.BytesIO( as f:
labels = np.frombuffer(, dtype=np.uint8)
labels = np.array([[1.0 if i == l else .0 for i in range(10)] for l in labels])
# images
r = requests.get(f"{imagefile}", stream=True)
r.raw.decode_content = True
with gzip.GzipFile(fileobj=io.BytesIO( as f:
imgs = np.frombuffer(, dtype=np.uint8).reshape(len(labels), 28 * 28).astype(np.float32) / 255
return imgs, labels
X, y = get_Xy("train-images-idx3-ubyte.gz", "train-labels-idx1-ubyte.gz")
X_tr, y_tr = X[:50000], y[:50000]
X_va, y_va = X[50000:], y[50000:]
X_te, y_te = get_Xy("t10k-images-idx3-ubyte.gz", "t10k-labels-idx1-ubyte.gz")
# sigmoid activation function
def sigmoid(x, prime=False):
return 1 / (1 + np.exp(-x))
# weights + biases
b2 = np.random.randn(1, 100)
b3 = np.random.randn(1, 10)
w2 = np.random.randn(28 * 28, 100) / 28
w3 = np.random.randn(100, 10) / 10
# mini batch size
mbsz = 10
# learning rate
eta = .1
# regularization parameter
lam = 5.0
# feed forward with trace of individual activation layers
def feed_forward(X):
a1 = X
z2 =, w2) + b2
a2 = sigmoid(z2)
z3 =, w3) + b3
a3 = sigmoid(z3)
return a1, z2, a2, z3, a3
# costs + correct classifications
def evaluate(X, y):
a3 = feed_forward(X)[-1]
cost = np.sum(np.nan_to_num(-y * np.log(a3) - (1 - y) * (np.log(1 - a3)))) / len(X)
cost += (.5 * lam * (np.linalg.norm(w2) ** 2 + np.linalg.norm(w3) ** 2)) / len(X_tr)
correct = sum(1 if np.argmax(a) == np.argmax(b) else 0 for a, b in zip(a3, y))
return cost, correct
# learn
for epoch in range(50):
# create mini batches
Xy_tr = list(zip(X_tr, y_tr))
X_tr, y_tr = map(np.array, zip(*Xy_tr))
for (X, y) in [(X_tr[i:i+mbsz], y_tr[i:i+mbsz]) for i in range(0, len(X_tr), mbsz)]:
# feed forward
a1, z2, a2, z3, a3 = feed_forward(X)
# calculate error
delta3 = (a3 - y)
delta2 =, w3.T) * (a2 * (1 - a2))
# update weights + biases
w3 *= 1 - (eta * lam) / len(X_tr)
w3 -= (eta / len(X)) *, delta3)
b3 -= (eta / len(X)) * sum(delta3)
w2 *= 1 - (eta * lam) / len(X_tr)
w2 -= (eta / len(X)) *, delta2)
b2 -= (eta / len(X)) * sum(delta2)
# status
print(f"Epoch {epoch + 1}")
for l, X, y in (("tr", X_tr, y_tr), ("va", X_va, y_va), ("te", X_te, y_te)):
cost, correct = evaluate(X, y)
print(f"{l}: cost {cost:.5f} acc {correct} / {len(X)} ({float(correct) / len(X) * 100:.2f}%)")
Display the source blob
Display the rendered blob
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment