Skip to content

Instantly share code, notes, and snippets.

@cqfd
Created August 27, 2017 21:07
Show Gist options
  • Save cqfd/91d6fd0103b07ea0a484ce0fb223bbd6 to your computer and use it in GitHub Desktop.
Save cqfd/91d6fd0103b07ea0a484ce0fb223bbd6 to your computer and use it in GitHub Desktop.
Little MNIST classifier from scratch.
import numpy as np
class Classifier(object):
def __init__(self):
self.net = Composed([
Affine.randomized(input_dim=28*28, output_dim=512),
ReLU(),
Affine.randomized(input_dim=512, output_dim=10),
Softmax()
])
def __call__(self, X):
return self.net(X)
def train(self, X, Y, eta):
batch_size = X.shape[0]
Yhat = self.net(X)
cost = - np.sum(Y * np.log(Yhat)) / batch_size
d = - (Y / Yhat) / batch_size
self.net.backprop(d, eta=eta)
return cost
class Composed(object):
def __init__(self, layers):
self.layers = layers
def __call__(self, X):
for layer in self.layers:
X = layer(X)
return X
def backprop(self, d, eta):
for layer in reversed(self.layers):
d = layer.backprop(d, eta)
return d
class Affine(object):
@classmethod
def randomized(cls, input_dim, output_dim):
W = np.random.randn(input_dim, output_dim) * 0.01
b = np.zeros(output_dim)
return cls(W, b)
def __init__(self, W, b):
self.W = W
self.b = b
def __call__(self, X):
self._X = X
return X @ self.W + self.b
def backprop(self, d, eta):
dW = self._X.T @ d
self.W -= eta * dW
db = np.sum(d, axis=0)
self.b -= eta * db
dX = d @ self.W.T
return dX
class ReLU(object):
def __call__(self, X):
self._X = X
return np.maximum(0, X)
def backprop(self, d, eta):
return (self._X > 0) * d
class Softmax(object):
def __call__(self, X):
X -= np.max(X, axis=1, keepdims=True)
self._Y = np.exp(X) / np.sum(np.exp(X), axis=1, keepdims=True)
return self._Y
def backprop(self, d, eta):
return self._Y * (d - np.sum(self._Y * d, axis=1, keepdims=True))
if __name__ == '__main__':
from keras.datasets import mnist
from keras.utils import to_categorical
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], -1)
x_train = x_train.astype('float32') / 255
x_test = x_test.reshape(x_test.shape[0], -1)
x_test = x_test.astype('float32') / 255
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
m = Classifier()
for batch in range(20):
eta = 0.05
for i in range(60000 // 100):
x_batch, y_batch = x_train[i*100:(i+1)*100], y_train[i*100:(i+1)*100]
loss = m.train(x_batch, y_batch, eta)
print('loss (eta=%f): %f' % (eta, loss))
for batch in range(10):
eta = 0.03
for i in range(60000 // 100):
x_batch, y_batch = x_train[i*100:(i+1)*100], y_train[i*100:(i+1)*100]
loss = m.train(x_batch, y_batch, eta)
print('loss (eta=%f): %f' % (eta, loss))
for batch in range(20):
eta = 0.02
for i in range(60000 // 100):
x_batch, y_batch = x_train[i*100:(i+1)*100], y_train[i*100:(i+1)*100]
loss = m.train(x_batch, y_batch, eta)
print('loss (eta=%f): %f' % (eta, loss))
predictions = np.argmax(m(x_test), axis=1)
actuals = np.argmax(y_test, axis=1)
accuracy = np.sum(predictions == actuals) / predictions.shape[0]
print('accuracy: %f' % accuracy)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment