cqfd · August 27, 2017 21:07
diff --git a/deminimnist.py b/deminimnist.py
 import numpy as np

 class Classifier(object):
    def __init__(self):
        self.net = Composed([
            Affine.randomized(input_dim=28*28, output_dim=512),
            ReLU(),
            Affine.randomized(input_dim=512, output_dim=10),
            Softmax()
        ])

    def __call__(self, X):
        return self.net(X)

    def train(self, X, Y, eta):
        batch_size = X.shape[0]
        Yhat = self.net(X)
        cost = - np.sum(Y * np.log(Yhat)) / batch_size
        d = - (Y / Yhat) / batch_size
        self.net.backprop(d, eta=eta)
        return cost

 class Composed(object):
    def __init__(self, layers):
        self.layers = layers
    def __call__(self, X):
        for layer in self.layers:
            X = layer(X)
        return X
    def backprop(self, d, eta):
        for layer in reversed(self.layers):
            d = layer.backprop(d, eta)
        return d

 class Affine(object):
    @classmethod
    def randomized(cls, input_dim, output_dim):
        W = np.random.randn(input_dim, output_dim) * 0.01
        b = np.zeros(output_dim)
        return cls(W, b)

    def __init__(self, W, b):
        self.W = W
        self.b = b

    def __call__(self, X):
        self._X = X
        return X @ self.W + self.b

    def backprop(self, d, eta):
        dW = self._X.T @ d
        self.W -= eta * dW
        db = np.sum(d, axis=0)
        self.b -= eta * db
        dX = d @ self.W.T
        return dX

 class ReLU(object):
    def __call__(self, X):
        self._X = X
        return np.maximum(0, X)
    def backprop(self, d, eta):
        return (self._X > 0) * d

 class Softmax(object):
    def __call__(self, X):
        X -= np.max(X, axis=1, keepdims=True)
        self._Y = np.exp(X) / np.sum(np.exp(X), axis=1, keepdims=True)
        return self._Y
    def backprop(self, d, eta):
        return self._Y * (d - np.sum(self._Y * d, axis=1, keepdims=True))

 if __name__ == '__main__':
    from keras.datasets import mnist
    from keras.utils import to_categorical

    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    x_train = x_train.reshape(x_train.shape[0], -1)
    x_train = x_train.astype('float32') / 255

    x_test = x_test.reshape(x_test.shape[0], -1)
    x_test = x_test.astype('float32') / 255

    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    m = Classifier()

    for batch in range(20):
        eta = 0.05
        for i in range(60000 // 100):
            x_batch, y_batch = x_train[i*100:(i+1)*100], y_train[i*100:(i+1)*100]
            loss = m.train(x_batch, y_batch, eta)
        print('loss (eta=%f): %f' % (eta, loss))

    for batch in range(10):
        eta = 0.03
        for i in range(60000 // 100):
            x_batch, y_batch = x_train[i*100:(i+1)*100], y_train[i*100:(i+1)*100]
            loss = m.train(x_batch, y_batch, eta)
        print('loss (eta=%f): %f' % (eta, loss))

    for batch in range(20):
        eta = 0.02
        for i in range(60000 // 100):
            x_batch, y_batch = x_train[i*100:(i+1)*100], y_train[i*100:(i+1)*100]
            loss = m.train(x_batch, y_batch, eta)
        print('loss (eta=%f): %f' % (eta, loss))
    
    predictions = np.argmax(m(x_test), axis=1)
    actuals = np.argmax(y_test, axis=1)
    accuracy = np.sum(predictions == actuals) / predictions.shape[0]

    print('accuracy: %f' % accuracy)
	import numpy as np

	class Classifier(object):
	def __init__(self):
	self.net = Composed([
	Affine.randomized(input_dim=28*28, output_dim=512),
	ReLU(),
	Affine.randomized(input_dim=512, output_dim=10),
	Softmax()
	])

	def __call__(self, X):
	return self.net(X)

	def train(self, X, Y, eta):
	batch_size = X.shape[0]
	Yhat = self.net(X)
	cost = - np.sum(Y * np.log(Yhat)) / batch_size
	d = - (Y / Yhat) / batch_size
	self.net.backprop(d, eta=eta)
	return cost

	class Composed(object):
	def __init__(self, layers):
	self.layers = layers
	def __call__(self, X):
	for layer in self.layers:
	X = layer(X)
	return X
	def backprop(self, d, eta):
	for layer in reversed(self.layers):
	d = layer.backprop(d, eta)
	return d

	class Affine(object):
	@classmethod
	def randomized(cls, input_dim, output_dim):
	W = np.random.randn(input_dim, output_dim) * 0.01
	b = np.zeros(output_dim)
	return cls(W, b)

	def __init__(self, W, b):
	self.W = W
	self.b = b

	def __call__(self, X):
	self._X = X
	return X @ self.W + self.b

	def backprop(self, d, eta):
	dW = self._X.T @ d
	self.W -= eta * dW
	db = np.sum(d, axis=0)
	self.b -= eta * db
	dX = d @ self.W.T
	return dX

	class ReLU(object):
	def __call__(self, X):
	self._X = X
	return np.maximum(0, X)
	def backprop(self, d, eta):
	return (self._X > 0) * d

	class Softmax(object):
	def __call__(self, X):
	X -= np.max(X, axis=1, keepdims=True)
	self._Y = np.exp(X) / np.sum(np.exp(X), axis=1, keepdims=True)
	return self._Y
	def backprop(self, d, eta):
	return self._Y * (d - np.sum(self._Y * d, axis=1, keepdims=True))

	if __name__ == '__main__':
	from keras.datasets import mnist
	from keras.utils import to_categorical

	(x_train, y_train), (x_test, y_test) = mnist.load_data()

	x_train = x_train.reshape(x_train.shape[0], -1)
	x_train = x_train.astype('float32') / 255

	x_test = x_test.reshape(x_test.shape[0], -1)
	x_test = x_test.astype('float32') / 255

	y_train = to_categorical(y_train)
	y_test = to_categorical(y_test)

	m = Classifier()

	for batch in range(20):
	eta = 0.05
	for i in range(60000 // 100):
	x_batch, y_batch = x_train[i100:(i+1)100], y_train[i100:(i+1)100]
	loss = m.train(x_batch, y_batch, eta)
	print('loss (eta=%f): %f' % (eta, loss))

	for batch in range(10):
	eta = 0.03
	for i in range(60000 // 100):
	x_batch, y_batch = x_train[i100:(i+1)100], y_train[i100:(i+1)100]
	loss = m.train(x_batch, y_batch, eta)
	print('loss (eta=%f): %f' % (eta, loss))

	for batch in range(20):
	eta = 0.02
	for i in range(60000 // 100):
	x_batch, y_batch = x_train[i100:(i+1)100], y_train[i100:(i+1)100]
	loss = m.train(x_batch, y_batch, eta)
	print('loss (eta=%f): %f' % (eta, loss))

	predictions = np.argmax(m(x_test), axis=1)
	actuals = np.argmax(y_test, axis=1)
	accuracy = np.sum(predictions == actuals) / predictions.shape[0]

	print('accuracy: %f' % accuracy)