Created
October 2, 2017 05:11
-
-
Save tomokishii/02fa43683fd5df1f13414909494aaaf7 to your computer and use it in GitHub Desktop.
fashion_mist_theano.py - one another mnist (fashion mnist) classification code using Theano. (Goodbye Theano!)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# fashion_mnist_theano.py | |
# date. 10/2/2017 | |
# | |
# REM: I read the article for stopping development of "THEANO". | |
# The deep learning framework stimulated me and made me write codes. | |
# I'd like to say thank you to Theano supporting team. | |
# | |
import os | |
import numpy as np | |
import theano | |
import theano.tensor as T | |
# using tensorflow mnist loader to load Fashion mnist | |
# ref. https://github.com/zalandoresearch/fashion-mnist | |
# | |
from tensorflow.examples.tutorials.mnist import input_data | |
class HiddenLayer(object): | |
""" | |
Fully connected hidden layer | |
""" | |
def __init__(self, input, n_in, n_out, rng, W=None, b=None, | |
activation=T.nnet.relu): | |
self.input = input | |
if W is None: | |
W_values = np.asarray( | |
rng.uniform( | |
low=-np.sqrt(6. / (n_in + n_out)), | |
high=np.sqrt(6. / (n_in + n_out)), | |
size=(n_in, n_out) | |
), | |
dtype=theano.config.floatX | |
) | |
if activation == T.nnet.sigmoid: | |
W_values *= 4 | |
W = theano.shared(value=W_values, name='W', borrow=True) | |
if b is None: | |
b_values = np.zeros((n_out,), dtype=theano.config.floatX) | |
b = theano.shared(value=b_values, name='b', borrow=True) | |
self.W = W | |
self.b = b | |
lin_output = T.dot(input, self.W) + self.b | |
self.output = ( | |
lin_output if activation is None | |
else activation(lin_output) | |
) | |
# parameters of the model | |
self.params = [self.W, self.b] | |
class SoftmaxRegression(object): | |
""" | |
Multi-class Logistic (Softmax) Regression layer | |
""" | |
def __init__(self, input, n_in, n_out): | |
self.W = theano.shared( | |
value=np.zeros((n_in, n_out), | |
dtype=theano.config.floatX), | |
name='W', | |
borrow=True | |
) | |
# initialize the biases b as a vector of n_out 0s | |
self.b = theano.shared( | |
value=np.zeros((n_out,), | |
dtype=theano.config.floatX), | |
name='b', | |
borrow=True | |
) | |
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b) | |
self.y_pred = T.argmax(self.p_y_given_x, axis=1) | |
self.params = [self.W, self.b] | |
self.input = input | |
def mlp_model(input, n_in, n_out, rng, n_hidden=[512, 256]): | |
""" | |
make multi-layer perceptron model | |
""" | |
hidden1 = HiddenLayer(input, n_in, n_hidden[0], rng) | |
hidden2 = HiddenLayer(hidden1.output, | |
n_hidden[0], n_hidden[1], rng) | |
readout = SoftmaxRegression(hidden2.output, n_hidden[1], n_out) | |
l2_loss = ((hidden1.W ** 2).sum() | |
+(hidden2.W **2).sum() | |
+ (readout.W ** 2).sum()) | |
params = hidden1.params + hidden2.params + readout.params | |
return readout.p_y_given_x, readout.y_pred, l2_loss, params | |
def loss(p_y_given_x, y_pred, y_label): | |
# negative log likelihood | |
nll = -T.mean(T.log(p_y_given_x)[T.arange(y_label.shape[0]), y_label]) | |
# errors | |
if y_label.ndim != y_pred.ndim: | |
raise TypeError( | |
'y should have the same shape as self.y_pred', | |
('y', y_label.type, 'y_pred', self.y_pred.type)) | |
if y_label.dtype.startswith('int'): | |
error_rate = T.mean(T.neq(y_pred, y_label)) | |
else: | |
raise NotImplementedError() | |
return nll, error_rate | |
def theano_ready_dataset(dirn): | |
""" | |
prepaare theano-ready mnist dataset | |
""" | |
def shared_dataset(data_xy, borrow=True): | |
""" Function that loads the dataset into shared variables | |
""" | |
data_x, data_y = data_xy | |
shared_x = theano.shared(np.asarray(data_x, | |
dtype=theano.config.floatX), | |
borrow=borrow) | |
shared_y = theano.shared(np.asarray(data_y, | |
dtype=theano.config.floatX), | |
borrow=borrow) | |
return shared_x, T.cast(shared_y, 'int32') | |
# Load Dataset | |
fmnist = input_data.read_data_sets(dirn, one_hot=False) | |
test_set_x, test_set_y = shared_dataset((fmnist.train.images, | |
fmnist.train.labels)) | |
valid_set_x, valid_set_y = shared_dataset((fmnist.validation.images, | |
fmnist.validation.labels)) | |
train_set_x, train_set_y = shared_dataset((fmnist.test.images, | |
fmnist.test.labels)) | |
rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), | |
(test_set_x, test_set_y)] | |
return rval | |
def test_mnist(dirn, batch_size=50, | |
learning_rate=0.01, | |
L2_reg=0.0001, n_epochs=10): | |
""" | |
Main body of MNIST classification | |
""" | |
# Load Datasets | |
fileA_path = os.path.join(dirn, 'train-images-idx3-ubyte.gz') | |
if os.path.exists(fileA_path): | |
datasets = theano_ready_dataset(dirn) | |
else: | |
raise OSError('Check path to data files.') | |
train_set_x, train_set_y = datasets[0] | |
valid_set_x, valid_set_y = datasets[1] | |
test_set_x, test_set_y = datasets[2] | |
# compute number of minibatches for training, validation and testing | |
n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size | |
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size | |
n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size | |
# Define Graph | |
index = T.lscalar() | |
x = T.matrix('x') # the data is presented as rasterized images [-1, 784] | |
y = T.ivector('y') # the labels are presented as aD vector | |
rng = np.random.RandomState(1234) # Random state | |
# MLP model | |
p_y_given_x, y_pred, l2_loss, params = mlp_model( | |
input=x, | |
n_in=28 * 28, | |
n_out=10, | |
rng=rng | |
) | |
# Loss | |
loss_nll, errors = loss(p_y_given_x, y_pred, y) | |
cost = loss_nll + L2_reg * l2_loss # L2 regularization term | |
# Theano Functions | |
gparams = [T.grad(cost, param) for param in params] | |
updates = [(param, param - learning_rate * gparam) | |
for param, gparam in zip(params, gparams)] | |
train_model = theano.function( | |
inputs=[index], | |
outputs=cost, | |
updates=updates, | |
givens={ | |
x: train_set_x[index * batch_size: (index + 1) * batch_size], | |
y: train_set_y[index * batch_size: (index + 1) * batch_size] | |
} | |
) | |
validate_model = theano.function( | |
inputs=[index], | |
outputs=[cost, errors], | |
givens={ | |
x: valid_set_x[index * batch_size:(index + 1) * batch_size], | |
y: valid_set_y[index * batch_size:(index + 1) * batch_size] | |
} | |
) | |
# Train | |
print('Training...') | |
epoch = 0 | |
while (epoch < n_epochs): | |
epoch += 1 | |
# train step | |
for mini_batch_index in range(n_train_batches): | |
cost_j = train_model(mini_batch_index) | |
if mini_batch_index % 40 == 0: | |
print('epoch[{:>5d}] : cost ={:>10.4f}'.format( | |
epoch, float(cost_j))) | |
# validation step | |
cost_list = [] | |
err_list = [] | |
for mini_batch_index in range(n_valid_batches): | |
cost_j, err_j = validate_model(mini_batch_index) | |
cost_list.append(float(cost_j)) | |
err_list.append(float(err_j)) | |
cost_val_mean = np.mean(cost_list) | |
accu_val_mean = 1.0 - np.mean(err_list) | |
print('validation: cost ={:>10.4f}, accuracy={:>10.4f}'.format( | |
cost_val_mean, accu_val_mean)) | |
return None | |
if __name__ == '__main__': | |
test_mnist(dirn='../FMNISTdata', n_epochs=20) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment