Created
August 27, 2017 14:06
-
-
Save victor-iyi/ec08c80e7236517ba1e21a3289c1d9c3 to your computer and use it in GitHub Desktop.
A five (5) layer Convolutional Neural Network for classifying MNIST handwritten digits
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# # 5-Layer Convnet for classifying the MNIST dataset | |
# In[1]: | |
# importing the dependencies | |
import tensorflow as tf | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from tqdm import tqdm | |
get_ipython().magic('matplotlib inline') | |
# In[2]: | |
# Loading the dataset | |
from tensorflow.examples.tutorials.mnist import input_data | |
dataset = input_data.read_data_sets('MNIST_data/', one_hot=True) | |
# In[3]: | |
print('Length of training data:{:>19,}'.format(dataset.train.num_examples)) | |
print('Length of testing data:{:>20,}'.format(dataset.test.num_examples)) | |
print('Length of validation data:{:>17,}'.format(dataset.validation.num_examples)) | |
# In[4]: | |
# Create the true label of true classes | |
dataset.train.true = np.argmax(dataset.train.labels, axis=1) | |
dataset.test.true = np.argmax(dataset.test.labels, axis=1) | |
dataset.validation.true = np.argmax(dataset.test.labels, axis=1) | |
# In[5]: | |
dataset.train.true[:4] | |
# In[6]: | |
dataset.test.true[:4] | |
# In[7]: | |
dataset.validation.true[:4] | |
# ### Define hyperparameters | |
# In[8]: | |
# Image dimensions | |
image_size = 28 | |
image_shape = (image_size, image_size) | |
image_shape_flat = image_size * image_size | |
num_channel = 1 | |
filter_size = 4 | |
# Hidden Layer Channels | |
hidden1_channels = 8 | |
hidden2_channels = 16 | |
hidden3_channels = 32 | |
hidden4_channels = 64 | |
hidden5_channels = 128 | |
fully_connected_1 = 512 | |
fully_connected_2 = 256 | |
num_classes = 10 | |
learning_rate = 1e-3 | |
dropout = 0.8 | |
iterations = 0 | |
batch_size = 24 | |
# ### Helper functions for `weights`, `biases`, `conv2d`, & `max_pool` | |
# In[9]: | |
# Weight initialization | |
def weight(shape): | |
initial = tf.truncated_normal(shape=shape, stddev=0.05) | |
return tf.Variable(initial) | |
# Bias in initialization | |
def bias(length): | |
initial = tf.constant(value=0.05, shape=[length]) | |
return tf.Variable(initial) | |
# Convolutional operation | |
def conv2d(X, W): | |
return tf.nn.conv2d(X, W, strides=[1,1,1,1], padding='SAME') | |
# Max pooling operation | |
def max_pool(X): | |
return tf.nn.max_pool(X, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') | |
# In[10]: | |
# Placeholder Variables | |
X = tf.placeholder(tf.float32, [None, image_shape_flat]) | |
y = tf.placeholder(tf.float32, [None, num_classes]) | |
keep_prob = tf.placeholder(tf.float32) | |
y_true = tf.argmax(y, axis=1) | |
# ## Building the Network | |
# ### (1st Convolutional Layer + Max pooling) Input Layer `>` Hidden Layer 1 | |
# In[11]: | |
X_image = tf.reshape(X, shape=[-1, image_size, image_size, num_channel]) | |
# In[12]: | |
W_hidden1 = weight(shape=[filter_size, filter_size, num_channel, hidden1_channels]) | |
b_hidden1 = bias(length=hidden1_channels) | |
h_conv1 = tf.nn.relu(conv2d(X_image, W_hidden1) + b_hidden1) | |
h_pool1 = max_pool(h_conv1) | |
# In[13]: | |
h_pool1 | |
# ### (2nd Convolutional Layer + Max pooling) Hidden Layer 1 `>` Hidden Layer 2 | |
# In[14]: | |
W_hidden2 = weight(shape=[filter_size, filter_size, hidden1_channels, hidden2_channels]) | |
b_hidden2 = bias(length=hidden2_channels) | |
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_hidden2) + b_hidden2) | |
h_pool2 = max_pool(h_conv2) | |
# In[15]: | |
h_pool2 | |
# ### (3rd Convolutional Layer) Hidden Layer 2 `>` Hidden Layer 3 | |
# In[16]: | |
W_hidden3 = weight(shape=[filter_size, filter_size, hidden2_channels, hidden3_channels]) | |
b_hidden3 = bias(length=hidden3_channels) | |
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_hidden3) + b_hidden3) | |
# In[17]: | |
h_conv3 | |
# ### (4th Convolutional Layer) Hidden Layer 3 `>` Hidden Layer 4 | |
# In[18]: | |
W_hidden4 = weight(shape=[filter_size, filter_size, hidden3_channels, hidden4_channels]) | |
b_hidden4 = bias(length=hidden4_channels) | |
h_conv4 = tf.nn.relu(conv2d(h_conv3, W_hidden4) + b_hidden4) | |
# In[19]: | |
h_conv4 | |
# ### (5th Convolutional Layer) Hidden Layer 4 `>` Hidden Layer 5 | |
# In[20]: | |
W_hidden5 = weight(shape=[filter_size, filter_size, hidden4_channels, hidden5_channels]) | |
b_hidden5 = bias(length=hidden5_channels) | |
h_conv5 = tf.nn.relu(conv2d(h_conv4, W_hidden5) + b_hidden5) | |
# In[21]: | |
h_conv5 | |
# ### (1st Fully Connected Layer) Hidden Layer 5 `>` Hidden Layer 6 | |
# In[22]: | |
h_conv5_flat = tf.reshape(h_conv5, shape=[-1, 7*7*hidden5_channels]) | |
W_fc1 = weight(shape=[7*7*hidden5_channels, fully_connected_1]) | |
b_fc1 = bias(length=fully_connected_1) | |
h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1) | |
# In[23]: | |
h_fc1 | |
# ### (2nd Fully Connected Layer + Dropout) Hidden Layer 6 `>` Hidden Layer 7 | |
# In[24]: | |
W_fc2 = weight(shape=[fully_connected_1, fully_connected_2]) | |
b_fc2 = bias(length=fully_connected_2) | |
h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) | |
h_drop = tf.nn.dropout(h_fc2, keep_prob=keep_prob) | |
# In[25]: | |
h_drop | |
# ### Readout/Output Layer | |
# In[26]: | |
W_out = weight(shape=[fully_connected_2, num_classes]) | |
b_out = bias(length=num_classes) | |
y_pred = tf.matmul(h_drop, W_out) + b_out | |
y_pred_true = tf.argmax(y_pred, axis=1) | |
# In[27]: | |
y_pred | |
# In[28]: | |
y_pred_true | |
# ### Cost function, and optimizer | |
# In[29]: | |
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y, name='xentropy') | |
cost = tf.reduce_mean(cross_entropy, name='xentropy_mean') | |
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) | |
# ### Evaluating accuracy | |
# In[30]: | |
correct = tf.equal(y_true, y_pred_true) | |
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) | |
# ### Running tensorflow's `Session()` | |
# In[31]: | |
sess = tf.Session() | |
init = tf.global_variables_initializer() | |
sess.run(init) | |
# ### `optimize()` and `print_accuracy()` function helper | |
# In[32]: | |
# Optimize helper | |
def optimize(num_iter=1): | |
global iterations | |
for i in tqdm(range(num_iter)): | |
X_batch, y_batch = dataset.train.next_batch(batch_size) | |
feed_dict_train = {X: X_batch, | |
y: y_batch, | |
keep_prob:dropout} | |
sess.run(optimizer, feed_dict=feed_dict_train) | |
iterations += 1 | |
print('Total number of iterations so far: {:,}'.format(iterations)) | |
# Accuracy helper | |
def print_accuracy(): | |
X_batch, y_batch = dataset.test.next_batch(batch_size) | |
feed_dict_test = {X: X_batch, | |
y: y_batch, | |
keep_prob: dropout} | |
acc = sess.run(accuracy, feed_dict=feed_dict_test) | |
print('Accuracy after {:,} iterations = {:.2%}'.format(iterations, acc)) | |
# In[46]: | |
def print_validation_accuracy(): | |
X_batch, y_batch = dataset.validation.next_batch(batch_size) | |
feed_dict_val = {X: X_batch, | |
y: y_batch, | |
keep_prob: dropout} | |
acc = sess.run(accuracy, feed_dict=feed_dict_val) | |
print('Accuracy on validation set: {:.2%}'.format(acc)) | |
if __name__ == '__main__': | |
# ### Training the network | |
# In[33]: | |
print_accuracy() | |
# In[34]: | |
optimize() | |
# In[35]: | |
print_accuracy() | |
# In[36]: | |
optimize(num_iter=9) | |
# In[37]: | |
optimize(num_iter=90) | |
# In[38]: | |
print_accuracy() | |
# In[39]: | |
optimize(num_iter=900) | |
# In[40]: | |
print_accuracy() | |
# In[41]: | |
optimize(num_iter=9000) | |
# In[42]: | |
print_accuracy() | |
# In[43]: | |
optimize(num_iter=100) | |
# In[44]: | |
optimize(num_iter=900) | |
# In[45]: | |
print_accuracy() | |
# In[47]: | |
print_validation_accuracy() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment