victor-iyi · August 27, 2017 14:06
diff --git a/5-layer-convnet.py b/5-layer-convnet.py

 # coding: utf-8

 # # 5-Layer Convnet for classifying the MNIST dataset

 # In[1]:

 # importing the dependencies
 import tensorflow as tf
 import numpy as np
 import matplotlib.pyplot as plt
 from tqdm import tqdm
 get_ipython().magic('matplotlib inline')


 # In[2]:

 # Loading the dataset
 from tensorflow.examples.tutorials.mnist import input_data
 dataset = input_data.read_data_sets('MNIST_data/', one_hot=True)


 # In[3]:

 print('Length of training data:{:>19,}'.format(dataset.train.num_examples))
 print('Length of testing data:{:>20,}'.format(dataset.test.num_examples))
 print('Length of validation data:{:>17,}'.format(dataset.validation.num_examples))


 # In[4]:

 # Create the true label of true classes
 dataset.train.true = np.argmax(dataset.train.labels, axis=1)
 dataset.test.true = np.argmax(dataset.test.labels, axis=1)
 dataset.validation.true = np.argmax(dataset.test.labels, axis=1)


 # In[5]:

 dataset.train.true[:4]


 # In[6]:

 dataset.test.true[:4]


 # In[7]:

 dataset.validation.true[:4]


 # ### Define hyperparameters

 # In[8]:

 # Image dimensions
 image_size = 28
 image_shape = (image_size, image_size)
 image_shape_flat = image_size * image_size
 num_channel = 1
 filter_size = 4
 # Hidden Layer Channels
 hidden1_channels = 8
 hidden2_channels = 16
 hidden3_channels = 32
 hidden4_channels = 64
 hidden5_channels = 128
 fully_connected_1 = 512
 fully_connected_2 = 256
 num_classes = 10
 learning_rate = 1e-3
 dropout = 0.8
 iterations = 0
 batch_size = 24


 # ### Helper functions for `weights`, `biases`, `conv2d`, & `max_pool`

 # In[9]:

 # Weight initialization
 def weight(shape):
    initial = tf.truncated_normal(shape=shape, stddev=0.05)
    return tf.Variable(initial)

 # Bias in initialization
 def bias(length):
    initial = tf.constant(value=0.05, shape=[length])
    return tf.Variable(initial)

 # Convolutional operation
 def conv2d(X, W):
    return tf.nn.conv2d(X, W, strides=[1,1,1,1], padding='SAME')

 # Max pooling operation
 def max_pool(X):
    return tf.nn.max_pool(X, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')


 # In[10]:

 # Placeholder Variables
 X = tf.placeholder(tf.float32, [None, image_shape_flat])
 y = tf.placeholder(tf.float32, [None, num_classes])
 keep_prob = tf.placeholder(tf.float32)
 y_true = tf.argmax(y, axis=1)


 # ## Building the Network
 # ### (1st Convolutional Layer + Max pooling) Input Layer `>` Hidden Layer 1

 # In[11]:

 X_image = tf.reshape(X, shape=[-1, image_size, image_size, num_channel])


 # In[12]:

 W_hidden1 = weight(shape=[filter_size, filter_size, num_channel, hidden1_channels])
 b_hidden1 = bias(length=hidden1_channels)
 h_conv1 = tf.nn.relu(conv2d(X_image, W_hidden1) + b_hidden1)
 h_pool1 = max_pool(h_conv1)


 # In[13]:

 h_pool1


 # ### (2nd Convolutional Layer + Max pooling) Hidden Layer 1 `>` Hidden Layer 2

 # In[14]:

 W_hidden2 = weight(shape=[filter_size, filter_size, hidden1_channels, hidden2_channels])
 b_hidden2 = bias(length=hidden2_channels)
 h_conv2 = tf.nn.relu(conv2d(h_pool1, W_hidden2) + b_hidden2)
 h_pool2 = max_pool(h_conv2)


 # In[15]:

 h_pool2


 # ### (3rd Convolutional Layer) Hidden Layer 2 `>` Hidden Layer 3

 # In[16]:

 W_hidden3 = weight(shape=[filter_size, filter_size, hidden2_channels, hidden3_channels])
 b_hidden3 = bias(length=hidden3_channels)
 h_conv3 = tf.nn.relu(conv2d(h_pool2, W_hidden3) + b_hidden3)


 # In[17]:

 h_conv3


 # ### (4th Convolutional Layer) Hidden Layer 3 `>` Hidden Layer 4

 # In[18]:

 W_hidden4 = weight(shape=[filter_size, filter_size, hidden3_channels, hidden4_channels])
 b_hidden4 = bias(length=hidden4_channels)
 h_conv4 = tf.nn.relu(conv2d(h_conv3, W_hidden4) + b_hidden4)


 # In[19]:

 h_conv4


 # ### (5th Convolutional Layer) Hidden Layer 4 `>` Hidden Layer 5

 # In[20]:

 W_hidden5 = weight(shape=[filter_size, filter_size, hidden4_channels, hidden5_channels])
 b_hidden5 = bias(length=hidden5_channels)
 h_conv5 = tf.nn.relu(conv2d(h_conv4, W_hidden5) + b_hidden5)


 # In[21]:

 h_conv5


 # ### (1st Fully Connected Layer) Hidden Layer 5 `>` Hidden Layer 6

 # In[22]:

 h_conv5_flat = tf.reshape(h_conv5, shape=[-1, 7*7*hidden5_channels])
 W_fc1 = weight(shape=[7*7*hidden5_channels, fully_connected_1])
 b_fc1 = bias(length=fully_connected_1)
 h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1)


 # In[23]:

 h_fc1


 # ### (2nd Fully Connected Layer + Dropout) Hidden Layer 6 `>` Hidden Layer 7

 # In[24]:

 W_fc2 = weight(shape=[fully_connected_1, fully_connected_2])
 b_fc2 = bias(length=fully_connected_2)
 h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
 h_drop = tf.nn.dropout(h_fc2, keep_prob=keep_prob)


 # In[25]:

 h_drop


 # ### Readout/Output Layer

 # In[26]:

 W_out = weight(shape=[fully_connected_2, num_classes])
 b_out = bias(length=num_classes)
 y_pred = tf.matmul(h_drop, W_out) + b_out
 y_pred_true = tf.argmax(y_pred, axis=1)


 # In[27]:

 y_pred


 # In[28]:

 y_pred_true


 # ### Cost function, and optimizer

 # In[29]:

 cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y, name='xentropy')
 cost = tf.reduce_mean(cross_entropy, name='xentropy_mean')
 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)


 # ### Evaluating accuracy

 # In[30]:

 correct = tf.equal(y_true, y_pred_true)
 accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))


 # ### Running tensorflow's `Session()`

 # In[31]:

 sess = tf.Session()
 init = tf.global_variables_initializer()
 sess.run(init)


 # ### `optimize()` and `print_accuracy()` function helper

 # In[32]:

 # Optimize helper
 def optimize(num_iter=1):
    global iterations
    for i in tqdm(range(num_iter)):
        X_batch, y_batch = dataset.train.next_batch(batch_size)
        feed_dict_train = {X: X_batch, 
                           y: y_batch, 
                           keep_prob:dropout}
        sess.run(optimizer, feed_dict=feed_dict_train)
        iterations += 1
    print('Total number of iterations so far: {:,}'.format(iterations))

 # Accuracy helper
 def print_accuracy():
    X_batch, y_batch = dataset.test.next_batch(batch_size)
    feed_dict_test = {X: X_batch, 
                      y: y_batch, 
                      keep_prob: dropout}
    acc = sess.run(accuracy, feed_dict=feed_dict_test)
    print('Accuracy after {:,} iterations = {:.2%}'.format(iterations, acc))


 # In[46]:

 def print_validation_accuracy():
    X_batch, y_batch = dataset.validation.next_batch(batch_size)
    feed_dict_val = {X: X_batch, 
                     y: y_batch,
                     keep_prob: dropout}
    acc = sess.run(accuracy, feed_dict=feed_dict_val)
    print('Accuracy on validation set: {:.2%}'.format(acc))

 if __name__ == '__main__':
    # ### Training the network

    # In[33]:

    print_accuracy()


    # In[34]:

    optimize()


    # In[35]:

    print_accuracy()


    # In[36]:

    optimize(num_iter=9)


    # In[37]:

    optimize(num_iter=90)


    # In[38]:

    print_accuracy()


    # In[39]:

    optimize(num_iter=900)


    # In[40]:

    print_accuracy()


    # In[41]:

    optimize(num_iter=9000)


    # In[42]:

    print_accuracy()


    # In[43]:

    optimize(num_iter=100)


    # In[44]:

    optimize(num_iter=900)


    # In[45]:

    print_accuracy()


    # In[47]:

    print_validation_accuracy()

	# coding: utf-8

	# # 5-Layer Convnet for classifying the MNIST dataset

	# In[1]:

	# importing the dependencies
	import tensorflow as tf
	import numpy as np
	import matplotlib.pyplot as plt
	from tqdm import tqdm
	get_ipython().magic('matplotlib inline')


	# In[2]:

	# Loading the dataset
	from tensorflow.examples.tutorials.mnist import input_data
	dataset = input_data.read_data_sets('MNIST_data/', one_hot=True)


	# In[3]:

	print('Length of training data:{:>19,}'.format(dataset.train.num_examples))
	print('Length of testing data:{:>20,}'.format(dataset.test.num_examples))
	print('Length of validation data:{:>17,}'.format(dataset.validation.num_examples))


	# In[4]:

	# Create the true label of true classes
	dataset.train.true = np.argmax(dataset.train.labels, axis=1)
	dataset.test.true = np.argmax(dataset.test.labels, axis=1)
	dataset.validation.true = np.argmax(dataset.test.labels, axis=1)


	# In[5]:

	dataset.train.true[:4]


	# In[6]:

	dataset.test.true[:4]


	# In[7]:

	dataset.validation.true[:4]


	# ### Define hyperparameters

	# In[8]:

	# Image dimensions
	image_size = 28
	image_shape = (image_size, image_size)
	image_shape_flat = image_size * image_size
	num_channel = 1
	filter_size = 4
	# Hidden Layer Channels
	hidden1_channels = 8
	hidden2_channels = 16
	hidden3_channels = 32
	hidden4_channels = 64
	hidden5_channels = 128
	fully_connected_1 = 512
	fully_connected_2 = 256
	num_classes = 10
	learning_rate = 1e-3
	dropout = 0.8
	iterations = 0
	batch_size = 24


	# ### Helper functions for `weights`, `biases`, `conv2d`, & `max_pool`

	# In[9]:

	# Weight initialization
	def weight(shape):
	initial = tf.truncated_normal(shape=shape, stddev=0.05)
	return tf.Variable(initial)

	# Bias in initialization
	def bias(length):
	initial = tf.constant(value=0.05, shape=[length])
	return tf.Variable(initial)

	# Convolutional operation
	def conv2d(X, W):
	return tf.nn.conv2d(X, W, strides=[1,1,1,1], padding='SAME')

	# Max pooling operation
	def max_pool(X):
	return tf.nn.max_pool(X, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')


	# In[10]:

	# Placeholder Variables
	X = tf.placeholder(tf.float32, [None, image_shape_flat])
	y = tf.placeholder(tf.float32, [None, num_classes])
	keep_prob = tf.placeholder(tf.float32)
	y_true = tf.argmax(y, axis=1)


	# ## Building the Network
	# ### (1st Convolutional Layer + Max pooling) Input Layer `>` Hidden Layer 1

	# In[11]:

	X_image = tf.reshape(X, shape=[-1, image_size, image_size, num_channel])


	# In[12]:

	W_hidden1 = weight(shape=[filter_size, filter_size, num_channel, hidden1_channels])
	b_hidden1 = bias(length=hidden1_channels)
	h_conv1 = tf.nn.relu(conv2d(X_image, W_hidden1) + b_hidden1)
	h_pool1 = max_pool(h_conv1)


	# In[13]:

	h_pool1


	# ### (2nd Convolutional Layer + Max pooling) Hidden Layer 1 `>` Hidden Layer 2

	# In[14]:

	W_hidden2 = weight(shape=[filter_size, filter_size, hidden1_channels, hidden2_channels])
	b_hidden2 = bias(length=hidden2_channels)
	h_conv2 = tf.nn.relu(conv2d(h_pool1, W_hidden2) + b_hidden2)
	h_pool2 = max_pool(h_conv2)


	# In[15]:

	h_pool2


	# ### (3rd Convolutional Layer) Hidden Layer 2 `>` Hidden Layer 3

	# In[16]:

	W_hidden3 = weight(shape=[filter_size, filter_size, hidden2_channels, hidden3_channels])
	b_hidden3 = bias(length=hidden3_channels)
	h_conv3 = tf.nn.relu(conv2d(h_pool2, W_hidden3) + b_hidden3)


	# In[17]:

	h_conv3


	# ### (4th Convolutional Layer) Hidden Layer 3 `>` Hidden Layer 4

	# In[18]:

	W_hidden4 = weight(shape=[filter_size, filter_size, hidden3_channels, hidden4_channels])
	b_hidden4 = bias(length=hidden4_channels)
	h_conv4 = tf.nn.relu(conv2d(h_conv3, W_hidden4) + b_hidden4)


	# In[19]:

	h_conv4


	# ### (5th Convolutional Layer) Hidden Layer 4 `>` Hidden Layer 5

	# In[20]:

	W_hidden5 = weight(shape=[filter_size, filter_size, hidden4_channels, hidden5_channels])
	b_hidden5 = bias(length=hidden5_channels)
	h_conv5 = tf.nn.relu(conv2d(h_conv4, W_hidden5) + b_hidden5)


	# In[21]:

	h_conv5


	# ### (1st Fully Connected Layer) Hidden Layer 5 `>` Hidden Layer 6

	# In[22]:

	h_conv5_flat = tf.reshape(h_conv5, shape=[-1, 77hidden5_channels])
	W_fc1 = weight(shape=[77hidden5_channels, fully_connected_1])
	b_fc1 = bias(length=fully_connected_1)
	h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1)


	# In[23]:

	h_fc1


	# ### (2nd Fully Connected Layer + Dropout) Hidden Layer 6 `>` Hidden Layer 7

	# In[24]:

	W_fc2 = weight(shape=[fully_connected_1, fully_connected_2])
	b_fc2 = bias(length=fully_connected_2)
	h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
	h_drop = tf.nn.dropout(h_fc2, keep_prob=keep_prob)


	# In[25]:

	h_drop


	# ### Readout/Output Layer

	# In[26]:

	W_out = weight(shape=[fully_connected_2, num_classes])
	b_out = bias(length=num_classes)
	y_pred = tf.matmul(h_drop, W_out) + b_out
	y_pred_true = tf.argmax(y_pred, axis=1)


	# In[27]:

	y_pred


	# In[28]:

	y_pred_true


	# ### Cost function, and optimizer

	# In[29]:

	cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y, name='xentropy')
	cost = tf.reduce_mean(cross_entropy, name='xentropy_mean')
	optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)


	# ### Evaluating accuracy

	# In[30]:

	correct = tf.equal(y_true, y_pred_true)
	accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))


	# ### Running tensorflow's `Session()`

	# In[31]:

	sess = tf.Session()
	init = tf.global_variables_initializer()
	sess.run(init)


	# ### `optimize()` and `print_accuracy()` function helper

	# In[32]:

	# Optimize helper
	def optimize(num_iter=1):
	global iterations
	for i in tqdm(range(num_iter)):
	X_batch, y_batch = dataset.train.next_batch(batch_size)
	feed_dict_train = {X: X_batch,
	y: y_batch,
	keep_prob:dropout}
	sess.run(optimizer, feed_dict=feed_dict_train)
	iterations += 1
	print('Total number of iterations so far: {:,}'.format(iterations))

	# Accuracy helper
	def print_accuracy():
	X_batch, y_batch = dataset.test.next_batch(batch_size)
	feed_dict_test = {X: X_batch,
	y: y_batch,
	keep_prob: dropout}
	acc = sess.run(accuracy, feed_dict=feed_dict_test)
	print('Accuracy after {:,} iterations = {:.2%}'.format(iterations, acc))


	# In[46]:

	def print_validation_accuracy():
	X_batch, y_batch = dataset.validation.next_batch(batch_size)
	feed_dict_val = {X: X_batch,
	y: y_batch,
	keep_prob: dropout}
	acc = sess.run(accuracy, feed_dict=feed_dict_val)
	print('Accuracy on validation set: {:.2%}'.format(acc))

	if __name__ == '__main__':
	# ### Training the network

	# In[33]:

	print_accuracy()


	# In[34]:

	optimize()


	# In[35]:

	print_accuracy()


	# In[36]:

	optimize(num_iter=9)


	# In[37]:

	optimize(num_iter=90)


	# In[38]:

	print_accuracy()


	# In[39]:

	optimize(num_iter=900)


	# In[40]:

	print_accuracy()


	# In[41]:

	optimize(num_iter=9000)


	# In[42]:

	print_accuracy()


	# In[43]:

	optimize(num_iter=100)


	# In[44]:

	optimize(num_iter=900)


	# In[45]:

	print_accuracy()


	# In[47]:

	print_validation_accuracy()