Skip to content

Instantly share code, notes, and snippets.

@tomokishii
Last active December 14, 2023 03:55
Show Gist options
  • Save tomokishii/0ce3bdac1588b5cca9fa5fbdf6e1c412 to your computer and use it in GitHub Desktop.
Save tomokishii/0ce3bdac1588b5cca9fa5fbdf6e1c412 to your computer and use it in GitHub Desktop.
MNIST using Batch Normalization - TensorFlow tutorial
#
# mnist_cnn_bn.py date. 5/21/2016
# date. 6/2/2017 check TF 1.1 compatibility
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from my_nn_lib import Convolution2D, MaxPooling2D
from my_nn_lib import FullConnected, ReadOutLayer
mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)
chkpt_file = '../MNIST_data/mnist_cnn.ckpt'
def batch_norm(x, n_out, phase_train):
"""
Batch normalization on convolutional maps.
Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
Args:
x: Tensor, 4D BHWD input maps
n_out: integer, depth of input maps
phase_train: boolean tf.Varialbe, true indicates training phase
scope: string, variable scope
Return:
normed: batch-normalized maps
"""
with tf.variable_scope('bn'):
beta = tf.Variable(tf.constant(0.0, shape=[n_out]),
name='beta', trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=[n_out]),
name='gamma', trainable=True)
batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments')
ema = tf.train.ExponentialMovingAverage(decay=0.5)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(phase_train,
mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
return normed
#
def training(loss, learning_rate):
optimizer = tf.train.AdamOptimizer(learning_rate)
# Create a variable to track the global step.
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
def evaluation(y_pred, y):
correct = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
return accuracy
def mlogloss(predicted, actual):
'''
args.
predicted : predicted probability
(sum of predicted proba should be 1.0)
actual : actual value, label
'''
def inner_fn(item):
eps = 1.e-15
item1 = min(item, (1 - eps))
item1 = max(item, eps)
res = np.log(item1)
return res
nrow = actual.shape[0]
ncol = actual.shape[1]
mysum = sum([actual[i, j] * inner_fn(predicted[i, j])
for i in range(nrow) for j in range(ncol)])
ans = -1 * mysum / nrow
return ans
#
# Create the model
def inference(x, y_, keep_prob, phase_train):
x_image = tf.reshape(x, [-1, 28, 28, 1])
with tf.variable_scope('conv_1'):
conv1 = Convolution2D(x, (28, 28), 1, 32, (5, 5), activation='none')
conv1_bn = batch_norm(conv1.output(), 32, phase_train)
conv1_out = tf.nn.relu(conv1_bn)
pool1 = MaxPooling2D(conv1_out)
pool1_out = pool1.output()
with tf.variable_scope('conv_2'):
conv2 = Convolution2D(pool1_out, (28, 28), 32, 64, (5, 5),
activation='none')
conv2_bn = batch_norm(conv2.output(), 64, phase_train)
conv2_out = tf.nn.relu(conv2_bn)
pool2 = MaxPooling2D(conv2_out)
pool2_out = pool2.output()
pool2_flat = tf.reshape(pool2_out, [-1, 7*7*64])
with tf.variable_scope('fc1'):
fc1 = FullConnected(pool2_flat, 7*7*64, 1024)
fc1_out = fc1.output()
fc1_dropped = tf.nn.dropout(fc1_out, keep_prob)
y_pred = ReadOutLayer(fc1_dropped, 1024, 10).output()
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_pred),
reduction_indices=[1]))
loss = cross_entropy
train_step = training(loss, 1.e-4)
accuracy = evaluation(y_pred, y_)
return loss, accuracy, y_pred
#
if __name__ == '__main__':
TASK = 'train' # 'train' or 'test'
# Variables
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
phase_train = tf.placeholder(tf.bool, name='phase_train')
loss, accuracy, y_pred = inference(x, y_,
keep_prob, phase_train)
# Train
lr = 0.01
train_step = tf.train.AdagradOptimizer(lr).minimize(loss)
vars_to_train = tf.trainable_variables() # option-1
vars_for_bn1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, # TF >1.0
scope='conv_1/bn')
vars_for_bn2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, # TF >1.0
scope='conv_2/bn')
vars_to_train = list(set(vars_to_train).union(set(vars_for_bn1)))
vars_to_train = list(set(vars_to_train).union(set(vars_for_bn2)))
if TASK == 'test' or os.path.exists(chkpt_file):
restore_call = True
vars_all = tf.all_variables()
vars_to_init = list(set(vars_all) - set(vars_to_train))
init = tf.variables_initializer(vars_to_init) # TF >1.0
elif TASK == 'train':
restore_call = False
init = tf.global_variables_initializer() # TF >1.0
else:
print('Check task switch.')
saver = tf.train.Saver(vars_to_train) # option-1
# saver = tf.train.Saver() # option-2
with tf.Session() as sess:
# if TASK == 'train': # add in option-2 case
sess.run(init) # option-1
if restore_call:
# Restore variables from disk.
saver.restore(sess, chkpt_file)
if TASK == 'train':
print('\n Training...')
for i in range(5001):
batch_xs, batch_ys = mnist.train.next_batch(100)
train_step.run({x: batch_xs, y_: batch_ys, keep_prob: 0.5,
phase_train: True})
if i % 1000 == 0:
cv_fd = {x: batch_xs, y_: batch_ys, keep_prob: 1.0,
phase_train: False}
train_loss = loss.eval(cv_fd)
train_accuracy = accuracy.eval(cv_fd)
print(' step, loss, accurary = %6d: %8.4f, %8.4f' % (i,
train_loss, train_accuracy))
# Test trained model
test_fd = {x: mnist.test.images, y_: mnist.test.labels,
keep_prob: 1.0, phase_train: False}
print(' accuracy = %8.4f' % accuracy.eval(test_fd))
# Multiclass Log Loss
pred = y_pred.eval(test_fd)
act = mnist.test.labels
print(' multiclass logloss = %8.4f' % mlogloss(pred, act))
# Save the variables to disk.
if TASK == 'train':
save_path = saver.save(sess, chkpt_file)
print("Model saved in file: %s" % save_path)
#
# my_nn_lib.py
# date. 5/19/2016
#
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import os
import sys
import numpy as np
# import cv2
import tensorflow as tf
# Convolution 2-D Layer
class Convolution2D(object):
'''
constructor's args:
input : input image (2D matrix)
input_siz ; input image size
in_ch : number of incoming image channel
out_ch : number of outgoing image channel
patch_siz : filter(patch) size
weights : (if input) (weights, bias)
'''
def __init__(self, input, input_siz, in_ch, out_ch, patch_siz, activation='relu'):
self.input = input
self.rows = input_siz[0]
self.cols = input_siz[1]
self.in_ch = in_ch
self.activation = activation
wshape = [patch_siz[0], patch_siz[1], in_ch, out_ch]
w_cv = tf.Variable(tf.truncated_normal(wshape, stddev=0.1),
trainable=True)
b_cv = tf.Variable(tf.constant(0.1, shape=[out_ch]),
trainable=True)
self.w = w_cv
self.b = b_cv
self.params = [self.w, self.b]
def output(self):
shape4D = [-1, self.rows, self.cols, self.in_ch]
x_image = tf.reshape(self.input, shape4D) # reshape to 4D tensor
linout = tf.nn.conv2d(x_image, self.w,
strides=[1, 1, 1, 1], padding='SAME') + self.b
if self.activation == 'relu':
self.output = tf.nn.relu(linout)
else:
self.output = linout
return self.output
# Max Pooling Layer
class MaxPooling2D(object):
'''
constructor's args:
input : input image (2D matrix)
ksize : pooling patch size
'''
def __init__(self, input, ksize=None):
self.input = input
if ksize == None:
ksize = [1, 2, 2, 1]
self.ksize = ksize
def output(self):
self.output = tf.nn.max_pool(self.input, ksize=self.ksize,
strides=[1, 2, 2, 1], padding='SAME')
return self.output
# Full-connected Layer
class FullConnected(object):
def __init__(self, input, n_in, n_out):
self.input = input
w_h = tf.Variable(tf.truncated_normal([n_in,n_out],
mean=0.0, stddev=0.05), trainable=True)
b_h = tf.Variable(tf.zeros([n_out]), trainable=True)
self.w = w_h
self.b = b_h
self.params = [self.w, self.b]
def output(self):
linarg = tf.matmul(self.input, self.w) + self.b
self.output = tf.nn.relu(linarg)
return self.output
# Read-out Layer
class ReadOutLayer(object):
def __init__(self, input, n_in, n_out):
self.input = input
w_o = tf.Variable(tf.random_normal([n_in,n_out],
mean=0.0, stddev=0.05), trainable=True)
b_o = tf.Variable(tf.zeros([n_out]), trainable=True)
self.w = w_o
self.b = b_o
self.params = [self.w, self.b]
def output(self):
linarg = tf.matmul(self.input, self.w) + self.b
self.output = tf.nn.softmax(linarg)
return self.output
#
@jihobak
Copy link

jihobak commented Feb 13, 2017

@tomokishii

Thanks for sharing, I have question though.

'phase_train = tf.placeholder(tf.bool, name='phase_train')' need to be feed even when initializing it
(similar issue, tensorflow/tensorflow#5618)

Why this code doesn't have to do feed 'phase_train'?

@duducheng
Copy link

Hello @tomokishii,

Thanks for your great demenstration of using BN in tf.

I've question on the L44:

                return tf.identity(batch_mean), tf.identity(batch_var)

I don't think the tf.identity is needed here, is it? Since the ewa will just save a copy, and not change the original variables.

@tomokishii
Copy link
Author

tomokishii commented Jun 2, 2017

Hi @duducheng,

Thank you for your comment.

I quickly reviewed my code you pointed, I'm thinking that you are right and it's better to save memory space. Actually, I'm not confident the variables update timing, I adopted the tf.identity() wrapping method. I thought it looked safer to protect variables from unexpected overwriting.

Tomokishii

@YifanZuo
Copy link

YifanZuo commented Jun 6, 2017

ema = tf.train.ExponentialMovingAverage(decay=0.5)
I confirm that the parameter should be set to decay=0.9 or 0.999

@ruqayya
Copy link

ruqayya commented Jun 6, 2017

Hi,

On running this exact same code, I am getting the feed placeholder error:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'phase_train' with dtype booll
Since the value is assigned to the 'phase_train ' placeholder before executing an op, I am not sure what is wrong. Could you please guide.

@tomokishii
Copy link
Author

tomokishii commented Jun 8, 2017

Hi @ruqayya,

In my side, the code runs without errors. How about check this simpler code?

import tensorflow as tf

bool_ph = tf.placeholder(tf.bool)

def fn1():
    return tf.add(x, 1)
def fn2():
    return tf.add(x, 10)
    
x = tf.Variable(10, tf.int32)
out = tf.cond(bool_ph, fn1, fn2)

init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)

x_1 = sess.run(out, feed_dict={bool_ph: True})
x_2 = sess.run(out, feed_dict={bool_ph: False})

print('x_1 = ', x_1)    # 11
print('x_2 = ', x_2)    # 20

@WilliamRo
Copy link

WilliamRo commented Oct 17, 2017

Hi @duducheng,
If L44 is modified to:

return batch_mean, batch_var

the update for moving mean and moving variance will not triggered, 'cause there is no operator inside with tf.control_dependencies([ema_apply_op]): . tf.identity may be a good choice except for that it will cost extra memory space.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment