Skip to content

Instantly share code, notes, and snippets.

@tomokishii
Last active February 24, 2022 11:49
Show Gist options
  • Save tomokishii/7ddde510edb1c4273438ba0663b26fc6 to your computer and use it in GitHub Desktop.
Save tomokishii/7ddde510edb1c4273438ba0663b26fc6 to your computer and use it in GitHub Desktop.
TensorFlow MNIST Autoencoders

README.md

These codes are TensorFlow Autoencoder implementation examples. They are inspired by very educational Keras Blog article.

http://blog.keras.io/building-autoencoders-in-keras.html

Building Autoencodes in Keras

"Autoencoding" is a data compression algorithm where the compression and decompression functions are 1) data-specific, 2) lossy, and 3) learned automatically from examples rather than engineered by a human. Additionally, in almost all contexts where the term "autoencoder" is used, the compression and decompression functions are implemented with neural networks.

At this time, I use "TensorFlow" to learn how to use tf.nn.conv2d_transpose(). Actually, this TensorFlow API is different from Keras prepareing Upsampling2D().

  1. mnist_ae1.py - very simple model of autoencoder
  2. mnist_ae2.py - convolutional autoencoder
#
# mnist_ae1.py date. 7/4/2016
#
# Autoencoder tutorial code
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import tensorflow as tf
# Import data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)
# Variables
x = tf.placeholder("float", [None, 784])
y_ = tf.placeholder("float", [None, 10])
w_enc = tf.Variable(tf.random_normal([784, 625], mean=0.0, stddev=0.05))
w_dec = tf.Variable(tf.random_normal([625, 784], mean=0.0, stddev=0.05))
# w_dec = tf.transpose(w_enc) # if you use tied weights
b_enc = tf.Variable(tf.zeros([625]))
b_dec = tf.Variable(tf.zeros([784]))
# Create the model
def model(X, w_e, b_e, w_d, b_d):
encoded = tf.sigmoid(tf.matmul(X, w_e) + b_e)
decoded = tf.sigmoid(tf.matmul(encoded, w_d) + b_d)
return encoded, decoded
encoded, decoded = model(x, w_enc, b_enc, w_dec, b_dec)
# Cost Function basic term
cross_entropy = -1. * x * tf.log(decoded) - (1. - x) * tf.log(1. - decoded)
loss = tf.reduce_mean(cross_entropy)
train_step = tf.train.AdagradOptimizer(0.1).minimize(loss)
# Train
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
print('Training...')
for i in range(10001):
batch_xs, batch_ys = mnist.train.next_batch(128)
train_step.run({x: batch_xs, y_: batch_ys})
if i % 1000 == 0:
train_loss = loss.eval({x: batch_xs, y_: batch_ys})
print(' step, loss = %6d: %6.3f' % (i, train_loss))
# generate decoded image with test data
test_fd = {x: mnist.test.images, y_: mnist.test.labels}
decoded_imgs = decoded.eval(test_fd)
print('loss (test) = ', loss.eval(test_fd))
x_test = mnist.test.images
n = 10 # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
# display original
ax = plt.subplot(2, n, i + 1)
plt.imshow(x_test[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
# display reconstruction
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.savefig('mnist_ae1.png')
#
# mnist_ae2.py date. 7/4/2016
#
# Autoencoder tutorial code - trial of convolutional AE
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from my_nn_lib import Convolution2D, MaxPooling2D
from my_nn_lib import FullConnected, ReadOutLayer
# Up-sampling 2-D Layer (deconvolutoinal Layer)
class Conv2Dtranspose(object):
'''
constructor's args:
input : input image (2D matrix)
output_siz : output image size
in_ch : number of incoming image channel
out_ch : number of outgoing image channel
patch_siz : filter(patch) size
'''
def __init__(self, input, output_siz, in_ch, out_ch, patch_siz, activation='relu'):
self.input = input
self.rows = output_siz[0]
self.cols = output_siz[1]
self.out_ch = out_ch
self.activation = activation
wshape = [patch_siz[0], patch_siz[1], out_ch, in_ch] # note the arguments order
w_cvt = tf.Variable(tf.truncated_normal(wshape, stddev=0.1),
trainable=True)
b_cvt = tf.Variable(tf.constant(0.1, shape=[out_ch]),
trainable=True)
self.batsiz = tf.shape(input)[0]
self.w = w_cvt
self.b = b_cvt
self.params = [self.w, self.b]
def output(self):
shape4D = [self.batsiz, self.rows, self.cols, self.out_ch]
linout = tf.nn.conv2d_transpose(self.input, self.w, output_shape=shape4D,
strides=[1, 2, 2, 1], padding='SAME') + self.b
if self.activation == 'relu':
self.output = tf.nn.relu(linout)
elif self.activation == 'sigmoid':
self.output = tf.sigmoid(linout)
else:
self.output = linout
return self.output
# Create the model
def model(X, w_e, b_e, w_d, b_d):
encoded = tf.sigmoid(tf.matmul(X, w_e) + b_e)
decoded = tf.sigmoid(tf.matmul(encoded, w_d) + b_d)
return encoded, decoded
def mk_nn_model(x, y_):
# Encoding phase
x_image = tf.reshape(x, [-1, 28, 28, 1])
conv1 = Convolution2D(x_image, (28, 28), 1, 16,
(3, 3), activation='relu')
conv1_out = conv1.output()
pool1 = MaxPooling2D(conv1_out)
pool1_out = pool1.output()
conv2 = Convolution2D(pool1_out, (14, 14), 16, 8,
(3, 3), activation='relu')
conv2_out = conv2.output()
pool2 = MaxPooling2D(conv2_out)
pool2_out = pool2.output()
conv3 = Convolution2D(pool2_out, (7, 7), 8, 8, (3, 3), activation='relu')
conv3_out = conv3.output()
pool3 = MaxPooling2D(conv3_out)
pool3_out = pool3.output()
# at this point the representation is (8, 4, 4) i.e. 128-dimensional
# Decoding phase
conv_t1 = Conv2Dtranspose(pool3_out, (7, 7), 8, 8,
(3, 3), activation='relu')
conv_t1_out = conv_t1.output()
conv_t2 = Conv2Dtranspose(conv_t1_out, (14, 14), 8, 8,
(3, 3), activation='relu')
conv_t2_out = conv_t2.output()
conv_t3 = Conv2Dtranspose(conv_t2_out, (28, 28), 8, 16,
(3, 3), activation='relu')
conv_t3_out = conv_t3.output()
conv_last = Convolution2D(conv_t3_out, (28, 28), 16, 1, (3, 3),
activation='sigmoid')
decoded = conv_last.output()
decoded = tf.reshape(decoded, [-1, 784])
cross_entropy = -1. *x *tf.log(decoded) - (1. - x) *tf.log(1. - decoded)
loss = tf.reduce_mean(cross_entropy)
return loss, decoded
if __name__ == '__main__':
mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)
# Variables
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
loss, decoded = mk_nn_model(x, y_)
train_step = tf.train.AdagradOptimizer(0.1).minimize(loss)
init = tf.initialize_all_variables()
# Train
with tf.Session() as sess:
sess.run(init)
print('Training...')
for i in range(10001):
batch_xs, batch_ys = mnist.train.next_batch(128)
train_step.run({x: batch_xs, y_: batch_ys})
if i % 1000 == 0:
train_loss= loss.eval({x: batch_xs, y_: batch_ys})
print(' step, loss = %6d: %6.3f' % (i, train_loss))
# generate decoded image with test data
test_fd = {x: mnist.test.images, y_: mnist.test.labels}
decoded_imgs = decoded.eval(test_fd)
print('loss (test) = ', loss.eval(test_fd))
x_test = mnist.test.images
n = 10 # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
# display original
ax = plt.subplot(2, n, i + 1)
plt.imshow(x_test[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
# display reconstruction
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
#plt.show()
plt.savefig('mnist_ae2.png')
#
# my_nn_lib.py
# date. 5/19/2016
#
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import os
import sys
import numpy as np
# import cv2
import tensorflow as tf
# Convolution 2-D Layer
class Convolution2D(object):
'''
constructor's args:
input : input image (2D matrix)
input_siz ; input image size
in_ch : number of incoming image channel
out_ch : number of outgoing image channel
patch_siz : filter(patch) size
weights : (if input) (weights, bias)
'''
def __init__(self, input, input_siz, in_ch, out_ch, patch_siz, activation='relu'):
self.input = input
self.rows = input_siz[0]
self.cols = input_siz[1]
self.in_ch = in_ch
self.activation = activation
wshape = [patch_siz[0], patch_siz[1], in_ch, out_ch]
w_cv = tf.Variable(tf.truncated_normal(wshape, stddev=0.1),
trainable=True)
b_cv = tf.Variable(tf.constant(0.1, shape=[out_ch]),
trainable=True)
self.w = w_cv
self.b = b_cv
self.params = [self.w, self.b]
def output(self):
shape4D = [-1, self.rows, self.cols, self.in_ch]
x_image = tf.reshape(self.input, shape4D) # reshape to 4D tensor
linout = tf.nn.conv2d(x_image, self.w,
strides=[1, 1, 1, 1], padding='SAME') + self.b
if self.activation == 'relu':
self.output = tf.nn.relu(linout)
elif self.activation == 'sigmoid':
self.output = tf.sigmoid(linout)
else:
self.output = linout
return self.output
# Max Pooling Layer
class MaxPooling2D(object):
'''
constructor's args:
input : input image (2D matrix)
ksize : pooling patch size
'''
def __init__(self, input, ksize=None):
self.input = input
if ksize == None:
ksize = [1, 2, 2, 1]
self.ksize = ksize
def output(self):
self.output = tf.nn.max_pool(self.input, ksize=self.ksize,
strides=[1, 2, 2, 1], padding='SAME')
return self.output
# Full-connected Layer
class FullConnected(object):
def __init__(self, input, n_in, n_out):
self.input = input
w_h = tf.Variable(tf.truncated_normal([n_in,n_out],
mean=0.0, stddev=0.05), trainable=True)
b_h = tf.Variable(tf.zeros([n_out]), trainable=True)
self.w = w_h
self.b = b_h
self.params = [self.w, self.b]
def output(self):
linarg = tf.matmul(self.input, self.w) + self.b
self.output = tf.nn.relu(linarg)
return self.output
# Read-out Layer
class ReadOutLayer(object):
def __init__(self, input, n_in, n_out):
self.input = input
w_o = tf.Variable(tf.random_normal([n_in,n_out],
mean=0.0, stddev=0.05), trainable=True)
b_o = tf.Variable(tf.zeros([n_out]), trainable=True)
self.w = w_o
self.b = b_o
self.params = [self.w, self.b]
def output(self):
linarg = tf.matmul(self.input, self.w) + self.b
self.output = tf.nn.softmax(linarg)
return self.output
#
@tomokishii
Copy link
Author

Hi Kajiyu,

I thought I implemented the unpooling process by tf.nn.conv2d_transpose() according to the stackoverflow Q&A information.
http://stackoverflow.com/questions/37926562/tensorflow-conv2d-transpose-deconv-number-of-rows-of-out-backprop-doesnt-matc/

In this Q&A info, you can find the link to very instructive slide in bottom of page.

Tomo

@jhwjhw0123
Copy link

Thanks for this excellent post! However, I think there is a problem with the cross-entropy implementation: since we are using vector donation of original image, the cross-entropy loss should not be like that in the code... If we consider mutual-exclusive labels, performing a loss function in the code will penalize the probability of the correct classification (you can figure it out by a simple two-class example). And I think the same applies to this program. Meanwhile, here the probabilities in one row (an image) need not sum up to 1, thus I can't see any rationality to use cross_entropy = -1. * x * tf.log(decoded) - (1. - x) * tf.log(1. - decoded)
Derek Murray once answered a issue regarding the direct cross-entropy https://github.com/tensorflow/tensorflow/issues/2462, and his suggested code is cross_entropy = -tf.reduce_mean(tf.reduce_sum(x*tf.log(decoded),reduction_indices=[1])). And I think for vector-like cross-entropy this should be the right formula, although there are still numerical issues with this function.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment