-
-
Save f0k/738fa2eedd9666b78404ed1751336f56 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
Example employing Lasagne for digit generation using the MNIST dataset and | |
Deep Convolutional Generative Adversarial Networks | |
(DCGANs, see http://arxiv.org/abs/1511.06434). | |
It is based on the MNIST example in Lasagne: | |
http://lasagne.readthedocs.org/en/latest/user/tutorial.html | |
Note: In contrast to the original paper, this trains the generator and | |
discriminator at once, not alternatingly. It's easy to change, though. | |
Jan Schlüter, 2015-12-16 | |
""" | |
from __future__ import print_function | |
import sys | |
import os | |
import time | |
import numpy as np | |
import theano | |
import theano.tensor as T | |
import lasagne | |
# ################## Download and prepare the MNIST dataset ################## | |
# This is just some way of getting the MNIST dataset from an online location | |
# and loading it into numpy arrays. It doesn't involve Lasagne at all. | |
def load_dataset(): | |
# We first define a download function, supporting both Python 2 and 3. | |
if sys.version_info[0] == 2: | |
from urllib import urlretrieve | |
else: | |
from urllib.request import urlretrieve | |
def download(filename, source='http://yann.lecun.com/exdb/mnist/'): | |
print("Downloading %s" % filename) | |
urlretrieve(source + filename, filename) | |
# We then define functions for loading MNIST images and labels. | |
# For convenience, they also download the requested files if needed. | |
import gzip | |
def load_mnist_images(filename): | |
if not os.path.exists(filename): | |
download(filename) | |
# Read the inputs in Yann LeCun's binary format. | |
with gzip.open(filename, 'rb') as f: | |
data = np.frombuffer(f.read(), np.uint8, offset=16) | |
# The inputs are vectors now, we reshape them to monochrome 2D images, | |
# following the shape convention: (examples, channels, rows, columns) | |
data = data.reshape(-1, 1, 28, 28) | |
# The inputs come as bytes, we convert them to float32 in range [0,1]. | |
# (Actually to range [0, 255/256], for compatibility to the version | |
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.) | |
return data / np.float32(256) | |
def load_mnist_labels(filename): | |
if not os.path.exists(filename): | |
download(filename) | |
# Read the labels in Yann LeCun's binary format. | |
with gzip.open(filename, 'rb') as f: | |
data = np.frombuffer(f.read(), np.uint8, offset=8) | |
# The labels are vectors of integers now, that's exactly what we want. | |
return data | |
# We can now download and read the training and test set images and labels. | |
X_train = load_mnist_images('train-images-idx3-ubyte.gz') | |
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz') | |
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz') | |
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz') | |
# We reserve the last 10000 training examples for validation. | |
X_train, X_val = X_train[:-10000], X_train[-10000:] | |
y_train, y_val = y_train[:-10000], y_train[-10000:] | |
# We just return all the arrays in order, as expected in main(). | |
# (It doesn't matter how we do this as long as we can read them again.) | |
return X_train, y_train, X_val, y_val, X_test, y_test | |
# ##################### Build the neural network model ####################### | |
# We create two models: The generator and the discriminator network. The | |
# generator needs a transposed convolution layer defined first. | |
class Deconv2DLayer(lasagne.layers.Layer): | |
def __init__(self, incoming, num_filters, filter_size, stride=1, pad=0, | |
nonlinearity=lasagne.nonlinearities.rectify, **kwargs): | |
super(Deconv2DLayer, self).__init__(incoming, **kwargs) | |
self.num_filters = num_filters | |
self.filter_size = lasagne.utils.as_tuple(filter_size, 2, int) | |
self.stride = lasagne.utils.as_tuple(stride, 2, int) | |
self.pad = lasagne.utils.as_tuple(pad, 2, int) | |
self.W = self.add_param(lasagne.init.Orthogonal(), | |
(self.input_shape[1], num_filters) + self.filter_size, | |
name='W') | |
self.b = self.add_param(lasagne.init.Constant(0), | |
(num_filters,), | |
name='b') | |
if nonlinearity is None: | |
nonlinearity = lasagne.nonlinearities.identity | |
self.nonlinearity = nonlinearity | |
def get_output_shape_for(self, input_shape): | |
shape = tuple(i*s - 2*p + f - 1 | |
for i, s, p, f in zip(input_shape[2:], | |
self.stride, | |
self.pad, | |
self.filter_size)) | |
return (input_shape[0], self.num_filters) + shape | |
def get_output_for(self, input, **kwargs): | |
op = T.nnet.abstract_conv.AbstractConv2d_gradInputs( | |
imshp=self.output_shape, | |
kshp=(self.input_shape[1], self.num_filters) + self.filter_size, | |
subsample=self.stride, border_mode=self.pad) | |
conved = op(self.W, input, self.output_shape[2:]) | |
if self.b is not None: | |
conved += self.b.dimshuffle('x', 0, 'x', 'x') | |
return self.nonlinearity(conved) | |
def build_generator(input_var=None): | |
from lasagne.layers import InputLayer, ReshapeLayer, DenseLayer, batch_norm | |
from lasagne.nonlinearities import sigmoid | |
# input: 100dim | |
layer = InputLayer(shape=(None, 100), input_var=input_var) | |
# fully-connected layer | |
layer = batch_norm(DenseLayer(layer, 1024)) | |
# project and reshape | |
layer = batch_norm(DenseLayer(layer, 128*7*7)) | |
layer = ReshapeLayer(layer, ([0], 128, 7, 7)) | |
# two fractional-stride convolutions | |
layer = batch_norm(Deconv2DLayer(layer, 64, 5, stride=2, pad=2)) | |
layer = Deconv2DLayer(layer, 1, 5, stride=2, pad=2, | |
nonlinearity=sigmoid) | |
print ("Generator output:", layer.output_shape) | |
return layer | |
def build_discriminator(input_var=None): | |
from lasagne.layers import (InputLayer, Conv2DLayer, ReshapeLayer, | |
DenseLayer, batch_norm) | |
from lasagne.layers.dnn import Conv2DDNNLayer as Conv2DLayer # override | |
from lasagne.nonlinearities import LeakyRectify, sigmoid | |
lrelu = LeakyRectify(0.2) | |
# input: (None, 1, 28, 28) | |
layer = InputLayer(shape=(None, 1, 28, 28), input_var=input_var) | |
# two convolutions | |
layer = batch_norm(Conv2DLayer(layer, 64, 5, stride=2, pad=2, nonlinearity=lrelu)) | |
layer = batch_norm(Conv2DLayer(layer, 128, 5, stride=2, pad=2, nonlinearity=lrelu)) | |
# fully-connected layer | |
layer = batch_norm(DenseLayer(layer, 1024, nonlinearity=lrelu)) | |
# output layer | |
layer = DenseLayer(layer, 1, nonlinearity=sigmoid) | |
print ("Discriminator output:", layer.output_shape) | |
return layer | |
# ############################# Batch iterator ############################### | |
# This is just a simple helper function iterating over training data in | |
# mini-batches of a particular size, optionally in random order. It assumes | |
# data is available as numpy arrays. For big datasets, you could load numpy | |
# arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your | |
# own custom data iteration function. For small datasets, you can also copy | |
# them to GPU at once for slightly improved performance. This would involve | |
# several changes in the main program, though, and is not demonstrated here. | |
def iterate_minibatches(inputs, targets, batchsize, shuffle=False): | |
assert len(inputs) == len(targets) | |
if shuffle: | |
indices = np.arange(len(inputs)) | |
np.random.shuffle(indices) | |
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): | |
if shuffle: | |
excerpt = indices[start_idx:start_idx + batchsize] | |
else: | |
excerpt = slice(start_idx, start_idx + batchsize) | |
yield inputs[excerpt], targets[excerpt] | |
# ############################## Main program ################################ | |
# Everything else will be handled in our main program now. We could pull out | |
# more functions to better separate the code, but it wouldn't make it any | |
# easier to read. | |
def main(num_epochs=200, initial_eta=2e-4): | |
# Load the dataset | |
print("Loading data...") | |
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset() | |
# Prepare Theano variables for inputs and targets | |
noise_var = T.matrix('noise') | |
input_var = T.tensor4('inputs') | |
# target_var = T.ivector('targets') | |
# Create neural network model | |
print("Building model and compiling functions...") | |
generator = build_generator(noise_var) | |
discriminator = build_discriminator(input_var) | |
# Create expression for passing real data through the discriminator | |
real_out = lasagne.layers.get_output(discriminator) | |
# Create expression for passing fake data through the discriminator | |
fake_out = lasagne.layers.get_output(discriminator, | |
lasagne.layers.get_output(generator)) | |
# Create loss expressions | |
generator_loss = lasagne.objectives.binary_crossentropy(fake_out, 1).mean() | |
discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1) | |
+ lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() | |
# Create update expressions for training | |
generator_params = lasagne.layers.get_all_params(generator, trainable=True) | |
discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) | |
eta = theano.shared(lasagne.utils.floatX(initial_eta)) | |
updates = lasagne.updates.adam( | |
generator_loss, generator_params, learning_rate=eta, beta1=0.5) | |
updates.update(lasagne.updates.adam( | |
discriminator_loss, discriminator_params, learning_rate=eta, beta1=0.5)) | |
# Compile a function performing a training step on a mini-batch (by giving | |
# the updates dictionary) and returning the corresponding training loss: | |
train_fn = theano.function([noise_var, input_var], | |
[(real_out > .5).mean(), | |
(fake_out < .5).mean()], | |
updates=updates) | |
# Compile another function generating some data | |
gen_fn = theano.function([noise_var], | |
lasagne.layers.get_output(generator, | |
deterministic=True)) | |
# Finally, launch the training loop. | |
print("Starting training...") | |
# We iterate over epochs: | |
for epoch in range(num_epochs): | |
# In each epoch, we do a full pass over the training data: | |
train_err = 0 | |
train_batches = 0 | |
start_time = time.time() | |
for batch in iterate_minibatches(X_train, y_train, 128, shuffle=True): | |
inputs, targets = batch | |
noise = lasagne.utils.floatX(np.random.rand(len(inputs), 100)) | |
train_err += np.array(train_fn(noise, inputs)) | |
train_batches += 1 | |
# Then we print the results for this epoch: | |
print("Epoch {} of {} took {:.3f}s".format( | |
epoch + 1, num_epochs, time.time() - start_time)) | |
print(" training loss:\t\t{}".format(train_err / train_batches)) | |
# And finally, we plot some generated data | |
samples = gen_fn(lasagne.utils.floatX(np.random.rand(42, 100))) | |
try: | |
import matplotlib.pyplot as plt | |
except ImportError: | |
pass | |
else: | |
plt.imsave('mnist_samples.png', | |
(samples.reshape(6, 7, 28, 28) | |
.transpose(0, 2, 1, 3) | |
.reshape(6*28, 7*28)), | |
cmap='gray') | |
# After half the epochs, we start decaying the learn rate towards zero | |
if epoch >= num_epochs // 2: | |
progress = float(epoch) / num_epochs | |
eta.set_value(lasagne.utils.floatX(initial_eta*2*(1 - progress))) | |
# Optionally, you could now dump the network weights to a file like this: | |
np.savez('mnist_gen.npz', *lasagne.layers.get_all_param_values(generator)) | |
np.savez('mnist_disc.npz', *lasagne.layers.get_all_param_values(discriminator)) | |
# | |
# And load them again later on like this: | |
# with np.load('model.npz') as f: | |
# param_values = [f['arr_%d' % i] for i in range(len(f.files))] | |
# lasagne.layers.set_all_param_values(network, param_values) | |
if __name__ == '__main__': | |
if ('--help' in sys.argv) or ('-h' in sys.argv): | |
print("Trains a DCGAN on MNIST using Lasagne.") | |
print("Usage: %s [EPOCHS]" % sys.argv[0]) | |
print() | |
print("EPOCHS: number of training epochs to perform (default: 100)") | |
else: | |
kwargs = {} | |
if len(sys.argv) > 1: | |
kwargs['num_epochs'] = int(sys.argv[1]) | |
main(**kwargs) |
Do you get the same training dynamics?
With regular GANs, the loss will generally not decrease to zero over time. It's a delicate balancing act between training the generator to become better and training the discriminator not to get too good. Have a look at WGANs (for the WGAN example to produce nice samples, you will have to increase the clip
parameter) and LSGANs, including the respective papers and possibly third-party explanations.
Sorry if these are very naive question. I observed that the performance drops severely when using more transpose convolution and convolution operations in gen and desc respectively, (for CIFAR100 as well). I inserted the conv and transpose conv layers in the network without reducing the size of the image and the generated images are very noisy and are indistinguishable . What could be the reason? Also, I observed that here for a 32 x 32 image, they are using 5 conv + 5 transpose conv layers, and for 64 x 64 images(faces dataset) they are using 3 conv + 3 transpose conv layer. How to estimate these number of layers to be used in the network?
Hi,
The example is very informative, can you please explain the query below?
Note: In contrast to the original paper, this trains the generator and
discriminator at once, not alternatively. It's easy to change, though
I assume, one has to call gen_fn() in the training loop as well in order to train generator if training as per the paper.
Please help me in understanding, how can one train both at once.
Many thanks
Suppose theta_G_t
and theta_D_t
denote the generator's params at time t
and the discriminator's params at time t
, respectively. Simultaneous updating (as it is in f0k's code) means: G updates itself based on theta_D_t
, and D updates itself based on theta_G_t
.
Alternating would be: updating G based on theta_D_t
, thereby obtaining theta_G_t+1
. Then, D updates itself based on theta_G_t+1
.
At least this is how I think is the case with how Theano updates work.
I already have my own data in mnist format, how pass it to your code?
I think that's expected. Yes, the generator is producing better images, but the discriminator is also getting better at spotting real/fake images.