Skip to content

Instantly share code, notes, and snippets.

@FlorianMuellerklein
Last active June 6, 2016 23:21
Show Gist options
  • Save FlorianMuellerklein/535dfde68561c3a59e775fe9ef7e2d0c to your computer and use it in GitHub Desktop.
Save FlorianMuellerklein/535dfde68561c3a59e775fe9ef7e2d0c to your computer and use it in GitHub Desktop.
Potential Example for Lasagne/Recipes
#!/usr/bin/env python
"""
Lasagne example of ResNet-110 from 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027)
"""
from __future__ import print_function
import sys
import os
import time
import string
import random
import pickle
import numpy as np
import theano
import theano.tensor as T
import lasagne
# for the larger networks (n>=9), we need to adjust pythons recursion limit
sys.setrecursionlimit(10000)
# ##################### Load data from CIFAR-10 dataset #######################
# this code assumes the cifar dataset from 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
# has been extracted in current working directory
def unpickle(file):
import cPickle
fo = open(file, 'rb')
dict = cPickle.load(fo)
fo.close()
return dict
def load_data():
xs = []
ys = []
for j in range(5):
d = unpickle('cifar-10-batches-py/data_batch_'+`j+1`)
x = d['data']
y = d['labels']
xs.append(x)
ys.append(y)
d = unpickle('cifar-10-batches-py/test_batch')
xs.append(d['data'])
ys.append(d['labels'])
x = np.concatenate(xs)/np.float32(255)
y = np.concatenate(ys)
x = np.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:]))
x = x.reshape((x.shape[0], 32, 32, 3)).transpose(0,3,1,2)
# subtract per-pixel mean
pixel_mean = np.mean(x[0:50000],axis=0)
#pickle.dump(pixel_mean, open("cifar10-pixel_mean.pkl","wb"))
x -= pixel_mean
# create mirrored images
X_train = x[0:50000,:,:,:]
Y_train = y[0:50000]
X_train_flip = X_train[:,:,:,::-1]
Y_train_flip = Y_train
X_train = np.concatenate((X_train,X_train_flip),axis=0)
Y_train = np.concatenate((Y_train,Y_train_flip),axis=0)
X_test = x[50000:,:,:,:]
Y_test = y[50000:]
return dict(
X_train=lasagne.utils.floatX(X_train),
Y_train=Y_train.astype('int32'),
X_test = lasagne.utils.floatX(X_test),
Y_test = Y_test.astype('int32'),)
# ##################### Build the neural network model #######################
from lasagne.nonlinearities import rectify, softmax, sigmoid
from lasagne.layers import InputLayer, MaxPool2DLayer, DenseLayer, DropoutLayer, helper, batch_norm, BatchNormLayer
from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer
from lasagne.layers import Pool2DLayer, ElemwiseSumLayer, NonlinearityLayer, PadLayer, GlobalPoolLayer, ExpressionLayer
from lasagne.init import Orthogonal, HeNormal, GlorotNormal
PIXELS = 32
imageSize = PIXELS * PIXELS
num_features = imageSize * 3
he_norm = HeNormal(gain='relu')
def ResNet_FullPreActivation(input_var=None, n=18):
'''
Adapted from https://github.com/Lasagne/Recipes/tree/master/papers/deep_residual_learning.
Tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027)
Forumala to figure out depth: 6n + 2
'''
# create a residual learning building block with two stacked 3x3 convlayers as in paper
def residual_block(l, increase_dim=False, first=False):
input_num_filters = l.output_shape[1]
if increase_dim:
first_stride = (2,2)
out_num_filters = input_num_filters*2
else:
first_stride = (1,1)
out_num_filters = input_num_filters
if first:
# hacky solution to keep layers correct
bn_pre_relu = l
else:
# contains the BN -> ReLU portion, steps 1 to 2
bn_pre_conv = BatchNormLayer(l)
bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify)
# contains the weight -> BN -> ReLU portion, steps 3 to 5
conv_1 = batch_norm(ConvLayer(bn_pre_relu, num_filters=out_num_filters, filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=he_norm))
# contains the last weight portion, step 6
conv_2 = ConvLayer(conv_1, num_filters=out_num_filters, filter_size=(3,3), stride=(1,1), nonlinearity=None, pad='same', W=he_norm)
# add shortcut connections
if increase_dim:
# projection shortcut, as option B in paper
projection = ConvLayer(l, num_filters=out_num_filters, filter_size=(1,1), stride=(2,2), nonlinearity=None, pad='same', b=None)
block = ElemwiseSumLayer([conv_2, projection])
else:
block = ElemwiseSumLayer([conv_2, l])
return block
# Building the network
l_in = InputLayer(shape=(None, 3, PIXELS, PIXELS), input_var=input_var)
# first layer, output is 16 x 32 x 32
l = batch_norm(ConvLayer(l_in, num_filters=16, filter_size=(3,3), stride=(1,1), nonlinearity=rectify, pad='same', W=he_norm))
# first stack of residual blocks, output is 16 x 32 x 32
l = residual_block(l, first=True)
for _ in range(1,n):
l = residual_block(l)
# second stack of residual blocks, output is 32 x 16 x 16
l = residual_block(l, increase_dim=True)
for _ in range(1,n):
l = residual_block(l)
# third stack of residual blocks, output is 64 x 8 x 8
l = residual_block(l, increase_dim=True)
for _ in range(1,n):
l = residual_block(l)
bn_post_conv = BatchNormLayer(l)
bn_post_relu = NonlinearityLayer(bn_post_conv, rectify)
# average pooling
avg_pool = GlobalPoolLayer(bn_post_relu)
# fully connected layer
network = DenseLayer(avg_pool, num_units=10, W=HeNormal(), nonlinearity=softmax)
return network
# ############################# Batch iterator ###############################
def iterate_minibatches(inputs, targets, batchsize, shuffle=False, augment=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
if augment:
# as in paper :
# pad feature arrays with 4 pixels on each side
# and do random cropping of 32x32
padded = np.pad(inputs[excerpt],((0,0),(0,0),(4,4),(4,4)),mode='constant')
random_cropped = np.zeros(inputs[excerpt].shape, dtype=np.float32)
crops = np.random.random_integers(0,high=8,size=(batchsize,2))
for r in range(batchsize):
random_cropped[r,:,:,:] = padded[r,:,crops[r,0]:(crops[r,0]+32),crops[r,1]:(crops[r,1]+32)]
inp_exc = random_cropped
else:
inp_exc = inputs[excerpt]
yield inp_exc, targets[excerpt]
# ############################## Main program ################################
def main(n=5, num_epochs=82, model=None):
# Check if cifar data exists
if not os.path.exists("./cifar-10-batches-py"):
print("CIFAR-10 dataset can not be found. Please download the dataset from 'https://www.cs.toronto.edu/~kriz/cifar.html'.")
return
# Load the dataset
print("Loading data...")
data = load_data()
X_train = data['X_train']
Y_train = data['Y_train']
X_test = data['X_test']
Y_test = data['Y_test']
# Prepare Theano variables for inputs and targets
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')
# Create neural network model
print("Building model and compiling functions...")
network = ResNet_FullPreActivation(input_var, n=18)
print("number of parameters in model: %d" % lasagne.layers.count_params(network, trainable=True))
if model is None:
# Create a loss expression for training, i.e., a scalar objective we want
# to minimize (for our multi-class problem, it is the cross-entropy loss):
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
# add weight decay
all_layers = lasagne.layers.get_all_layers(network)
l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * 0.0001
loss = loss + l2_penalty
# Create update expressions for training
# Stochastic Gradient Descent (SGD) with momentum
params = lasagne.layers.get_all_params(network, trainable=True)
lr = 0.1
sh_lr = theano.shared(lasagne.utils.floatX(lr))
updates = lasagne.updates.momentum(
loss, params, learning_rate=sh_lr, momentum=0.9)
# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates)
# Create a loss expression for validation/testing
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
target_var)
test_loss = test_loss.mean()
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
dtype=theano.config.floatX)
# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
if model is None:
# launch the training loop
print("Starting training...")
# We iterate over epochs:
for epoch in range(num_epochs):
# shuffle training data
train_indices = np.arange(100000)
np.random.shuffle(train_indices)
X_train = X_train[train_indices,:,:,:]
Y_train = Y_train[train_indices]
# In each epoch, we do a full pass over the training data:
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, Y_train, 128, shuffle=True, augment=True):
inputs, targets = batch
train_err += train_fn(inputs, targets)
train_batches += 1
# And a full pass over the validation data:
val_err = 0
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(X_test, Y_test, 500, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
val_err += err
val_acc += acc
val_batches += 1
# Then we print the results for this epoch:
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" validation loss:\t\t{:.6f}".format(val_err / val_batches))
print(" validation accuracy:\t\t{:.2f} %".format(
val_acc / val_batches * 100))
# adjust learning rate as in paper
# 32k and 48k iterations should be roughly equivalent to 41 and 61 epochs
if (epoch+1) == 41 or (epoch+1) == 61:
new_lr = sh_lr.get_value() * 0.1
print("New LR:"+str(new_lr))
sh_lr.set_value(lasagne.utils.floatX(new_lr))
# dump the network weights to a file :
np.savez('cifar10_deep_residual_model.npz', *lasagne.layers.get_all_param_values(network))
else:
# load network weights from model file
with np.load(model) as f:
param_values = [f['arr_%d' % i] for i in range(len(f.files))]
lasagne.layers.set_all_param_values(network, param_values)
# Calculate validation error of model:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, Y_test, 500, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
test_err += err
test_acc += acc
test_batches += 1
print("Final results:")
print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print(" test accuracy:\t\t{:.2f} %".format(
test_acc / test_batches * 100))
if __name__ == '__main__':
if ('--help' in sys.argv) or ('-h' in sys.argv):
print("Trains a Deep Residual Learning network on cifar-10 using Lasagne.")
print("Network architecture and training parameters are as in section 4.2 in 'Deep Residual Learning for Image Recognition'.")
print("Usage: %s [N [MODEL]]" % sys.argv[0])
print()
print("N: Number of stacked residual building blocks per feature map (default: 5)")
print("MODEL: saved model file to load (for validation) (default: None)")
else:
kwargs = {}
if len(sys.argv) > 1:
kwargs['n'] = int(sys.argv[1])
if len(sys.argv) > 2:
kwargs['model'] = sys.argv[2]
main(**kwargs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment