Created
June 3, 2016 16:20
-
-
Save armhold/973cf9131d589c3f218e405fb908f37a to your computer and use it in GitHub Desktop.
An MxN layer convolutional network with the following architecture: [conv-relu-pool] x N - [affine] x M - [softmax]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from cs231n.layers import * | |
from cs231n.fast_layers import * | |
from cs231n.layer_utils import * | |
# a Convolutional Network by armhold | |
class GeorgeNet(object): | |
""" | |
An MxN layer convolutional network with the following architecture: | |
[conv-relu-pool] x N - [affine] x M - [softmax] | |
The network operates on minibatches of data that have shape (N, C, H, W) | |
consisting of N images, each with height H and width W and with C input | |
channels. | |
""" | |
def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=3, | |
num_convnets=2, num_affine=2, hidden_dim=100, num_classes=10, | |
use_batchnorm=False, | |
weight_scale=1e-3, reg=0.0, dtype=np.float32): | |
""" | |
Initialize a new network. | |
Inputs: | |
- input_dim: Tuple (C, H, W) giving size of input data | |
- num_filters: Number of filters to use in the convolutional layer | |
- filter_size: Size of filters to use in the convolutional layer | |
- hidden_dim: Number of units to use in the fully-connected hidden layer | |
- num_classes: Number of scores to produce from the final affine layer. | |
of weights. | |
- reg: Scalar giving L2 regularization strength | |
- dtype: numpy datatype to use for computation. | |
""" | |
self.params = {} | |
self.use_batchnorm = use_batchnorm | |
self.reg = reg | |
self.dtype = dtype | |
C, H, W = input_dim | |
# these values *were* hard-coded in loss() | |
conv_stride = 1 | |
conv_pad = (filter_size - 1) / 2 | |
self.conv_param = {'stride': conv_stride, 'pad': conv_pad} | |
# pass pool_param to the forward pass for the max-pooling layer | |
pool_height = 2 | |
pool_width = 2 | |
pool_stride = 2 | |
self.pool_param = {'pool_height': pool_height, 'pool_width': pool_width, 'stride': pool_stride} | |
self.num_convnets = num_convnets | |
self.num_affine = num_affine | |
incoming_h, incoming_w = conv_outputs(H, W, filter_size, conv_pad, conv_stride) | |
# create weights & biases for conv layers | |
# | |
layer = 0 | |
for i in range(self.num_convnets): | |
layer += 1 | |
weightK = 'W' + str(layer) | |
biasK = 'b' + str(layer) | |
depth = C if i == 0 else num_filters | |
self.params[weightK] = weight_scale * np.random.randn(num_filters, depth, filter_size, filter_size) | |
self.params[biasK] = np.zeros((num_filters)) | |
incoming_h, incoming_w = pool_outputs(incoming_h, incoming_w, pool_height, pool_width, pool_stride) | |
if self.use_batchnorm: | |
self.params['gamma' + str(layer)] = np.ones(num_filters) | |
self.params['beta' + str(layer)] = np.zeros(num_filters) | |
# print "layer: %d, weights shape: %s, gamma shape: %s" % (layer, self.params[weightK].shape, self.params['gamma' + str(layer)].shape) | |
# create weights & biases for affine layers | |
# | |
for i in range(self.num_affine): | |
layer += 1 | |
weightK = 'W' + str(layer) | |
biasK = 'b' + str(layer) | |
prev_layer_dim = num_filters * incoming_h * incoming_w if i == 0 else hidden_dim | |
assert (incoming_h > 0), 'incoming_h must be > 0... network too deep?' | |
assert (incoming_w > 0), 'incoming_w must be > 0... network too deep?' | |
# if this is the last layer, get ready to feed softmax, else just another hidden layer | |
output_dim = num_classes if i == self.num_affine - 1 else hidden_dim | |
self.params[weightK] = weight_scale * np.random.randn(prev_layer_dim, output_dim) | |
self.params[biasK] = np.zeros((output_dim)) | |
if self.use_batchnorm and i < self.num_affine - 1: | |
self.params['gamma' + str(layer)] = np.ones(output_dim) | |
self.params['beta' + str(layer)] = np.zeros(output_dim) | |
# print "layer: %d, weights shape: %s, gamma shape: %s" % (layer, self.params[weightK].shape, self.params['gamma' + str(layer)].shape) | |
# With batch normalization we need to keep track of running means and | |
# variances, so we need to pass a special bn_param object to each batch | |
# normalization layer. You should pass self.bn_params[0] to the forward pass | |
# of the first batch normalization layer, self.bn_params[1] to the forward | |
# pass of the second batch normalization layer, etc. | |
self.bn_params = [] | |
if self.use_batchnorm: | |
self.bn_params = [{'mode': 'train'} for i in xrange(self.num_convnets + self.num_affine - 1)] | |
for k, v in self.params.iteritems(): | |
self.params[k] = v.astype(dtype) | |
def loss(self, X, y=None): | |
""" | |
Evaluate loss and gradient for the MxN layer convolutional network. | |
""" | |
# Set train/test mode for batchnorm params and dropout param since they | |
# behave differently during training and testing. | |
mode = 'test' if y is None else 'train' | |
if self.use_batchnorm: | |
for bn_param in self.bn_params: | |
bn_param['mode'] = mode # fixed by armhold | |
scores = None | |
out = None | |
caches = [] | |
# forward pass: conv-relu-pool x N | |
# | |
layer = 0 | |
input = X | |
# print "forward pass: conv layers..." | |
for i in range(self.num_convnets): | |
layer += 1 | |
weights = self.params['W' + str(layer)] | |
bias = self.params['b' + str(layer)] | |
cache = None | |
if self.use_batchnorm: | |
bnp = self.bn_params[i] | |
gamma = self.params['gamma' + str(layer)] | |
beta = self.params['beta' + str(layer)] | |
out, cache = conv_batchnorm_relu_pool_forward(input, weights, bias, gamma, beta, self.conv_param, self.pool_param, bnp) | |
else: | |
out, cache = conv_relu_pool_forward(input, weights, bias, self.conv_param, self.pool_param) | |
caches.append(cache) | |
input = out | |
# forward pass: affine x M | |
# | |
# print "forward pass: affine layers..." | |
for i in range(self.num_affine): | |
layer += 1 | |
weights = self.params['W' + str(layer)] | |
bias = self.params['b' + str(layer)] | |
cache = None | |
if self.use_batchnorm and i < self.num_affine - 1: | |
bnp = self.bn_params[i + self.num_convnets] # add offset from num_convnets | |
gamma = self.params['gamma' + str(layer)] | |
beta = self.params['beta' + str(layer)] | |
out, cache = affine_batchnorm_forward(input, weights, bias, gamma, beta, bnp) | |
else: | |
out, cache = affine_forward(input, weights, bias) | |
caches.append(cache) | |
input = out | |
# print "forward pass: DONE with affine layers." | |
scores = out | |
if y is None: | |
return scores | |
data_loss, dscores = softmax_loss(scores, y) | |
# compute regularization loss | |
reg_loss = 0 | |
layer = 0 | |
for i in range(self.num_convnets + self.num_affine): | |
layer += 1 | |
weights = self.params['W' + str(layer)] | |
reg_loss += 0.5 * self.reg * np.sum(weights * weights) | |
loss = data_loss + reg_loss | |
grads = {} | |
dout = dscores | |
for i in reversed(xrange(self.num_affine)): | |
if self.use_batchnorm and i < self.num_affine - 1: | |
dx, dw, db, dgamma, dbeta = affine_batchnorm_backward(dout, caches.pop()) | |
grads['gamma' + str(layer)] = dgamma | |
grads['beta' + str(layer)] = dbeta | |
else: | |
dx, dw, db = affine_backward(dout, caches.pop()) | |
weights = self.params['W' + str(layer)] | |
grads['W' + str(layer)] = dw + self.reg * weights | |
grads['b' + str(layer)] = db | |
dout = dx | |
layer -= 1 | |
for i in reversed(xrange(self.num_convnets)): | |
if self.use_batchnorm: | |
dx, dw, db, dgamma, dbeta = conv_batchnorm_relu_pool_backward(dout, caches.pop()) | |
grads['gamma' + str(layer)] = dgamma | |
grads['beta' + str(layer)] = dbeta | |
else: | |
dx, dw, db = conv_relu_pool_backward(dout, caches.pop()) | |
weights = self.params['W' + str(layer)] | |
grads['W' + str(layer)] = dw + self.reg * weights | |
grads['b' + str(layer)] = db | |
dout = dx | |
layer -= 1 | |
return loss, grads | |
# conv volume size formula W2 = (W1 - F + 2P) / S + 1 | |
# from http://cs231n.github.io/convolutional-networks/ | |
def conv_outputs(H, W, filter_size, pad, stride): | |
out_h = (H - filter_size + 2 * pad) / stride + 1 | |
out_w = (W - filter_size + 2 * pad) / stride + 1 | |
return (out_h, out_w) | |
# pooling volume formula: W2 = (W1 - F) / S + 1 | |
def pool_outputs(H, W, pool_height, pool_width, stride): | |
out_h = (H - pool_height) / stride + 1 | |
out_w = (W - pool_width) / stride + 1 | |
return (out_h, out_w) | |
def conv_batchnorm_relu_pool_forward(x, w, b, gamma, beta, conv_param, pool_param, bn_param): | |
""" | |
Convenience layer that performs a convolution, spatial batchnorm, a ReLU, and a pool. | |
Inputs: | |
- x: Input to the convolutional layer | |
- w, b, conv_param: Weights and parameters for the convolutional layer | |
- pool_param: Parameters for the pooling layer | |
Returns a tuple of: | |
- out: Output from the pooling layer | |
- cache: Object to give to the backward pass | |
""" | |
conv_out, conv_cache = conv_forward_fast(x, w, b, conv_param) | |
# print "conv_out.shape: %s, gamma.shape: %s, beta.shape: %s" % (conv_out.shape, gamma.shape, beta.shape) | |
batch_out, batch_cache = spatial_batchnorm_forward(conv_out, gamma, beta, bn_param) | |
relu_out, relu_cache = relu_forward(batch_out) | |
pool_out, pool_cache = max_pool_forward_fast(relu_out, pool_param) | |
out = pool_out | |
cache = (conv_cache, batch_cache, relu_cache, pool_cache) | |
return out, cache | |
def conv_batchnorm_relu_pool_backward(dout, cache): | |
""" | |
Backward pass for the conv-batchnorm-relu-pool convenience layer | |
""" | |
conv_cache, batch_cache, relu_cache, pool_cache = cache | |
dpool = max_pool_backward_fast(dout, pool_cache) | |
drelu = relu_backward(dpool, relu_cache) | |
dbatch, dgamma, dbeta = spatial_batchnorm_backward(drelu, batch_cache) | |
dx, dw, db = conv_backward_fast(dbatch, conv_cache) | |
return dx, dw, db, dgamma, dbeta | |
def affine_batchnorm_forward(x, w, b, gamma, beta, bn_param): | |
""" | |
Convenience layer that performs an affine transform, followed by batch normalization | |
""" | |
af_out, af_cache = affine_forward(x, w, b) | |
# print "af_out.shape: %s, gamma.shape: %s, beta.shape: %s" % (af_out.shape, gamma.shape, beta.shape) | |
bn_out, bn_cache = batchnorm_forward(af_out, gamma, beta, bn_param) | |
cache = (af_cache, bn_cache) | |
return bn_out, cache | |
def affine_batchnorm_backward(dout, cache): | |
""" | |
Backward pass for the affine-batchnorm convenience layer | |
""" | |
af_cache, bn_cache = cache | |
dbatch, dgamma, dbeta = batchnorm_backward(dout, bn_cache) | |
dx, dw, db = affine_backward(dbatch, af_cache) | |
return dx, dw, db, dgamma, dbeta | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment