Skip to content

Instantly share code, notes, and snippets.

@mihaidusmanu
Last active December 1, 2018 17:07
Show Gist options
  • Save mihaidusmanu/fdfc0bd53e3b627ee10c8a2a53d94032 to your computer and use it in GitHub Desktop.
Save mihaidusmanu/fdfc0bd53e3b627ee10c8a2a53d94032 to your computer and use it in GitHub Desktop.
Slight modifications to Keras' Conv2DTranspose layer to make it compatible with Zeiler and Fergus' paper.
import keras.backend as K
from keras.engine import InputSpec
from keras.layers import Conv2D
from keras.utils import conv_utils
class Deconv2D(Conv2D):
"""Deconvolution layer.
The need for transposed convolutions generally arises
from the desire to use a transformation going in the opposite direction
of a normal convolution, i.e., from something that has the shape of the
output of some convolution to something that has the shape of its input
while maintaining a connectivity pattern that is compatible with
said convolution.
When using this layer as the first layer in a model,
provide the keyword argument `input_shape`
(tuple of integers, does not include the batch axis),
e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures
in `data_format="channels_last"`.
# Arguments
filters: Integer, the dimensionality of the output space
(i.e. the number of output filters in the convolution).
kernel_size: An integer or tuple/list of 2 integers, specifying the
height and width of the 2D convolution window.
Can be a single integer to specify the same value for
all spatial dimensions.
strides: An integer or tuple/list of 2 integers,
specifying the strides of the convolution
along the height and width.
Can be a single integer to specify the same value for
all spatial dimensions.
Specifying any stride value != 1 is incompatible with specifying
any `dilation_rate` value != 1.
padding: one of `"valid"` or `"same"` (case-insensitive).
output_padding: An integer or tuple/list of 2 integers,
specifying the amount of padding along the height and width
of the output tensor.
Can be a single integer to specify the same value for all
spatial dimensions.
The amount of output padding along a given dimension must be
lower than the stride along that same dimension.
If set to `None` (default), the output shape is inferred.
data_format: A string,
one of `"channels_last"` or `"channels_first"`.
The ordering of the dimensions in the inputs.
`"channels_last"` corresponds to inputs with shape
`(batch, height, width, channels)` while `"channels_first"`
corresponds to inputs with shape
`(batch, channels, height, width)`.
It defaults to the `image_data_format` value found in your
Keras config file at `~/.keras/keras.json`.
If you never set it, then it will be "channels_last".
dilation_rate: an integer or tuple/list of 2 integers, specifying
the dilation rate to use for dilated convolution.
Can be a single integer to specify the same value for
all spatial dimensions.
Currently, specifying any `dilation_rate` value != 1 is
incompatible with specifying any stride value != 1.
activation: Activation function to use
(see [activations](../activations.md)).
If you don't specify anything, no activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix
(see [initializers](../initializers.md)).
bias_initializer: Initializer for the bias vector
(see [initializers](../initializers.md)).
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix
(see [regularizer](../regularizers.md)).
bias_regularizer: Regularizer function applied to the bias vector
(see [regularizer](../regularizers.md)).
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation").
(see [regularizer](../regularizers.md)).
kernel_constraint: Constraint function applied to the kernel matrix
(see [constraints](../constraints.md)).
bias_constraint: Constraint function applied to the bias vector
(see [constraints](../constraints.md)).
# Input shape
4D tensor with shape:
`(batch, channels, rows, cols)`
if `data_format` is `"channels_first"`
or 4D tensor with shape:
`(batch, rows, cols, channels)`
if `data_format` is `"channels_last"`.
# Output shape
4D tensor with shape:
`(batch, filters, new_rows, new_cols)`
if `data_format` is `"channels_first"`
or 4D tensor with shape:
`(batch, new_rows, new_cols, filters)`
if `data_format` is `"channels_last"`.
`rows` and `cols` values might have changed due to padding.
If `output_padding` is specified:
```
new_rows = ((rows - 1) * strides[0] + kernel_size[0]
- 2 * padding[0] + output_padding[0])
new_cols = ((cols - 1) * strides[1] + kernel_size[1]
- 2 * padding[1] + output_padding[1])
```
# References
- [A guide to convolution arithmetic for deep learning](
https://arxiv.org/abs/1603.07285v1)
- [Deconvolutional Networks](
http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
"""
#@interfaces.legacy_deconv2d_support
def __init__(self, filters,
kernel_size,
strides=(1, 1),
padding='valid',
output_padding=None,
data_format=None,
dilation_rate=(1, 1),
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
super(Deconv2D, self).__init__(
filters,
kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
**kwargs)
self.output_padding = output_padding
if self.output_padding is not None:
self.output_padding = conv_utils.normalize_tuple(
self.output_padding, 2, 'output_padding')
for stride, out_pad in zip(self.strides, self.output_padding):
if out_pad >= stride:
raise ValueError('Stride ' + str(self.strides) + ' must be '
'greater than output padding ' +
str(self.output_padding))
def build(self, input_shape):
if len(input_shape) != 4:
raise ValueError('Inputs should have rank ' +
str(4) +
'; Received input shape:', str(input_shape))
if self.data_format == 'channels_first':
channel_axis = 1
else:
channel_axis = -1
if input_shape[channel_axis] is None:
raise ValueError('The channel dimension of the inputs '
'should be defined. Found `None`.')
input_dim = input_shape[channel_axis]
kernel_shape = self.kernel_size + (self.filters, input_dim)
self.kernel = self.add_weight(shape=kernel_shape,
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(input_dim,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
# Set input spec.
self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim})
self.built = True
def call(self, inputs):
input_shape = K.shape(inputs)
batch_size = input_shape[0]
if self.data_format == 'channels_first':
h_axis, w_axis = 2, 3
else:
h_axis, w_axis = 1, 2
height, width = input_shape[h_axis], input_shape[w_axis]
kernel_h, kernel_w = self.kernel_size
stride_h, stride_w = self.strides
if self.output_padding is None:
out_pad_h = out_pad_w = None
else:
out_pad_h, out_pad_w = self.output_padding
# Infer the dynamic output shape:
out_height = conv_utils.deconv_length(height,
stride_h, kernel_h,
self.padding,
out_pad_h,
self.dilation_rate[0])
out_width = conv_utils.deconv_length(width,
stride_w, kernel_w,
self.padding,
out_pad_w,
self.dilation_rate[1])
if self.data_format == 'channels_first':
output_shape = (batch_size, self.filters, out_height, out_width)
else:
output_shape = (batch_size, out_height, out_width, self.filters)
outputs = inputs
if self.use_bias:
outputs = K.bias_add(
outputs,
-self.bias,
data_format=self.data_format)
outputs = K.conv2d_transpose(
outputs,
self.kernel,
output_shape,
self.strides,
padding=self.padding,
data_format=self.data_format,
dilation_rate=self.dilation_rate)
if self.activation is not None:
return self.activation(outputs)
return outputs
def compute_output_shape(self, input_shape):
output_shape = list(input_shape)
if self.data_format == 'channels_first':
c_axis, h_axis, w_axis = 1, 2, 3
else:
c_axis, h_axis, w_axis = 3, 1, 2
kernel_h, kernel_w = self.kernel_size
stride_h, stride_w = self.strides
if self.output_padding is None:
out_pad_h = out_pad_w = None
else:
out_pad_h, out_pad_w = self.output_padding
output_shape[c_axis] = self.filters
output_shape[h_axis] = conv_utils.deconv_length(output_shape[h_axis],
stride_h,
kernel_h,
self.padding,
out_pad_h,
self.dilation_rate[0])
output_shape[w_axis] = conv_utils.deconv_length(output_shape[w_axis],
stride_w,
kernel_w,
self.padding,
out_pad_w,
self.dilation_rate[1])
return tuple(output_shape)
def get_config(self):
config = super(Deconv2D, self).get_config()
config['output_padding'] = self.output_padding
return config
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment