Skip to content

Instantly share code, notes, and snippets.

@fzyzcjy
Created May 23, 2019 07:54
Show Gist options
  • Save fzyzcjy/25a60fa3b53f63a1b9c87f84d3e25908 to your computer and use it in GitHub Desktop.
Save fzyzcjy/25a60fa3b53f63a1b9c87f84d3e25908 to your computer and use it in GitHub Desktop.
mobilenetv3
"""MobileNet v3 models for Keras.
# Reference
[Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs)
"""
from keras.layers import Conv2D, DepthwiseConv2D, Dense, GlobalAveragePooling2D
from keras.layers import Activation, BatchNormalization, Add, Lambda, Reshape, Layer, InputSpec, Multiply
from keras.utils import conv_utils
from keras.backend.common import normalize_data_format
import keras
import tensorflow as tf
from keras import backend as K
class ResizeImages(Layer):
"""Resize Images to a specified size
https://stackoverflow.com/questions/41903928/add-a-resizing-layer-to-a-keras-sequential-model
# Arguments
output_dim: Size of output layer width and height
output_scale: scale compared with input
data_format: A string,
one of `channels_last` (default) or `channels_first`.
The ordering of the dimensions in the inputs.
`channels_last` corresponds to inputs with shape
`(batch, height, width, channels)` while `channels_first`
corresponds to inputs with shape
`(batch, channels, height, width)`.
It defaults to the `image_data_format` value found in your
Keras config file at `~/.keras/keras.json`.
If you never set it, then it will be "channels_last".
# Input shape
- If `data_format='channels_last'`:
4D tensor with shape:
`(batch_size, rows, cols, channels)`
- If `data_format='channels_first'`:
4D tensor with shape:
`(batch_size, channels, rows, cols)`
# Output shape
- If `data_format='channels_last'`:
4D tensor with shape:
`(batch_size, pooled_rows, pooled_cols, channels)`
- If `data_format='channels_first'`:
4D tensor with shape:
`(batch_size, channels, pooled_rows, pooled_cols)`
"""
def __init__(self, output_dim=(1, 1), output_scale=None, data_format=None, **kwargs):
super(ResizeImages, self).__init__(**kwargs)
data_format = normalize_data_format(data_format) # does not have
self.naive_output_dim = conv_utils.normalize_tuple(output_dim,
2, 'output_dim')
self.naive_output_scale = output_scale
self.data_format = normalize_data_format(data_format)
self.input_spec = InputSpec(ndim=4)
def build(self, input_shape):
self.input_spec = [InputSpec(shape=input_shape)]
if self.naive_output_scale is not None:
if self.data_format == 'channels_first':
self.output_dim = (self.naive_output_scale * input_shape[2],
self.naive_output_scale * input_shape[3])
elif self.data_format == 'channels_last':
self.output_dim = (self.naive_output_scale * input_shape[1],
self.naive_output_scale * input_shape[2])
else:
self.output_dim = self.naive_output_dim
def compute_output_shape(self, input_shape):
# print('data_format', self.data_format)
# print('a', input_shape)
# print('hi', (input_shape[0], self.output_dim[0],
# self.output_dim[1], input_shape[3]))
if self.data_format == 'channels_first':
return (input_shape[0], input_shape[1], self.output_dim[0], self.output_dim[1])
elif self.data_format == 'channels_last':
return (input_shape[0], self.output_dim[0], self.output_dim[1], input_shape[3])
def _resize_fun(self, inputs, data_format):
try:
assert keras.backend.backend() == 'tensorflow'
assert self.data_format == 'channels_last'
except AssertionError:
print("Only tensorflow backend is supported for the resize layer and accordingly 'channels_last' ordering")
output = tf.image.resize_images(inputs, self.output_dim)
return output
def call(self, inputs):
output = self._resize_fun(inputs=inputs, data_format=self.data_format)
return output
def get_config(self):
config = {'output_dim': self.output_dim,
# 'padding': self.padding,
'data_format': self.data_format}
base_config = super(ResizeImages, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def _identity_layer(**kw):
return Lambda(lambda x: x, **kw)
class MobileNetBase:
def __init__(self, shape, n_class):
self.shape = shape
self.n_class = n_class
def _relu6(self, x):
"""Relu 6
"""
return K.relu(x, max_value=6.0)
def _hard_swish(self, x):
"""Hard swish
"""
return x * K.relu(x + 3.0, max_value=6.0) / 6.0
def _return_activation(self, x, nl):
"""Convolution Block
This function defines a activation choice.
# Arguments
x: Tensor, input tensor of conv layer.
nl: String, nonlinearity activation type.
# Returns
Output tensor.
"""
if nl == 'HS':
x = Activation(self._hard_swish)(x)
if nl == 'RE':
x = Activation(self._relu6)(x)
return x
def _conv_block(self, inputs, filters, kernel, strides, nl, dilation_rate):
"""Convolution Block
This function defines a 2D convolution operation with BN and activation.
# Arguments
inputs: Tensor, input tensor of conv layer.
filters: Integer, the dimensionality of the output space.
kernel: An integer or tuple/list of 2 integers, specifying the
width and height of the 2D convolution window.
strides: An integer or tuple/list of 2 integers,
specifying the strides of the convolution along the width and height.
Can be a single integer to specify the same value for
all spatial dimensions.
nl: String, nonlinearity activation type.
# Returns
Output tensor.
"""
channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
x = Conv2D(filters, kernel, padding='same', strides=strides,
dilation_rate=dilation_rate)(inputs)
x = BatchNormalization(axis=channel_axis)(x)
return self._return_activation(x, nl)
def _squeeze(self, inputs):
"""Squeeze and Excitation.
This function defines a squeeze structure.
# Arguments
inputs: Tensor, input tensor of conv layer.
"""
input_channels = int(inputs.shape[-1])
x = GlobalAveragePooling2D()(inputs)
x = Dense(input_channels, activation='relu')(x)
x = Dense(input_channels, activation='hard_sigmoid')(x)
x = Reshape((1, 1, input_channels))(x)
return x
def _bottleneck(self, inputs, filters, kernel, e, s, squeeze, nl, dilation_rate=1):
"""Bottleneck
This function defines a basic bottleneck structure.
Remark: fig. 4 of paper
# Arguments
inputs: Tensor, input tensor of conv layer.
filters: Integer, the dimensionality of the output space.
kernel: An integer or tuple/list of 2 integers, specifying the
width and height of the 2D convolution window.
e: Integer, expansion factor.
t is always applied to the input size.
s: An integer or tuple/list of 2 integers,specifying the strides
of the convolution along the width and height.Can be a single
integer to specify the same value for all spatial dimensions.
squeeze: Boolean, Whether to use the squeeze.
nl: String, nonlinearity activation type.
# Returns
Output tensor.
"""
channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
input_shape = K.int_shape(inputs)
tchannel = e
r = s == 1 and input_shape[3] == filters
x = self._conv_block(inputs, tchannel, (1, 1),
(1, 1), nl, dilation_rate=dilation_rate)
x = DepthwiseConv2D(kernel, strides=(s, s), dilation_rate=dilation_rate,
depth_multiplier=1, padding='same')(x)
x = BatchNormalization(axis=channel_axis)(x)
if squeeze:
# x = Lambda(lambda x: x * self._squeeze(x))(x) # note serializable
x_squeezed = self._squeeze(x)
x = Multiply()([x, x_squeezed])
x = self._return_activation(x, nl)
x = Conv2D(filters, (1, 1), strides=(1, 1),
dilation_rate=dilation_rate, padding='same')(x)
x = BatchNormalization(axis=channel_axis)(x)
if r:
x = Add()([x, inputs])
return x
def _identity(self, x, **kw):
return _identity_layer(**kw)(x)
def build(self):
pass
"""MobileNet v3 small models for Keras.
# Reference
[Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs)
"""
from keras.models import Model
from keras.layers import Input, Conv2D, GlobalAveragePooling2D, Reshape, BatchNormalization, Activation, AveragePooling2D, Multiply, Add, Lambda
from keras.utils.vis_utils import plot_model
from keras import backend as K
import tensorflow as tf
from .mobilenet_base import MobileNetBase, ResizeImages
class MobileNetV3_Small(MobileNetBase):
def __init__(self, shape, n_class):
"""Init.
# Arguments
input_shape: An integer or tuple/list of 3 integers, shape
of input tensor.
n_class: Integer, number of classes.
# Returns
MobileNetv3 model.
"""
super().__init__(shape, n_class)
def build_backbone(self, inputs, last_stride=2):
assert last_stride in (1, 2)
last_dilation_rate = 2 if (last_stride == 1) else 1
x = self._conv_block(inputs, 16, (3, 3), strides=(
2, 2), nl='HS', dilation_rate=1)
x = self._bottleneck(x, 16, (3, 3), e=16, s=2, squeeze=True, nl='RE')
x = self._bottleneck(x, 24, (3, 3), e=72, s=2, squeeze=False, nl='RE')
x = self._bottleneck(x, 24, (3, 3), e=88, s=1, squeeze=False, nl='RE')
x_os_8 = x
x = self._bottleneck(x, 40, (5, 5), e=96, s=2, squeeze=True, nl='HS')
x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS')
x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS')
x = self._bottleneck(x, 48, (5, 5), e=120, s=1, squeeze=True, nl='HS')
x = self._bottleneck(x, 48, (5, 5), e=144, s=1, squeeze=True, nl='HS')
x = self._bottleneck(x, 96, (5, 5), e=288,
s=last_stride, squeeze=True, nl='HS', dilation_rate=last_dilation_rate)
x = self._bottleneck(x, 96, (5, 5), e=576,
s=1, squeeze=True, nl='HS', dilation_rate=last_dilation_rate)
x = self._bottleneck(x, 96, (5, 5), e=576,
s=1, squeeze=True, nl='HS', dilation_rate=last_dilation_rate)
x = self._conv_block(x, 576, (1, 1),
strides=(1, 1), nl='HS', dilation_rate=last_dilation_rate)
x = Lambda(lambda x: x, name='end_of_backbone')(x)
return x, x_os_8
def build_classifier_top(self, inputs, n_classifier_class, activation='softmax', last_layer_name=None):
x = GlobalAveragePooling2D()(inputs)
x = Reshape((1, 1, 576))(x)
x = Conv2D(1280, (1, 1), padding='same')(x)
x = self._return_activation(x, 'HS')
x = Conv2D(n_classifier_class, (1, 1), padding='same')(x)
output = Reshape((n_classifier_class,))(x)
if activation:
x = Activation(activation)(x)
if last_layer_name:
output = self._identity(output, name=last_layer_name)
return output
def build_segmentation_head(self, middle, middle_earlier, n_seg_class, upsample_output=True, last_layer_name=None):
# 1x1 conv
x_up = Conv2D(128, (1, 1), padding='same',
use_bias=False, name='seghead_1x1_conv')(middle)
x_up = BatchNormalization(name='seghead_1x1_BN', epsilon=1e-5)(x_up)
x_up = Activation('relu', name='seghead_1x1_activation')(x_up)
# avg pool
# TODO: AvgPool2D with such as large value, in effect, result in 1x1 value...
# x_mid = AveragePooling2D((49, 49), strides=(16, 20))(middle)
x_mid = GlobalAveragePooling2D()(middle)
x_mid = Reshape((1, 1, K.int_shape(x_mid)[-1]))(x_mid)
x_mid = Conv2D(128, (1, 1), padding='same')(x_mid)
x_mid = Activation('sigmoid')(x_mid)
x_mid = ResizeImages(output_dim=K.int_shape(middle)[1:3])(x_mid)
# skip conn
x_lo = Conv2D(n_seg_class, (1, 1), padding='same')(middle_earlier)
# merge up and mid
x_up_mid_merged = Multiply()([x_up, x_mid])
x_up_mid_merged = ResizeImages(output_scale=2,)(x_up_mid_merged)
x_up_mid_merged = Conv2D(n_seg_class, (1, 1),
padding='same')(x_up_mid_merged)
# merge up_and_mid and lo
x_final = Add()([x_up_mid_merged, x_lo])
x_final = Activation('sigmoid', name='final_activation')(x_final)
# TODO:
if upsample_output:
x_final = ResizeImages(output_scale=8)(x_final)
if last_layer_name:
x_final = self._identity(x_final, name=last_layer_name)
return x_final
def build(self, plot=False, inputs=None, mode='classifier'):
"""build MobileNetV3 Small.
# Arguments
plot: Boolean, weather to plot model.
# Returns
model: Model, model.
"""
if inputs is None:
inputs = Input(shape=self.shape)
if mode == 'classifier':
middle, middle_earlier = self.build_backbone(inputs, last_stride=2)
output = self.build_classifier_top(middle, self.n_class)
elif mode == 'segmentation':
middle, middle_earlier = self.build_backbone(inputs, last_stride=1)
output = self.build_segmentation_head(middle,
middle_earlier, self.n_class)
else:
raise Exception('invalid mode')
model = Model(inputs, output)
if plot:
plot_model(model, to_file='images/MobileNetv3_small.png',
show_shapes=True)
return model
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment