Created
May 23, 2019 07:54
-
-
Save fzyzcjy/25a60fa3b53f63a1b9c87f84d3e25908 to your computer and use it in GitHub Desktop.
mobilenetv3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""MobileNet v3 models for Keras. | |
# Reference | |
[Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs) | |
""" | |
from keras.layers import Conv2D, DepthwiseConv2D, Dense, GlobalAveragePooling2D | |
from keras.layers import Activation, BatchNormalization, Add, Lambda, Reshape, Layer, InputSpec, Multiply | |
from keras.utils import conv_utils | |
from keras.backend.common import normalize_data_format | |
import keras | |
import tensorflow as tf | |
from keras import backend as K | |
class ResizeImages(Layer): | |
"""Resize Images to a specified size | |
https://stackoverflow.com/questions/41903928/add-a-resizing-layer-to-a-keras-sequential-model | |
# Arguments | |
output_dim: Size of output layer width and height | |
output_scale: scale compared with input | |
data_format: A string, | |
one of `channels_last` (default) or `channels_first`. | |
The ordering of the dimensions in the inputs. | |
`channels_last` corresponds to inputs with shape | |
`(batch, height, width, channels)` while `channels_first` | |
corresponds to inputs with shape | |
`(batch, channels, height, width)`. | |
It defaults to the `image_data_format` value found in your | |
Keras config file at `~/.keras/keras.json`. | |
If you never set it, then it will be "channels_last". | |
# Input shape | |
- If `data_format='channels_last'`: | |
4D tensor with shape: | |
`(batch_size, rows, cols, channels)` | |
- If `data_format='channels_first'`: | |
4D tensor with shape: | |
`(batch_size, channels, rows, cols)` | |
# Output shape | |
- If `data_format='channels_last'`: | |
4D tensor with shape: | |
`(batch_size, pooled_rows, pooled_cols, channels)` | |
- If `data_format='channels_first'`: | |
4D tensor with shape: | |
`(batch_size, channels, pooled_rows, pooled_cols)` | |
""" | |
def __init__(self, output_dim=(1, 1), output_scale=None, data_format=None, **kwargs): | |
super(ResizeImages, self).__init__(**kwargs) | |
data_format = normalize_data_format(data_format) # does not have | |
self.naive_output_dim = conv_utils.normalize_tuple(output_dim, | |
2, 'output_dim') | |
self.naive_output_scale = output_scale | |
self.data_format = normalize_data_format(data_format) | |
self.input_spec = InputSpec(ndim=4) | |
def build(self, input_shape): | |
self.input_spec = [InputSpec(shape=input_shape)] | |
if self.naive_output_scale is not None: | |
if self.data_format == 'channels_first': | |
self.output_dim = (self.naive_output_scale * input_shape[2], | |
self.naive_output_scale * input_shape[3]) | |
elif self.data_format == 'channels_last': | |
self.output_dim = (self.naive_output_scale * input_shape[1], | |
self.naive_output_scale * input_shape[2]) | |
else: | |
self.output_dim = self.naive_output_dim | |
def compute_output_shape(self, input_shape): | |
# print('data_format', self.data_format) | |
# print('a', input_shape) | |
# print('hi', (input_shape[0], self.output_dim[0], | |
# self.output_dim[1], input_shape[3])) | |
if self.data_format == 'channels_first': | |
return (input_shape[0], input_shape[1], self.output_dim[0], self.output_dim[1]) | |
elif self.data_format == 'channels_last': | |
return (input_shape[0], self.output_dim[0], self.output_dim[1], input_shape[3]) | |
def _resize_fun(self, inputs, data_format): | |
try: | |
assert keras.backend.backend() == 'tensorflow' | |
assert self.data_format == 'channels_last' | |
except AssertionError: | |
print("Only tensorflow backend is supported for the resize layer and accordingly 'channels_last' ordering") | |
output = tf.image.resize_images(inputs, self.output_dim) | |
return output | |
def call(self, inputs): | |
output = self._resize_fun(inputs=inputs, data_format=self.data_format) | |
return output | |
def get_config(self): | |
config = {'output_dim': self.output_dim, | |
# 'padding': self.padding, | |
'data_format': self.data_format} | |
base_config = super(ResizeImages, self).get_config() | |
return dict(list(base_config.items()) + list(config.items())) | |
def _identity_layer(**kw): | |
return Lambda(lambda x: x, **kw) | |
class MobileNetBase: | |
def __init__(self, shape, n_class): | |
self.shape = shape | |
self.n_class = n_class | |
def _relu6(self, x): | |
"""Relu 6 | |
""" | |
return K.relu(x, max_value=6.0) | |
def _hard_swish(self, x): | |
"""Hard swish | |
""" | |
return x * K.relu(x + 3.0, max_value=6.0) / 6.0 | |
def _return_activation(self, x, nl): | |
"""Convolution Block | |
This function defines a activation choice. | |
# Arguments | |
x: Tensor, input tensor of conv layer. | |
nl: String, nonlinearity activation type. | |
# Returns | |
Output tensor. | |
""" | |
if nl == 'HS': | |
x = Activation(self._hard_swish)(x) | |
if nl == 'RE': | |
x = Activation(self._relu6)(x) | |
return x | |
def _conv_block(self, inputs, filters, kernel, strides, nl, dilation_rate): | |
"""Convolution Block | |
This function defines a 2D convolution operation with BN and activation. | |
# Arguments | |
inputs: Tensor, input tensor of conv layer. | |
filters: Integer, the dimensionality of the output space. | |
kernel: An integer or tuple/list of 2 integers, specifying the | |
width and height of the 2D convolution window. | |
strides: An integer or tuple/list of 2 integers, | |
specifying the strides of the convolution along the width and height. | |
Can be a single integer to specify the same value for | |
all spatial dimensions. | |
nl: String, nonlinearity activation type. | |
# Returns | |
Output tensor. | |
""" | |
channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 | |
x = Conv2D(filters, kernel, padding='same', strides=strides, | |
dilation_rate=dilation_rate)(inputs) | |
x = BatchNormalization(axis=channel_axis)(x) | |
return self._return_activation(x, nl) | |
def _squeeze(self, inputs): | |
"""Squeeze and Excitation. | |
This function defines a squeeze structure. | |
# Arguments | |
inputs: Tensor, input tensor of conv layer. | |
""" | |
input_channels = int(inputs.shape[-1]) | |
x = GlobalAveragePooling2D()(inputs) | |
x = Dense(input_channels, activation='relu')(x) | |
x = Dense(input_channels, activation='hard_sigmoid')(x) | |
x = Reshape((1, 1, input_channels))(x) | |
return x | |
def _bottleneck(self, inputs, filters, kernel, e, s, squeeze, nl, dilation_rate=1): | |
"""Bottleneck | |
This function defines a basic bottleneck structure. | |
Remark: fig. 4 of paper | |
# Arguments | |
inputs: Tensor, input tensor of conv layer. | |
filters: Integer, the dimensionality of the output space. | |
kernel: An integer or tuple/list of 2 integers, specifying the | |
width and height of the 2D convolution window. | |
e: Integer, expansion factor. | |
t is always applied to the input size. | |
s: An integer or tuple/list of 2 integers,specifying the strides | |
of the convolution along the width and height.Can be a single | |
integer to specify the same value for all spatial dimensions. | |
squeeze: Boolean, Whether to use the squeeze. | |
nl: String, nonlinearity activation type. | |
# Returns | |
Output tensor. | |
""" | |
channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 | |
input_shape = K.int_shape(inputs) | |
tchannel = e | |
r = s == 1 and input_shape[3] == filters | |
x = self._conv_block(inputs, tchannel, (1, 1), | |
(1, 1), nl, dilation_rate=dilation_rate) | |
x = DepthwiseConv2D(kernel, strides=(s, s), dilation_rate=dilation_rate, | |
depth_multiplier=1, padding='same')(x) | |
x = BatchNormalization(axis=channel_axis)(x) | |
if squeeze: | |
# x = Lambda(lambda x: x * self._squeeze(x))(x) # note serializable | |
x_squeezed = self._squeeze(x) | |
x = Multiply()([x, x_squeezed]) | |
x = self._return_activation(x, nl) | |
x = Conv2D(filters, (1, 1), strides=(1, 1), | |
dilation_rate=dilation_rate, padding='same')(x) | |
x = BatchNormalization(axis=channel_axis)(x) | |
if r: | |
x = Add()([x, inputs]) | |
return x | |
def _identity(self, x, **kw): | |
return _identity_layer(**kw)(x) | |
def build(self): | |
pass |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""MobileNet v3 small models for Keras. | |
# Reference | |
[Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs) | |
""" | |
from keras.models import Model | |
from keras.layers import Input, Conv2D, GlobalAveragePooling2D, Reshape, BatchNormalization, Activation, AveragePooling2D, Multiply, Add, Lambda | |
from keras.utils.vis_utils import plot_model | |
from keras import backend as K | |
import tensorflow as tf | |
from .mobilenet_base import MobileNetBase, ResizeImages | |
class MobileNetV3_Small(MobileNetBase): | |
def __init__(self, shape, n_class): | |
"""Init. | |
# Arguments | |
input_shape: An integer or tuple/list of 3 integers, shape | |
of input tensor. | |
n_class: Integer, number of classes. | |
# Returns | |
MobileNetv3 model. | |
""" | |
super().__init__(shape, n_class) | |
def build_backbone(self, inputs, last_stride=2): | |
assert last_stride in (1, 2) | |
last_dilation_rate = 2 if (last_stride == 1) else 1 | |
x = self._conv_block(inputs, 16, (3, 3), strides=( | |
2, 2), nl='HS', dilation_rate=1) | |
x = self._bottleneck(x, 16, (3, 3), e=16, s=2, squeeze=True, nl='RE') | |
x = self._bottleneck(x, 24, (3, 3), e=72, s=2, squeeze=False, nl='RE') | |
x = self._bottleneck(x, 24, (3, 3), e=88, s=1, squeeze=False, nl='RE') | |
x_os_8 = x | |
x = self._bottleneck(x, 40, (5, 5), e=96, s=2, squeeze=True, nl='HS') | |
x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS') | |
x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS') | |
x = self._bottleneck(x, 48, (5, 5), e=120, s=1, squeeze=True, nl='HS') | |
x = self._bottleneck(x, 48, (5, 5), e=144, s=1, squeeze=True, nl='HS') | |
x = self._bottleneck(x, 96, (5, 5), e=288, | |
s=last_stride, squeeze=True, nl='HS', dilation_rate=last_dilation_rate) | |
x = self._bottleneck(x, 96, (5, 5), e=576, | |
s=1, squeeze=True, nl='HS', dilation_rate=last_dilation_rate) | |
x = self._bottleneck(x, 96, (5, 5), e=576, | |
s=1, squeeze=True, nl='HS', dilation_rate=last_dilation_rate) | |
x = self._conv_block(x, 576, (1, 1), | |
strides=(1, 1), nl='HS', dilation_rate=last_dilation_rate) | |
x = Lambda(lambda x: x, name='end_of_backbone')(x) | |
return x, x_os_8 | |
def build_classifier_top(self, inputs, n_classifier_class, activation='softmax', last_layer_name=None): | |
x = GlobalAveragePooling2D()(inputs) | |
x = Reshape((1, 1, 576))(x) | |
x = Conv2D(1280, (1, 1), padding='same')(x) | |
x = self._return_activation(x, 'HS') | |
x = Conv2D(n_classifier_class, (1, 1), padding='same')(x) | |
output = Reshape((n_classifier_class,))(x) | |
if activation: | |
x = Activation(activation)(x) | |
if last_layer_name: | |
output = self._identity(output, name=last_layer_name) | |
return output | |
def build_segmentation_head(self, middle, middle_earlier, n_seg_class, upsample_output=True, last_layer_name=None): | |
# 1x1 conv | |
x_up = Conv2D(128, (1, 1), padding='same', | |
use_bias=False, name='seghead_1x1_conv')(middle) | |
x_up = BatchNormalization(name='seghead_1x1_BN', epsilon=1e-5)(x_up) | |
x_up = Activation('relu', name='seghead_1x1_activation')(x_up) | |
# avg pool | |
# TODO: AvgPool2D with such as large value, in effect, result in 1x1 value... | |
# x_mid = AveragePooling2D((49, 49), strides=(16, 20))(middle) | |
x_mid = GlobalAveragePooling2D()(middle) | |
x_mid = Reshape((1, 1, K.int_shape(x_mid)[-1]))(x_mid) | |
x_mid = Conv2D(128, (1, 1), padding='same')(x_mid) | |
x_mid = Activation('sigmoid')(x_mid) | |
x_mid = ResizeImages(output_dim=K.int_shape(middle)[1:3])(x_mid) | |
# skip conn | |
x_lo = Conv2D(n_seg_class, (1, 1), padding='same')(middle_earlier) | |
# merge up and mid | |
x_up_mid_merged = Multiply()([x_up, x_mid]) | |
x_up_mid_merged = ResizeImages(output_scale=2,)(x_up_mid_merged) | |
x_up_mid_merged = Conv2D(n_seg_class, (1, 1), | |
padding='same')(x_up_mid_merged) | |
# merge up_and_mid and lo | |
x_final = Add()([x_up_mid_merged, x_lo]) | |
x_final = Activation('sigmoid', name='final_activation')(x_final) | |
# TODO: | |
if upsample_output: | |
x_final = ResizeImages(output_scale=8)(x_final) | |
if last_layer_name: | |
x_final = self._identity(x_final, name=last_layer_name) | |
return x_final | |
def build(self, plot=False, inputs=None, mode='classifier'): | |
"""build MobileNetV3 Small. | |
# Arguments | |
plot: Boolean, weather to plot model. | |
# Returns | |
model: Model, model. | |
""" | |
if inputs is None: | |
inputs = Input(shape=self.shape) | |
if mode == 'classifier': | |
middle, middle_earlier = self.build_backbone(inputs, last_stride=2) | |
output = self.build_classifier_top(middle, self.n_class) | |
elif mode == 'segmentation': | |
middle, middle_earlier = self.build_backbone(inputs, last_stride=1) | |
output = self.build_segmentation_head(middle, | |
middle_earlier, self.n_class) | |
else: | |
raise Exception('invalid mode') | |
model = Model(inputs, output) | |
if plot: | |
plot_model(model, to_file='images/MobileNetv3_small.png', | |
show_shapes=True) | |
return model |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment