Created
May 31, 2020 03:24
-
-
Save pishangujeniya/6ac306af1e1ea5aed10616b89e0d9c80 to your computer and use it in GitHub Desktop.
ROI Pooling Layer for Keras Tensorflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This code is copied and modified from the below repository | |
https://github.com/kbardool/keras-frcnn/blob/master/keras_frcnn/RoiPoolingConv.py | |
original license | |
kbardool/keras-frcnn is licensed under the | |
Apache License 2.0 | |
A permissive license whose main conditions require preservation of copyright and license notices. Contributors provide an express grant of patent rights. Licensed works, modifications, and larger works may be distributed under different terms and without source code. | |
""" | |
from keras.engine.topology import Layer | |
import keras.backend as K | |
import tensorflow as tf | |
class RoiPoolingConv2DTF(Layer): | |
"""ROI pooling layer for 2D inputs. | |
See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition, | |
K. He, X. Zhang, S. Ren, J. Sun | |
# Arguments | |
pool_size: int | |
Size of pooling region to use. pool_size = 7 will result in a 7x7 region. | |
num_rois: number of regions of interest to be used | |
# Input shape | |
list of two 4D tensors [X_img,X_roi] with shape: | |
X_img: | |
`(1, channels, rows, cols)` if dim_ordering='th' | |
or 4D tensor with shape: | |
`(1, rows, cols, channels)` if dim_ordering='tf'. | |
X_roi: | |
`(1,num_rois,4)` list of rois, with ordering (x,y,w,h) | |
# Output shape | |
3D tensor with shape: | |
`(1, num_rois, channels, pool_size, pool_size)` | |
""" | |
def __init__(self, pool_size, num_rois, **kwargs): | |
self.pool_size = pool_size | |
self.num_rois = num_rois | |
self.nb_channels = None | |
super(RoiPoolingConv2DTF, self).__init__(**kwargs) | |
def build(self, input_shape): | |
self.nb_channels = input_shape[0][3] | |
def compute_output_shape(self, input_shape): | |
return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels | |
def call(self, x, mask=None): | |
assert (len(x) == 2) | |
img = x[0] | |
rois = x[1] | |
input_shape = K.shape(img) | |
outputs = [] | |
for roi_idx in range(self.num_rois): | |
x = rois[0, roi_idx, 0] | |
y = rois[0, roi_idx, 1] | |
w = rois[0, roi_idx, 2] | |
h = rois[0, roi_idx, 3] | |
row_length = w / float(self.pool_size) | |
col_length = h / float(self.pool_size) | |
num_pool_regions = self.pool_size | |
x = K.cast(x, 'int32') | |
y = K.cast(y, 'int32') | |
w = K.cast(w, 'int32') | |
h = K.cast(h, 'int32') | |
rs = tf.image.resize(img[:, y:y + h, x:x + w, :], (self.pool_size, self.pool_size)) | |
outputs.append(rs) | |
final_output = K.concatenate(outputs, axis=0) | |
final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) | |
final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) | |
return final_output |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment