Skip to content

Instantly share code, notes, and snippets.

@pishangujeniya
Created May 31, 2020 03:24
Show Gist options
  • Save pishangujeniya/6ac306af1e1ea5aed10616b89e0d9c80 to your computer and use it in GitHub Desktop.
Save pishangujeniya/6ac306af1e1ea5aed10616b89e0d9c80 to your computer and use it in GitHub Desktop.
ROI Pooling Layer for Keras Tensorflow
"""
This code is copied and modified from the below repository
https://github.com/kbardool/keras-frcnn/blob/master/keras_frcnn/RoiPoolingConv.py
original license
kbardool/keras-frcnn is licensed under the
Apache License 2.0
A permissive license whose main conditions require preservation of copyright and license notices. Contributors provide an express grant of patent rights. Licensed works, modifications, and larger works may be distributed under different terms and without source code.
"""
from keras.engine.topology import Layer
import keras.backend as K
import tensorflow as tf
class RoiPoolingConv2DTF(Layer):
"""ROI pooling layer for 2D inputs.
See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
K. He, X. Zhang, S. Ren, J. Sun
# Arguments
pool_size: int
Size of pooling region to use. pool_size = 7 will result in a 7x7 region.
num_rois: number of regions of interest to be used
# Input shape
list of two 4D tensors [X_img,X_roi] with shape:
X_img:
`(1, channels, rows, cols)` if dim_ordering='th'
or 4D tensor with shape:
`(1, rows, cols, channels)` if dim_ordering='tf'.
X_roi:
`(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
# Output shape
3D tensor with shape:
`(1, num_rois, channels, pool_size, pool_size)`
"""
def __init__(self, pool_size, num_rois, **kwargs):
self.pool_size = pool_size
self.num_rois = num_rois
self.nb_channels = None
super(RoiPoolingConv2DTF, self).__init__(**kwargs)
def build(self, input_shape):
self.nb_channels = input_shape[0][3]
def compute_output_shape(self, input_shape):
return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels
def call(self, x, mask=None):
assert (len(x) == 2)
img = x[0]
rois = x[1]
input_shape = K.shape(img)
outputs = []
for roi_idx in range(self.num_rois):
x = rois[0, roi_idx, 0]
y = rois[0, roi_idx, 1]
w = rois[0, roi_idx, 2]
h = rois[0, roi_idx, 3]
row_length = w / float(self.pool_size)
col_length = h / float(self.pool_size)
num_pool_regions = self.pool_size
x = K.cast(x, 'int32')
y = K.cast(y, 'int32')
w = K.cast(w, 'int32')
h = K.cast(h, 'int32')
rs = tf.image.resize(img[:, y:y + h, x:x + w, :], (self.pool_size, self.pool_size))
outputs.append(rs)
final_output = K.concatenate(outputs, axis=0)
final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))
final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))
return final_output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment