Skip to content

Instantly share code, notes, and snippets.

@TarrySingh
Created July 8, 2017 16:41
Show Gist options
  • Save TarrySingh/5d24539ff5429e3f055fe365861e9df1 to your computer and use it in GitHub Desktop.
Save TarrySingh/5d24539ff5429e3f055fe365861e9df1 to your computer and use it in GitHub Desktop.
Object Detection
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.1 (/Library/Frameworks/Python.framework/Versions/3.6/bin/python3.6)" project-jdk-type="Python SDK" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Object-Detector-App.iml" filepath="$PROJECT_DIR$/.idea/Object-Detector-App.iml" />
</modules>
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.5.3 (~/anaconda/envs/object-detection/bin/python)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<option name="TRACKING_ENABLED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="ShelveChangesManager" show_recycled="false">
<option name="remove_strategy" value="false" />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<created>1499365852136</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1499365852136</updated>
</task>
<servers />
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager />
<watches-manager />
</component>
</project>
name: object-detection
channels: !!python/tuple
- menpo
- defaults
dependencies:
- freetype=2.5.5=2
- jbig=2.1=0
- jlaura::opencv3=3.0.0=py35_0
- jpeg=9b=0
- libpng=1.6.27=0
- libtiff=4.0.6=3
- menpo::tbb=4.3_20141023=0
- mkl=2017.0.1=0
- numpy=1.13.0=py35_0
- olefile=0.44=py35_0
- openssl=1.0.2l=0
- pillow=4.1.1=py35_0
- pip=9.0.1=py35_1
- python=3.5.3=1
- readline=6.2=2
- setuptools=27.2.0=py35_0
- sqlite=3.13.0=0
- tk=8.5.18=0
- wheel=0.29.0=py35_0
- xz=5.2.2=1
- zlib=1.2.8=3
- pip:
- backports.weakref==1.0rc1
- bleach==1.5.0
- html5lib==0.9999999
- markdown==2.2.0
- protobuf==3.3.0
- six==1.10.0
- tensorflow==1.2.0
- werkzeug==0.12.2
prefix: /Users/datitran/anaconda/envs/object-detection
MIT License
Copyright (c) 2017 Dat Tran
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (~/anaconda/bin/python)" project-jdk-type="Python SDK" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/object_detection.iml" filepath="$PROJECT_DIR$/.idea/object_detection.iml" />
</modules>
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="projectConfiguration" value="Nosetests" />
<option name="PROJECT_TEST_RUNNER" value="Nosetests" />
</component>
</module>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="afa8758a-48f3-4f65-8fa7-5a1beb5bcd6b" name="Default" comment="" />
<ignored path="object_detection.iws" />
<ignored path=".idea/workspace.xml" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="TRACKING_ENABLED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="CreatePatchCommitExecutor">
<option name="PATCH_PATH" value="" />
</component>
<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
<component name="FavoritesManager">
<favorites_list name="object_detection" />
</component>
<component name="FileEditorManager">
<leaf>
<file leaf-file-name="object_detection.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/object_detection.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="195">
<caret line="13" column="0" selection-start-line="13" selection-start-column="0" selection-end-line="13" selection-end-column="32" />
<folding />
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/object_detection.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds">
<option name="y" value="23" />
<option name="width" value="1280" />
<option name="height" value="1351" />
</component>
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1">
<flattenPackages />
<showMembers />
<showModules />
<showLibraryContents />
<hideEmptyPackages />
<abbreviatePackageNames />
<autoscrollToSource />
<autoscrollFromSource />
<sortByType />
<manualOrder />
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scratches" />
<pane id="ProjectPane">
<subPane>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="object_detection" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="object_detection" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="object_detection" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
</subPane>
</pane>
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
</component>
<component name="ShelveChangesManager" show_recycled="false">
<option name="remove_strategy" value="false" />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="afa8758a-48f3-4f65-8fa7-5a1beb5bcd6b" name="Default" comment="" />
<created>1497876882068</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1497876882068</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="0" y="23" width="1280" height="1351" extended-state="0" />
<editor active="false" />
<layout>
<window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.2494043" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
</layout>
</component>
<component name="Vcs.Log.UiProperties">
<option name="RECENTLY_FILTERED_USER_GROUPS">
<collection />
</option>
<option name="RECENTLY_FILTERED_BRANCH_GROUPS">
<collection />
</option>
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager />
<watches-manager />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/object_detection.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="195">
<caret line="13" column="0" selection-start-line="13" selection-start-column="0" selection-end-line="13" selection-end-column="32" />
<folding />
</state>
</provider>
</entry>
</component>
</project>
# Tensorflow Object Detection API: Anchor Generator implementations.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "grid_anchor_generator",
srcs = [
"grid_anchor_generator.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:anchor_generator",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/utils:ops",
],
)
py_test(
name = "grid_anchor_generator_test",
srcs = [
"grid_anchor_generator_test.py",
],
deps = [
":grid_anchor_generator",
"//tensorflow",
],
)
py_library(
name = "multiple_grid_anchor_generator",
srcs = [
"multiple_grid_anchor_generator.py",
],
deps = [
":grid_anchor_generator",
"//tensorflow",
"//tensorflow_models/object_detection/core:anchor_generator",
"//tensorflow_models/object_detection/core:box_list_ops",
],
)
py_test(
name = "multiple_grid_anchor_generator_test",
srcs = [
"multiple_grid_anchor_generator_test.py",
],
deps = [
":multiple_grid_anchor_generator",
"//third_party/py/numpy",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates grid anchors on the fly as used in Faster RCNN.
Generates grid anchors on the fly as described in:
"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
"""
import tensorflow as tf
from object_detection.core import anchor_generator
from object_detection.core import box_list
from object_detection.utils import ops
class GridAnchorGenerator(anchor_generator.AnchorGenerator):
"""Generates a grid of anchors at given scales and aspect ratios."""
def __init__(self,
scales=(0.5, 1.0, 2.0),
aspect_ratios=(0.5, 1.0, 2.0),
base_anchor_size=None,
anchor_stride=None,
anchor_offset=None):
"""Constructs a GridAnchorGenerator.
Args:
scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
base_anchor_size: base anchor size as height, width (
(length-2 float32 list, default=[256, 256])
anchor_stride: difference in centers between base anchors for adjacent
grid positions (length-2 float32 list, default=[16, 16])
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
upper left element of the grid, this should be zero for
feature networks with only VALID padding and even receptive
field size, but may need additional calculation if other
padding is used (length-2 float32 tensor, default=[0, 0])
"""
# Handle argument defaults
if base_anchor_size is None:
base_anchor_size = [256, 256]
base_anchor_size = tf.constant(base_anchor_size, tf.float32)
if anchor_stride is None:
anchor_stride = [16, 16]
anchor_stride = tf.constant(anchor_stride, dtype=tf.float32)
if anchor_offset is None:
anchor_offset = [0, 0]
anchor_offset = tf.constant(anchor_offset, dtype=tf.float32)
self._scales = scales
self._aspect_ratios = aspect_ratios
self._base_anchor_size = base_anchor_size
self._anchor_stride = anchor_stride
self._anchor_offset = anchor_offset
def name_scope(self):
return 'GridAnchorGenerator'
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the `generate` function.
"""
return [len(self._scales) * len(self._aspect_ratios)]
def _generate(self, feature_map_shape_list):
"""Generates a collection of bounding boxes to be used as anchors.
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
format [(height_0, width_0)]. For example, setting
feature_map_shape_list=[(8, 8)] asks for anchors that correspond
to an 8x8 layer. For this anchor generator, only lists of length 1 are
allowed.
Returns:
boxes: a BoxList holding a collection of N anchor boxes
Raises:
ValueError: if feature_map_shape_list, box_specs_list do not have the same
length.
ValueError: if feature_map_shape_list does not consist of pairs of
integers
"""
if not (isinstance(feature_map_shape_list, list)
and len(feature_map_shape_list) == 1):
raise ValueError('feature_map_shape_list must be a list of length 1.')
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in feature_map_shape_list]):
raise ValueError('feature_map_shape_list must be a list of pairs.')
grid_height, grid_width = feature_map_shape_list[0]
scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
self._aspect_ratios)
scales_grid = tf.reshape(scales_grid, [-1])
aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
return tile_anchors(grid_height,
grid_width,
scales_grid,
aspect_ratios_grid,
self._base_anchor_size,
self._anchor_stride,
self._anchor_offset)
def tile_anchors(grid_height,
grid_width,
scales,
aspect_ratios,
base_anchor_size,
anchor_stride,
anchor_offset):
"""Create a tiled set of anchors strided along a grid in image space.
This op creates a set of anchor boxes by placing a "basis" collection of
boxes with user-specified scales and aspect ratios centered at evenly
distributed points along a grid. The basis collection is specified via the
scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2]
and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
.1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before
placing it over its respective center.
Grid points are specified via grid_height, grid_width parameters as well as
the anchor_stride and anchor_offset parameters.
Args:
grid_height: size of the grid in the y direction (int or int scalar tensor)
grid_width: size of the grid in the x direction (int or int scalar tensor)
scales: a 1-d (float) tensor representing the scale of each box in the
basis set.
aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
box in the basis set. The length of the scales and aspect_ratios tensors
must be equal.
base_anchor_size: base anchor size as [height, width]
(float tensor of shape [2])
anchor_stride: difference in centers between base anchors for adjacent grid
positions (float tensor of shape [2])
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
upper left element of the grid, this should be zero for
feature networks with only VALID padding and even receptive
field size, but may need some additional calculation if other
padding is used (float tensor of shape [2])
Returns:
a BoxList holding a collection of N anchor boxes
"""
ratio_sqrts = tf.sqrt(aspect_ratios)
heights = scales / ratio_sqrts * base_anchor_size[0]
widths = scales * ratio_sqrts * base_anchor_size[1]
# Get a grid of box centers
y_centers = tf.to_float(tf.range(grid_height))
y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
x_centers = tf.to_float(tf.range(grid_width))
x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
bbox_centers = tf.reshape(bbox_centers, [-1, 2])
bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
return box_list.BoxList(bbox_corners)
def _center_size_bbox_to_corners_bbox(centers, sizes):
"""Converts bbox center-size representation to corners representation.
Args:
centers: a tensor with shape [N, 2] representing bounding box centers
sizes: a tensor with shape [N, 2] representing bounding boxes
Returns:
corners: tensor with shape [N, 4] representing bounding boxes in corners
representation
"""
return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.grid_anchor_generator."""
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
class GridAnchorGeneratorTest(tf.test.TestCase):
def test_construct_single_anchor(self):
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
scales = [0.5, 1.0, 2.0]
aspect_ratios = [0.25, 1.0, 4.0]
anchor_offset = [7, -3]
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
[-505, -131, 519, 125], [-57, -67, 71, 61],
[-121, -131, 135, 125], [-249, -259, 263, 253],
[-25, -131, 39, 125], [-57, -259, 71, 253],
[-121, -515, 135, 509]]
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
scales, aspect_ratios,
anchor_offset=anchor_offset)
anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid(self):
base_anchor_size = [10, 10]
anchor_stride = [19, 19]
anchor_offset = [0, 0]
scales = [0.5, 1.0, 2.0]
aspect_ratios = [1.0]
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
[-5., 14., 5, 24], [-10., 9., 10, 29],
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
[14., 14., 24, 24], [9., 9., 29, 29]]
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
scales,
aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=anchor_stride,
anchor_offset=anchor_offset)
anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates grid anchors on the fly corresponding to multiple CNN layers.
Generates grid anchors on the fly corresponding to multiple CNN layers as
described in:
"SSD: Single Shot MultiBox Detector"
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
Cheng-Yang Fu, Alexander C. Berg
(see Section 2.2: Choosing scales and aspect ratios for default boxes)
"""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.core import anchor_generator
from object_detection.core import box_list_ops
class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
"""Generate a grid of anchors for multiple CNN layers."""
def __init__(self,
box_specs_list,
base_anchor_size=None,
clip_window=None):
"""Constructs a MultipleGridAnchorGenerator.
To construct anchors, at multiple grid resolutions, one must provide a
list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
size, a corresponding list of (scale, aspect ratio) box specifications.
For example:
box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid
[(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid
To support the fully convolutional setting, we pass grid sizes in at
generation time, while scale and aspect ratios are fixed at construction
time.
Args:
box_specs_list: list of list of (scale, aspect ratio) pairs with the
outside list having the same number of entries as feature_map_shape_list
(which is passed in at generation time).
base_anchor_size: base anchor size as [height, width]
(length-2 float tensor, default=[256, 256]).
clip_window: a tensor of shape [4] specifying a window to which all
anchors should be clipped. If clip_window is None, then no clipping
is performed.
Raises:
ValueError: if box_specs_list is not a list of list of pairs
ValueError: if clip_window is not either None or a tensor of shape [4]
"""
if isinstance(box_specs_list, list) and all(
[isinstance(list_item, list) for list_item in box_specs_list]):
self._box_specs = box_specs_list
else:
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
if base_anchor_size is None:
base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
self._base_anchor_size = base_anchor_size
if clip_window is not None and clip_window.get_shape().as_list() != [4]:
raise ValueError('clip_window must either be None or a shape [4] tensor')
self._clip_window = clip_window
self._scales = []
self._aspect_ratios = []
for box_spec in self._box_specs:
if not all([isinstance(entry, tuple) and len(entry) == 2
for entry in box_spec]):
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
scales, aspect_ratios = zip(*box_spec)
self._scales.append(scales)
self._aspect_ratios.append(aspect_ratios)
def name_scope(self):
return 'MultipleGridAnchorGenerator'
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the Generate function.
"""
return [len(box_specs) for box_specs in self._box_specs]
def _generate(self,
feature_map_shape_list,
im_height=1,
im_width=1,
anchor_strides=None,
anchor_offsets=None):
"""Generates a collection of bounding boxes to be used as anchors.
The number of anchors generated for a single grid with shape MxM where we
place k boxes over each grid center is k*M^2 and thus the total number of
anchors is the sum over all grids. In our box_specs_list example
(see the constructor docstring), we would place two boxes over each grid
point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
output anchors follows the order of how the grid sizes and box_specs are
specified (with box_spec index varying the fastest, followed by width
index, then height index, then grid index).
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
format [(height_0, width_0), (height_1, width_1), ...]. For example,
setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
correspond to an 8x8 layer followed by a 7x7 layer.
im_height: the height of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the
grid.
im_width: the width of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the
grid.
anchor_strides: list of pairs of strides (in y and x directions
respectively). For example, setting
anchor_strides=[(.25, .25), (.5, .5)] means that we want the anchors
corresponding to the first layer to be strided by .25 and those in the
second layer to be strided by .5 in both y and x directions. By
default, if anchor_strides=None, then they are set to be the reciprocal
of the corresponding grid sizes. The pairs can also be specified as
dynamic tf.int or tf.float numbers, e.g. for variable shape input
images.
anchor_offsets: list of pairs of offsets (in y and x directions
respectively). The offset specifies where we want the center of the
(0, 0)-th anchor to lie for each layer. For example, setting
anchor_offsets=[(.125, .125), (.25, .25)]) means that we want the
(0, 0)-th anchor of the first layer to lie at (.125, .125) in image
space and likewise that we want the (0, 0)-th anchor of the second
layer to lie at (.25, .25) in image space. By default, if
anchor_offsets=None, then they are set to be half of the corresponding
anchor stride. The pairs can also be specified as dynamic tf.int or
tf.float numbers, e.g. for variable shape input images.
Returns:
boxes: a BoxList holding a collection of N anchor boxes
Raises:
ValueError: if feature_map_shape_list, box_specs_list do not have the same
length.
ValueError: if feature_map_shape_list does not consist of pairs of
integers
"""
if not (isinstance(feature_map_shape_list, list)
and len(feature_map_shape_list) == len(self._box_specs)):
raise ValueError('feature_map_shape_list must be a list with the same '
'length as self._box_specs')
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in feature_map_shape_list]):
raise ValueError('feature_map_shape_list must be a list of pairs.')
if not anchor_strides:
anchor_strides = [(tf.to_float(im_height) / tf.to_float(pair[0]),
tf.to_float(im_width) / tf.to_float(pair[1]))
for pair in feature_map_shape_list]
if not anchor_offsets:
anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
for stride in anchor_strides]
for arg, arg_name in zip([anchor_strides, anchor_offsets],
['anchor_strides', 'anchor_offsets']):
if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
raise ValueError('%s must be a list with the same length '
'as self._box_specs' % arg_name)
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in arg]):
raise ValueError('%s must be a list of pairs.' % arg_name)
anchor_grid_list = []
min_im_shape = tf.to_float(tf.minimum(im_height, im_width))
base_anchor_size = min_im_shape * self._base_anchor_size
for grid_size, scales, aspect_ratios, stride, offset in zip(
feature_map_shape_list, self._scales, self._aspect_ratios,
anchor_strides, anchor_offsets):
anchor_grid_list.append(
grid_anchor_generator.tile_anchors(
grid_height=grid_size[0],
grid_width=grid_size[1],
scales=scales,
aspect_ratios=aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=stride,
anchor_offset=offset))
concatenated_anchors = box_list_ops.concatenate(anchor_grid_list)
num_anchors = concatenated_anchors.num_boxes_static()
if num_anchors is None:
num_anchors = concatenated_anchors.num_boxes()
if self._clip_window is not None:
clip_window = tf.multiply(
tf.to_float([im_height, im_width, im_height, im_width]),
self._clip_window)
concatenated_anchors = box_list_ops.clip_to_window(
concatenated_anchors, clip_window, filter_nonoverlapping=False)
# TODO: make reshape an option for the clip_to_window op
concatenated_anchors.set(
tf.reshape(concatenated_anchors.get(), [num_anchors, 4]))
stddevs_tensor = 0.01 * tf.ones(
[num_anchors, 4], dtype=tf.float32, name='stddevs')
concatenated_anchors.add_field('stddev', stddevs_tensor)
return concatenated_anchors
def create_ssd_anchors(num_layers=6,
min_scale=0.2,
max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
base_anchor_size=None,
reduce_boxes_in_lowest_layer=True):
"""Creates MultipleGridAnchorGenerator for SSD anchors.
This function instantiates a MultipleGridAnchorGenerator that reproduces
``default box`` construction proposed by Liu et al in the SSD paper.
See Section 2.2 for details. Grid sizes are assumed to be passed in
at generation time from finest resolution to coarsest resolution --- this is
used to (linearly) interpolate scales of anchor boxes corresponding to the
intermediate grid sizes.
Anchors that are returned by calling the `generate` method on the returned
MultipleGridAnchorGenerator object are always in normalized coordinates
and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
Args:
num_layers: integer number of grid layers to create anchors for (actual
grid sizes passed in at generation time)
min_scale: scale of anchors corresponding to finest resolution (float)
max_scale: scale of anchors corresponding to coarsest resolution (float)
aspect_ratios: list or tuple of (float) aspect ratios to place on each
grid point.
base_anchor_size: base anchor size as [height, width].
reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
boxes per location is used in the lowest layer.
Returns:
a MultipleGridAnchorGenerator
"""
if base_anchor_size is None:
base_anchor_size = [1.0, 1.0]
base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32)
box_specs_list = []
scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
for i in range(num_layers)] + [1.0]
for layer, scale, scale_next in zip(
range(num_layers), scales[:-1], scales[1:]):
layer_box_specs = []
if layer == 0 and reduce_boxes_in_lowest_layer:
layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
else:
for aspect_ratio in aspect_ratios:
layer_box_specs.append((scale, aspect_ratio))
if aspect_ratio == 1.0:
layer_box_specs.append((np.sqrt(scale*scale_next), 1.0))
box_specs_list.append(layer_box_specs)
return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
class MultipleGridAnchorGeneratorTest(tf.test.TestCase):
def test_construct_single_anchor_grid(self):
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
[-505, -131, 519, 125], [-57, -67, 71, 61],
[-121, -131, 135, 125], [-249, -259, 263, 253],
[-25, -131, 39, 125], [-57, -259, 71, 253],
[-121, -515, 135, 509]]
base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
(.5, 1.0), (1.0, 1.0), (2.0, 1.0),
(.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)],
anchor_strides=[(16, 16)],
anchor_offsets=[(7, -3)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid(self):
base_anchor_size = tf.constant([10, 10], dtype=tf.float32)
box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
[-5., 14., 5, 24], [-10., 9., 10, 29],
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
[14., 14., 24, 24], [9., 9., 29, 29]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)],
anchor_strides=[(19, 19)],
anchor_offsets=[(0, 0)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid_non_square(self):
base_anchor_size = tf.constant([1, 1], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0)]]
exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
anchor_generator = ag.MultipleGridAnchorGenerator(box_specs_list,
base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(tf.constant(
1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid_unnormalized(self):
base_anchor_size = tf.constant([1, 1], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0)]]
exp_anchor_corners = [[0., 0., 320., 320.], [0., 320., 320., 640.]]
anchor_generator = ag.MultipleGridAnchorGenerator(box_specs_list,
base_anchor_size)
anchors = anchor_generator.generate(
feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
2, dtype=tf.int32))],
im_height=320,
im_width=640)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_multiple_grids(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
# height and width of box with .5 aspect ratio
h = np.sqrt(2)
w = 1.0/np.sqrt(2)
exp_small_grid_corners = [[-.25, -.25, .75, .75],
[.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
[-.25, .25, .75, 1.25],
[.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
[.25, -.25, 1.25, .75],
[.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
[.25, .25, 1.25, 1.25],
[.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
# only test first entry of larger set of anchors
exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
[.125-1.0, .125-1.0, .125+1.0, .125+1.0],
[.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125),
(.25, .25)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (56, 4))
big_grid_corners = anchor_corners_out[0:3, :]
small_grid_corners = anchor_corners_out[48:, :]
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
self.assertAllClose(big_grid_corners, exp_big_grid_corners)
def test_construct_multiple_grids_with_clipping(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
# height and width of box with .5 aspect ratio
h = np.sqrt(2)
w = 1.0/np.sqrt(2)
exp_small_grid_corners = [[0, 0, .75, .75],
[0, 0, .25+.5*h, .25+.5*w],
[0, .25, .75, 1],
[0, .75-.5*w, .25+.5*h, 1],
[.25, 0, 1, .75],
[.75-.5*h, 0, 1, .25+.5*w],
[.25, .25, 1, 1],
[.75-.5*h, .75-.5*w, 1, 1]]
clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size, clip_window=clip_window)
anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
small_grid_corners = anchor_corners_out[48:, :]
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
def test_invalid_box_specs(self):
# not all box specs are pairs
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5, .3)]]
with self.assertRaises(ValueError):
ag.MultipleGridAnchorGenerator(box_specs_list)
# box_specs_list is not a list of lists
box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
with self.assertRaises(ValueError):
ag.MultipleGridAnchorGenerator(box_specs_list)
def test_invalid_generate_arguments(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
# incompatible lengths with box_specs_list
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.5, .5)],
anchor_offsets=[(.25, .25)])
# not pairs
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25, .1), (.5, .5)],
anchor_offsets=[(.125, .125),
(.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125), (.25)])
class CreateSSDAnchorsTest(tf.test.TestCase):
def test_create_ssd_anchors_returns_correct_shape(self):
anchor_generator = ag.create_ssd_anchors(
num_layers=6, min_scale=0.2, max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
reduce_boxes_in_lowest_layer=True)
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
(5, 5), (3, 3), (1, 1)]
anchors = anchor_generator.generate(
feature_map_shape_list=feature_map_shape_list)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (7308, 4))
anchor_generator = ag.create_ssd_anchors(
num_layers=6, min_scale=0.2, max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
reduce_boxes_in_lowest_layer=False)
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
(5, 5), (3, 3), (1, 1)]
anchors = anchor_generator.generate(
feature_map_shape_list=feature_map_shape_list)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (11640, 4))
if __name__ == '__main__':
tf.test.main()
import os
import cv2
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf
from utils import FPS, WebcamVideoStream
from multiprocessing import Process, Queue, Pool
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
CWD_PATH = os.getcwd()
# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb')
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt')
NUM_CLASSES = 90
# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def detect_objects(image_np, sess, detection_graph):
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
return image_np
def worker(input_q, output_q):
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
fps = FPS().start()
while True:
fps.update()
frame = input_q.get()
output_q.put(detect_objects(frame, sess, detection_graph))
fps.stop()
sess.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-src', '--source', dest='video_source', type=int,
default=0, help='Device index of the camera.')
parser.add_argument('-wd', '--width', dest='width', type=int,
default=480, help='Width of the frames in the video stream.')
parser.add_argument('-ht', '--height', dest='height', type=int,
default=360, help='Height of the frames in the video stream.')
parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int,
default=2, help='Number of workers.')
parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int,
default=5, help='Size of the queue.')
args = parser.parse_args()
logger = multiprocessing.log_to_stderr()
logger.setLevel(multiprocessing.SUBDEBUG)
input_q = Queue(maxsize=args.queue_size)
output_q = Queue(maxsize=args.queue_size)
process = Process(target=worker, args=((input_q, output_q)))
process.daemon = True
pool = Pool(args.num_workers, worker, (input_q, output_q))
video_capture = WebcamVideoStream(src=args.video_source,
width=args.width,
height=args.height).start()
fps = FPS().start()
while True: # fps._numFrames < 120
frame = video_capture.read()
input_q.put(frame)
t = time.time()
cv2.imshow('Video', output_q.get())
fps.update()
print('[INFO] elapsed time: {:.2f}'.format(time.time() - t))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
fps.stop()
print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
video_capture.stop()
cv2.destroyAllWindows()
#!/bin/bash
# Determine the directory containing this script
if [[ -n $BASH_VERSION ]]; then
_SCRIPT_LOCATION=${BASH_SOURCE[0]}
_SHELL="bash"
elif [[ -n $ZSH_VERSION ]]; then
_SCRIPT_LOCATION=${funcstack[1]}
_SHELL="zsh"
else
echo "Only bash and zsh are supported"
return 1
fi
_CONDA_DIR=$(dirname "$_SCRIPT_LOCATION")
if [ $# -gt 1 ]; then
(>&2 echo "Error: did not expect more than one argument.")
(>&2 echo " (Got $@)")
return 1
fi
case "$(uname -s)" in
CYGWIN*|MINGW*|MSYS*)
EXT=".exe"
export MSYS2_ENV_CONV_EXCL=CONDA_PATH
# ignore any windows backup paths from bat-based activation
if [ "${CONDA_PATH_BACKUP:0:1}" != "/" ]; then
unset CONDA_PATH_BACKUP
fi
export _CONDA_PYTHON="$_CONDA_DIR/../python"
;;
*)
EXT=""
export _CONDA_PYTHON="$_CONDA_DIR/python"
;;
esac
# Ensure that this script is sourced, not executed
# Also note that errors are ignored as `activate foo` doesn't generate a bad
# value for $0 which would cause errors.
if [[ -n $BASH_VERSION ]] && [[ "$(basename "$0" 2> /dev/null)" == "activate" ]]; then
(>&2 echo "Error: activate must be sourced. Run 'source activate envname'
instead of 'activate envname'.
")
"$_CONDA_DIR/conda" ..activate $_SHELL$EXT -h
exit 1
fi
if [ "$#" -eq "0" ]; then
args=('root')
else
args=$@
fi
"$_CONDA_DIR/conda" ..checkenv $_SHELL$EXT "$args"
if (( $? != 0 )); then
return 1
fi
# Ensure we deactivate any scripts from the old env
_CONDA_HOLD=true
source "$_CONDA_DIR/deactivate"
unset _CONDA_HOLD
_NEW_PART=$("$_CONDA_DIR/conda" ..activate $_SHELL$EXT "$args")
if (( $? == 0 )); then
export CONDA_PATH_BACKUP="$PATH"
# export this to restore it upon deactivation
export CONDA_PS1_BACKUP="$PS1"
# look if the deactivate script left a placeholder for us
if [[ $PATH == *"CONDA_PATH_PLACEHOLDER"* ]]; then
# If it did, replace it with our _NEW_PART
export PATH="$($_CONDA_PYTHON -c "import re; print(re.sub(r'CONDA_PATH_PLACEHOLDER', r'$_NEW_PART', '$PATH', 1))")"
else
export PATH="$_NEW_PART:$PATH"
fi
# CONDA_DEFAULT_ENV is the shortest representation of how conda recognizes your env.
# It can be an env name, or a full path.
# Last date of change: 2016-06-21
# If the string contains / it's a path
if [[ "$@" == */* ]]; then
export CONDA_DEFAULT_ENV=$(get_abs_filename "$args")
else
export CONDA_DEFAULT_ENV="$args"
fi
# CONDA_PREFIX is always the full path to the activated environment. It is not set
# when no environment is active.
# Legacy support: CONDA_DEFAULT_ENV is either env name or full path if given as path.
# CONDA_PREFIX is always the full path, for consistency.
# Last date of change: 2016-06-21
firstpath=${_NEW_PART%%:*}
export CONDA_PREFIX="$(echo ${firstpath} | sed "s|/bin$||")" &>/dev/null
# if CONDA_DEFAULT_ENV not in PS1, prepend it with parentheses
if [ $("$_CONDA_DIR/conda" ..changeps1) = "1" ]; then
if ! $(grep -q CONDA_DEFAULT_ENV <<<$PS1); then
if ! $(grep -q "POWERLINE" <<<$PS1); then
export PS1="(${CONDA_DEFAULT_ENV}) $PS1"
fi
fi
fi
# Load any of the scripts found $PREFIX/etc/conda/activate.d AFTER activation
_CONDA_D="${CONDA_PREFIX}/etc/conda/activate.d"
if [[ -d "$_CONDA_D" ]]; then
eval $(find "$_CONDA_D" -iname "*.sh" -exec echo source \'{}\'';' \;)
fi
unset _CONDA_PYTHON
else
unset _CONDA_PYTHON
return $?
fi
unset CONDA_PATH
if [[ -n $BASH_VERSION ]]; then
hash -r
elif [[ -n $ZSH_VERSION ]]; then
rehash
else
echo "Only bash and zsh are supported"
return 1
fi
#!/Users/tarrysingh/anaconda/bin/python
if __name__ == '__main__':
import sys
import conda.cli
sys.exit(conda.cli.main())
#!/bin/bash
# Determine the directory containing this script
if [[ -n $BASH_VERSION ]]; then
_SCRIPT_LOCATION=${BASH_SOURCE[0]}
_SHELL="bash"
elif [[ -n $ZSH_VERSION ]]; then
_SCRIPT_LOCATION=${funcstack[1]}
_SHELL="zsh"
else
echo "Only bash and zsh are supported"
return 1
fi
_CONDA_DIR=$(dirname "$_SCRIPT_LOCATION")
case "$(uname -s)" in
CYGWIN*|MINGW*|MSYS*)
EXT=".exe"
export MSYS2_ENV_CONV_EXCL=CONDA_PATH
;;
*)
EXT=""
;;
esac
# shift over all args. We don't accept any, so it's OK that we ignore them all here.
while [[ $# > 0 ]]
do
key="$1"
case $key in
-h|--help)
"$_CONDA_DIR/conda" ..deactivate $_SHELL$EXT -h
if [[ -n $BASH_VERSION ]] && [[ "$(basename "$0" 2> /dev/null)" == "deactivate" ]]; then
exit 0
else
return 0
fi
;;
esac
shift # past argument or value
done
# Ensure that this script is sourced, not executed
# Note that if the script was executed, we're running inside bash!
# Also note that errors are ignored as `activate foo` doesn't generate a bad
# value for $0 which would cause errors.
if [[ -n $BASH_VERSION ]] && [[ "$(basename "$0" 2> /dev/null)" == "deactivate" ]]; then
(>&2 echo "Error: deactivate must be sourced. Run 'source deactivate'
instead of 'deactivate'.
")
"$_CONDA_DIR/conda" ..deactivate $_SHELL$EXT -h
exit 1
fi
if [[ -z "$CONDA_PATH_BACKUP" ]]; then
if [[ -n $BASH_VERSION ]] && [[ "$(basename "$0" 2> /dev/null)" == "deactivate" ]]; then
exit 0
else
return 0
fi
fi
if (( $? == 0 )); then
# Inverse of activation: run deactivate scripts prior to deactivating env
_CONDA_D="${CONDA_PREFIX}/etc/conda/deactivate.d"
if [[ -d $_CONDA_D ]]; then
eval $(find "$_CONDA_D" -iname "*.sh" -exec echo source \'{}\'';' \;)
fi
# # get the activation path that would have been provided for this prefix
# _LAST_ACTIVATE_PATH=$("$_CONDA_DIR/conda" ..activate $_SHELL$EXT "$CONDA_PREFIX")
#
# # in activate, we replace a placeholder so that conda keeps its place in the PATH order
# # The activate script sets _CONDA_HOLD here to activate that behavior.
# # Otherwise, PATH is simply removed.
# if [ -n "$_CONDA_HOLD" ]; then
# export PATH="$($_CONDA_PYTHON2 -c "import re; print(re.sub(r'$_LAST_ACTIVATE_PATH(:?)', r'CONDA_PATH_PLACEHOLDER\1', '$PATH', 1))")"
# else
# export PATH="$($_CONDA_PYTHON2 -c "import re; print(re.sub(r'$_LAST_ACTIVATE_PATH(:?)', r'', '$PATH', 1))")"
# fi
#
# unset _LAST_ACTIVATE_PATH
export PATH=$("$_CONDA_DIR/conda" ..deactivate.path $_SHELL$EXT "$CONDA_PREFIX")
unset CONDA_DEFAULT_ENV
unset CONDA_PREFIX
unset CONDA_PATH_BACKUP
export PS1="$CONDA_PS1_BACKUP"
unset CONDA_PS1_BACKUP
unset _CONDA_PYTHON2
else
unset _CONDA_PYTHON2
return $?
fi
if [[ -n $BASH_VERSION ]]; then
hash -r
elif [[ -n $ZSH_VERSION ]]; then
rehash
fi
# Tensorflow Object Detection API: Box Coder implementations.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "faster_rcnn_box_coder",
srcs = [
"faster_rcnn_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "faster_rcnn_box_coder_test",
srcs = [
"faster_rcnn_box_coder_test.py",
],
deps = [
":faster_rcnn_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_library(
name = "keypoint_box_coder",
srcs = [
"keypoint_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_test(
name = "keypoint_box_coder_test",
srcs = [
"keypoint_box_coder_test.py",
],
deps = [
":keypoint_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_library(
name = "mean_stddev_box_coder",
srcs = [
"mean_stddev_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "mean_stddev_box_coder_test",
srcs = [
"mean_stddev_box_coder_test.py",
],
deps = [
":mean_stddev_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_library(
name = "square_box_coder",
srcs = [
"square_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "square_box_coder_test",
srcs = [
"square_box_coder_test.py",
],
deps = [
":square_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Faster RCNN box coder.
Faster RCNN box coder follows the coding schema described below:
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively.
See http://arxiv.org/abs/1506.01497 for details.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
EPSILON = 1e-8
class FasterRcnnBoxCoder(box_coder.BoxCoder):
"""Faster RCNN box coder."""
def __init__(self, scale_factors=None):
"""Constructor for FasterRcnnBoxCoder.
Args:
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
If set to None, does not perform scaling. For Faster RCNN,
the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
"""
if scale_factors:
assert len(scale_factors) == 4
for scalar in scale_factors:
assert scalar > 0
self._scale_factors = scale_factors
@property
def code_size(self):
return 4
def _encode(self, boxes, anchors):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw].
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
# Avoid NaN in division and log below.
ha += EPSILON
wa += EPSILON
h += EPSILON
w += EPSILON
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
return tf.transpose(tf.stack([ty, tx, th, tw]))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
import tensorflow as tf
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.core import box_list
class FasterRcnnBoxCoderTest(tf.test.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
scale_factors = [2, 3, 4, 5]
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_very_small_Width_nan_after_encoding(self):
boxes = [[10.0, 10.0, 10.0000001, 20.0]]
anchors = [[15.0, 12.0, 30.0, 18.0]]
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Keypoint box coder.
The keypoint box coder follows the coding schema described below (this is
similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
to box coordinates):
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
tky0 = (ky0 - ya) / ha
tkx0 = (kx0 - xa) / ha
tky1 = (ky1 - ya) / ha
tkx1 = (kx1 - xa) / ha
...
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
anchor-encoded keypoint coordinates.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
from object_detection.core import standard_fields as fields
EPSILON = 1e-8
class KeypointBoxCoder(box_coder.BoxCoder):
"""Keypoint box coder."""
def __init__(self, num_keypoints, scale_factors=None):
"""Constructor for KeypointBoxCoder.
Args:
num_keypoints: Number of keypoints to encode/decode.
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
In addition to scaling ty and tx, the first 2 scalars are used to scale
the y and x coordinates of the keypoints as well. If set to None, does
not perform scaling.
"""
self._num_keypoints = num_keypoints
if scale_factors:
assert len(scale_factors) == 4
for scalar in scale_factors:
assert scalar > 0
self._scale_factors = scale_factors
self._keypoint_scale_factors = None
if scale_factors is not None:
self._keypoint_scale_factors = tf.expand_dims(tf.tile(
[tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
[num_keypoints]), 1)
@property
def code_size(self):
return 4 + self._num_keypoints * 2
def _encode(self, boxes, anchors):
"""Encode a box and keypoint collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
tensors with the shape [N, 4], and keypoints are tensors with the shape
[N, num_keypoints, 2].
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
represent the y and x coordinates of the first keypoint, tky1 and tkx1
represent the y and x coordinates of the second keypoint, and so on.
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
keypoints = boxes.get_field(fields.BoxListFields.keypoints)
keypoints = tf.transpose(tf.reshape(keypoints,
[-1, self._num_keypoints * 2]))
num_boxes = boxes.num_boxes()
# Avoid NaN in division and log below.
ha += EPSILON
wa += EPSILON
h += EPSILON
w += EPSILON
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
tiled_anchor_centers = tf.tile(
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
tiled_anchor_sizes = tf.tile(
tf.stack([ha, wa]), [self._num_keypoints, 1])
tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
tboxes = tf.stack([ty, tx, th, tw])
return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes and keypoints.
Args:
rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
anchor-encoded boxes and keypoints
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes and keypoints.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
num_codes = tf.shape(rel_codes)[0]
result = tf.unstack(tf.transpose(rel_codes))
ty, tx, th, tw = result[:4]
tkeypoints = result[4:]
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
decoded_boxes_keypoints = box_list.BoxList(
tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
tiled_anchor_centers = tf.tile(
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
tiled_anchor_sizes = tf.tile(
tf.stack([ha, wa]), [self._num_keypoints, 1])
keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
keypoints = tf.reshape(tf.transpose(keypoints),
[-1, self._num_keypoints, 2])
decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
return decoded_boxes_keypoints
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.keypoint_box_coder."""
import tensorflow as tf
from object_detection.box_coders import keypoint_box_coder
from object_detection.core import box_list
from object_detection.core import standard_fields as fields
class KeypointBoxCoderTest(tf.test.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
expected_rel_codes = [
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
expected_rel_codes = [
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints, scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, keypoints_out = sess.run(
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
self.assertAllClose(boxes_out, expected_boxes)
self.assertAllClose(keypoints_out, expected_keypoints)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
scale_factors = [2, 3, 4, 5]
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints, scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, keypoints_out = sess.run(
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
self.assertAllClose(boxes_out, expected_boxes)
self.assertAllClose(keypoints_out, expected_keypoints)
def test_very_small_width_nan_after_encoding(self):
boxes = [[10., 10., 10.0000001, 20.]]
keypoints = [[[10., 10.], [10.0000001, 20.]]]
anchors = [[15., 12., 30., 18.]]
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
-0.833333, -0.833333, -0.833333, 0.833333]]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(2)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mean stddev box coder.
This box coder use the following coding schema to encode boxes:
rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev.
"""
from object_detection.core import box_coder
from object_detection.core import box_list
class MeanStddevBoxCoder(box_coder.BoxCoder):
"""Mean stddev box coder."""
@property
def code_size(self):
return 4
def _encode(self, boxes, anchors):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of N anchors. We assume that anchors has an associated
stddev field.
Returns:
a tensor representing N anchor-encoded boxes
Raises:
ValueError: if the anchors BoxList does not have a stddev field
"""
if not anchors.has_field('stddev'):
raise ValueError('anchors must have a stddev field')
box_corners = boxes.get()
means = anchors.get()
stddev = anchors.get_field('stddev')
return (box_corners - means) / stddev
def _decode(self, rel_codes, anchors):
"""Decode.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors. We assume that anchors has an associated
stddev field.
Returns:
boxes: BoxList holding N bounding boxes
Raises:
ValueError: if the anchors BoxList does not have a stddev field
"""
if not anchors.has_field('stddev'):
raise ValueError('anchors must have a stddev field')
means = anchors.get()
stddevs = anchors.get_field('stddev')
box_corners = rel_codes * stddevs + means
return box_list.BoxList(box_corners)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.mean_stddev_boxcoder."""
import tensorflow as tf
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_list
class MeanStddevBoxCoderTest(tf.test.TestCase):
def testGetCorrectRelativeCodesAfterEncoding(self):
box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
boxes = box_list.BoxList(tf.constant(box_corners))
expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
prior_stddevs = tf.constant(2 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
coder = mean_stddev_box_coder.MeanStddevBoxCoder()
rel_codes = coder.encode(boxes, priors)
with self.test_session() as sess:
rel_codes_out = sess.run(rel_codes)
self.assertAllClose(rel_codes_out, expected_rel_codes)
def testGetCorrectBoxesAfterDecoding(self):
rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]])
expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
prior_stddevs = tf.constant(2 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
coder = mean_stddev_box_coder.MeanStddevBoxCoder()
decoded_boxes = coder.decode(rel_codes, priors)
decoded_box_corners = decoded_boxes.get()
with self.test_session() as sess:
decoded_out = sess.run(decoded_box_corners)
self.assertAllClose(decoded_out, expected_box_corners)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Square box coder.
Square box coder follows the coding schema described below:
l = sqrt(h * w)
la = sqrt(ha * wa)
ty = (y - ya) / la
tx = (x - xa) / la
tl = log(l / la)
where x, y, w, h denote the box's center coordinates, width, and height,
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tl denote the anchor-encoded
center, and length, respectively. Because the encoded box is a square, only
one length is encoded.
This has shown to provide performance improvements over the Faster RCNN box
coder when the objects being detected tend to be square (e.g. faces) and when
the input images are not distorted via resizing.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
EPSILON = 1e-8
class SquareBoxCoder(box_coder.BoxCoder):
"""Encodes a 3-scalar representation of a square box."""
def __init__(self, scale_factors=None):
"""Constructor for SquareBoxCoder.
Args:
scale_factors: List of 3 positive scalars to scale ty, tx, and tl.
If set to None, does not perform scaling. For faster RCNN,
the open-source implementation recommends using [10.0, 10.0, 5.0].
Raises:
ValueError: If scale_factors is not length 3 or contains values less than
or equal to 0.
"""
if scale_factors:
if len(scale_factors) != 3:
raise ValueError('The argument scale_factors must be a list of length '
'3.')
if any(scalar <= 0 for scalar in scale_factors):
raise ValueError('The values in scale_factors must all be greater '
'than 0.')
self._scale_factors = scale_factors
@property
def code_size(self):
return 3
def _encode(self, boxes, anchors):
"""Encodes a box collection with respect to an anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, tl].
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
la = tf.sqrt(ha * wa)
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
l = tf.sqrt(h * w)
# Avoid NaN in division and log below.
la += EPSILON
l += EPSILON
tx = (xcenter - xcenter_a) / la
ty = (ycenter - ycenter_a) / la
tl = tf.log(l / la)
# Scales location targets for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
tl *= self._scale_factors[2]
return tf.transpose(tf.stack([ty, tx, tl]))
def _decode(self, rel_codes, anchors):
"""Decodes relative codes to boxes.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
la = tf.sqrt(ha * wa)
ty, tx, tl = tf.unstack(tf.transpose(rel_codes))
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
tl /= self._scale_factors[2]
l = tf.exp(tl) * la
ycenter = ty * la + ycenter_a
xcenter = tx * la + xcenter_a
ymin = ycenter - l / 2.
xmin = xcenter - l / 2.
ymax = ycenter + l / 2.
xmax = xcenter + l / 2.
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.square_box_coder."""
import tensorflow as tf
from object_detection.box_coders import square_box_coder
from object_detection.core import box_list
class SquareBoxCoderTest(tf.test.TestCase):
def test_correct_relative_codes_with_default_scale(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = None
expected_rel_codes = [[-0.790569, -0.263523, -0.293893],
[-0.068041, -0.272166, -0.89588]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
(rel_codes_out,) = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_correct_relative_codes_with_non_default_scale(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4]
expected_rel_codes = [[-1.581139, -0.790569, -1.175573],
[-0.136083, -0.816497, -3.583519]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
(rel_codes_out,) = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_correct_relative_codes_with_small_width(self):
boxes = [[10.0, 10.0, 10.0000001, 20.0]]
anchors = [[15.0, 12.0, 30.0, 18.0]]
scale_factors = None
expected_rel_codes = [[-1.317616, 0., -20.670586]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
(rel_codes_out,) = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_correct_boxes_with_default_scale(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-0.5, -0.416666, -0.405465],
[-0.083333, -0.222222, -0.693147]]
scale_factors = None
expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
[0.155051, 0.102989, 0.522474, 0.470412]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
(boxes_out,) = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_correct_boxes_with_non_default_scale(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-1., -1.25, -1.62186], [-0.166667, -0.666667, -2.772588]]
scale_factors = [2, 3, 4]
expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
[0.155051, 0.102989, 0.522474, 0.470412]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
(boxes_out,) = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
if __name__ == '__main__':
tf.test.main()
# Tensorflow Object Detection API: main runnables.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_binary(
name = "train",
srcs = [
"train.py",
],
deps = [
":trainer",
"//tensorflow",
"//tensorflow_models/object_detection/builders:input_reader_builder",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
"//tensorflow_models/object_detection/protos:model_py_pb2",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
"//tensorflow_models/object_detection/protos:train_py_pb2",
],
)
py_library(
name = "trainer",
srcs = ["trainer.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/builders:optimizer_builder",
"//tensorflow_models/object_detection/builders:preprocessor_builder",
"//tensorflow_models/object_detection/core:batcher",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/object_detection/utils:variables_helper",
"//tensorflow_models/slim:model_deploy",
],
)
py_test(
name = "trainer_test",
srcs = ["trainer_test.py"],
deps = [
":trainer",
"//tensorflow",
"//tensorflow_models/object_detection/core:losses",
"//tensorflow_models/object_detection/core:model",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/protos:train_py_pb2",
],
)
py_library(
name = "eval_util",
srcs = [
"eval_util.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/utils:label_map_util",
"//tensorflow_models/object_detection/utils:object_detection_evaluation",
"//tensorflow_models/object_detection/utils:visualization_utils",
],
)
py_library(
name = "evaluator",
srcs = ["evaluator.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection:eval_util",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:box_list_ops",
"//tensorflow_models/object_detection/core:prefetcher",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/protos:eval_py_pb2",
],
)
py_binary(
name = "eval",
srcs = [
"eval.py",
],
deps = [
":evaluator",
"//tensorflow",
"//tensorflow_models/object_detection/builders:input_reader_builder",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/protos:eval_py_pb2",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
"//tensorflow_models/object_detection/protos:model_py_pb2",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
"//tensorflow_models/object_detection/utils:label_map_util",
],
)
py_library(
name = "exporter",
srcs = [
"exporter.py",
],
deps = [
"//tensorflow",
"//tensorflow/python/tools:freeze_graph_lib",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/data_decoders:tf_example_decoder",
],
)
py_test(
name = "exporter_test",
srcs = [
"exporter_test.py",
],
deps = [
":exporter",
"//tensorflow",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/core:model",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
],
)
py_binary(
name = "export_inference_graph",
srcs = [
"export_inference_graph.py",
],
deps = [
":exporter",
"//tensorflow",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
],
)
py_binary(
name = "create_pascal_tf_record",
srcs = [
"create_pascal_tf_record.py",
],
deps = [
"//third_party/py/PIL:pil",
"//third_party/py/lxml",
"//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util",
],
)
py_test(
name = "create_pascal_tf_record_test",
srcs = [
"create_pascal_tf_record_test.py",
],
deps = [
":create_pascal_tf_record",
"//tensorflow",
],
)
py_binary(
name = "create_pet_tf_record",
srcs = [
"create_pet_tf_record.py",
],
deps = [
"//third_party/py/PIL:pil",
"//third_party/py/lxml",
"//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A function to build an object detection anchor generator from config."""
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.anchor_generators import multiple_grid_anchor_generator
from object_detection.protos import anchor_generator_pb2
def build(anchor_generator_config):
"""Builds an anchor generator based on the config.
Args:
anchor_generator_config: An anchor_generator.proto object containing the
config for the desired anchor generator.
Returns:
Anchor generator based on the config.
Raises:
ValueError: On empty anchor generator proto.
"""
if not isinstance(anchor_generator_config,
anchor_generator_pb2.AnchorGenerator):
raise ValueError('anchor_generator_config not of type '
'anchor_generator_pb2.AnchorGenerator')
if anchor_generator_config.WhichOneof(
'anchor_generator_oneof') == 'grid_anchor_generator':
grid_anchor_generator_config = anchor_generator_config.grid_anchor_generator
return grid_anchor_generator.GridAnchorGenerator(
scales=[float(scale) for scale in grid_anchor_generator_config.scales],
aspect_ratios=[float(aspect_ratio)
for aspect_ratio
in grid_anchor_generator_config.aspect_ratios],
base_anchor_size=[grid_anchor_generator_config.height,
grid_anchor_generator_config.width],
anchor_stride=[grid_anchor_generator_config.height_stride,
grid_anchor_generator_config.width_stride],
anchor_offset=[grid_anchor_generator_config.height_offset,
grid_anchor_generator_config.width_offset])
elif anchor_generator_config.WhichOneof(
'anchor_generator_oneof') == 'ssd_anchor_generator':
ssd_anchor_generator_config = anchor_generator_config.ssd_anchor_generator
return multiple_grid_anchor_generator.create_ssd_anchors(
num_layers=ssd_anchor_generator_config.num_layers,
min_scale=ssd_anchor_generator_config.min_scale,
max_scale=ssd_anchor_generator_config.max_scale,
aspect_ratios=ssd_anchor_generator_config.aspect_ratios,
reduce_boxes_in_lowest_layer=(ssd_anchor_generator_config
.reduce_boxes_in_lowest_layer))
else:
raise ValueError('Empty anchor generator.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for anchor_generator_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.anchor_generators import multiple_grid_anchor_generator
from object_detection.builders import anchor_generator_builder
from object_detection.protos import anchor_generator_pb2
class AnchorGeneratorBuilderTest(tf.test.TestCase):
def assert_almost_list_equal(self, expected_list, actual_list, delta=None):
self.assertEqual(len(expected_list), len(actual_list))
for expected_item, actual_item in zip(expected_list, actual_list):
self.assertAlmostEqual(expected_item, actual_item, delta=delta)
def test_build_grid_anchor_generator_with_defaults(self):
anchor_generator_text_proto = """
grid_anchor_generator {
}
"""
anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
anchor_generator_object = anchor_generator_builder.build(
anchor_generator_proto)
self.assertTrue(isinstance(anchor_generator_object,
grid_anchor_generator.GridAnchorGenerator))
self.assertListEqual(anchor_generator_object._scales, [])
self.assertListEqual(anchor_generator_object._aspect_ratios, [])
with self.test_session() as sess:
base_anchor_size, anchor_offset, anchor_stride = sess.run(
[anchor_generator_object._base_anchor_size,
anchor_generator_object._anchor_offset,
anchor_generator_object._anchor_stride])
self.assertAllEqual(anchor_offset, [0, 0])
self.assertAllEqual(anchor_stride, [16, 16])
self.assertAllEqual(base_anchor_size, [256, 256])
def test_build_grid_anchor_generator_with_non_default_parameters(self):
anchor_generator_text_proto = """
grid_anchor_generator {
height: 128
width: 512
height_stride: 10
width_stride: 20
height_offset: 30
width_offset: 40
scales: [0.4, 2.2]
aspect_ratios: [0.3, 4.5]
}
"""
anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
anchor_generator_object = anchor_generator_builder.build(
anchor_generator_proto)
self.assertTrue(isinstance(anchor_generator_object,
grid_anchor_generator.GridAnchorGenerator))
self.assert_almost_list_equal(anchor_generator_object._scales,
[0.4, 2.2])
self.assert_almost_list_equal(anchor_generator_object._aspect_ratios,
[0.3, 4.5])
with self.test_session() as sess:
base_anchor_size, anchor_offset, anchor_stride = sess.run(
[anchor_generator_object._base_anchor_size,
anchor_generator_object._anchor_offset,
anchor_generator_object._anchor_stride])
self.assertAllEqual(anchor_offset, [30, 40])
self.assertAllEqual(anchor_stride, [10, 20])
self.assertAllEqual(base_anchor_size, [128, 512])
def test_build_ssd_anchor_generator_with_defaults(self):
anchor_generator_text_proto = """
ssd_anchor_generator {
aspect_ratios: [1.0]
}
"""
anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
anchor_generator_object = anchor_generator_builder.build(
anchor_generator_proto)
self.assertTrue(isinstance(anchor_generator_object,
multiple_grid_anchor_generator.
MultipleGridAnchorGenerator))
for actual_scales, expected_scales in zip(
list(anchor_generator_object._scales),
[(0.1, 0.2, 0.2),
(0.35, 0.418),
(0.499, 0.570),
(0.649, 0.721),
(0.799, 0.871),
(0.949, 0.974)]):
self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
for actual_aspect_ratio, expected_aspect_ratio in zip(
list(anchor_generator_object._aspect_ratios),
[(1.0, 2.0, 0.5)] + 5 * [(1.0, 1.0)]):
self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
with self.test_session() as sess:
base_anchor_size = sess.run(anchor_generator_object._base_anchor_size)
self.assertAllClose(base_anchor_size, [1.0, 1.0])
def test_build_ssd_anchor_generator_withoud_reduced_boxes(self):
anchor_generator_text_proto = """
ssd_anchor_generator {
aspect_ratios: [1.0]
reduce_boxes_in_lowest_layer: false
}
"""
anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
anchor_generator_object = anchor_generator_builder.build(
anchor_generator_proto)
self.assertTrue(isinstance(anchor_generator_object,
multiple_grid_anchor_generator.
MultipleGridAnchorGenerator))
for actual_scales, expected_scales in zip(
list(anchor_generator_object._scales),
[(0.2, 0.264),
(0.35, 0.418),
(0.499, 0.570),
(0.649, 0.721),
(0.799, 0.871),
(0.949, 0.974)]):
self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
for actual_aspect_ratio, expected_aspect_ratio in zip(
list(anchor_generator_object._aspect_ratios),
6 * [(1.0, 1.0)]):
self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
with self.test_session() as sess:
base_anchor_size = sess.run(anchor_generator_object._base_anchor_size)
self.assertAllClose(base_anchor_size, [1.0, 1.0])
def test_build_ssd_anchor_generator_with_non_default_parameters(self):
anchor_generator_text_proto = """
ssd_anchor_generator {
num_layers: 2
min_scale: 0.3
max_scale: 0.8
aspect_ratios: [2.0]
}
"""
anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
anchor_generator_object = anchor_generator_builder.build(
anchor_generator_proto)
self.assertTrue(isinstance(anchor_generator_object,
multiple_grid_anchor_generator.
MultipleGridAnchorGenerator))
for actual_scales, expected_scales in zip(
list(anchor_generator_object._scales),
[(0.1, 0.3, 0.3), (0.8,)]):
self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
for actual_aspect_ratio, expected_aspect_ratio in zip(
list(anchor_generator_object._aspect_ratios),
[(1.0, 2.0, 0.5), (2.0,)]):
self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
with self.test_session() as sess:
base_anchor_size = sess.run(anchor_generator_object._base_anchor_size)
self.assertAllClose(base_anchor_size, [1.0, 1.0])
def test_raise_value_error_on_empty_anchor_genertor(self):
anchor_generator_text_proto = """
"""
anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
with self.assertRaises(ValueError):
anchor_generator_builder.build(anchor_generator_proto)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A function to build an object detection box coder from configuration."""
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.box_coders import square_box_coder
from object_detection.protos import box_coder_pb2
def build(box_coder_config):
"""Builds a box coder object based on the box coder config.
Args:
box_coder_config: A box_coder.proto object containing the config for the
desired box coder.
Returns:
BoxCoder based on the config.
Raises:
ValueError: On empty box coder proto.
"""
if not isinstance(box_coder_config, box_coder_pb2.BoxCoder):
raise ValueError('box_coder_config not of type box_coder_pb2.BoxCoder.')
if box_coder_config.WhichOneof('box_coder_oneof') == 'faster_rcnn_box_coder':
return faster_rcnn_box_coder.FasterRcnnBoxCoder(scale_factors=[
box_coder_config.faster_rcnn_box_coder.y_scale,
box_coder_config.faster_rcnn_box_coder.x_scale,
box_coder_config.faster_rcnn_box_coder.height_scale,
box_coder_config.faster_rcnn_box_coder.width_scale
])
if (box_coder_config.WhichOneof('box_coder_oneof') ==
'mean_stddev_box_coder'):
return mean_stddev_box_coder.MeanStddevBoxCoder()
if box_coder_config.WhichOneof('box_coder_oneof') == 'square_box_coder':
return square_box_coder.SquareBoxCoder(scale_factors=[
box_coder_config.square_box_coder.y_scale,
box_coder_config.square_box_coder.x_scale,
box_coder_config.square_box_coder.length_scale
])
raise ValueError('Empty box coder.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for box_coder_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.box_coders import square_box_coder
from object_detection.builders import box_coder_builder
from object_detection.protos import box_coder_pb2
class BoxCoderBuilderTest(tf.test.TestCase):
def test_build_faster_rcnn_box_coder_with_defaults(self):
box_coder_text_proto = """
faster_rcnn_box_coder {
}
"""
box_coder_proto = box_coder_pb2.BoxCoder()
text_format.Merge(box_coder_text_proto, box_coder_proto)
box_coder_object = box_coder_builder.build(box_coder_proto)
self.assertTrue(isinstance(box_coder_object,
faster_rcnn_box_coder.FasterRcnnBoxCoder))
self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0])
def test_build_faster_rcnn_box_coder_with_non_default_parameters(self):
box_coder_text_proto = """
faster_rcnn_box_coder {
y_scale: 6.0
x_scale: 3.0
height_scale: 7.0
width_scale: 8.0
}
"""
box_coder_proto = box_coder_pb2.BoxCoder()
text_format.Merge(box_coder_text_proto, box_coder_proto)
box_coder_object = box_coder_builder.build(box_coder_proto)
self.assertTrue(isinstance(box_coder_object,
faster_rcnn_box_coder.FasterRcnnBoxCoder))
self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0])
def test_build_mean_stddev_box_coder(self):
box_coder_text_proto = """
mean_stddev_box_coder {
}
"""
box_coder_proto = box_coder_pb2.BoxCoder()
text_format.Merge(box_coder_text_proto, box_coder_proto)
box_coder_object = box_coder_builder.build(box_coder_proto)
self.assertTrue(
isinstance(box_coder_object,
mean_stddev_box_coder.MeanStddevBoxCoder))
def test_build_square_box_coder_with_defaults(self):
box_coder_text_proto = """
square_box_coder {
}
"""
box_coder_proto = box_coder_pb2.BoxCoder()
text_format.Merge(box_coder_text_proto, box_coder_proto)
box_coder_object = box_coder_builder.build(box_coder_proto)
self.assertTrue(
isinstance(box_coder_object, square_box_coder.SquareBoxCoder))
self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0])
def test_build_square_box_coder_with_non_default_parameters(self):
box_coder_text_proto = """
square_box_coder {
y_scale: 6.0
x_scale: 3.0
length_scale: 7.0
}
"""
box_coder_proto = box_coder_pb2.BoxCoder()
text_format.Merge(box_coder_text_proto, box_coder_proto)
box_coder_object = box_coder_builder.build(box_coder_proto)
self.assertTrue(
isinstance(box_coder_object, square_box_coder.SquareBoxCoder))
self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0])
def test_raise_error_on_empty_box_coder(self):
box_coder_text_proto = """
"""
box_coder_proto = box_coder_pb2.BoxCoder()
text_format.Merge(box_coder_text_proto, box_coder_proto)
with self.assertRaises(ValueError):
box_coder_builder.build(box_coder_proto)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Function to build box predictor from configuration."""
from object_detection.core import box_predictor
from object_detection.protos import box_predictor_pb2
def build(argscope_fn, box_predictor_config, is_training, num_classes):
"""Builds box predictor based on the configuration.
Builds box predictor based on the configuration. See box_predictor.proto for
configurable options. Also, see box_predictor.py for more details.
Args:
argscope_fn: A function that takes the following inputs:
* hyperparams_pb2.Hyperparams proto
* a boolean indicating if the model is in training mode.
and returns a tf slim argscope for Conv and FC hyperparameters.
box_predictor_config: box_predictor_pb2.BoxPredictor proto containing
configuration.
is_training: Whether the models is in training mode.
num_classes: Number of classes to predict.
Returns:
box_predictor: box_predictor.BoxPredictor object.
Raises:
ValueError: On unknown box predictor.
"""
if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor):
raise ValueError('box_predictor_config not of type '
'box_predictor_pb2.BoxPredictor.')
box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof')
if box_predictor_oneof == 'convolutional_box_predictor':
conv_box_predictor = box_predictor_config.convolutional_box_predictor
conv_hyperparams = argscope_fn(conv_box_predictor.conv_hyperparams,
is_training)
box_predictor_object = box_predictor.ConvolutionalBoxPredictor(
is_training=is_training,
num_classes=num_classes,
conv_hyperparams=conv_hyperparams,
min_depth=conv_box_predictor.min_depth,
max_depth=conv_box_predictor.max_depth,
num_layers_before_predictor=(conv_box_predictor.
num_layers_before_predictor),
use_dropout=conv_box_predictor.use_dropout,
dropout_keep_prob=conv_box_predictor.dropout_keep_probability,
kernel_size=conv_box_predictor.kernel_size,
box_code_size=conv_box_predictor.box_code_size,
apply_sigmoid_to_scores=conv_box_predictor.apply_sigmoid_to_scores)
return box_predictor_object
if box_predictor_oneof == 'mask_rcnn_box_predictor':
mask_rcnn_box_predictor = box_predictor_config.mask_rcnn_box_predictor
fc_hyperparams = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams,
is_training)
conv_hyperparams = None
if mask_rcnn_box_predictor.HasField('conv_hyperparams'):
conv_hyperparams = argscope_fn(mask_rcnn_box_predictor.conv_hyperparams,
is_training)
box_predictor_object = box_predictor.MaskRCNNBoxPredictor(
is_training=is_training,
num_classes=num_classes,
fc_hyperparams=fc_hyperparams,
use_dropout=mask_rcnn_box_predictor.use_dropout,
dropout_keep_prob=mask_rcnn_box_predictor.dropout_keep_probability,
box_code_size=mask_rcnn_box_predictor.box_code_size,
conv_hyperparams=conv_hyperparams,
predict_instance_masks=mask_rcnn_box_predictor.predict_instance_masks,
mask_prediction_conv_depth=(mask_rcnn_box_predictor.
mask_prediction_conv_depth),
predict_keypoints=mask_rcnn_box_predictor.predict_keypoints)
return box_predictor_object
if box_predictor_oneof == 'rfcn_box_predictor':
rfcn_box_predictor = box_predictor_config.rfcn_box_predictor
conv_hyperparams = argscope_fn(rfcn_box_predictor.conv_hyperparams,
is_training)
box_predictor_object = box_predictor.RfcnBoxPredictor(
is_training=is_training,
num_classes=num_classes,
conv_hyperparams=conv_hyperparams,
crop_size=[rfcn_box_predictor.crop_height,
rfcn_box_predictor.crop_width],
num_spatial_bins=[rfcn_box_predictor.num_spatial_bins_height,
rfcn_box_predictor.num_spatial_bins_width],
depth=rfcn_box_predictor.depth,
box_code_size=rfcn_box_predictor.box_code_size)
return box_predictor_object
raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for box_predictor_builder."""
import mock
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import box_predictor_builder
from object_detection.builders import hyperparams_builder
from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2
class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_calls_conv_argscope_fn(self):
conv_hyperparams_text_proto = """
regularizer {
l1_regularizer {
weight: 0.0003
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.3
}
}
activation: RELU_6
"""
hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
return (conv_hyperparams_arg, is_training)
box_predictor_proto = box_predictor_pb2.BoxPredictor()
box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom(
hyperparams_proto)
box_predictor = box_predictor_builder.build(
argscope_fn=mock_conv_argscope_builder,
box_predictor_config=box_predictor_proto,
is_training=False,
num_classes=10)
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams
self.assertAlmostEqual((hyperparams_proto.regularizer.
l1_regularizer.weight),
(conv_hyperparams_actual.regularizer.l1_regularizer.
weight))
self.assertAlmostEqual((hyperparams_proto.initializer.
truncated_normal_initializer.stddev),
(conv_hyperparams_actual.initializer.
truncated_normal_initializer.stddev))
self.assertAlmostEqual((hyperparams_proto.initializer.
truncated_normal_initializer.mean),
(conv_hyperparams_actual.initializer.
truncated_normal_initializer.mean))
self.assertEqual(hyperparams_proto.activation,
conv_hyperparams_actual.activation)
self.assertFalse(is_training)
def test_construct_non_default_conv_box_predictor(self):
box_predictor_text_proto = """
convolutional_box_predictor {
min_depth: 2
max_depth: 16
num_layers_before_predictor: 2
use_dropout: false
dropout_keep_probability: 0.4
kernel_size: 3
box_code_size: 3
apply_sigmoid_to_scores: true
}
"""
conv_hyperparams_text_proto = """
regularizer {
l1_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
return (conv_hyperparams_arg, is_training)
box_predictor_proto = box_predictor_pb2.BoxPredictor()
text_format.Merge(box_predictor_text_proto, box_predictor_proto)
box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom(
hyperparams_proto)
box_predictor = box_predictor_builder.build(
argscope_fn=mock_conv_argscope_builder,
box_predictor_config=box_predictor_proto,
is_training=False,
num_classes=10)
self.assertEqual(box_predictor._min_depth, 2)
self.assertEqual(box_predictor._max_depth, 16)
self.assertEqual(box_predictor._num_layers_before_predictor, 2)
self.assertFalse(box_predictor._use_dropout)
self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.4)
self.assertTrue(box_predictor._apply_sigmoid_to_scores)
self.assertEqual(box_predictor.num_classes, 10)
self.assertFalse(box_predictor._is_training)
def test_construct_default_conv_box_predictor(self):
box_predictor_text_proto = """
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l1_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}"""
box_predictor_proto = box_predictor_pb2.BoxPredictor()
text_format.Merge(box_predictor_text_proto, box_predictor_proto)
box_predictor = box_predictor_builder.build(
argscope_fn=hyperparams_builder.build,
box_predictor_config=box_predictor_proto,
is_training=True,
num_classes=90)
self.assertEqual(box_predictor._min_depth, 0)
self.assertEqual(box_predictor._max_depth, 0)
self.assertEqual(box_predictor._num_layers_before_predictor, 0)
self.assertTrue(box_predictor._use_dropout)
self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8)
self.assertFalse(box_predictor._apply_sigmoid_to_scores)
self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training)
class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_builder_calls_fc_argscope_fn(self):
fc_hyperparams_text_proto = """
regularizer {
l1_regularizer {
weight: 0.0003
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.3
}
}
activation: RELU_6
op: FC
"""
hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto)
box_predictor_proto = box_predictor_pb2.BoxPredictor()
box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom(
hyperparams_proto)
mock_argscope_fn = mock.Mock(return_value='arg_scope')
box_predictor = box_predictor_builder.build(
argscope_fn=mock_argscope_fn,
box_predictor_config=box_predictor_proto,
is_training=False,
num_classes=10)
mock_argscope_fn.assert_called_with(hyperparams_proto, False)
self.assertEqual(box_predictor._fc_hyperparams, 'arg_scope')
def test_non_default_mask_rcnn_box_predictor(self):
fc_hyperparams_text_proto = """
regularizer {
l1_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
activation: RELU_6
op: FC
"""
box_predictor_text_proto = """
mask_rcnn_box_predictor {
use_dropout: true
dropout_keep_probability: 0.8
box_code_size: 3
}
"""
hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto)
def mock_fc_argscope_builder(fc_hyperparams_arg, is_training):
return (fc_hyperparams_arg, is_training)
box_predictor_proto = box_predictor_pb2.BoxPredictor()
text_format.Merge(box_predictor_text_proto, box_predictor_proto)
box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom(
hyperparams_proto)
box_predictor = box_predictor_builder.build(
argscope_fn=mock_fc_argscope_builder,
box_predictor_config=box_predictor_proto,
is_training=True,
num_classes=90)
self.assertTrue(box_predictor._use_dropout)
self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8)
self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training)
self.assertEqual(box_predictor._box_code_size, 3)
def test_build_default_mask_rcnn_box_predictor(self):
box_predictor_proto = box_predictor_pb2.BoxPredictor()
box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = (
hyperparams_pb2.Hyperparams.FC)
box_predictor = box_predictor_builder.build(
argscope_fn=mock.Mock(return_value='arg_scope'),
box_predictor_config=box_predictor_proto,
is_training=True,
num_classes=90)
self.assertFalse(box_predictor._use_dropout)
self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5)
self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training)
self.assertEqual(box_predictor._box_code_size, 4)
self.assertFalse(box_predictor._predict_instance_masks)
self.assertFalse(box_predictor._predict_keypoints)
def test_build_box_predictor_with_mask_branch(self):
box_predictor_proto = box_predictor_pb2.BoxPredictor()
box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = (
hyperparams_pb2.Hyperparams.FC)
box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = (
hyperparams_pb2.Hyperparams.CONV)
box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True
box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512
mock_argscope_fn = mock.Mock(return_value='arg_scope')
box_predictor = box_predictor_builder.build(
argscope_fn=mock_argscope_fn,
box_predictor_config=box_predictor_proto,
is_training=True,
num_classes=90)
mock_argscope_fn.assert_has_calls(
[mock.call(box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams,
True),
mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams,
True)], any_order=True)
self.assertFalse(box_predictor._use_dropout)
self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5)
self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training)
self.assertEqual(box_predictor._box_code_size, 4)
self.assertTrue(box_predictor._predict_instance_masks)
self.assertEqual(box_predictor._mask_prediction_conv_depth, 512)
self.assertFalse(box_predictor._predict_keypoints)
class RfcnBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_calls_fc_argscope_fn(self):
conv_hyperparams_text_proto = """
regularizer {
l1_regularizer {
weight: 0.0003
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.3
}
}
activation: RELU_6
"""
hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
return (conv_hyperparams_arg, is_training)
box_predictor_proto = box_predictor_pb2.BoxPredictor()
box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
hyperparams_proto)
box_predictor = box_predictor_builder.build(
argscope_fn=mock_conv_argscope_builder,
box_predictor_config=box_predictor_proto,
is_training=False,
num_classes=10)
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams
self.assertAlmostEqual((hyperparams_proto.regularizer.
l1_regularizer.weight),
(conv_hyperparams_actual.regularizer.l1_regularizer.
weight))
self.assertAlmostEqual((hyperparams_proto.initializer.
truncated_normal_initializer.stddev),
(conv_hyperparams_actual.initializer.
truncated_normal_initializer.stddev))
self.assertAlmostEqual((hyperparams_proto.initializer.
truncated_normal_initializer.mean),
(conv_hyperparams_actual.initializer.
truncated_normal_initializer.mean))
self.assertEqual(hyperparams_proto.activation,
conv_hyperparams_actual.activation)
self.assertFalse(is_training)
def test_non_default_rfcn_box_predictor(self):
conv_hyperparams_text_proto = """
regularizer {
l1_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
activation: RELU_6
"""
box_predictor_text_proto = """
rfcn_box_predictor {
num_spatial_bins_height: 4
num_spatial_bins_width: 4
depth: 4
box_code_size: 3
crop_height: 16
crop_width: 16
}
"""
hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
return (conv_hyperparams_arg, is_training)
box_predictor_proto = box_predictor_pb2.BoxPredictor()
text_format.Merge(box_predictor_text_proto, box_predictor_proto)
box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
hyperparams_proto)
box_predictor = box_predictor_builder.build(
argscope_fn=mock_conv_argscope_builder,
box_predictor_config=box_predictor_proto,
is_training=True,
num_classes=90)
self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training)
self.assertEqual(box_predictor._box_code_size, 3)
self.assertEqual(box_predictor._num_spatial_bins, [4, 4])
self.assertEqual(box_predictor._crop_size, [16, 16])
def test_default_rfcn_box_predictor(self):
conv_hyperparams_text_proto = """
regularizer {
l1_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
activation: RELU_6
"""
hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
return (conv_hyperparams_arg, is_training)
box_predictor_proto = box_predictor_pb2.BoxPredictor()
box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
hyperparams_proto)
box_predictor = box_predictor_builder.build(
argscope_fn=mock_conv_argscope_builder,
box_predictor_config=box_predictor_proto,
is_training=True,
num_classes=90)
self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training)
self.assertEqual(box_predictor._box_code_size, 4)
self.assertEqual(box_predictor._num_spatial_bins, [3, 3])
self.assertEqual(box_predictor._crop_size, [12, 12])
if __name__ == '__main__':
tf.test.main()
# Tensorflow Object Detection API: component builders.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "model_builder",
srcs = ["model_builder.py"],
deps = [
":anchor_generator_builder",
":box_coder_builder",
":box_predictor_builder",
":hyperparams_builder",
":image_resizer_builder",
":losses_builder",
":matcher_builder",
":post_processing_builder",
":region_similarity_calculator_builder",
"//tensorflow_models/object_detection/core:box_predictor",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/object_detection/meta_architectures:rfcn_meta_arch",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/models:faster_rcnn_inception_resnet_v2_feature_extractor",
"//tensorflow_models/object_detection/models:faster_rcnn_resnet_v1_feature_extractor",
"//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor",
"//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor",
"//tensorflow_models/object_detection/protos:model_py_pb2",
],
)
py_test(
name = "model_builder_test",
srcs = ["model_builder_test.py"],
deps = [
":model_builder",
"//tensorflow",
"//tensorflow_models/object_detection/meta_architectures:faster_rcnn_meta_arch",
"//tensorflow_models/object_detection/meta_architectures:ssd_meta_arch",
"//tensorflow_models/object_detection/models:ssd_inception_v2_feature_extractor",
"//tensorflow_models/object_detection/models:ssd_mobilenet_v1_feature_extractor",
"//tensorflow_models/object_detection/protos:model_py_pb2",
],
)
py_library(
name = "matcher_builder",
srcs = ["matcher_builder.py"],
deps = [
"//tensorflow_models/object_detection/matchers:argmax_matcher",
"//tensorflow_models/object_detection/matchers:bipartite_matcher",
"//tensorflow_models/object_detection/protos:matcher_py_pb2",
],
)
py_test(
name = "matcher_builder_test",
srcs = ["matcher_builder_test.py"],
deps = [
":matcher_builder",
"//tensorflow_models/object_detection/matchers:argmax_matcher",
"//tensorflow_models/object_detection/matchers:bipartite_matcher",
"//tensorflow_models/object_detection/protos:matcher_py_pb2",
],
)
py_library(
name = "box_coder_builder",
srcs = ["box_coder_builder.py"],
deps = [
"//tensorflow_models/object_detection/box_coders:faster_rcnn_box_coder",
"//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder",
"//tensorflow_models/object_detection/box_coders:square_box_coder",
"//tensorflow_models/object_detection/protos:box_coder_py_pb2",
],
)
py_test(
name = "box_coder_builder_test",
srcs = ["box_coder_builder_test.py"],
deps = [
":box_coder_builder",
"//tensorflow",
"//tensorflow_models/object_detection/box_coders:faster_rcnn_box_coder",
"//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder",
"//tensorflow_models/object_detection/box_coders:square_box_coder",
"//tensorflow_models/object_detection/protos:box_coder_py_pb2",
],
)
py_library(
name = "anchor_generator_builder",
srcs = ["anchor_generator_builder.py"],
deps = [
"//tensorflow_models/object_detection/anchor_generators:grid_anchor_generator",
"//tensorflow_models/object_detection/anchor_generators:multiple_grid_anchor_generator",
"//tensorflow_models/object_detection/protos:anchor_generator_py_pb2",
],
)
py_test(
name = "anchor_generator_builder_test",
srcs = ["anchor_generator_builder_test.py"],
deps = [
":anchor_generator_builder",
"//tensorflow",
"//tensorflow_models/object_detection/anchor_generators:grid_anchor_generator",
"//tensorflow_models/object_detection/anchor_generators:multiple_grid_anchor_generator",
"//tensorflow_models/object_detection/protos:anchor_generator_py_pb2",
],
)
py_library(
name = "input_reader_builder",
srcs = ["input_reader_builder.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/data_decoders:tf_example_decoder",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
],
)
py_test(
name = "input_reader_builder_test",
srcs = [
"input_reader_builder_test.py",
],
deps = [
":input_reader_builder",
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
],
)
py_library(
name = "losses_builder",
srcs = ["losses_builder.py"],
deps = [
"//tensorflow_models/object_detection/core:losses",
"//tensorflow_models/object_detection/protos:losses_py_pb2",
],
)
py_test(
name = "losses_builder_test",
srcs = ["losses_builder_test.py"],
deps = [
":losses_builder",
"//tensorflow_models/object_detection/core:losses",
"//tensorflow_models/object_detection/protos:losses_py_pb2",
],
)
py_library(
name = "optimizer_builder",
srcs = ["optimizer_builder.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/utils:learning_schedules",
],
)
py_test(
name = "optimizer_builder_test",
srcs = ["optimizer_builder_test.py"],
deps = [
":optimizer_builder",
"//tensorflow",
"//tensorflow_models/object_detection/protos:optimizer_py_pb2",
],
)
py_library(
name = "post_processing_builder",
srcs = ["post_processing_builder.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:post_processing",
"//tensorflow_models/object_detection/protos:post_processing_py_pb2",
],
)
py_test(
name = "post_processing_builder_test",
srcs = ["post_processing_builder_test.py"],
deps = [
":post_processing_builder",
"//tensorflow",
"//tensorflow_models/object_detection/protos:post_processing_py_pb2",
],
)
py_library(
name = "hyperparams_builder",
srcs = ["hyperparams_builder.py"],
deps = [
"//tensorflow_models/object_detection/protos:hyperparams_py_pb2",
],
)
py_test(
name = "hyperparams_builder_test",
srcs = ["hyperparams_builder_test.py"],
deps = [
":hyperparams_builder",
"//tensorflow",
"//tensorflow_models/object_detection/protos:hyperparams_py_pb2",
],
)
py_library(
name = "box_predictor_builder",
srcs = ["box_predictor_builder.py"],
deps = [
":hyperparams_builder",
"//tensorflow_models/object_detection/core:box_predictor",
"//tensorflow_models/object_detection/protos:box_predictor_py_pb2",
],
)
py_test(
name = "box_predictor_builder_test",
srcs = ["box_predictor_builder_test.py"],
deps = [
":box_predictor_builder",
":hyperparams_builder",
"//tensorflow",
"//tensorflow_models/object_detection/protos:box_predictor_py_pb2",
"//tensorflow_models/object_detection/protos:hyperparams_py_pb2",
],
)
py_library(
name = "region_similarity_calculator_builder",
srcs = ["region_similarity_calculator_builder.py"],
deps = [
"//tensorflow_models/object_detection/core:region_similarity_calculator",
"//tensorflow_models/object_detection/protos:region_similarity_calculator_py_pb2",
],
)
py_test(
name = "region_similarity_calculator_builder_test",
srcs = ["region_similarity_calculator_builder_test.py"],
deps = [
":region_similarity_calculator_builder",
"//tensorflow",
],
)
py_library(
name = "preprocessor_builder",
srcs = ["preprocessor_builder.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:preprocessor",
"//tensorflow_models/object_detection/protos:preprocessor_py_pb2",
],
)
py_test(
name = "preprocessor_builder_test",
srcs = [
"preprocessor_builder_test.py",
],
deps = [
":preprocessor_builder",
"//tensorflow",
"//tensorflow_models/object_detection/core:preprocessor",
"//tensorflow_models/object_detection/protos:preprocessor_py_pb2",
],
)
py_library(
name = "image_resizer_builder",
srcs = ["image_resizer_builder.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:preprocessor",
"//tensorflow_models/object_detection/protos:image_resizer_py_pb2",
],
)
py_test(
name = "image_resizer_builder_test",
srcs = ["image_resizer_builder_test.py"],
deps = [
":image_resizer_builder",
"//tensorflow",
"//tensorflow_models/object_detection/protos:image_resizer_py_pb2",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Builder function to construct tf-slim arg_scope for convolution, fc ops."""
import tensorflow as tf
from object_detection.protos import hyperparams_pb2
slim = tf.contrib.slim
def build(hyperparams_config, is_training):
"""Builds tf-slim arg_scope for convolution ops based on the config.
Returns an arg_scope to use for convolution ops containing weights
initializer, weights regularizer, activation function, batch norm function
and batch norm parameters based on the configuration.
Note that if the batch_norm parameteres are not specified in the config
(i.e. left to default) then batch norm is excluded from the arg_scope.
The batch norm parameters are set for updates based on `is_training` argument
and conv_hyperparams_config.batch_norm.train parameter. During training, they
are updated only if batch_norm.train parameter is true. However, during eval,
no updates are made to the batch norm variables. In both cases, their current
values are used during forward pass.
Args:
hyperparams_config: hyperparams.proto object containing
hyperparameters.
is_training: Whether the network is in training mode.
Returns:
arg_scope: tf-slim arg_scope containing hyperparameters for ops.
Raises:
ValueError: if hyperparams_config is not of type hyperparams.Hyperparams.
"""
if not isinstance(hyperparams_config,
hyperparams_pb2.Hyperparams):
raise ValueError('hyperparams_config not of type '
'hyperparams_pb.Hyperparams.')
batch_norm = None
batch_norm_params = None
if hyperparams_config.HasField('batch_norm'):
batch_norm = slim.batch_norm
batch_norm_params = _build_batch_norm_params(
hyperparams_config.batch_norm, is_training)
affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose]
if hyperparams_config.HasField('op') and (
hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
affected_ops = [slim.fully_connected]
with slim.arg_scope(
affected_ops,
weights_regularizer=_build_regularizer(
hyperparams_config.regularizer),
weights_initializer=_build_initializer(
hyperparams_config.initializer),
activation_fn=_build_activation_fn(hyperparams_config.activation),
normalizer_fn=batch_norm,
normalizer_params=batch_norm_params) as sc:
return sc
def _build_activation_fn(activation_fn):
"""Builds a callable activation from config.
Args:
activation_fn: hyperparams_pb2.Hyperparams.activation
Returns:
Callable activation function.
Raises:
ValueError: On unknown activation function.
"""
if activation_fn == hyperparams_pb2.Hyperparams.NONE:
return None
if activation_fn == hyperparams_pb2.Hyperparams.RELU:
return tf.nn.relu
if activation_fn == hyperparams_pb2.Hyperparams.RELU_6:
return tf.nn.relu6
raise ValueError('Unknown activation function: {}'.format(activation_fn))
def _build_regularizer(regularizer):
"""Builds a tf-slim regularizer from config.
Args:
regularizer: hyperparams_pb2.Hyperparams.regularizer proto.
Returns:
tf-slim regularizer.
Raises:
ValueError: On unknown regularizer.
"""
regularizer_oneof = regularizer.WhichOneof('regularizer_oneof')
if regularizer_oneof == 'l1_regularizer':
return slim.l1_regularizer(scale=regularizer.l1_regularizer.weight)
if regularizer_oneof == 'l2_regularizer':
return slim.l2_regularizer(scale=regularizer.l2_regularizer.weight)
raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof))
def _build_initializer(initializer):
"""Build a tf initializer from config.
Args:
initializer: hyperparams_pb2.Hyperparams.regularizer proto.
Returns:
tf initializer.
Raises:
ValueError: On unknown initializer.
"""
initializer_oneof = initializer.WhichOneof('initializer_oneof')
if initializer_oneof == 'truncated_normal_initializer':
return tf.truncated_normal_initializer(
mean=initializer.truncated_normal_initializer.mean,
stddev=initializer.truncated_normal_initializer.stddev)
if initializer_oneof == 'variance_scaling_initializer':
enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer.
DESCRIPTOR.enum_types_by_name['Mode'])
mode = enum_descriptor.values_by_number[initializer.
variance_scaling_initializer.
mode].name
return slim.variance_scaling_initializer(
factor=initializer.variance_scaling_initializer.factor,
mode=mode,
uniform=initializer.variance_scaling_initializer.uniform)
raise ValueError('Unknown initializer function: {}'.format(
initializer_oneof))
def _build_batch_norm_params(batch_norm, is_training):
"""Build a dictionary of batch_norm params from config.
Args:
batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto.
is_training: Whether the models is in training mode.
Returns:
A dictionary containing batch_norm parameters.
"""
batch_norm_params = {
'decay': batch_norm.decay,
'center': batch_norm.center,
'scale': batch_norm.scale,
'epsilon': batch_norm.epsilon,
'fused': True,
'is_training': is_training and batch_norm.train,
}
return batch_norm_params
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests object_detection.core.hyperparams_builder."""
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
# TODO: Rewrite third_party imports.
from object_detection.builders import hyperparams_builder
from object_detection.protos import hyperparams_pb2
slim = tf.contrib.slim
class HyperparamsBuilderTest(tf.test.TestCase):
# TODO: Make this a public api in slim arg_scope.py.
def _get_scope_key(self, op):
return getattr(op, '_key_op', str(op))
def test_default_arg_scope_has_conv2d_op(self):
conv_hyperparams_text_proto = """
regularizer {
l1_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
self.assertTrue(self._get_scope_key(slim.conv2d) in scope)
def test_default_arg_scope_has_separable_conv2d_op(self):
conv_hyperparams_text_proto = """
regularizer {
l1_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
self.assertTrue(self._get_scope_key(slim.separable_conv2d) in scope)
def test_default_arg_scope_has_conv2d_transpose_op(self):
conv_hyperparams_text_proto = """
regularizer {
l1_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
self.assertTrue(self._get_scope_key(slim.conv2d_transpose) in scope)
def test_explicit_fc_op_arg_scope_has_fully_connected_op(self):
conv_hyperparams_text_proto = """
op: FC
regularizer {
l1_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
self.assertTrue(self._get_scope_key(slim.fully_connected) in scope)
def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self):
conv_hyperparams_text_proto = """
regularizer {
l1_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
kwargs_1, kwargs_2, kwargs_3 = scope.values()
self.assertDictEqual(kwargs_1, kwargs_2)
self.assertDictEqual(kwargs_1, kwargs_3)
def test_return_l1_regularized_weights(self):
conv_hyperparams_text_proto = """
regularizer {
l1_regularizer {
weight: 0.5
}
}
initializer {
truncated_normal_initializer {
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
regularizer = conv_scope_arguments['weights_regularizer']
weights = np.array([1., -1, 4., 2.])
with self.test_session() as sess:
result = sess.run(regularizer(tf.constant(weights)))
self.assertAllClose(np.abs(weights).sum() * 0.5, result)
def test_return_l2_regularizer_weights(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
weight: 0.42
}
}
initializer {
truncated_normal_initializer {
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
regularizer = conv_scope_arguments['weights_regularizer']
weights = np.array([1., -1, 4., 2.])
with self.test_session() as sess:
result = sess.run(regularizer(tf.constant(weights)))
self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
def test_return_non_default_batch_norm_params_with_train_during_train(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
decay: 0.7
center: false
scale: true
epsilon: 0.03
train: true
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
self.assertTrue(batch_norm_params['scale'])
self.assertTrue(batch_norm_params['is_training'])
def test_return_batch_norm_params_with_notrain_during_eval(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
decay: 0.7
center: false
scale: true
epsilon: 0.03
train: true
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=False)
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
self.assertTrue(batch_norm_params['scale'])
self.assertFalse(batch_norm_params['is_training'])
def test_return_batch_norm_params_with_notrain_when_train_is_false(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
decay: 0.7
center: false
scale: true
epsilon: 0.03
train: false
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
self.assertTrue(batch_norm_params['scale'])
self.assertFalse(batch_norm_params['is_training'])
def test_do_not_use_batch_norm_if_default(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
self.assertEqual(conv_scope_arguments['normalizer_params'], None)
def test_use_none_activation(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
activation: NONE
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['activation_fn'], None)
def test_use_relu_activation(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
activation: RELU
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
def test_use_relu_6_activation(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
activation: RELU_6
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
def _assert_variance_in_range(self, initializer, shape, variance,
tol=1e-2):
with tf.Graph().as_default() as g:
with self.test_session(graph=g) as sess:
var = tf.get_variable(
name='test',
shape=shape,
dtype=tf.float32,
initializer=initializer)
sess.run(tf.global_variables_initializer())
values = sess.run(var)
self.assertAllClose(np.var(values), variance, tol, tol)
def test_variance_in_range_with_variance_scaling_initializer_fan_in(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
variance_scaling_initializer {
factor: 2.0
mode: FAN_IN
uniform: false
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.)
def test_variance_in_range_with_variance_scaling_initializer_fan_out(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
variance_scaling_initializer {
factor: 2.0
mode: FAN_OUT
uniform: false
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 40.)
def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
variance_scaling_initializer {
factor: 2.0
mode: FAN_AVG
uniform: false
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=4. / (100. + 40.))
def test_variance_in_range_with_variance_scaling_initializer_uniform(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
variance_scaling_initializer {
factor: 2.0
mode: FAN_IN
uniform: true
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.)
def test_variance_in_range_with_truncated_normal_initializer(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.8
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=0.49, tol=1e-1)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Builder function for image resizing operations."""
import functools
from object_detection.core import preprocessor
from object_detection.protos import image_resizer_pb2
def build(image_resizer_config):
"""Builds callable for image resizing operations.
Args:
image_resizer_config: image_resizer.proto object containing parameters for
an image resizing operation.
Returns:
image_resizer_fn: Callable for image resizing. This callable always takes
a rank-3 image tensor (corresponding to a single image) and returns a
rank-3 image tensor, possibly with new spatial dimensions.
Raises:
ValueError: if `image_resizer_config` is of incorrect type.
ValueError: if `image_resizer_config.image_resizer_oneof` is of expected
type.
ValueError: if min_dimension > max_dimension when keep_aspect_ratio_resizer
is used.
"""
if not isinstance(image_resizer_config, image_resizer_pb2.ImageResizer):
raise ValueError('image_resizer_config not of type '
'image_resizer_pb2.ImageResizer.')
if image_resizer_config.WhichOneof(
'image_resizer_oneof') == 'keep_aspect_ratio_resizer':
keep_aspect_ratio_config = image_resizer_config.keep_aspect_ratio_resizer
if not (keep_aspect_ratio_config.min_dimension
<= keep_aspect_ratio_config.max_dimension):
raise ValueError('min_dimension > max_dimension')
return functools.partial(
preprocessor.resize_to_range,
min_dimension=keep_aspect_ratio_config.min_dimension,
max_dimension=keep_aspect_ratio_config.max_dimension)
if image_resizer_config.WhichOneof(
'image_resizer_oneof') == 'fixed_shape_resizer':
fixed_shape_resizer_config = image_resizer_config.fixed_shape_resizer
return functools.partial(preprocessor.resize_image,
new_height=fixed_shape_resizer_config.height,
new_width=fixed_shape_resizer_config.width)
raise ValueError('Invalid image resizer option.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.builders.image_resizer_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import image_resizer_builder
from object_detection.protos import image_resizer_pb2
class ImageResizerBuilderTest(tf.test.TestCase):
def _shape_of_resized_random_image_given_text_proto(
self, input_shape, text_proto):
image_resizer_config = image_resizer_pb2.ImageResizer()
text_format.Merge(text_proto, image_resizer_config)
image_resizer_fn = image_resizer_builder.build(image_resizer_config)
images = tf.to_float(tf.random_uniform(
input_shape, minval=0, maxval=255, dtype=tf.int32))
resized_images = image_resizer_fn(images)
with self.test_session() as sess:
return sess.run(resized_images).shape
def test_built_keep_aspect_ratio_resizer_returns_expected_shape(self):
image_resizer_text_proto = """
keep_aspect_ratio_resizer {
min_dimension: 10
max_dimension: 20
}
"""
input_shape = (50, 25, 3)
expected_output_shape = (20, 10, 3)
output_shape = self._shape_of_resized_random_image_given_text_proto(
input_shape, image_resizer_text_proto)
self.assertEqual(output_shape, expected_output_shape)
def test_built_fixed_shape_resizer_returns_expected_shape(self):
image_resizer_text_proto = """
fixed_shape_resizer {
height: 10
width: 20
}
"""
input_shape = (50, 25, 3)
expected_output_shape = (10, 20, 3)
output_shape = self._shape_of_resized_random_image_given_text_proto(
input_shape, image_resizer_text_proto)
self.assertEqual(output_shape, expected_output_shape)
def test_raises_error_on_invalid_input(self):
invalid_input = 'invalid_input'
with self.assertRaises(ValueError):
image_resizer_builder.build(invalid_input)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Input reader builder.
Creates data sources for DetectionModels from an InputReader config. See
input_reader.proto for options.
Note: If users wishes to also use their own InputReaders with the Object
Detection configuration framework, they should define their own builder function
that wraps the build function.
"""
import tensorflow as tf
from object_detection.data_decoders import tf_example_decoder
from object_detection.protos import input_reader_pb2
parallel_reader = tf.contrib.slim.parallel_reader
def build(input_reader_config):
"""Builds a tensor dictionary based on the InputReader config.
Args:
input_reader_config: A input_reader_pb2.InputReader object.
Returns:
A tensor dict based on the input_reader_config.
Raises:
ValueError: On invalid input reader proto.
"""
if not isinstance(input_reader_config, input_reader_pb2.InputReader):
raise ValueError('input_reader_config not of type '
'input_reader_pb2.InputReader.')
if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader':
config = input_reader_config.tf_record_input_reader
_, string_tensor = parallel_reader.parallel_read(
config.input_path,
reader_class=tf.TFRecordReader,
num_epochs=(input_reader_config.num_epochs
if input_reader_config.num_epochs else None),
num_readers=input_reader_config.num_readers,
shuffle=input_reader_config.shuffle,
dtypes=[tf.string, tf.string],
capacity=input_reader_config.queue_capacity,
min_after_dequeue=input_reader_config.min_after_dequeue)
return tf_example_decoder.TfExampleDecoder().Decode(string_tensor)
raise ValueError('Unsupported input_reader_config.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for input_reader_builder."""
import os
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from tensorflow.core.example import example_pb2
from tensorflow.core.example import feature_pb2
from object_detection.builders import input_reader_builder
from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2
class InputReaderBuilderTest(tf.test.TestCase):
def create_tf_record(self):
path = os.path.join(self.get_temp_dir(), 'tfrecord')
writer = tf.python_io.TFRecordWriter(path)
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
with self.test_session():
encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
example = example_pb2.Example(features=feature_pb2.Features(feature={
'image/encoded': feature_pb2.Feature(
bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])),
'image/format': feature_pb2.Feature(
bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])),
'image/object/bbox/xmin': feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[0.0])),
'image/object/bbox/xmax': feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[1.0])),
'image/object/bbox/ymin': feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[0.0])),
'image/object/bbox/ymax': feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[1.0])),
'image/object/class/label': feature_pb2.Feature(
int64_list=feature_pb2.Int64List(value=[2])),
}))
writer.write(example.SerializeToString())
writer.close()
return path
def test_build_tf_record_input_reader(self):
tf_record_path = self.create_tf_record()
input_reader_text_proto = """
shuffle: false
num_readers: 1
tf_record_input_reader {{
input_path: '{0}'
}}
""".format(tf_record_path)
input_reader_proto = input_reader_pb2.InputReader()
text_format.Merge(input_reader_text_proto, input_reader_proto)
tensor_dict = input_reader_builder.build(input_reader_proto)
sv = tf.train.Supervisor(logdir=self.get_temp_dir())
with sv.prepare_or_wait_for_session() as sess:
sv.start_queue_runners(sess)
output_dict = sess.run(tensor_dict)
self.assertEquals(
(4, 5, 3), output_dict[fields.InputDataFields.image].shape)
self.assertEquals(
[2], output_dict[fields.InputDataFields.groundtruth_classes])
self.assertEquals(
(1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
self.assertAllEqual(
[0.0, 0.0, 1.0, 1.0],
output_dict[fields.InputDataFields.groundtruth_boxes][0])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A function to build localization and classification losses from config."""
from object_detection.core import losses
from object_detection.protos import losses_pb2
def build(loss_config):
"""Build losses based on the config.
Builds classification, localization losses and optionally a hard example miner
based on the config.
Args:
loss_config: A losses_pb2.Loss object.
Returns:
classification_loss: Classification loss object.
localization_loss: Localization loss object.
classification_weight: Classification loss weight.
localization_weight: Localization loss weight.
hard_example_miner: Hard example miner object.
"""
classification_loss = _build_classification_loss(
loss_config.classification_loss)
localization_loss = _build_localization_loss(
loss_config.localization_loss)
classification_weight = loss_config.classification_weight
localization_weight = loss_config.localization_weight
hard_example_miner = None
if loss_config.HasField('hard_example_miner'):
hard_example_miner = build_hard_example_miner(
loss_config.hard_example_miner,
classification_weight,
localization_weight)
return (classification_loss, localization_loss,
classification_weight,
localization_weight, hard_example_miner)
def build_hard_example_miner(config,
classification_weight,
localization_weight):
"""Builds hard example miner based on the config.
Args:
config: A losses_pb2.HardExampleMiner object.
classification_weight: Classification loss weight.
localization_weight: Localization loss weight.
Returns:
Hard example miner.
"""
loss_type = None
if config.loss_type == losses_pb2.HardExampleMiner.BOTH:
loss_type = 'both'
if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION:
loss_type = 'cls'
if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION:
loss_type = 'loc'
max_negatives_per_positive = None
num_hard_examples = None
if config.max_negatives_per_positive > 0:
max_negatives_per_positive = config.max_negatives_per_positive
if config.num_hard_examples > 0:
num_hard_examples = config.num_hard_examples
hard_example_miner = losses.HardExampleMiner(
num_hard_examples=num_hard_examples,
iou_threshold=config.iou_threshold,
loss_type=loss_type,
cls_loss_weight=classification_weight,
loc_loss_weight=localization_weight,
max_negatives_per_positive=max_negatives_per_positive,
min_negatives_per_image=config.min_negatives_per_image)
return hard_example_miner
def _build_localization_loss(loss_config):
"""Builds a localization loss based on the loss config.
Args:
loss_config: A losses_pb2.LocalizationLoss object.
Returns:
Loss based on the config.
Raises:
ValueError: On invalid loss_config.
"""
if not isinstance(loss_config, losses_pb2.LocalizationLoss):
raise ValueError('loss_config not of type losses_pb2.LocalizationLoss.')
loss_type = loss_config.WhichOneof('localization_loss')
if loss_type == 'weighted_l2':
config = loss_config.weighted_l2
return losses.WeightedL2LocalizationLoss(
anchorwise_output=config.anchorwise_output)
if loss_type == 'weighted_smooth_l1':
config = loss_config.weighted_smooth_l1
return losses.WeightedSmoothL1LocalizationLoss(
anchorwise_output=config.anchorwise_output)
if loss_type == 'weighted_iou':
return losses.WeightedIOULocalizationLoss()
raise ValueError('Empty loss config.')
def _build_classification_loss(loss_config):
"""Builds a classification loss based on the loss config.
Args:
loss_config: A losses_pb2.ClassificationLoss object.
Returns:
Loss based on the config.
Raises:
ValueError: On invalid loss_config.
"""
if not isinstance(loss_config, losses_pb2.ClassificationLoss):
raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.')
loss_type = loss_config.WhichOneof('classification_loss')
if loss_type == 'weighted_sigmoid':
config = loss_config.weighted_sigmoid
return losses.WeightedSigmoidClassificationLoss(
anchorwise_output=config.anchorwise_output)
if loss_type == 'weighted_softmax':
config = loss_config.weighted_softmax
return losses.WeightedSoftmaxClassificationLoss(
anchorwise_output=config.anchorwise_output)
if loss_type == 'bootstrapped_sigmoid':
config = loss_config.bootstrapped_sigmoid
return losses.BootstrappedSigmoidClassificationLoss(
alpha=config.alpha,
bootstrap_type=('hard' if config.hard_bootstrap else 'soft'),
anchorwise_output=config.anchorwise_output)
raise ValueError('Empty loss config.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for losses_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import losses_builder
from object_detection.core import losses
from object_detection.protos import losses_pb2
class LocalizationLossBuilderTest(tf.test.TestCase):
def test_build_weighted_l2_localization_loss(self):
losses_text_proto = """
localization_loss {
weighted_l2 {
}
}
classification_loss {
weighted_softmax {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
_, localization_loss, _, _, _ = losses_builder.build(losses_proto)
self.assertTrue(isinstance(localization_loss,
losses.WeightedL2LocalizationLoss))
def test_build_weighted_smooth_l1_localization_loss(self):
losses_text_proto = """
localization_loss {
weighted_smooth_l1 {
}
}
classification_loss {
weighted_softmax {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
_, localization_loss, _, _, _ = losses_builder.build(losses_proto)
self.assertTrue(isinstance(localization_loss,
losses.WeightedSmoothL1LocalizationLoss))
def test_build_weighted_iou_localization_loss(self):
losses_text_proto = """
localization_loss {
weighted_iou {
}
}
classification_loss {
weighted_softmax {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
_, localization_loss, _, _, _ = losses_builder.build(losses_proto)
self.assertTrue(isinstance(localization_loss,
losses.WeightedIOULocalizationLoss))
def test_anchorwise_output(self):
losses_text_proto = """
localization_loss {
weighted_smooth_l1 {
anchorwise_output: true
}
}
classification_loss {
weighted_softmax {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
_, localization_loss, _, _, _ = losses_builder.build(losses_proto)
self.assertTrue(isinstance(localization_loss,
losses.WeightedSmoothL1LocalizationLoss))
predictions = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]])
targets = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]])
weights = tf.constant([[1.0, 1.0]])
loss = localization_loss(predictions, targets, weights=weights)
self.assertEqual(loss.shape, [1, 2])
def test_raise_error_on_empty_localization_config(self):
losses_text_proto = """
classification_loss {
weighted_softmax {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
with self.assertRaises(ValueError):
losses_builder._build_localization_loss(losses_proto)
class ClassificationLossBuilderTest(tf.test.TestCase):
def test_build_weighted_sigmoid_classification_loss(self):
losses_text_proto = """
classification_loss {
weighted_sigmoid {
}
}
localization_loss {
weighted_l2 {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
self.assertTrue(isinstance(classification_loss,
losses.WeightedSigmoidClassificationLoss))
def test_build_weighted_softmax_classification_loss(self):
losses_text_proto = """
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_l2 {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
self.assertTrue(isinstance(classification_loss,
losses.WeightedSoftmaxClassificationLoss))
def test_build_bootstrapped_sigmoid_classification_loss(self):
losses_text_proto = """
classification_loss {
bootstrapped_sigmoid {
alpha: 0.5
}
}
localization_loss {
weighted_l2 {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
self.assertTrue(isinstance(classification_loss,
losses.BootstrappedSigmoidClassificationLoss))
def test_anchorwise_output(self):
losses_text_proto = """
classification_loss {
weighted_sigmoid {
anchorwise_output: true
}
}
localization_loss {
weighted_l2 {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
classification_loss, _, _, _, _ = losses_builder.build(losses_proto)
self.assertTrue(isinstance(classification_loss,
losses.WeightedSigmoidClassificationLoss))
predictions = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.5, 0.5]]])
targets = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]])
weights = tf.constant([[1.0, 1.0]])
loss = classification_loss(predictions, targets, weights=weights)
self.assertEqual(loss.shape, [1, 2])
def test_raise_error_on_empty_config(self):
losses_text_proto = """
localization_loss {
weighted_l2 {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
with self.assertRaises(ValueError):
losses_builder.build(losses_proto)
class HardExampleMinerBuilderTest(tf.test.TestCase):
def test_do_not_build_hard_example_miner_by_default(self):
losses_text_proto = """
localization_loss {
weighted_l2 {
}
}
classification_loss {
weighted_softmax {
}
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
_, _, _, _, hard_example_miner = losses_builder.build(losses_proto)
self.assertEqual(hard_example_miner, None)
def test_build_hard_example_miner_for_classification_loss(self):
losses_text_proto = """
localization_loss {
weighted_l2 {
}
}
classification_loss {
weighted_softmax {
}
}
hard_example_miner {
loss_type: CLASSIFICATION
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
_, _, _, _, hard_example_miner = losses_builder.build(losses_proto)
self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
self.assertEqual(hard_example_miner._loss_type, 'cls')
def test_build_hard_example_miner_for_localization_loss(self):
losses_text_proto = """
localization_loss {
weighted_l2 {
}
}
classification_loss {
weighted_softmax {
}
}
hard_example_miner {
loss_type: LOCALIZATION
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
_, _, _, _, hard_example_miner = losses_builder.build(losses_proto)
self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
self.assertEqual(hard_example_miner._loss_type, 'loc')
def test_build_hard_example_miner_with_non_default_values(self):
losses_text_proto = """
localization_loss {
weighted_l2 {
}
}
classification_loss {
weighted_softmax {
}
}
hard_example_miner {
num_hard_examples: 32
iou_threshold: 0.5
loss_type: LOCALIZATION
max_negatives_per_positive: 10
min_negatives_per_image: 3
}
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
_, _, _, _, hard_example_miner = losses_builder.build(losses_proto)
self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
self.assertEqual(hard_example_miner._num_hard_examples, 32)
self.assertAlmostEqual(hard_example_miner._iou_threshold, 0.5)
self.assertEqual(hard_example_miner._max_negatives_per_positive, 10)
self.assertEqual(hard_example_miner._min_negatives_per_image, 3)
class LossBuilderTest(tf.test.TestCase):
def test_build_all_loss_parameters(self):
losses_text_proto = """
localization_loss {
weighted_l2 {
}
}
classification_loss {
weighted_softmax {
}
}
hard_example_miner {
}
classification_weight: 0.8
localization_weight: 0.2
"""
losses_proto = losses_pb2.Loss()
text_format.Merge(losses_text_proto, losses_proto)
(classification_loss, localization_loss,
classification_weight, localization_weight,
hard_example_miner) = losses_builder.build(losses_proto)
self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
self.assertTrue(isinstance(classification_loss,
losses.WeightedSoftmaxClassificationLoss))
self.assertTrue(isinstance(localization_loss,
losses.WeightedL2LocalizationLoss))
self.assertAlmostEqual(classification_weight, 0.8)
self.assertAlmostEqual(localization_weight, 0.2)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A function to build an object detection matcher from configuration."""
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
from object_detection.protos import matcher_pb2
def build(matcher_config):
"""Builds a matcher object based on the matcher config.
Args:
matcher_config: A matcher.proto object containing the config for the desired
Matcher.
Returns:
Matcher based on the config.
Raises:
ValueError: On empty matcher proto.
"""
if not isinstance(matcher_config, matcher_pb2.Matcher):
raise ValueError('matcher_config not of type matcher_pb2.Matcher.')
if matcher_config.WhichOneof('matcher_oneof') == 'argmax_matcher':
matcher = matcher_config.argmax_matcher
matched_threshold = unmatched_threshold = None
if not matcher.ignore_thresholds:
matched_threshold = matcher.matched_threshold
unmatched_threshold = matcher.unmatched_threshold
return argmax_matcher.ArgMaxMatcher(
matched_threshold=matched_threshold,
unmatched_threshold=unmatched_threshold,
negatives_lower_than_unmatched=matcher.negatives_lower_than_unmatched,
force_match_for_each_row=matcher.force_match_for_each_row)
if matcher_config.WhichOneof('matcher_oneof') == 'bipartite_matcher':
return bipartite_matcher.GreedyBipartiteMatcher()
raise ValueError('Empty matcher.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for matcher_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import matcher_builder
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
from object_detection.protos import matcher_pb2
class MatcherBuilderTest(tf.test.TestCase):
def test_build_arg_max_matcher_with_defaults(self):
matcher_text_proto = """
argmax_matcher {
}
"""
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
self.assertAlmostEqual(matcher_object._matched_threshold, 0.5)
self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.5)
self.assertTrue(matcher_object._negatives_lower_than_unmatched)
self.assertFalse(matcher_object._force_match_for_each_row)
def test_build_arg_max_matcher_without_thresholds(self):
matcher_text_proto = """
argmax_matcher {
ignore_thresholds: true
}
"""
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
self.assertEqual(matcher_object._matched_threshold, None)
self.assertEqual(matcher_object._unmatched_threshold, None)
self.assertTrue(matcher_object._negatives_lower_than_unmatched)
self.assertFalse(matcher_object._force_match_for_each_row)
def test_build_arg_max_matcher_with_non_default_parameters(self):
matcher_text_proto = """
argmax_matcher {
matched_threshold: 0.7
unmatched_threshold: 0.3
negatives_lower_than_unmatched: false
force_match_for_each_row: true
}
"""
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
self.assertAlmostEqual(matcher_object._matched_threshold, 0.7)
self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.3)
self.assertFalse(matcher_object._negatives_lower_than_unmatched)
self.assertTrue(matcher_object._force_match_for_each_row)
def test_build_bipartite_matcher(self):
matcher_text_proto = """
bipartite_matcher {
}
"""
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
matcher_object = matcher_builder.build(matcher_proto)
self.assertTrue(
isinstance(matcher_object, bipartite_matcher.GreedyBipartiteMatcher))
def test_raise_error_on_empty_matcher(self):
matcher_text_proto = """
"""
matcher_proto = matcher_pb2.Matcher()
text_format.Merge(matcher_text_proto, matcher_proto)
with self.assertRaises(ValueError):
matcher_builder.build(matcher_proto)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A function to build a DetectionModel from configuration."""
from object_detection.builders import anchor_generator_builder
from object_detection.builders import box_coder_builder
from object_detection.builders import box_predictor_builder
from object_detection.builders import hyperparams_builder
from object_detection.builders import image_resizer_builder
from object_detection.builders import losses_builder
from object_detection.builders import matcher_builder
from object_detection.builders import post_processing_builder
from object_detection.builders import region_similarity_calculator_builder as sim_calc
from object_detection.core import box_predictor
from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.meta_architectures import rfcn_meta_arch
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
from object_detection.protos import model_pb2
# A map of names to SSD feature extractors.
SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
'ssd_inception_v2': SSDInceptionV2FeatureExtractor,
'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor,
}
# A map of names to Faster R-CNN feature extractors.
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
'faster_rcnn_resnet50':
frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
'faster_rcnn_resnet101':
frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
'faster_rcnn_resnet152':
frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor,
'faster_rcnn_inception_resnet_v2':
frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor
}
def build(model_config, is_training):
"""Builds a DetectionModel based on the model config.
Args:
model_config: A model.proto object containing the config for the desired
DetectionModel.
is_training: True if this model is being built for training purposes.
Returns:
DetectionModel based on the config.
Raises:
ValueError: On invalid meta architecture or model.
"""
if not isinstance(model_config, model_pb2.DetectionModel):
raise ValueError('model_config not of type model_pb2.DetectionModel.')
meta_architecture = model_config.WhichOneof('model')
if meta_architecture == 'ssd':
return _build_ssd_model(model_config.ssd, is_training)
if meta_architecture == 'faster_rcnn':
return _build_faster_rcnn_model(model_config.faster_rcnn, is_training)
raise ValueError('Unknown meta architecture: {}'.format(meta_architecture))
def _build_ssd_feature_extractor(feature_extractor_config, is_training,
reuse_weights=None):
"""Builds a ssd_meta_arch.SSDFeatureExtractor based on config.
Args:
feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
is_training: True if this feature extractor is being built for training.
reuse_weights: if the feature extractor should reuse weights.
Returns:
ssd_meta_arch.SSDFeatureExtractor based on config.
Raises:
ValueError: On invalid feature extractor type.
"""
feature_type = feature_extractor_config.type
depth_multiplier = feature_extractor_config.depth_multiplier
min_depth = feature_extractor_config.min_depth
conv_hyperparams = hyperparams_builder.build(
feature_extractor_config.conv_hyperparams, is_training)
if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
return feature_extractor_class(depth_multiplier, min_depth, conv_hyperparams,
reuse_weights)
def _build_ssd_model(ssd_config, is_training):
"""Builds an SSD detection model based on the model config.
Args:
ssd_config: A ssd.proto object containing the config for the desired
SSDMetaArch.
is_training: True if this model is being built for training purposes.
Returns:
SSDMetaArch based on the config.
Raises:
ValueError: If ssd_config.type is not recognized (i.e. not registered in
model_class_map).
"""
num_classes = ssd_config.num_classes
# Feature extractor
feature_extractor = _build_ssd_feature_extractor(ssd_config.feature_extractor,
is_training)
box_coder = box_coder_builder.build(ssd_config.box_coder)
matcher = matcher_builder.build(ssd_config.matcher)
region_similarity_calculator = sim_calc.build(
ssd_config.similarity_calculator)
ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
ssd_config.box_predictor,
is_training, num_classes)
anchor_generator = anchor_generator_builder.build(
ssd_config.anchor_generator)
image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
ssd_config.post_processing)
(classification_loss, localization_loss, classification_weight,
localization_weight,
hard_example_miner) = losses_builder.build(ssd_config.loss)
normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
return ssd_meta_arch.SSDMetaArch(
is_training,
anchor_generator,
ssd_box_predictor,
box_coder,
feature_extractor,
matcher,
region_similarity_calculator,
image_resizer_fn,
non_max_suppression_fn,
score_conversion_fn,
classification_loss,
localization_loss,
classification_weight,
localization_weight,
normalize_loss_by_num_matches,
hard_example_miner)
def _build_faster_rcnn_feature_extractor(
feature_extractor_config, is_training, reuse_weights=None):
"""Builds a faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
Args:
feature_extractor_config: A FasterRcnnFeatureExtractor proto config from
faster_rcnn.proto.
is_training: True if this feature extractor is being built for training.
reuse_weights: if the feature extractor should reuse weights.
Returns:
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
Raises:
ValueError: On invalid feature extractor type.
"""
feature_type = feature_extractor_config.type
first_stage_features_stride = (
feature_extractor_config.first_stage_features_stride)
if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP:
raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format(
feature_type))
feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[
feature_type]
return feature_extractor_class(
is_training, first_stage_features_stride, reuse_weights)
def _build_faster_rcnn_model(frcnn_config, is_training):
"""Builds a Faster R-CNN or R-FCN detection model based on the model config.
Builds R-FCN model if the second_stage_box_predictor in the config is of type
`rfcn_box_predictor` else builds a Faster R-CNN model.
Args:
frcnn_config: A faster_rcnn.proto object containing the config for the
desired FasterRCNNMetaArch or RFCNMetaArch.
is_training: True if this model is being built for training purposes.
Returns:
FasterRCNNMetaArch based on the config.
Raises:
ValueError: If frcnn_config.type is not recognized (i.e. not registered in
model_class_map).
"""
num_classes = frcnn_config.num_classes
image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)
feature_extractor = _build_faster_rcnn_feature_extractor(
frcnn_config.feature_extractor, is_training)
first_stage_only = frcnn_config.first_stage_only
first_stage_anchor_generator = anchor_generator_builder.build(
frcnn_config.first_stage_anchor_generator)
first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
first_stage_box_predictor_arg_scope = hyperparams_builder.build(
frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
first_stage_box_predictor_kernel_size = (
frcnn_config.first_stage_box_predictor_kernel_size)
first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
first_stage_positive_balance_fraction = (
frcnn_config.first_stage_positive_balance_fraction)
first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
first_stage_max_proposals = frcnn_config.first_stage_max_proposals
first_stage_loc_loss_weight = (
frcnn_config.first_stage_localization_loss_weight)
first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight
initial_crop_size = frcnn_config.initial_crop_size
maxpool_kernel_size = frcnn_config.maxpool_kernel_size
maxpool_stride = frcnn_config.maxpool_stride
second_stage_box_predictor = box_predictor_builder.build(
hyperparams_builder.build,
frcnn_config.second_stage_box_predictor,
is_training=is_training,
num_classes=num_classes)
second_stage_batch_size = frcnn_config.second_stage_batch_size
second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
(second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
second_stage_localization_loss_weight = (
frcnn_config.second_stage_localization_loss_weight)
second_stage_classification_loss_weight = (
frcnn_config.second_stage_classification_loss_weight)
hard_example_miner = None
if frcnn_config.HasField('hard_example_miner'):
hard_example_miner = losses_builder.build_hard_example_miner(
frcnn_config.hard_example_miner,
second_stage_classification_loss_weight,
second_stage_localization_loss_weight)
common_kwargs = {
'is_training': is_training,
'num_classes': num_classes,
'image_resizer_fn': image_resizer_fn,
'feature_extractor': feature_extractor,
'first_stage_only': first_stage_only,
'first_stage_anchor_generator': first_stage_anchor_generator,
'first_stage_atrous_rate': first_stage_atrous_rate,
'first_stage_box_predictor_arg_scope':
first_stage_box_predictor_arg_scope,
'first_stage_box_predictor_kernel_size':
first_stage_box_predictor_kernel_size,
'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
'first_stage_minibatch_size': first_stage_minibatch_size,
'first_stage_positive_balance_fraction':
first_stage_positive_balance_fraction,
'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
'first_stage_max_proposals': first_stage_max_proposals,
'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
'second_stage_batch_size': second_stage_batch_size,
'second_stage_balance_fraction': second_stage_balance_fraction,
'second_stage_non_max_suppression_fn':
second_stage_non_max_suppression_fn,
'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
'second_stage_localization_loss_weight':
second_stage_localization_loss_weight,
'second_stage_classification_loss_weight':
second_stage_classification_loss_weight,
'hard_example_miner': hard_example_miner}
if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
return rfcn_meta_arch.RFCNMetaArch(
second_stage_rfcn_box_predictor=second_stage_box_predictor,
**common_kwargs)
else:
return faster_rcnn_meta_arch.FasterRCNNMetaArch(
initial_crop_size=initial_crop_size,
maxpool_kernel_size=maxpool_kernel_size,
maxpool_stride=maxpool_stride,
second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
**common_kwargs)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.models.model_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import model_builder
from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.meta_architectures import rfcn_meta_arch
from object_detection.meta_architectures import ssd_meta_arch
from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
from object_detection.protos import model_pb2
FEATURE_EXTRACTOR_MAPS = {
'faster_rcnn_resnet50':
frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
'faster_rcnn_resnet101':
frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
'faster_rcnn_resnet152':
frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor
}
class ModelBuilderTest(tf.test.TestCase):
def create_model(self, model_config):
"""Builds a DetectionModel based on the model config.
Args:
model_config: A model.proto object containing the config for the desired
DetectionModel.
Returns:
DetectionModel based on the config.
"""
return model_builder.build(model_config, is_training=True)
def test_create_ssd_inception_v2_model_from_config(self):
model_text_proto = """
ssd {
feature_extractor {
type: 'ssd_inception_v2'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = self.create_model(model_proto)
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor,
SSDInceptionV2FeatureExtractor)
def test_create_ssd_mobilenet_v1_model_from_config(self):
model_text_proto = """
ssd {
feature_extractor {
type: 'ssd_mobilenet_v1'
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
box_coder {
faster_rcnn_box_coder {
}
}
matcher {
argmax_matcher {
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
aspect_ratios: 1.0
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
loss {
classification_loss {
weighted_softmax {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = self.create_model(model_proto)
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor,
SSDMobileNetV1FeatureExtractor)
def test_create_faster_rcnn_resnet_v1_models_from_config(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.items():
model_proto.faster_rcnn.feature_extractor.type = extractor_type
model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
self.assertIsInstance(model._feature_extractor, extractor_class)
def test_create_faster_rcnn_inception_resnet_v2_model_from_config(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_inception_resnet_v2'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 17
maxpool_kernel_size: 1
maxpool_stride: 1
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
self.assertIsInstance(
model._feature_extractor,
frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor)
def test_create_faster_rcnn_model_from_config_with_example_miner(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
feature_extractor {
type: 'faster_rcnn_inception_resnet_v2'
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
second_stage_box_predictor {
mask_rcnn_box_predictor {
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
hard_example_miner {
num_hard_examples: 10
iou_threshold: 0.99
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
model = model_builder.build(model_proto, is_training=True)
self.assertIsNotNone(model._hard_example_miner)
def test_create_rfcn_resnet_v1_model_from_config(self):
model_text_proto = """
faster_rcnn {
num_classes: 3
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 600
max_dimension: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
rfcn_box_predictor {
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.01
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
}"""
model_proto = model_pb2.DetectionModel()
text_format.Merge(model_text_proto, model_proto)
for extractor_type, extractor_class in FEATURE_EXTRACTOR_MAPS.items():
model_proto.faster_rcnn.feature_extractor.type = extractor_type
model = model_builder.build(model_proto, is_training=True)
self.assertIsInstance(model, rfcn_meta_arch.RFCNMetaArch)
self.assertIsInstance(model._feature_extractor, extractor_class)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to build DetectionModel training optimizers."""
import tensorflow as tf
from object_detection.utils import learning_schedules
slim = tf.contrib.slim
def build(optimizer_config, global_summaries):
"""Create optimizer based on config.
Args:
optimizer_config: A Optimizer proto message.
global_summaries: A set to attach learning rate summary to.
Returns:
An optimizer.
Raises:
ValueError: when using an unsupported input data type.
"""
optimizer_type = optimizer_config.WhichOneof('optimizer')
optimizer = None
if optimizer_type == 'rms_prop_optimizer':
config = optimizer_config.rms_prop_optimizer
optimizer = tf.train.RMSPropOptimizer(
_create_learning_rate(config.learning_rate, global_summaries),
decay=config.decay,
momentum=config.momentum_optimizer_value,
epsilon=config.epsilon)
if optimizer_type == 'momentum_optimizer':
config = optimizer_config.momentum_optimizer
optimizer = tf.train.MomentumOptimizer(
_create_learning_rate(config.learning_rate, global_summaries),
momentum=config.momentum_optimizer_value)
if optimizer_type == 'adam_optimizer':
config = optimizer_config.adam_optimizer
optimizer = tf.train.AdamOptimizer(
_create_learning_rate(config.learning_rate, global_summaries))
if optimizer is None:
raise ValueError('Optimizer %s not supported.' % optimizer_type)
if optimizer_config.use_moving_average:
optimizer = tf.contrib.opt.MovingAverageOptimizer(
optimizer, average_decay=optimizer_config.moving_average_decay)
return optimizer
def _create_learning_rate(learning_rate_config, global_summaries):
"""Create optimizer learning rate based on config.
Args:
learning_rate_config: A LearningRate proto message.
global_summaries: A set to attach learning rate summary to.
Returns:
A learning rate.
Raises:
ValueError: when using an unsupported input data type.
"""
learning_rate = None
learning_rate_type = learning_rate_config.WhichOneof('learning_rate')
if learning_rate_type == 'constant_learning_rate':
config = learning_rate_config.constant_learning_rate
learning_rate = config.learning_rate
if learning_rate_type == 'exponential_decay_learning_rate':
config = learning_rate_config.exponential_decay_learning_rate
learning_rate = tf.train.exponential_decay(
config.initial_learning_rate,
slim.get_or_create_global_step(),
config.decay_steps,
config.decay_factor,
staircase=config.staircase)
if learning_rate_type == 'manual_step_learning_rate':
config = learning_rate_config.manual_step_learning_rate
if not config.schedule:
raise ValueError('Empty learning rate schedule.')
learning_rate_step_boundaries = [x.step for x in config.schedule]
learning_rate_sequence = [config.initial_learning_rate]
learning_rate_sequence += [x.learning_rate for x in config.schedule]
learning_rate = learning_schedules.manual_stepping(
slim.get_or_create_global_step(), learning_rate_step_boundaries,
learning_rate_sequence)
if learning_rate is None:
raise ValueError('Learning_rate %s not supported.' % learning_rate_type)
global_summaries.add(tf.summary.scalar('Learning Rate', learning_rate))
return learning_rate
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for optimizer_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import optimizer_builder
from object_detection.protos import optimizer_pb2
class LearningRateBuilderTest(tf.test.TestCase):
def testBuildConstantLearningRate(self):
learning_rate_text_proto = """
constant_learning_rate {
learning_rate: 0.004
}
"""
global_summaries = set([])
learning_rate_proto = optimizer_pb2.LearningRate()
text_format.Merge(learning_rate_text_proto, learning_rate_proto)
learning_rate = optimizer_builder._create_learning_rate(
learning_rate_proto, global_summaries)
self.assertAlmostEqual(learning_rate, 0.004)
def testBuildExponentialDecayLearningRate(self):
learning_rate_text_proto = """
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 99999
decay_factor: 0.85
staircase: false
}
"""
global_summaries = set([])
learning_rate_proto = optimizer_pb2.LearningRate()
text_format.Merge(learning_rate_text_proto, learning_rate_proto)
learning_rate = optimizer_builder._create_learning_rate(
learning_rate_proto, global_summaries)
self.assertTrue(isinstance(learning_rate, tf.Tensor))
def testBuildManualStepLearningRate(self):
learning_rate_text_proto = """
manual_step_learning_rate {
schedule {
step: 0
learning_rate: 0.006
}
schedule {
step: 90000
learning_rate: 0.00006
}
}
"""
global_summaries = set([])
learning_rate_proto = optimizer_pb2.LearningRate()
text_format.Merge(learning_rate_text_proto, learning_rate_proto)
learning_rate = optimizer_builder._create_learning_rate(
learning_rate_proto, global_summaries)
self.assertTrue(isinstance(learning_rate, tf.Tensor))
def testRaiseErrorOnEmptyLearningRate(self):
learning_rate_text_proto = """
"""
global_summaries = set([])
learning_rate_proto = optimizer_pb2.LearningRate()
text_format.Merge(learning_rate_text_proto, learning_rate_proto)
with self.assertRaises(ValueError):
optimizer_builder._create_learning_rate(
learning_rate_proto, global_summaries)
class OptimizerBuilderTest(tf.test.TestCase):
def testBuildRMSPropOptimizer(self):
optimizer_text_proto = """
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
use_moving_average: false
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
self.assertTrue(isinstance(optimizer, tf.train.RMSPropOptimizer))
def testBuildMomentumOptimizer(self):
optimizer_text_proto = """
momentum_optimizer: {
learning_rate: {
constant_learning_rate {
learning_rate: 0.001
}
}
momentum_optimizer_value: 0.99
}
use_moving_average: false
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
self.assertTrue(isinstance(optimizer, tf.train.MomentumOptimizer))
def testBuildAdamOptimizer(self):
optimizer_text_proto = """
adam_optimizer: {
learning_rate: {
constant_learning_rate {
learning_rate: 0.002
}
}
}
use_moving_average: false
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
self.assertTrue(isinstance(optimizer, tf.train.AdamOptimizer))
def testBuildMovingAverageOptimizer(self):
optimizer_text_proto = """
adam_optimizer: {
learning_rate: {
constant_learning_rate {
learning_rate: 0.002
}
}
}
use_moving_average: True
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
self.assertTrue(
isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer))
def testBuildMovingAverageOptimizerWithNonDefaultDecay(self):
optimizer_text_proto = """
adam_optimizer: {
learning_rate: {
constant_learning_rate {
learning_rate: 0.002
}
}
}
use_moving_average: True
moving_average_decay: 0.2
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
optimizer = optimizer_builder.build(optimizer_proto, global_summaries)
self.assertTrue(
isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer))
# TODO: Find a way to not depend on the private members.
self.assertAlmostEqual(optimizer._ema._decay, 0.2)
def testBuildEmptyOptimizer(self):
optimizer_text_proto = """
"""
global_summaries = set([])
optimizer_proto = optimizer_pb2.Optimizer()
text_format.Merge(optimizer_text_proto, optimizer_proto)
with self.assertRaises(ValueError):
optimizer_builder.build(optimizer_proto, global_summaries)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Builder function for post processing operations."""
import functools
import tensorflow as tf
from object_detection.core import post_processing
from object_detection.protos import post_processing_pb2
def build(post_processing_config):
"""Builds callables for post-processing operations.
Builds callables for non-max suppression and score conversion based on the
configuration.
Non-max suppression callable takes `boxes`, `scores`, and optionally
`clip_window`, `parallel_iterations` and `scope` as inputs. It returns
`nms_boxes`, `nms_scores`, `nms_nms_classes` and `num_detections`. See
post_processing.batch_multiclass_non_max_suppression for the type and shape
of these tensors.
Score converter callable should be called with `input` tensor. The callable
returns the output from one of 3 tf operations based on the configuration -
tf.identity, tf.sigmoid or tf.nn.softmax. See tensorflow documentation for
argument and return value descriptions.
Args:
post_processing_config: post_processing.proto object containing the
parameters for the post-processing operations.
Returns:
non_max_suppressor_fn: Callable for non-max suppression.
score_converter_fn: Callable for score conversion.
Raises:
ValueError: if the post_processing_config is of incorrect type.
"""
if not isinstance(post_processing_config, post_processing_pb2.PostProcessing):
raise ValueError('post_processing_config not of type '
'post_processing_pb2.Postprocessing.')
non_max_suppressor_fn = _build_non_max_suppressor(
post_processing_config.batch_non_max_suppression)
score_converter_fn = _build_score_converter(
post_processing_config.score_converter)
return non_max_suppressor_fn, score_converter_fn
def _build_non_max_suppressor(nms_config):
"""Builds non-max suppresson based on the nms config.
Args:
nms_config: post_processing_pb2.PostProcessing.BatchNonMaxSuppression proto.
Returns:
non_max_suppressor_fn: Callable non-max suppressor.
Raises:
ValueError: On incorrect iou_threshold or on incompatible values of
max_total_detections and max_detections_per_class.
"""
if nms_config.iou_threshold < 0 or nms_config.iou_threshold > 1.0:
raise ValueError('iou_threshold not in [0, 1.0].')
if nms_config.max_detections_per_class > nms_config.max_total_detections:
raise ValueError('max_detections_per_class should be no greater than '
'max_total_detections.')
non_max_suppressor_fn = functools.partial(
post_processing.batch_multiclass_non_max_suppression,
score_thresh=nms_config.score_threshold,
iou_thresh=nms_config.iou_threshold,
max_size_per_class=nms_config.max_detections_per_class,
max_total_size=nms_config.max_total_detections)
return non_max_suppressor_fn
def _build_score_converter(score_converter_config):
"""Builds score converter based on the config.
Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on
the config.
Args:
score_converter_config: post_processing_pb2.PostProcessing.score_converter.
Returns:
Callable score converter op.
Raises:
ValueError: On unknown score converter.
"""
if score_converter_config == post_processing_pb2.PostProcessing.IDENTITY:
return tf.identity
if score_converter_config == post_processing_pb2.PostProcessing.SIGMOID:
return tf.sigmoid
if score_converter_config == post_processing_pb2.PostProcessing.SOFTMAX:
return tf.nn.softmax
raise ValueError('Unknown score converter.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for post_processing_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import post_processing_builder
from object_detection.protos import post_processing_pb2
class PostProcessingBuilderTest(tf.test.TestCase):
def test_build_non_max_suppressor_with_correct_parameters(self):
post_processing_text_proto = """
batch_non_max_suppression {
score_threshold: 0.7
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
"""
post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config)
non_max_suppressor, _ = post_processing_builder.build(
post_processing_config)
self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100)
self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300)
self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7)
self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6)
def test_build_identity_score_converter(self):
post_processing_text_proto = """
score_converter: IDENTITY
"""
post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config)
_, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter, tf.identity)
def test_build_sigmoid_score_converter(self):
post_processing_text_proto = """
score_converter: SIGMOID
"""
post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config)
_, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter, tf.sigmoid)
def test_build_softmax_score_converter(self):
post_processing_text_proto = """
score_converter: SOFTMAX
"""
post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config)
_, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter, tf.nn.softmax)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Builder for preprocessing steps."""
import tensorflow as tf
from object_detection.core import preprocessor
from object_detection.protos import preprocessor_pb2
def _get_step_config_from_proto(preprocessor_step_config, step_name):
"""Returns the value of a field named step_name from proto.
Args:
preprocessor_step_config: A preprocessor_pb2.PreprocessingStep object.
step_name: Name of the field to get value from.
Returns:
result_dict: a sub proto message from preprocessor_step_config which will be
later converted to a dictionary.
Raises:
ValueError: If field does not exist in proto.
"""
for field, value in preprocessor_step_config.ListFields():
if field.name == step_name:
return value
raise ValueError('Could not get field %s from proto!', step_name)
def _get_dict_from_proto(config):
"""Helper function to put all proto fields into a dictionary.
For many preprocessing steps, there's an trivial 1-1 mapping from proto fields
to function arguments. This function automatically populates a dictionary with
the arguments from the proto.
Protos that CANNOT be trivially populated include:
* nested messages.
* steps that check if an optional field is set (ie. where None != 0).
* protos that don't map 1-1 to arguments (ie. list should be reshaped).
* fields requiring additional validation (ie. repeated field has n elements).
Args:
config: A protobuf object that does not violate the conditions above.
Returns:
result_dict: |config| converted into a python dictionary.
"""
result_dict = {}
for field, value in config.ListFields():
result_dict[field.name] = value
return result_dict
# A map from a PreprocessingStep proto config field name to the preprocessing
# function that should be used. The PreprocessingStep proto should be parsable
# with _get_dict_from_proto.
PREPROCESSING_FUNCTION_MAP = {
'normalize_image': preprocessor.normalize_image,
'random_horizontal_flip': preprocessor.random_horizontal_flip,
'random_pixel_value_scale': preprocessor.random_pixel_value_scale,
'random_image_scale': preprocessor.random_image_scale,
'random_rgb_to_gray': preprocessor.random_rgb_to_gray,
'random_adjust_brightness': preprocessor.random_adjust_brightness,
'random_adjust_contrast': preprocessor.random_adjust_contrast,
'random_adjust_hue': preprocessor.random_adjust_hue,
'random_adjust_saturation': preprocessor.random_adjust_saturation,
'random_distort_color': preprocessor.random_distort_color,
'random_jitter_boxes': preprocessor.random_jitter_boxes,
'random_crop_to_aspect_ratio': preprocessor.random_crop_to_aspect_ratio,
'random_black_patches': preprocessor.random_black_patches,
'scale_boxes_to_pixel_coordinates': (
preprocessor.scale_boxes_to_pixel_coordinates),
'subtract_channel_mean': preprocessor.subtract_channel_mean,
}
# A map to convert from preprocessor_pb2.ResizeImage.Method enum to
# tf.image.ResizeMethod.
RESIZE_METHOD_MAP = {
preprocessor_pb2.ResizeImage.AREA: tf.image.ResizeMethod.AREA,
preprocessor_pb2.ResizeImage.BICUBIC: tf.image.ResizeMethod.BICUBIC,
preprocessor_pb2.ResizeImage.BILINEAR: tf.image.ResizeMethod.BILINEAR,
preprocessor_pb2.ResizeImage.NEAREST_NEIGHBOR: (
tf.image.ResizeMethod.NEAREST_NEIGHBOR),
}
def build(preprocessor_step_config):
"""Builds preprocessing step based on the configuration.
Args:
preprocessor_step_config: PreprocessingStep configuration proto.
Returns:
function, argmap: A callable function and an argument map to call function
with.
Raises:
ValueError: On invalid configuration.
"""
step_type = preprocessor_step_config.WhichOneof('preprocessing_step')
if step_type in PREPROCESSING_FUNCTION_MAP:
preprocessing_function = PREPROCESSING_FUNCTION_MAP[step_type]
step_config = _get_step_config_from_proto(preprocessor_step_config,
step_type)
function_args = _get_dict_from_proto(step_config)
return (preprocessing_function, function_args)
if step_type == 'random_crop_image':
config = preprocessor_step_config.random_crop_image
return (preprocessor.random_crop_image,
{
'min_object_covered': config.min_object_covered,
'aspect_ratio_range': (config.min_aspect_ratio,
config.max_aspect_ratio),
'area_range': (config.min_area, config.max_area),
'overlap_thresh': config.overlap_thresh,
'random_coef': config.random_coef,
})
if step_type == 'random_pad_image':
config = preprocessor_step_config.random_pad_image
min_image_size = None
if (config.HasField('min_image_height') !=
config.HasField('min_image_width')):
raise ValueError('min_image_height and min_image_width should be either '
'both set or both unset.')
if config.HasField('min_image_height'):
min_image_size = (config.min_image_height, config.min_image_width)
max_image_size = None
if (config.HasField('max_image_height') !=
config.HasField('max_image_width')):
raise ValueError('max_image_height and max_image_width should be either '
'both set or both unset.')
if config.HasField('max_image_height'):
max_image_size = (config.max_image_height, config.max_image_width)
pad_color = config.pad_color
if pad_color and len(pad_color) != 3:
raise ValueError('pad_color should have 3 elements (RGB) if set!')
if not pad_color:
pad_color = None
return (preprocessor.random_pad_image,
{
'min_image_size': min_image_size,
'max_image_size': max_image_size,
'pad_color': pad_color,
})
if step_type == 'random_crop_pad_image':
config = preprocessor_step_config.random_crop_pad_image
min_padded_size_ratio = config.min_padded_size_ratio
if min_padded_size_ratio and len(min_padded_size_ratio) != 2:
raise ValueError('min_padded_size_ratio should have 3 elements if set!')
max_padded_size_ratio = config.max_padded_size_ratio
if max_padded_size_ratio and len(max_padded_size_ratio) != 2:
raise ValueError('max_padded_size_ratio should have 3 elements if set!')
pad_color = config.pad_color
if pad_color and len(pad_color) != 3:
raise ValueError('pad_color should have 3 elements if set!')
return (preprocessor.random_crop_pad_image,
{
'min_object_covered': config.min_object_covered,
'aspect_ratio_range': (config.min_aspect_ratio,
config.max_aspect_ratio),
'area_range': (config.min_area, config.max_area),
'overlap_thresh': config.overlap_thresh,
'random_coef': config.random_coef,
'min_padded_size_ratio': (min_padded_size_ratio if
min_padded_size_ratio else None),
'max_padded_size_ratio': (max_padded_size_ratio if
max_padded_size_ratio else None),
'pad_color': (pad_color if pad_color else None),
})
if step_type == 'random_resize_method':
config = preprocessor_step_config.random_resize_method
return (preprocessor.random_resize_method,
{
'target_size': [config.target_height, config.target_width],
})
if step_type == 'resize_image':
config = preprocessor_step_config.resize_image
method = RESIZE_METHOD_MAP[config.method]
return (preprocessor.resize_image,
{
'new_height': config.new_height,
'new_width': config.new_width,
'method': method
})
if step_type == 'ssd_random_crop':
config = preprocessor_step_config.ssd_random_crop
if config.operations:
min_object_covered = [op.min_object_covered for op in config.operations]
aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio)
for op in config.operations]
area_range = [(op.min_area, op.max_area) for op in config.operations]
overlap_thresh = [op.overlap_thresh for op in config.operations]
random_coef = [op.random_coef for op in config.operations]
return (preprocessor.ssd_random_crop,
{
'min_object_covered': min_object_covered,
'aspect_ratio_range': aspect_ratio_range,
'area_range': area_range,
'overlap_thresh': overlap_thresh,
'random_coef': random_coef,
})
return (preprocessor.ssd_random_crop, {})
if step_type == 'ssd_random_crop_pad':
config = preprocessor_step_config.ssd_random_crop_pad
if config.operations:
min_object_covered = [op.min_object_covered for op in config.operations]
aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio)
for op in config.operations]
area_range = [(op.min_area, op.max_area) for op in config.operations]
overlap_thresh = [op.overlap_thresh for op in config.operations]
random_coef = [op.random_coef for op in config.operations]
min_padded_size_ratio = [
(op.min_padded_size_ratio[0], op.min_padded_size_ratio[1])
for op in config.operations]
max_padded_size_ratio = [
(op.max_padded_size_ratio[0], op.max_padded_size_ratio[1])
for op in config.operations]
pad_color = [(op.pad_color_r, op.pad_color_g, op.pad_color_b)
for op in config.operations]
return (preprocessor.ssd_random_crop_pad,
{
'min_object_covered': min_object_covered,
'aspect_ratio_range': aspect_ratio_range,
'area_range': area_range,
'overlap_thresh': overlap_thresh,
'random_coef': random_coef,
'min_padded_size_ratio': min_padded_size_ratio,
'max_padded_size_ratio': max_padded_size_ratio,
'pad_color': pad_color,
})
return (preprocessor.ssd_random_crop_pad, {})
if step_type == 'ssd_random_crop_fixed_aspect_ratio':
config = preprocessor_step_config.ssd_random_crop_fixed_aspect_ratio
if config.operations:
min_object_covered = [op.min_object_covered for op in config.operations]
area_range = [(op.min_area, op.max_area) for op in config.operations]
overlap_thresh = [op.overlap_thresh for op in config.operations]
random_coef = [op.random_coef for op in config.operations]
return (preprocessor.ssd_random_crop_fixed_aspect_ratio,
{
'min_object_covered': min_object_covered,
'aspect_ratio': config.aspect_ratio,
'area_range': area_range,
'overlap_thresh': overlap_thresh,
'random_coef': random_coef,
})
return (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})
raise ValueError('Unknown preprocessing step.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for preprocessor_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import preprocessor_builder
from object_detection.core import preprocessor
from object_detection.protos import preprocessor_pb2
class PreprocessorBuilderTest(tf.test.TestCase):
def assert_dictionary_close(self, dict1, dict2):
"""Helper to check if two dicts with floatst or integers are close."""
self.assertEqual(sorted(dict1.keys()), sorted(dict2.keys()))
for key in dict1:
value = dict1[key]
if isinstance(value, float):
self.assertAlmostEqual(value, dict2[key])
else:
self.assertEqual(value, dict2[key])
def test_build_normalize_image(self):
preprocessor_text_proto = """
normalize_image {
original_minval: 0.0
original_maxval: 255.0
target_minval: -1.0
target_maxval: 1.0
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.normalize_image)
self.assertEqual(args, {
'original_minval': 0.0,
'original_maxval': 255.0,
'target_minval': -1.0,
'target_maxval': 1.0,
})
def test_build_random_horizontal_flip(self):
preprocessor_text_proto = """
random_horizontal_flip {
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_horizontal_flip)
self.assertEqual(args, {})
def test_build_random_pixel_value_scale(self):
preprocessor_text_proto = """
random_pixel_value_scale {
minval: 0.8
maxval: 1.2
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_pixel_value_scale)
self.assert_dictionary_close(args, {'minval': 0.8, 'maxval': 1.2})
def test_build_random_image_scale(self):
preprocessor_text_proto = """
random_image_scale {
min_scale_ratio: 0.8
max_scale_ratio: 2.2
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_image_scale)
self.assert_dictionary_close(args, {'min_scale_ratio': 0.8,
'max_scale_ratio': 2.2})
def test_build_random_rgb_to_gray(self):
preprocessor_text_proto = """
random_rgb_to_gray {
probability: 0.8
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_rgb_to_gray)
self.assert_dictionary_close(args, {'probability': 0.8})
def test_build_random_adjust_brightness(self):
preprocessor_text_proto = """
random_adjust_brightness {
max_delta: 0.2
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_adjust_brightness)
self.assert_dictionary_close(args, {'max_delta': 0.2})
def test_build_random_adjust_contrast(self):
preprocessor_text_proto = """
random_adjust_contrast {
min_delta: 0.7
max_delta: 1.1
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_adjust_contrast)
self.assert_dictionary_close(args, {'min_delta': 0.7, 'max_delta': 1.1})
def test_build_random_adjust_hue(self):
preprocessor_text_proto = """
random_adjust_hue {
max_delta: 0.01
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_adjust_hue)
self.assert_dictionary_close(args, {'max_delta': 0.01})
def test_build_random_adjust_saturation(self):
preprocessor_text_proto = """
random_adjust_saturation {
min_delta: 0.75
max_delta: 1.15
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_adjust_saturation)
self.assert_dictionary_close(args, {'min_delta': 0.75, 'max_delta': 1.15})
def test_build_random_distort_color(self):
preprocessor_text_proto = """
random_distort_color {
color_ordering: 1
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_distort_color)
self.assertEqual(args, {'color_ordering': 1})
def test_build_random_jitter_boxes(self):
preprocessor_text_proto = """
random_jitter_boxes {
ratio: 0.1
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_jitter_boxes)
self.assert_dictionary_close(args, {'ratio': 0.1})
def test_build_random_crop_image(self):
preprocessor_text_proto = """
random_crop_image {
min_object_covered: 0.75
min_aspect_ratio: 0.75
max_aspect_ratio: 1.5
min_area: 0.25
max_area: 0.875
overlap_thresh: 0.5
random_coef: 0.125
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_crop_image)
self.assertEqual(args, {
'min_object_covered': 0.75,
'aspect_ratio_range': (0.75, 1.5),
'area_range': (0.25, 0.875),
'overlap_thresh': 0.5,
'random_coef': 0.125,
})
def test_build_random_pad_image(self):
preprocessor_text_proto = """
random_pad_image {
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_pad_image)
self.assertEqual(args, {
'min_image_size': None,
'max_image_size': None,
'pad_color': None,
})
def test_build_random_crop_pad_image(self):
preprocessor_text_proto = """
random_crop_pad_image {
min_object_covered: 0.75
min_aspect_ratio: 0.75
max_aspect_ratio: 1.5
min_area: 0.25
max_area: 0.875
overlap_thresh: 0.5
random_coef: 0.125
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_crop_pad_image)
self.assertEqual(args, {
'min_object_covered': 0.75,
'aspect_ratio_range': (0.75, 1.5),
'area_range': (0.25, 0.875),
'overlap_thresh': 0.5,
'random_coef': 0.125,
'min_padded_size_ratio': None,
'max_padded_size_ratio': None,
'pad_color': None,
})
def test_build_random_crop_to_aspect_ratio(self):
preprocessor_text_proto = """
random_crop_to_aspect_ratio {
aspect_ratio: 0.85
overlap_thresh: 0.35
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_crop_to_aspect_ratio)
self.assert_dictionary_close(args, {'aspect_ratio': 0.85,
'overlap_thresh': 0.35})
def test_build_random_black_patches(self):
preprocessor_text_proto = """
random_black_patches {
max_black_patches: 20
probability: 0.95
size_to_image_ratio: 0.12
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_black_patches)
self.assert_dictionary_close(args, {'max_black_patches': 20,
'probability': 0.95,
'size_to_image_ratio': 0.12})
def test_build_random_resize_method(self):
preprocessor_text_proto = """
random_resize_method {
target_height: 75
target_width: 100
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.random_resize_method)
self.assert_dictionary_close(args, {'target_size': [75, 100]})
def test_build_scale_boxes_to_pixel_coordinates(self):
preprocessor_text_proto = """
scale_boxes_to_pixel_coordinates {}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.scale_boxes_to_pixel_coordinates)
self.assertEqual(args, {})
def test_build_resize_image(self):
preprocessor_text_proto = """
resize_image {
new_height: 75
new_width: 100
method: BICUBIC
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.resize_image)
self.assertEqual(args, {'new_height': 75,
'new_width': 100,
'method': tf.image.ResizeMethod.BICUBIC})
def test_build_subtract_channel_mean(self):
preprocessor_text_proto = """
subtract_channel_mean {
means: [1.0, 2.0, 3.0]
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.subtract_channel_mean)
self.assertEqual(args, {'means': [1.0, 2.0, 3.0]})
def test_build_ssd_random_crop(self):
preprocessor_text_proto = """
ssd_random_crop {
operations {
min_object_covered: 0.0
min_aspect_ratio: 0.875
max_aspect_ratio: 1.125
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.0
random_coef: 0.375
}
operations {
min_object_covered: 0.25
min_aspect_ratio: 0.75
max_aspect_ratio: 1.5
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.25
random_coef: 0.375
}
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.ssd_random_crop)
self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
'area_range': [(0.5, 1.0), (0.5, 1.0)],
'overlap_thresh': [0.0, 0.25],
'random_coef': [0.375, 0.375]})
def test_build_ssd_random_crop_empty_operations(self):
preprocessor_text_proto = """
ssd_random_crop {
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.ssd_random_crop)
self.assertEqual(args, {})
def test_build_ssd_random_crop_pad(self):
preprocessor_text_proto = """
ssd_random_crop_pad {
operations {
min_object_covered: 0.0
min_aspect_ratio: 0.875
max_aspect_ratio: 1.125
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.0
random_coef: 0.375
min_padded_size_ratio: [0.0, 0.0]
max_padded_size_ratio: [2.0, 2.0]
pad_color_r: 0.5
pad_color_g: 0.5
pad_color_b: 0.5
}
operations {
min_object_covered: 0.25
min_aspect_ratio: 0.75
max_aspect_ratio: 1.5
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.25
random_coef: 0.375
min_padded_size_ratio: [0.0, 0.0]
max_padded_size_ratio: [2.0, 2.0]
pad_color_r: 0.5
pad_color_g: 0.5
pad_color_b: 0.5
}
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.ssd_random_crop_pad)
self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
'area_range': [(0.5, 1.0), (0.5, 1.0)],
'overlap_thresh': [0.0, 0.25],
'random_coef': [0.375, 0.375],
'min_padded_size_ratio': [(0.0, 0.0), (0.0, 0.0)],
'max_padded_size_ratio': [(2.0, 2.0), (2.0, 2.0)],
'pad_color': [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]})
def test_build_ssd_random_crop_fixed_aspect_ratio(self):
preprocessor_text_proto = """
ssd_random_crop_fixed_aspect_ratio {
operations {
min_object_covered: 0.0
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.0
random_coef: 0.375
}
operations {
min_object_covered: 0.25
min_area: 0.5
max_area: 1.0
overlap_thresh: 0.25
random_coef: 0.375
}
aspect_ratio: 0.875
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.ssd_random_crop_fixed_aspect_ratio)
self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
'aspect_ratio': 0.875,
'area_range': [(0.5, 1.0), (0.5, 1.0)],
'overlap_thresh': [0.0, 0.25],
'random_coef': [0.375, 0.375]})
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Builder for region similarity calculators."""
from object_detection.core import region_similarity_calculator
from object_detection.protos import region_similarity_calculator_pb2
def build(region_similarity_calculator_config):
"""Builds region similarity calculator based on the configuration.
Builds one of [IouSimilarity, IoaSimilarity, NegSqDistSimilarity] objects. See
core/region_similarity_calculator.proto for details.
Args:
region_similarity_calculator_config: RegionSimilarityCalculator
configuration proto.
Returns:
region_similarity_calculator: RegionSimilarityCalculator object.
Raises:
ValueError: On unknown region similarity calculator.
"""
if not isinstance(
region_similarity_calculator_config,
region_similarity_calculator_pb2.RegionSimilarityCalculator):
raise ValueError(
'region_similarity_calculator_config not of type '
'region_similarity_calculator_pb2.RegionsSimilarityCalculator')
similarity_calculator = region_similarity_calculator_config.WhichOneof(
'region_similarity')
if similarity_calculator == 'iou_similarity':
return region_similarity_calculator.IouSimilarity()
if similarity_calculator == 'ioa_similarity':
return region_similarity_calculator.IoaSimilarity()
if similarity_calculator == 'neg_sq_dist_similarity':
return region_similarity_calculator.NegSqDistSimilarity()
raise ValueError('Unknown region similarity calculator.')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for region_similarity_calculator_builder."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import region_similarity_calculator_builder
from object_detection.core import region_similarity_calculator
from object_detection.protos import region_similarity_calculator_pb2 as sim_calc_pb2
class RegionSimilarityCalculatorBuilderTest(tf.test.TestCase):
def testBuildIoaSimilarityCalculator(self):
similarity_calc_text_proto = """
ioa_similarity {
}
"""
similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
similarity_calc = region_similarity_calculator_builder.build(
similarity_calc_proto)
self.assertTrue(isinstance(similarity_calc,
region_similarity_calculator.IoaSimilarity))
def testBuildIouSimilarityCalculator(self):
similarity_calc_text_proto = """
iou_similarity {
}
"""
similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
similarity_calc = region_similarity_calculator_builder.build(
similarity_calc_proto)
self.assertTrue(isinstance(similarity_calc,
region_similarity_calculator.IouSimilarity))
def testBuildNegSqDistSimilarityCalculator(self):
similarity_calc_text_proto = """
neg_sq_dist_similarity {
}
"""
similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
similarity_calc = region_similarity_calculator_builder.build(
similarity_calc_proto)
self.assertTrue(isinstance(similarity_calc,
region_similarity_calculator.
NegSqDistSimilarity))
if __name__ == '__main__':
tf.test.main()

Contributing to the Tensorflow Object Detection API

Patches to Tensorflow Object Detection API are welcome!

We require contributors to fill out either the individual or corporate Contributor License Agreement (CLA).

  • If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an individual CLA.
  • If you work for a company that wants to allow you to contribute your work, then you'll need to sign a corporate CLA.

Please follow the Tensorflow contributing guidelines when submitting pull requests.

# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base anchor generator.
The job of the anchor generator is to create (or load) a collection
of bounding boxes to be used as anchors.
Generated anchors are assumed to match some convolutional grid or list of grid
shapes. For example, we might want to generate anchors matching an 8x8
feature map and a 4x4 feature map. If we place 3 anchors per grid location
on the first feature map and 6 anchors per grid location on the second feature
map, then 3*8*8 + 6*4*4 = 288 anchors are generated in total.
To support fully convolutional settings, feature map shapes are passed
dynamically at generation time. The number of anchors to place at each location
is static --- implementations of AnchorGenerator must always be able return
the number of anchors that it uses per location for each feature map.
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
class AnchorGenerator(object):
"""Abstract base class for anchor generators."""
__metaclass__ = ABCMeta
@abstractmethod
def name_scope(self):
"""Name scope.
Must be defined by implementations.
Returns:
a string representing the name scope of the anchor generation operation.
"""
pass
@property
def check_num_anchors(self):
"""Whether to dynamically check the number of anchors generated.
Can be overridden by implementations that would like to disable this
behavior.
Returns:
a boolean controlling whether the Generate function should dynamically
check the number of anchors generated against the mathematically
expected number of anchors.
"""
return True
@abstractmethod
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the `generate` function.
"""
pass
def generate(self, feature_map_shape_list, **params):
"""Generates a collection of bounding boxes to be used as anchors.
TODO: remove **params from argument list and make stride and offsets (for
multiple_grid_anchor_generator) constructor arguments.
Args:
feature_map_shape_list: list of (height, width) pairs in the format
[(height_0, width_0), (height_1, width_1), ...] that the generated
anchors must align with. Pairs can be provided as 1-dimensional
integer tensors of length 2 or simply as tuples of integers.
**params: parameters for anchor generation op
Returns:
boxes: a BoxList holding a collection of N anchor boxes
Raises:
ValueError: if the number of feature map shapes does not match the length
of NumAnchorsPerLocation.
"""
if self.check_num_anchors and (
len(feature_map_shape_list) != len(self.num_anchors_per_location())):
raise ValueError('Number of feature maps is expected to equal the length '
'of `num_anchors_per_location`.')
with tf.name_scope(self.name_scope()):
anchors = self._generate(feature_map_shape_list, **params)
if self.check_num_anchors:
with tf.control_dependencies([
self._assert_correct_number_of_anchors(
anchors, feature_map_shape_list)]):
anchors.set(tf.identity(anchors.get()))
return anchors
@abstractmethod
def _generate(self, feature_map_shape_list, **params):
"""To be overridden by implementations.
Args:
feature_map_shape_list: list of (height, width) pairs in the format
[(height_0, width_0), (height_1, width_1), ...] that the generated
anchors must align with.
**params: parameters for anchor generation op
Returns:
boxes: a BoxList holding a collection of N anchor boxes
"""
pass
def _assert_correct_number_of_anchors(self, anchors, feature_map_shape_list):
"""Assert that correct number of anchors was generated.
Args:
anchors: box_list.BoxList object holding anchors generated
feature_map_shape_list: list of (height, width) pairs in the format
[(height_0, width_0), (height_1, width_1), ...] that the generated
anchors must align with.
Returns:
Op that raises InvalidArgumentError if the number of anchors does not
match the number of expected anchors.
"""
expected_num_anchors = 0
for num_anchors_per_location, feature_map_shape in zip(
self.num_anchors_per_location(), feature_map_shape_list):
expected_num_anchors += (num_anchors_per_location
* feature_map_shape[0]
* feature_map_shape[1])
return tf.assert_equal(expected_num_anchors, anchors.num_boxes())
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Class to subsample minibatches by balancing positives and negatives.
Subsamples minibatches based on a pre-specified positive fraction in range
[0,1]. The class presumes there are many more negatives than positive examples:
if the desired batch_size cannot be achieved with the pre-specified positive
fraction, it fills the rest with negative examples. If this is not sufficient
for obtaining the desired batch_size, it returns fewer examples.
The main function to call is Subsample(self, indicator, labels). For convenience
one can also call SubsampleWeights(self, weights, labels) which is defined in
the minibatch_sampler base class.
"""
import tensorflow as tf
from object_detection.core import minibatch_sampler
class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
"""Subsamples minibatches to a desired balance of positives and negatives."""
def __init__(self, positive_fraction=0.5):
"""Constructs a minibatch sampler.
Args:
positive_fraction: desired fraction of positive examples (scalar in [0,1])
Raises:
ValueError: if positive_fraction < 0, or positive_fraction > 1
"""
if positive_fraction < 0 or positive_fraction > 1:
raise ValueError('positive_fraction should be in range [0,1]. '
'Received: %s.' % positive_fraction)
self._positive_fraction = positive_fraction
def subsample(self, indicator, batch_size, labels):
"""Returns subsampled minibatch.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size.
labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples.
Returns:
is_sampled: boolean tensor of shape [N], True for entries which are
sampled.
Raises:
ValueError: if labels and indicator are not 1D boolean tensors.
"""
if len(indicator.get_shape().as_list()) != 1:
raise ValueError('indicator must be 1 dimensional, got a tensor of '
'shape %s' % indicator.get_shape())
if len(labels.get_shape().as_list()) != 1:
raise ValueError('labels must be 1 dimensional, got a tensor of '
'shape %s' % labels.get_shape())
if labels.dtype != tf.bool:
raise ValueError('labels should be of type bool. Received: %s' %
labels.dtype)
if indicator.dtype != tf.bool:
raise ValueError('indicator should be of type bool. Received: %s' %
indicator.dtype)
# Only sample from indicated samples
negative_idx = tf.logical_not(labels)
positive_idx = tf.logical_and(labels, indicator)
negative_idx = tf.logical_and(negative_idx, indicator)
# Sample positive and negative samples separately
max_num_pos = int(self._positive_fraction * batch_size)
sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
max_num_neg = batch_size - tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32))
sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg)
sampled_idx = tf.logical_or(sampled_pos_idx, sampled_neg_idx)
return sampled_idx
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.balanced_positive_negative_sampler."""
import numpy as np
import tensorflow as tf
from object_detection.core import balanced_positive_negative_sampler
class BalancedPositiveNegativeSamplerTest(tf.test.TestCase):
def test_subsample_all_examples(self):
numpy_labels = np.random.permutation(300)
indicator = tf.constant(np.ones(300) == 1)
numpy_labels = (numpy_labels - 200) > 0
labels = tf.constant(numpy_labels)
sampler = (balanced_positive_negative_sampler.
BalancedPositiveNegativeSampler())
is_sampled = sampler.subsample(indicator, 64, labels)
with self.test_session() as sess:
is_sampled = sess.run(is_sampled)
self.assertTrue(sum(is_sampled) == 64)
self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 32)
self.assertTrue(sum(np.logical_and(
np.logical_not(numpy_labels), is_sampled)) == 32)
def test_subsample_selection(self):
# Test random sampling when only some examples can be sampled:
# 100 samples, 20 positives, 10 positives cannot be sampled
numpy_labels = np.arange(100)
numpy_indicator = numpy_labels < 90
indicator = tf.constant(numpy_indicator)
numpy_labels = (numpy_labels - 80) >= 0
labels = tf.constant(numpy_labels)
sampler = (balanced_positive_negative_sampler.
BalancedPositiveNegativeSampler())
is_sampled = sampler.subsample(indicator, 64, labels)
with self.test_session() as sess:
is_sampled = sess.run(is_sampled)
self.assertTrue(sum(is_sampled) == 64)
self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10)
self.assertTrue(sum(np.logical_and(
np.logical_not(numpy_labels), is_sampled)) == 54)
self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
numpy_indicator))
def test_raises_error_with_incorrect_label_shape(self):
labels = tf.constant([[True, False, False]])
indicator = tf.constant([True, False, True])
sampler = (balanced_positive_negative_sampler.
BalancedPositiveNegativeSampler())
with self.assertRaises(ValueError):
sampler.subsample(indicator, 64, labels)
def test_raises_error_with_incorrect_indicator_shape(self):
labels = tf.constant([True, False, False])
indicator = tf.constant([[True, False, True]])
sampler = (balanced_positive_negative_sampler.
BalancedPositiveNegativeSampler())
with self.assertRaises(ValueError):
sampler.subsample(indicator, 64, labels)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides functions to batch a dictionary of input tensors."""
import collections
import tensorflow as tf
from object_detection.core import prefetcher
class BatchQueue(object):
"""BatchQueue class.
This class creates a batch queue to asynchronously enqueue tensors_dict.
It also adds a FIFO prefetcher so that the batches are readily available
for the consumers. Dequeue ops for a BatchQueue object can be created via
the Dequeue method which evaluates to a batch of tensor_dict.
Example input pipeline with batching:
------------------------------------
key, string_tensor = slim.parallel_reader.parallel_read(...)
tensor_dict = decoder.decode(string_tensor)
tensor_dict = preprocessor.preprocess(tensor_dict, ...)
batch_queue = batcher.BatchQueue(tensor_dict,
batch_size=32,
batch_queue_capacity=2000,
num_batch_queue_threads=8,
prefetch_queue_capacity=20)
tensor_dict = batch_queue.dequeue()
outputs = Model(tensor_dict)
...
-----------------------------------
Notes:
-----
This class batches tensors of unequal sizes by zero padding and unpadding
them after generating a batch. This can be computationally expensive when
batching tensors (such as images) that are of vastly different sizes. So it is
recommended that the shapes of such tensors be fully defined in tensor_dict
while other lightweight tensors such as bounding box corners and class labels
can be of varying sizes. Use either crop or resize operations to fully define
the shape of an image in tensor_dict.
It is also recommended to perform any preprocessing operations on tensors
before passing to BatchQueue and subsequently calling the Dequeue method.
Another caveat is that this class does not read the last batch if it is not
full. The current implementation makes it hard to support that use case. So,
for evaluation, when it is critical to run all the examples through your
network use the input pipeline example mentioned in core/prefetcher.py.
"""
def __init__(self, tensor_dict, batch_size, batch_queue_capacity,
num_batch_queue_threads, prefetch_queue_capacity):
"""Constructs a batch queue holding tensor_dict.
Args:
tensor_dict: dictionary of tensors to batch.
batch_size: batch size.
batch_queue_capacity: max capacity of the queue from which the tensors are
batched.
num_batch_queue_threads: number of threads to use for batching.
prefetch_queue_capacity: max capacity of the queue used to prefetch
assembled batches.
"""
# Remember static shapes to set shapes of batched tensors.
static_shapes = collections.OrderedDict(
{key: tensor.get_shape() for key, tensor in tensor_dict.iteritems()})
# Remember runtime shapes to unpad tensors after batching.
runtime_shapes = collections.OrderedDict(
{(key, 'runtime_shapes'): tf.shape(tensor)
for key, tensor in tensor_dict.iteritems()})
all_tensors = tensor_dict
all_tensors.update(runtime_shapes)
batched_tensors = tf.train.batch(
all_tensors,
capacity=batch_queue_capacity,
batch_size=batch_size,
dynamic_pad=True,
num_threads=num_batch_queue_threads)
self._queue = prefetcher.prefetch(batched_tensors,
prefetch_queue_capacity)
self._static_shapes = static_shapes
self._batch_size = batch_size
def dequeue(self):
"""Dequeues a batch of tensor_dict from the BatchQueue.
TODO: use allow_smaller_final_batch to allow running over the whole eval set
Returns:
A list of tensor_dicts of the requested batch_size.
"""
batched_tensors = self._queue.dequeue()
# Separate input tensors from tensors containing their runtime shapes.
tensors = {}
shapes = {}
for key, batched_tensor in batched_tensors.iteritems():
unbatched_tensor_list = tf.unstack(batched_tensor)
for i, unbatched_tensor in enumerate(unbatched_tensor_list):
if isinstance(key, tuple) and key[1] == 'runtime_shapes':
shapes[(key[0], i)] = unbatched_tensor
else:
tensors[(key, i)] = unbatched_tensor
# Undo that padding using shapes and create a list of size `batch_size` that
# contains tensor dictionaries.
tensor_dict_list = []
batch_size = self._batch_size
for batch_id in range(batch_size):
tensor_dict = {}
for key in self._static_shapes:
tensor_dict[key] = tf.slice(tensors[(key, batch_id)],
tf.zeros_like(shapes[(key, batch_id)]),
shapes[(key, batch_id)])
tensor_dict[key].set_shape(self._static_shapes[key])
tensor_dict_list.append(tensor_dict)
return tensor_dict_list
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.batcher."""
import numpy as np
import tensorflow as tf
from object_detection.core import batcher
slim = tf.contrib.slim
class BatcherTest(tf.test.TestCase):
def test_batch_and_unpad_2d_tensors_of_different_sizes_in_1st_dimension(self):
with self.test_session() as sess:
batch_size = 3
num_batches = 2
examples = tf.Variable(tf.constant(2, dtype=tf.int32))
counter = examples.count_up_to(num_batches * batch_size + 2)
boxes = tf.tile(
tf.reshape(tf.range(4), [1, 4]), tf.stack([counter, tf.constant(1)]))
batch_queue = batcher.BatchQueue(
tensor_dict={'boxes': boxes},
batch_size=batch_size,
batch_queue_capacity=100,
num_batch_queue_threads=1,
prefetch_queue_capacity=100)
batch = batch_queue.dequeue()
for tensor_dict in batch:
for tensor in tensor_dict.values():
self.assertAllEqual([None, 4], tensor.get_shape().as_list())
tf.initialize_all_variables().run()
with slim.queues.QueueRunners(sess):
i = 2
for _ in range(num_batches):
batch_np = sess.run(batch)
for tensor_dict in batch_np:
for tensor in tensor_dict.values():
self.assertAllEqual(tensor, np.tile(np.arange(4), (i, 1)))
i += 1
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(batch)
def test_batch_and_unpad_2d_tensors_of_different_sizes_in_all_dimensions(
self):
with self.test_session() as sess:
batch_size = 3
num_batches = 2
examples = tf.Variable(tf.constant(2, dtype=tf.int32))
counter = examples.count_up_to(num_batches * batch_size + 2)
image = tf.reshape(
tf.range(counter * counter), tf.stack([counter, counter]))
batch_queue = batcher.BatchQueue(
tensor_dict={'image': image},
batch_size=batch_size,
batch_queue_capacity=100,
num_batch_queue_threads=1,
prefetch_queue_capacity=100)
batch = batch_queue.dequeue()
for tensor_dict in batch:
for tensor in tensor_dict.values():
self.assertAllEqual([None, None], tensor.get_shape().as_list())
tf.initialize_all_variables().run()
with slim.queues.QueueRunners(sess):
i = 2
for _ in range(num_batches):
batch_np = sess.run(batch)
for tensor_dict in batch_np:
for tensor in tensor_dict.values():
self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i)))
i += 1
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(batch)
def test_batch_and_unpad_2d_tensors_of_same_size_in_all_dimensions(self):
with self.test_session() as sess:
batch_size = 3
num_batches = 2
examples = tf.Variable(tf.constant(1, dtype=tf.int32))
counter = examples.count_up_to(num_batches * batch_size + 1)
image = tf.reshape(tf.range(1, 13), [4, 3]) * counter
batch_queue = batcher.BatchQueue(
tensor_dict={'image': image},
batch_size=batch_size,
batch_queue_capacity=100,
num_batch_queue_threads=1,
prefetch_queue_capacity=100)
batch = batch_queue.dequeue()
for tensor_dict in batch:
for tensor in tensor_dict.values():
self.assertAllEqual([4, 3], tensor.get_shape().as_list())
tf.initialize_all_variables().run()
with slim.queues.QueueRunners(sess):
i = 1
for _ in range(num_batches):
batch_np = sess.run(batch)
for tensor_dict in batch_np:
for tensor in tensor_dict.values():
self.assertAllEqual(tensor, np.arange(1, 13).reshape((4, 3)) * i)
i += 1
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(batch)
def test_batcher_when_batch_size_is_one(self):
with self.test_session() as sess:
batch_size = 1
num_batches = 2
examples = tf.Variable(tf.constant(2, dtype=tf.int32))
counter = examples.count_up_to(num_batches * batch_size + 2)
image = tf.reshape(
tf.range(counter * counter), tf.stack([counter, counter]))
batch_queue = batcher.BatchQueue(
tensor_dict={'image': image},
batch_size=batch_size,
batch_queue_capacity=100,
num_batch_queue_threads=1,
prefetch_queue_capacity=100)
batch = batch_queue.dequeue()
for tensor_dict in batch:
for tensor in tensor_dict.values():
self.assertAllEqual([None, None], tensor.get_shape().as_list())
tf.initialize_all_variables().run()
with slim.queues.QueueRunners(sess):
i = 2
for _ in range(num_batches):
batch_np = sess.run(batch)
for tensor_dict in batch_np:
for tensor in tensor_dict.values():
self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i)))
i += 1
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(batch)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base box coder.
Box coders convert between coordinate frames, namely image-centric
(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
defined by a specific anchor).
Users of a BoxCoder can call two methods:
encode: which encodes a box with respect to a given anchor
(or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
decode: which inverts this encoding with a decode operation.
In both cases, the arguments are assumed to be in 1-1 correspondence already;
it is not the job of a BoxCoder to perform matching.
"""
from abc import ABCMeta
from abc import abstractmethod
from abc import abstractproperty
import tensorflow as tf
# Box coder types.
FASTER_RCNN = 'faster_rcnn'
KEYPOINT = 'keypoint'
MEAN_STDDEV = 'mean_stddev'
SQUARE = 'square'
class BoxCoder(object):
"""Abstract base class for box coder."""
__metaclass__ = ABCMeta
@abstractproperty
def code_size(self):
"""Return the size of each code.
This number is a constant and should agree with the output of the `encode`
op (e.g. if rel_codes is the output of self.encode(...), then it should have
shape [N, code_size()]). This abstractproperty should be overridden by
implementations.
Returns:
an integer constant
"""
pass
def encode(self, boxes, anchors):
"""Encode a box list relative to an anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded
anchors: BoxList of N anchors
Returns:
a tensor representing N relative-encoded boxes
"""
with tf.name_scope('Encode'):
return self._encode(boxes, anchors)
def decode(self, rel_codes, anchors):
"""Decode boxes that are encoded relative to an anchor collection.
Args:
rel_codes: a tensor representing N relative-encoded boxes
anchors: BoxList of anchors
Returns:
boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
with corners y_min, x_min, y_max, x_max)
"""
with tf.name_scope('Decode'):
return self._decode(rel_codes, anchors)
@abstractmethod
def _encode(self, boxes, anchors):
"""Method to be overriden by implementations.
Args:
boxes: BoxList holding N boxes to be encoded
anchors: BoxList of N anchors
Returns:
a tensor representing N relative-encoded boxes
"""
pass
@abstractmethod
def _decode(self, rel_codes, anchors):
"""Method to be overriden by implementations.
Args:
rel_codes: a tensor representing N relative-encoded boxes
anchors: BoxList of anchors
Returns:
boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
with corners y_min, x_min, y_max, x_max)
"""
pass
def batch_decode(encoded_boxes, box_coder, anchors):
"""Decode a batch of encoded boxes.
This op takes a batch of encoded bounding boxes and transforms
them to a batch of bounding boxes specified by their corners in
the order of [y_min, x_min, y_max, x_max].
Args:
encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
code_size] representing the location of the objects.
box_coder: a BoxCoder object.
anchors: a BoxList of anchors used to encode `encoded_boxes`.
Returns:
decoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
coder_size] representing the corners of the objects in the order
of [y_min, x_min, y_max, x_max].
Raises:
ValueError: if batch sizes of the inputs are inconsistent, or if
the number of anchors inferred from encoded_boxes and anchors are
inconsistent.
"""
encoded_boxes.get_shape().assert_has_rank(3)
if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
raise ValueError('The number of anchors inferred from encoded_boxes'
' and anchors are inconsistent: shape[1] of encoded_boxes'
' %s should be equal to the number of anchors: %s.' %
(encoded_boxes.get_shape()[1].value,
anchors.num_boxes_static()))
decoded_boxes = tf.stack([
box_coder.decode(boxes, anchors).get()
for boxes in tf.unstack(encoded_boxes)
])
return decoded_boxes
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_coder."""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
class MockBoxCoder(box_coder.BoxCoder):
"""Test BoxCoder that encodes/decodes using the multiply-by-two function."""
def code_size(self):
return 4
def _encode(self, boxes, anchors):
return 2.0 * boxes.get()
def _decode(self, rel_codes, anchors):
return box_list.BoxList(rel_codes / 2.0)
class BoxCoderTest(tf.test.TestCase):
def test_batch_decode(self):
mock_anchor_corners = tf.constant(
[[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32)
mock_anchors = box_list.BoxList(mock_anchor_corners)
mock_box_coder = MockBoxCoder()
expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]],
[[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]]
encoded_boxes_list = [mock_box_coder.encode(
box_list.BoxList(tf.constant(boxes)), mock_anchors)
for boxes in expected_boxes]
encoded_boxes = tf.stack(encoded_boxes_list)
decoded_boxes = box_coder.batch_decode(
encoded_boxes, mock_box_coder, mock_anchors)
with self.test_session() as sess:
decoded_boxes_result = sess.run(decoded_boxes)
self.assertAllClose(expected_boxes, decoded_boxes_result)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List definition.
BoxList represents a list of bounding boxes as tensorflow
tensors, where each bounding box is represented as a row of 4 numbers,
[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes
within a given list correspond to a single image. See also
box_list_ops.py for common box related operations (such as area, iou, etc).
Optionally, users can add additional related fields (such as weights).
We assume the following things to be true about fields:
* they correspond to boxes in the box_list along the 0th dimension
* they have inferrable rank at graph construction time
* all dimensions except for possibly the 0th can be inferred
(i.e., not None) at graph construction time.
Some other notes:
* Following tensorflow conventions, we use height, width ordering,
and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
* Tensors are always provided as (flat) [N, 4] tensors.
"""
import tensorflow as tf
class BoxList(object):
"""Box collection."""
def __init__(self, boxes):
"""Constructs box collection.
Args:
boxes: a tensor of shape [N, 4] representing box corners
Raises:
ValueError: if invalid dimensions for bbox data or if bbox data is not in
float32 format.
"""
if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
raise ValueError('Invalid dimensions for box data.')
if boxes.dtype != tf.float32:
raise ValueError('Invalid tensor type: should be tf.float32')
self.data = {'boxes': boxes}
def num_boxes(self):
"""Returns number of boxes held in collection.
Returns:
a tensor representing the number of boxes held in the collection.
"""
return tf.shape(self.data['boxes'])[0]
def num_boxes_static(self):
"""Returns number of boxes held in collection.
This number is inferred at graph construction time rather than run-time.
Returns:
Number of boxes held in collection (integer) or None if this is not
inferrable at graph construction time.
"""
return self.data['boxes'].get_shape()[0].value
def get_all_fields(self):
"""Returns all fields."""
return self.data.keys()
def get_extra_fields(self):
"""Returns all non-box fields (i.e., everything not named 'boxes')."""
return [k for k in self.data.keys() if k != 'boxes']
def add_field(self, field, field_data):
"""Add field to box list.
This method can be used to add related box data such as
weights/labels, etc.
Args:
field: a string key to access the data via `get`
field_data: a tensor containing the data to store in the BoxList
"""
self.data[field] = field_data
def has_field(self, field):
return field in self.data
def get(self):
"""Convenience function for accessing box coordinates.
Returns:
a tensor with shape [N, 4] representing box coordinates.
"""
return self.get_field('boxes')
def set(self, boxes):
"""Convenience function for setting box coordinates.
Args:
boxes: a tensor of shape [N, 4] representing box corners
Raises:
ValueError: if invalid dimensions for bbox data
"""
if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
raise ValueError('Invalid dimensions for box data.')
self.data['boxes'] = boxes
def get_field(self, field):
"""Accesses a box collection and associated fields.
This function returns specified field with object; if no field is specified,
it returns the box coordinates.
Args:
field: this optional string parameter can be used to specify
a related field to be accessed.
Returns:
a tensor representing the box collection or an associated field.
Raises:
ValueError: if invalid field
"""
if not self.has_field(field):
raise ValueError('field ' + str(field) + ' does not exist')
return self.data[field]
def set_field(self, field, value):
"""Sets the value of a field.
Updates the field of a box_list with a given value.
Args:
field: (string) name of the field to set value.
value: the value to assign to the field.
Raises:
ValueError: if the box_list does not have specified field.
"""
if not self.has_field(field):
raise ValueError('field %s does not exist' % field)
self.data[field] = value
def get_center_coordinates_and_sizes(self, scope=None):
"""Computes the center coordinates, height and width of the boxes.
Args:
scope: name scope of the function.
Returns:
a list of 4 1-D tensors [ycenter, xcenter, height, width].
"""
with tf.name_scope(scope, 'get_center_coordinates_and_sizes'):
box_corners = self.get()
ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners))
width = xmax - xmin
height = ymax - ymin
ycenter = ymin + height / 2.
xcenter = xmin + width / 2.
return [ycenter, xcenter, height, width]
def transpose_coordinates(self, scope=None):
"""Transpose the coordinate representation in a boxlist.
Args:
scope: name scope of the function.
"""
with tf.name_scope(scope, 'transpose_coordinates'):
y_min, x_min, y_max, x_max = tf.split(
value=self.get(), num_or_size_splits=4, axis=1)
self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
def as_tensor_dict(self, fields=None):
"""Retrieves specified fields as a dictionary of tensors.
Args:
fields: (optional) list of fields to return in the dictionary.
If None (default), all fields are returned.
Returns:
tensor_dict: A dictionary of tensors specified by fields.
Raises:
ValueError: if specified field is not contained in boxlist.
"""
tensor_dict = {}
if fields is None:
fields = self.get_all_fields()
for field in fields:
if not self.has_field(field):
raise ValueError('boxlist must contain all specified fields')
tensor_dict[field] = self.get_field(field)
return tensor_dict
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List operations.
Example box operations that are supported:
* areas: compute bounding box areas
* iou: pairwise intersection-over-union scores
* sq_dist: pairwise distances between bounding boxes
Whenever box_list_ops functions output a BoxList, the fields of the incoming
BoxList are retained unless documented otherwise.
"""
import tensorflow as tf
from object_detection.core import box_list
from object_detection.utils import shape_utils
class SortOrder(object):
"""Enum class for sort order.
Attributes:
ascend: ascend order.
descend: descend order.
"""
ascend = 1
descend = 2
def area(boxlist, scope=None):
"""Computes area of boxes.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing box areas.
"""
with tf.name_scope(scope, 'Area'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
def height_width(boxlist, scope=None):
"""Computes height and width of boxes in boxlist.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
Height: A tensor with shape [N] representing box heights.
Width: A tensor with shape [N] representing box widths.
"""
with tf.name_scope(scope, 'HeightWidth'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1])
def scale(boxlist, y_scale, x_scale, scope=None):
"""scale box coordinates in x and y dimensions.
Args:
boxlist: BoxList holding N boxes
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
boxlist: BoxList holding N boxes
"""
with tf.name_scope(scope, 'Scale'):
y_scale = tf.cast(y_scale, tf.float32)
x_scale = tf.cast(x_scale, tf.float32)
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
y_min = y_scale * y_min
y_max = y_scale * y_max
x_min = x_scale * x_min
x_max = x_scale * x_max
scaled_boxlist = box_list.BoxList(
tf.concat([y_min, x_min, y_max, x_max], 1))
return _copy_extra_fields(scaled_boxlist, boxlist)
def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
"""Clip bounding boxes to a window.
This op clips any input bounding boxes (represented by bounding box
corners) to a window, optionally filtering out boxes that do not
overlap at all with the window.
Args:
boxlist: BoxList holding M_in boxes
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window to which the op should clip boxes.
filter_nonoverlapping: whether to filter out boxes that do not overlap at
all with the window.
scope: name scope.
Returns:
a BoxList holding M_out boxes where M_out <= M_in
"""
with tf.name_scope(scope, 'ClipToWindow'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
clipped = box_list.BoxList(
tf.concat([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
1))
clipped = _copy_extra_fields(clipped, boxlist)
if filter_nonoverlapping:
areas = area(clipped)
nonzero_area_indices = tf.cast(
tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
clipped = gather(clipped, nonzero_area_indices)
return clipped
def prune_outside_window(boxlist, window, scope=None):
"""Prunes bounding boxes that fall outside a given window.
This function prunes bounding boxes that even partially fall outside the given
window. See also clip_to_window which only prunes bounding boxes that fall
completely outside the window, and clips any bounding boxes that partially
overflow.
Args:
boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
of the window
scope: name scope.
Returns:
pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor.
"""
with tf.name_scope(scope, 'PruneOutsideWindow'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
coordinate_violations = tf.concat([
tf.less(y_min, win_y_min), tf.less(x_min, win_x_min),
tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max)
], 1)
valid_indices = tf.reshape(
tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
return gather(boxlist, valid_indices), valid_indices
def prune_completely_outside_window(boxlist, window, scope=None):
"""Prunes bounding boxes that fall completely outside of the given window.
The function clip_to_window prunes bounding boxes that fall
completely outside the window, but also clips any bounding boxes that
partially overflow. This function does not clip partially overflowing boxes.
Args:
boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
of the window
scope: name scope.
Returns:
pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor.
"""
with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
coordinate_violations = tf.concat([
tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max),
tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min)
], 1)
valid_indices = tf.reshape(
tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
return gather(boxlist, valid_indices), valid_indices
def intersection(boxlist1, boxlist2, scope=None):
"""Compute pairwise intersection areas between boxes.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise intersections
"""
with tf.name_scope(scope, 'Intersection'):
y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1.get(), num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split(
value=boxlist2.get(), num_or_size_splits=4, axis=1)
all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
return intersect_heights * intersect_widths
def matched_intersection(boxlist1, boxlist2, scope=None):
"""Compute intersection areas between corresponding boxes in two boxlists.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing pairwise intersections
"""
with tf.name_scope(scope, 'MatchedIntersection'):
y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1.get(), num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split(
value=boxlist2.get(), num_or_size_splits=4, axis=1)
min_ymax = tf.minimum(y_max1, y_max2)
max_ymin = tf.maximum(y_min1, y_min2)
intersect_heights = tf.maximum(0.0, min_ymax - max_ymin)
min_xmax = tf.minimum(x_max1, x_max2)
max_xmin = tf.maximum(x_min1, x_min2)
intersect_widths = tf.maximum(0.0, min_xmax - max_xmin)
return tf.reshape(intersect_heights * intersect_widths, [-1])
def iou(boxlist1, boxlist2, scope=None):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise iou scores.
"""
with tf.name_scope(scope, 'IOU'):
intersections = intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
unions = (
tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
def matched_iou(boxlist1, boxlist2, scope=None):
"""Compute intersection-over-union between corresponding boxes in boxlists.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing pairwise iou scores.
"""
with tf.name_scope(scope, 'MatchedIOU'):
intersections = matched_intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
unions = areas1 + areas2 - intersections
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
def ioa(boxlist1, boxlist2, scope=None):
"""Computes pairwise intersection-over-area between box collections.
intersection-over-area (IOA) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, ioa(box1, box2) != ioa(box2, box1).
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise ioa scores.
"""
with tf.name_scope(scope, 'IOA'):
intersections = intersection(boxlist1, boxlist2)
areas = tf.expand_dims(area(boxlist2), 0)
return tf.truediv(intersections, areas)
def prune_non_overlapping_boxes(
boxlist1, boxlist2, min_overlap=0.0, scope=None):
"""Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
For each box in boxlist1, we want its IOA to be more than minoverlap with
at least one of the boxes in boxlist2. If it does not, we remove it.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
min_overlap: Minimum required overlap between boxes, to count them as
overlapping.
scope: name scope.
Returns:
new_boxlist1: A pruned boxlist with size [N', 4].
keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the
first input BoxList `boxlist1`.
"""
with tf.name_scope(scope, 'PruneNonOverlappingBoxes'):
ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor
ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor
keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap))
keep_inds = tf.squeeze(tf.where(keep_bool), squeeze_dims=[1])
new_boxlist1 = gather(boxlist1, keep_inds)
return new_boxlist1, keep_inds
def prune_small_boxes(boxlist, min_side, scope=None):
"""Prunes small boxes in the boxlist which have a side smaller than min_side.
Args:
boxlist: BoxList holding N boxes.
min_side: Minimum width AND height of box to survive pruning.
scope: name scope.
Returns:
A pruned boxlist.
"""
with tf.name_scope(scope, 'PruneSmallBoxes'):
height, width = height_width(boxlist)
is_valid = tf.logical_and(tf.greater_equal(width, min_side),
tf.greater_equal(height, min_side))
return gather(boxlist, tf.reshape(tf.where(is_valid), [-1]))
def change_coordinate_frame(boxlist, window, scope=None):
"""Change coordinate frame of the boxlist to be relative to window's frame.
Given a window of the form [ymin, xmin, ymax, xmax],
changes bounding box coordinates from boxlist to be relative to this window
(e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
An example use case is data augmentation: where we are given groundtruth
boxes (boxlist) and would like to randomly crop the image to some
window (window). In this case we need to change the coordinate frame of
each groundtruth box to be relative to this new window.
Args:
boxlist: A BoxList object holding N boxes.
window: A rank 1 tensor [4].
scope: name scope.
Returns:
Returns a BoxList object with N boxes.
"""
with tf.name_scope(scope, 'ChangeCoordinateFrame'):
win_height = window[2] - window[0]
win_width = window[3] - window[1]
boxlist_new = scale(box_list.BoxList(
boxlist.get() - [window[0], window[1], window[0], window[1]]),
1.0 / win_height, 1.0 / win_width)
boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
return boxlist_new
def sq_dist(boxlist1, boxlist2, scope=None):
"""Computes the pairwise squared distances between box corners.
This op treats each box as if it were a point in a 4d Euclidean space and
computes pairwise squared distances.
Mathematically, we are given two matrices of box coordinates X and Y,
where X(i,:) is the i'th row of X, containing the 4 numbers defining the
corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to
boxlist2. We compute
Z(i,j) = ||X(i,:) - Y(j,:)||^2
= ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:),
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise distances
"""
with tf.name_scope(scope, 'SqDist'):
sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
innerprod = tf.matmul(boxlist1.get(), boxlist2.get(),
transpose_a=False, transpose_b=True)
return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod
def boolean_mask(boxlist, indicator, fields=None, scope=None):
"""Select boxes from BoxList according to indicator and return new BoxList.
`boolean_mask` returns the subset of boxes that are marked as "True" by the
indicator tensor. By default, `boolean_mask` returns boxes corresponding to
the input index list, as well as all additional fields stored in the boxlist
(indexing into the first dimension). However one can optionally only draw
from a subset of fields.
Args:
boxlist: BoxList holding N boxes
indicator: a rank-1 boolean tensor
fields: (optional) list of fields to also gather from. If None (default),
all fields are gathered from. Pass an empty fields list to only gather
the box coordinates.
scope: name scope.
Returns:
subboxlist: a BoxList corresponding to the subset of the input BoxList
specified by indicator
Raises:
ValueError: if `indicator` is not a rank-1 boolean tensor.
"""
with tf.name_scope(scope, 'BooleanMask'):
if indicator.shape.ndims != 1:
raise ValueError('indicator should have rank 1')
if indicator.dtype != tf.bool:
raise ValueError('indicator should be a boolean tensor')
subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
if fields is None:
fields = boxlist.get_extra_fields()
for field in fields:
if not boxlist.has_field(field):
raise ValueError('boxlist must contain all specified fields')
subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
subboxlist.add_field(field, subfieldlist)
return subboxlist
def gather(boxlist, indices, fields=None, scope=None):
"""Gather boxes from BoxList according to indices and return new BoxList.
By default, `gather` returns boxes corresponding to the input index list, as
well as all additional fields stored in the boxlist (indexing into the
first dimension). However one can optionally only gather from a
subset of fields.
Args:
boxlist: BoxList holding N boxes
indices: a rank-1 tensor of type int32 / int64
fields: (optional) list of fields to also gather from. If None (default),
all fields are gathered from. Pass an empty fields list to only gather
the box coordinates.
scope: name scope.
Returns:
subboxlist: a BoxList corresponding to the subset of the input BoxList
specified by indices
Raises:
ValueError: if specified field is not contained in boxlist or if the
indices are not of type int32
"""
with tf.name_scope(scope, 'Gather'):
if len(indices.shape.as_list()) != 1:
raise ValueError('indices should have rank 1')
if indices.dtype != tf.int32 and indices.dtype != tf.int64:
raise ValueError('indices should be an int32 / int64 tensor')
subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices))
if fields is None:
fields = boxlist.get_extra_fields()
for field in fields:
if not boxlist.has_field(field):
raise ValueError('boxlist must contain all specified fields')
subfieldlist = tf.gather(boxlist.get_field(field), indices)
subboxlist.add_field(field, subfieldlist)
return subboxlist
def concatenate(boxlists, fields=None, scope=None):
"""Concatenate list of BoxLists.
This op concatenates a list of input BoxLists into a larger BoxList. It also
handles concatenation of BoxList fields as long as the field tensor shapes
are equal except for the first dimension.
Args:
boxlists: list of BoxList objects
fields: optional list of fields to also concatenate. By default, all
fields from the first BoxList in the list are included in the
concatenation.
scope: name scope.
Returns:
a BoxList with number of boxes equal to
sum([boxlist.num_boxes() for boxlist in BoxList])
Raises:
ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
contains non BoxList objects), or if requested fields are not contained in
all boxlists
"""
with tf.name_scope(scope, 'Concatenate'):
if not isinstance(boxlists, list):
raise ValueError('boxlists should be a list')
if not boxlists:
raise ValueError('boxlists should have nonzero length')
for boxlist in boxlists:
if not isinstance(boxlist, box_list.BoxList):
raise ValueError('all elements of boxlists should be BoxList objects')
concatenated = box_list.BoxList(
tf.concat([boxlist.get() for boxlist in boxlists], 0))
if fields is None:
fields = boxlists[0].get_extra_fields()
for field in fields:
first_field_shape = boxlists[0].get_field(field).get_shape().as_list()
first_field_shape[0] = -1
if None in first_field_shape:
raise ValueError('field %s must have fully defined shape except for the'
' 0th dimension.' % field)
for boxlist in boxlists:
if not boxlist.has_field(field):
raise ValueError('boxlist must contain all requested fields')
field_shape = boxlist.get_field(field).get_shape().as_list()
field_shape[0] = -1
if field_shape != first_field_shape:
raise ValueError('field %s must have same shape for all boxlists '
'except for the 0th dimension.' % field)
concatenated_field = tf.concat(
[boxlist.get_field(field) for boxlist in boxlists], 0)
concatenated.add_field(field, concatenated_field)
return concatenated
def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
"""Sort boxes and associated fields according to a scalar field.
A common use case is reordering the boxes according to descending scores.
Args:
boxlist: BoxList holding N boxes.
field: A BoxList field for sorting and reordering the BoxList.
order: (Optional) descend or ascend. Default is descend.
scope: name scope.
Returns:
sorted_boxlist: A sorted BoxList with the field in the specified order.
Raises:
ValueError: if specified field does not exist
ValueError: if the order is not either descend or ascend
"""
with tf.name_scope(scope, 'SortByField'):
if order != SortOrder.descend and order != SortOrder.ascend:
raise ValueError('Invalid sort order')
field_to_sort = boxlist.get_field(field)
if len(field_to_sort.shape.as_list()) != 1:
raise ValueError('Field should have rank 1')
num_boxes = boxlist.num_boxes()
num_entries = tf.size(field_to_sort)
length_assert = tf.Assert(
tf.equal(num_boxes, num_entries),
['Incorrect field size: actual vs expected.', num_entries, num_boxes])
with tf.control_dependencies([length_assert]):
# TODO: Remove with tf.device when top_k operation runs correctly on GPU.
with tf.device('/cpu:0'):
_, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True)
if order == SortOrder.ascend:
sorted_indices = tf.reverse_v2(sorted_indices, [0])
return gather(boxlist, sorted_indices)
def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
"""Overlay bounding box list on image.
Currently this visualization plots a 1 pixel thick red bounding box on top
of the image. Note that tf.image.draw_bounding_boxes essentially is
1 indexed.
Args:
image: an image tensor with shape [height, width, 3]
boxlist: a BoxList
normalized: (boolean) specify whether corners are to be interpreted
as absolute coordinates in image space or normalized with respect to the
image size.
scope: name scope.
Returns:
image_and_boxes: an image tensor with shape [height, width, 3]
"""
with tf.name_scope(scope, 'VisualizeBoxesInImage'):
if not normalized:
height, width, _ = tf.unstack(tf.shape(image))
boxlist = scale(boxlist,
1.0 / tf.cast(height, tf.float32),
1.0 / tf.cast(width, tf.float32))
corners = tf.expand_dims(boxlist.get(), 0)
image = tf.expand_dims(image, 0)
return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0])
def filter_field_value_equals(boxlist, field, value, scope=None):
"""Filter to keep only boxes with field entries equal to the given value.
Args:
boxlist: BoxList holding N boxes.
field: field name for filtering.
value: scalar value.
scope: name scope.
Returns:
a BoxList holding M boxes where M <= N
Raises:
ValueError: if boxlist not a BoxList object or if it does not have
the specified field.
"""
with tf.name_scope(scope, 'FilterFieldValueEquals'):
if not isinstance(boxlist, box_list.BoxList):
raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field(field):
raise ValueError('boxlist must contain the specified field')
filter_field = boxlist.get_field(field)
gather_index = tf.reshape(tf.where(tf.equal(filter_field, value)), [-1])
return gather(boxlist, gather_index)
def filter_greater_than(boxlist, thresh, scope=None):
"""Filter to keep only boxes with score exceeding a given threshold.
This op keeps the collection of boxes whose corresponding scores are
greater than the input threshold.
TODO: Change function name to FilterScoresGreaterThan
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores.
thresh: scalar threshold
scope: name scope.
Returns:
a BoxList holding M boxes where M <= N
Raises:
ValueError: if boxlist not a BoxList object or if it does not
have a scores field
"""
with tf.name_scope(scope, 'FilterGreaterThan'):
if not isinstance(boxlist, box_list.BoxList):
raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field('scores'):
raise ValueError('input boxlist must have \'scores\' field')
scores = boxlist.get_field('scores')
if len(scores.shape.as_list()) > 2:
raise ValueError('Scores should have rank 1 or 2')
if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1:
raise ValueError('Scores should have rank 1 or have shape '
'consistent with [None, 1]')
high_score_indices = tf.cast(tf.reshape(
tf.where(tf.greater(scores, thresh)),
[-1]), tf.int32)
return gather(boxlist, high_score_indices)
def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
"""Non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. Note that this only works for a single class ---
to apply NMS to multi-class predictions, use MultiClassNonMaxSuppression.
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores.
thresh: scalar threshold
max_output_size: maximum number of retained boxes
scope: name scope.
Returns:
a BoxList holding M boxes where M <= max_output_size
Raises:
ValueError: if thresh is not in [0, 1]
"""
with tf.name_scope(scope, 'NonMaxSuppression'):
if not 0 <= thresh <= 1.0:
raise ValueError('thresh must be between 0 and 1')
if not isinstance(boxlist, box_list.BoxList):
raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field('scores'):
raise ValueError('input boxlist must have \'scores\' field')
selected_indices = tf.image.non_max_suppression(
boxlist.get(), boxlist.get_field('scores'),
max_output_size, iou_threshold=thresh)
return gather(boxlist, selected_indices)
def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
"""Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
Args:
boxlist_to_copy_to: BoxList to which extra fields are copied.
boxlist_to_copy_from: BoxList from which fields are copied.
Returns:
boxlist_to_copy_to with extra fields.
"""
for field in boxlist_to_copy_from.get_extra_fields():
boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
return boxlist_to_copy_to
def to_normalized_coordinates(boxlist, height, width,
check_range=True, scope=None):
"""Converts absolute box coordinates to normalized coordinates in [0, 1].
Usually one uses the dynamic shape of the image or conv-layer tensor:
boxlist = box_list_ops.to_normalized_coordinates(boxlist,
tf.shape(images)[1],
tf.shape(images)[2]),
This function raises an assertion failed error at graph execution time when
the maximum coordinate is smaller than 1.01 (which means that coordinates are
already normalized). The value 1.01 is to deal with small rounding errors.
Args:
boxlist: BoxList with coordinates in terms of pixel-locations.
height: Maximum value for height of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not.
scope: name scope.
Returns:
boxlist with normalized coordinates in [0, 1].
"""
with tf.name_scope(scope, 'ToNormalizedCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
if check_range:
max_val = tf.reduce_max(boxlist.get())
max_assert = tf.Assert(tf.greater(max_val, 1.01),
['max value is lower than 1.01: ', max_val])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
return scale(boxlist, 1 / height, 1 / width)
def to_absolute_coordinates(boxlist, height, width,
check_range=True, scope=None):
"""Converts normalized box coordinates to absolute pixel coordinates.
This function raises an assertion failed error when the maximum box coordinate
value is larger than 1.01 (in which case coordinates are already absolute).
Args:
boxlist: BoxList with coordinates in range [0, 1].
height: Maximum value for height of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not.
scope: name scope.
Returns:
boxlist with absolute coordinates in terms of the image size.
"""
with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
# Ensure range of input boxes is correct.
if check_range:
box_maximum = tf.reduce_max(boxlist.get())
max_assert = tf.Assert(tf.greater_equal(1.01, box_maximum),
['maximum box coordinate value is larger '
'than 1.01: ', box_maximum])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
return scale(boxlist, height, width)
def refine_boxes_multi_class(pool_boxes,
num_classes,
nms_iou_thresh,
nms_max_detections,
voting_iou_thresh=0.5):
"""Refines a pool of boxes using non max suppression and box voting.
Box refinement is done independently for each class.
Args:
pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
have a rank 1 'scores' field and a rank 1 'classes' field.
num_classes: (int scalar) Number of classes.
nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
nms_max_detections: (int scalar) maximum output size for NMS.
voting_iou_thresh: (float scalar) iou threshold for box voting.
Returns:
BoxList of refined boxes.
Raises:
ValueError: if
a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
b) pool_boxes is not a BoxList.
c) pool_boxes does not have a scores and classes field.
"""
if not 0.0 <= nms_iou_thresh <= 1.0:
raise ValueError('nms_iou_thresh must be between 0 and 1')
if not 0.0 <= voting_iou_thresh <= 1.0:
raise ValueError('voting_iou_thresh must be between 0 and 1')
if not isinstance(pool_boxes, box_list.BoxList):
raise ValueError('pool_boxes must be a BoxList')
if not pool_boxes.has_field('scores'):
raise ValueError('pool_boxes must have a \'scores\' field')
if not pool_boxes.has_field('classes'):
raise ValueError('pool_boxes must have a \'classes\' field')
refined_boxes = []
for i in range(num_classes):
boxes_class = filter_field_value_equals(pool_boxes, 'classes', i)
refined_boxes_class = refine_boxes(boxes_class, nms_iou_thresh,
nms_max_detections, voting_iou_thresh)
refined_boxes.append(refined_boxes_class)
return sort_by_field(concatenate(refined_boxes), 'scores')
def refine_boxes(pool_boxes,
nms_iou_thresh,
nms_max_detections,
voting_iou_thresh=0.5):
"""Refines a pool of boxes using non max suppression and box voting.
Args:
pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
have a rank 1 'scores' field.
nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
nms_max_detections: (int scalar) maximum output size for NMS.
voting_iou_thresh: (float scalar) iou threshold for box voting.
Returns:
BoxList of refined boxes.
Raises:
ValueError: if
a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
b) pool_boxes is not a BoxList.
c) pool_boxes does not have a scores field.
"""
if not 0.0 <= nms_iou_thresh <= 1.0:
raise ValueError('nms_iou_thresh must be between 0 and 1')
if not 0.0 <= voting_iou_thresh <= 1.0:
raise ValueError('voting_iou_thresh must be between 0 and 1')
if not isinstance(pool_boxes, box_list.BoxList):
raise ValueError('pool_boxes must be a BoxList')
if not pool_boxes.has_field('scores'):
raise ValueError('pool_boxes must have a \'scores\' field')
nms_boxes = non_max_suppression(
pool_boxes, nms_iou_thresh, nms_max_detections)
return box_voting(nms_boxes, pool_boxes, voting_iou_thresh)
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
"""Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.
Performs box voting as described in 'Object detection via a multi-region &
semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
with iou overlap >= iou_thresh. The location of B is set to the weighted
average location of boxes in S (scores are used for weighting). And the score
of B is set to the average score of boxes in S.
Args:
selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
boxes are usually selected from pool_boxes using non max suppression.
pool_boxes: BoxList containing a set of (possibly redundant) boxes.
iou_thresh: (float scalar) iou threshold for matching boxes in
selected_boxes and pool_boxes.
Returns:
BoxList containing averaged locations and scores for each box in
selected_boxes.
Raises:
ValueError: if
a) selected_boxes or pool_boxes is not a BoxList.
b) if iou_thresh is not in [0, 1].
c) pool_boxes does not have a scores field.
"""
if not 0.0 <= iou_thresh <= 1.0:
raise ValueError('iou_thresh must be between 0 and 1')
if not isinstance(selected_boxes, box_list.BoxList):
raise ValueError('selected_boxes must be a BoxList')
if not isinstance(pool_boxes, box_list.BoxList):
raise ValueError('pool_boxes must be a BoxList')
if not pool_boxes.has_field('scores'):
raise ValueError('pool_boxes must have a \'scores\' field')
iou_ = iou(selected_boxes, pool_boxes)
match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
num_matches = tf.reduce_sum(match_indicator, 1)
# TODO: Handle the case where some boxes in selected_boxes do not match to any
# boxes in pool_boxes. For such boxes without any matches, we should return
# the original boxes without voting.
match_assert = tf.Assert(
tf.reduce_all(tf.greater(num_matches, 0)),
['Each box in selected_boxes must match with at least one box '
'in pool_boxes.'])
scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
scores_assert = tf.Assert(
tf.reduce_all(tf.greater_equal(scores, 0)),
['Scores must be non negative.'])
with tf.control_dependencies([scores_assert, match_assert]):
sum_scores = tf.matmul(match_indicator, scores)
averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches
box_locations = tf.matmul(match_indicator,
pool_boxes.get() * scores) / sum_scores
averaged_boxes = box_list.BoxList(box_locations)
_copy_extra_fields(averaged_boxes, selected_boxes)
averaged_boxes.add_field('scores', averaged_scores)
return averaged_boxes
def pad_or_clip_box_list(boxlist, num_boxes, scope=None):
"""Pads or clips all fields of a BoxList.
Args:
boxlist: A BoxList with arbitrary of number of boxes.
num_boxes: First num_boxes in boxlist are kept.
The fields are zero-padded if num_boxes is bigger than the
actual number of boxes.
scope: name scope.
Returns:
BoxList with all fields padded or clipped.
"""
with tf.name_scope(scope, 'PadOrClipBoxList'):
subboxlist = box_list.BoxList(shape_utils.pad_or_clip_tensor(
boxlist.get(), num_boxes))
for field in boxlist.get_extra_fields():
subfield = shape_utils.pad_or_clip_tensor(
boxlist.get_field(field), num_boxes)
subboxlist.add_field(field, subfield)
return subboxlist
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_list_ops."""
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import errors
from object_detection.core import box_list
from object_detection.core import box_list_ops
class BoxListOpsTest(tf.test.TestCase):
"""Tests for common bounding box operations."""
def test_area(self):
corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
exp_output = [200.0, 4.0]
boxes = box_list.BoxList(corners)
areas = box_list_ops.area(boxes)
with self.test_session() as sess:
areas_output = sess.run(areas)
self.assertAllClose(areas_output, exp_output)
def test_height_width(self):
corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
exp_output_heights = [10., 2.]
exp_output_widths = [20., 2.]
boxes = box_list.BoxList(corners)
heights, widths = box_list_ops.height_width(boxes)
with self.test_session() as sess:
output_heights, output_widths = sess.run([heights, widths])
self.assertAllClose(output_heights, exp_output_heights)
self.assertAllClose(output_widths, exp_output_widths)
def test_scale(self):
corners = tf.constant([[0, 0, 100, 200], [50, 120, 100, 140]],
dtype=tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('extra_data', tf.constant([[1], [2]]))
y_scale = tf.constant(1.0/100)
x_scale = tf.constant(1.0/200)
scaled_boxes = box_list_ops.scale(boxes, y_scale, x_scale)
exp_output = [[0, 0, 1, 1], [0.5, 0.6, 1.0, 0.7]]
with self.test_session() as sess:
scaled_corners_out = sess.run(scaled_boxes.get())
self.assertAllClose(scaled_corners_out, exp_output)
extra_data_out = sess.run(scaled_boxes.get_field('extra_data'))
self.assertAllEqual(extra_data_out, [[1], [2]])
def test_clip_to_window_filter_boxes_which_fall_outside_the_window(
self):
window = tf.constant([0, 0, 9, 14], tf.float32)
corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
[-1.0, -2.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0],
[-100.0, -100.0, 300.0, 600.0],
[-10.0, -10.0, -9.0, -9.0]])
boxes = box_list.BoxList(corners)
boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
[0.0, 0.0, 9.0, 14.0]]
pruned = box_list_ops.clip_to_window(
boxes, window, filter_nonoverlapping=True)
with self.test_session() as sess:
pruned_output = sess.run(pruned.get())
self.assertAllClose(pruned_output, exp_output)
extra_data_out = sess.run(pruned.get_field('extra_data'))
self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5]])
def test_clip_to_window_without_filtering_boxes_which_fall_outside_the_window(
self):
window = tf.constant([0, 0, 9, 14], tf.float32)
corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
[-1.0, -2.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0],
[-100.0, -100.0, 300.0, 600.0],
[-10.0, -10.0, -9.0, -9.0]])
boxes = box_list.BoxList(corners)
boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
[0.0, 0.0, 9.0, 14.0], [0.0, 0.0, 0.0, 0.0]]
pruned = box_list_ops.clip_to_window(
boxes, window, filter_nonoverlapping=False)
with self.test_session() as sess:
pruned_output = sess.run(pruned.get())
self.assertAllClose(pruned_output, exp_output)
extra_data_out = sess.run(pruned.get_field('extra_data'))
self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5], [6]])
def test_prune_outside_window_filters_boxes_which_fall_outside_the_window(
self):
window = tf.constant([0, 0, 9, 14], tf.float32)
corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
[-1.0, -2.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0],
[-10.0, -10.0, -9.0, -9.0],
[-100.0, -100.0, 300.0, 600.0]])
boxes = box_list.BoxList(corners)
boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
exp_output = [[5.0, 5.0, 6.0, 6.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0]]
pruned, keep_indices = box_list_ops.prune_outside_window(boxes, window)
with self.test_session() as sess:
pruned_output = sess.run(pruned.get())
self.assertAllClose(pruned_output, exp_output)
keep_indices_out = sess.run(keep_indices)
self.assertAllEqual(keep_indices_out, [0, 2, 3])
extra_data_out = sess.run(pruned.get_field('extra_data'))
self.assertAllEqual(extra_data_out, [[1], [3], [4]])
def test_prune_completely_outside_window(self):
window = tf.constant([0, 0, 9, 14], tf.float32)
corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
[-1.0, -2.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0],
[-10.0, -10.0, -9.0, -9.0],
[-100.0, -100.0, 300.0, 600.0]])
boxes = box_list.BoxList(corners)
boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
exp_output = [[5.0, 5.0, 6.0, 6.0],
[-1.0, -2.0, 4.0, 5.0],
[2.0, 3.0, 5.0, 9.0],
[0.0, 0.0, 9.0, 14.0],
[-100.0, -100.0, 300.0, 600.0]]
pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes,
window)
with self.test_session() as sess:
pruned_output = sess.run(pruned.get())
self.assertAllClose(pruned_output, exp_output)
keep_indices_out = sess.run(keep_indices)
self.assertAllEqual(keep_indices_out, [0, 1, 2, 3, 5])
extra_data_out = sess.run(pruned.get_field('extra_data'))
self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [6]])
def test_intersection(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output = [[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
intersect = box_list_ops.intersection(boxes1, boxes2)
with self.test_session() as sess:
intersect_output = sess.run(intersect)
self.assertAllClose(intersect_output, exp_output)
def test_matched_intersection(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
exp_output = [2.0, 0.0]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
intersect = box_list_ops.matched_intersection(boxes1, boxes2)
with self.test_session() as sess:
intersect_output = sess.run(intersect)
self.assertAllClose(intersect_output, exp_output)
def test_iou(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
iou = box_list_ops.iou(boxes1, boxes2)
with self.test_session() as sess:
iou_output = sess.run(iou)
self.assertAllClose(iou_output, exp_output)
def test_matched_iou(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
exp_output = [2.0 / 16.0, 0]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
iou = box_list_ops.matched_iou(boxes1, boxes2)
with self.test_session() as sess:
iou_output = sess.run(iou)
self.assertAllClose(iou_output, exp_output)
def test_iouworks_on_empty_inputs(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
iou_empty_1 = box_list_ops.iou(boxes1, boxes_empty)
iou_empty_2 = box_list_ops.iou(boxes_empty, boxes2)
iou_empty_3 = box_list_ops.iou(boxes_empty, boxes_empty)
with self.test_session() as sess:
iou_output_1, iou_output_2, iou_output_3 = sess.run(
[iou_empty_1, iou_empty_2, iou_empty_3])
self.assertAllEqual(iou_output_1.shape, (2, 0))
self.assertAllEqual(iou_output_2.shape, (0, 3))
self.assertAllEqual(iou_output_3.shape, (0, 0))
def test_ioa(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
[1.0 / 12.0, 0.0, 5.0 / 400.0]]
exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
[0, 0],
[6.0 / 6.0, 5.0 / 5.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
ioa_1 = box_list_ops.ioa(boxes1, boxes2)
ioa_2 = box_list_ops.ioa(boxes2, boxes1)
with self.test_session() as sess:
ioa_output_1, ioa_output_2 = sess.run([ioa_1, ioa_2])
self.assertAllClose(ioa_output_1, exp_output_1)
self.assertAllClose(ioa_output_2, exp_output_2)
def test_prune_non_overlapping_boxes(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
minoverlap = 0.5
exp_output_1 = boxes1
exp_output_2 = box_list.BoxList(tf.constant(0.0, shape=[0, 4]))
output_1, keep_indices_1 = box_list_ops.prune_non_overlapping_boxes(
boxes1, boxes2, min_overlap=minoverlap)
output_2, keep_indices_2 = box_list_ops.prune_non_overlapping_boxes(
boxes2, boxes1, min_overlap=minoverlap)
with self.test_session() as sess:
(output_1_, keep_indices_1_, output_2_, keep_indices_2_, exp_output_1_,
exp_output_2_) = sess.run(
[output_1.get(), keep_indices_1,
output_2.get(), keep_indices_2,
exp_output_1.get(), exp_output_2.get()])
self.assertAllClose(output_1_, exp_output_1_)
self.assertAllClose(output_2_, exp_output_2_)
self.assertAllEqual(keep_indices_1_, [0, 1])
self.assertAllEqual(keep_indices_2_, [])
def test_prune_small_boxes(self):
boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
[5.0, 6.0, 10.0, 7.0],
[3.0, 4.0, 6.0, 8.0],
[14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_boxes = [[3.0, 4.0, 6.0, 8.0],
[0.0, 0.0, 20.0, 20.0]]
boxes = box_list.BoxList(boxes)
pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
with self.test_session() as sess:
pruned_boxes = sess.run(pruned_boxes.get())
self.assertAllEqual(pruned_boxes, exp_boxes)
def test_prune_small_boxes_prunes_boxes_with_negative_side(self):
boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
[5.0, 6.0, 10.0, 7.0],
[3.0, 4.0, 6.0, 8.0],
[14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0],
[2.0, 3.0, 1.5, 7.0], # negative height
[2.0, 3.0, 5.0, 1.7]]) # negative width
exp_boxes = [[3.0, 4.0, 6.0, 8.0],
[0.0, 0.0, 20.0, 20.0]]
boxes = box_list.BoxList(boxes)
pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
with self.test_session() as sess:
pruned_boxes = sess.run(pruned_boxes.get())
self.assertAllEqual(pruned_boxes, exp_boxes)
def test_change_coordinate_frame(self):
corners = tf.constant([[0.25, 0.5, 0.75, 0.75], [0.5, 0.0, 1.0, 1.0]])
window = tf.constant([0.25, 0.25, 0.75, 0.75])
boxes = box_list.BoxList(corners)
expected_corners = tf.constant([[0, 0.5, 1.0, 1.0], [0.5, -0.5, 1.5, 1.5]])
expected_boxes = box_list.BoxList(expected_corners)
output = box_list_ops.change_coordinate_frame(boxes, window)
with self.test_session() as sess:
output_, expected_boxes_ = sess.run([output.get(), expected_boxes.get()])
self.assertAllClose(output_, expected_boxes_)
def test_ioaworks_on_empty_inputs(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
ioa_empty_1 = box_list_ops.ioa(boxes1, boxes_empty)
ioa_empty_2 = box_list_ops.ioa(boxes_empty, boxes2)
ioa_empty_3 = box_list_ops.ioa(boxes_empty, boxes_empty)
with self.test_session() as sess:
ioa_output_1, ioa_output_2, ioa_output_3 = sess.run(
[ioa_empty_1, ioa_empty_2, ioa_empty_3])
self.assertAllEqual(ioa_output_1.shape, (2, 0))
self.assertAllEqual(ioa_output_2.shape, (0, 3))
self.assertAllEqual(ioa_output_3.shape, (0, 0))
def test_pairwise_distances(self):
corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
[1.0, 1.0, 0.0, 2.0]])
corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
[-4.0, 0.0, 0.0, 3.0],
[0.0, 0.0, 0.0, 0.0]])
exp_output = [[26, 25, 0], [18, 27, 6]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
dist_matrix = box_list_ops.sq_dist(boxes1, boxes2)
with self.test_session() as sess:
dist_output = sess.run(dist_matrix)
self.assertAllClose(dist_output, exp_output)
def test_boolean_mask(self):
corners = tf.constant(
[4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
indicator = tf.constant([True, False, True, False, True], tf.bool)
expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
boxes = box_list.BoxList(corners)
subset = box_list_ops.boolean_mask(boxes, indicator)
with self.test_session() as sess:
subset_output = sess.run(subset.get())
self.assertAllClose(subset_output, expected_subset)
def test_boolean_mask_with_field(self):
corners = tf.constant(
[4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
indicator = tf.constant([True, False, True, False, True], tf.bool)
weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32)
expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
expected_weights = [[.1], [.5], [.9]]
boxes = box_list.BoxList(corners)
boxes.add_field('weights', weights)
subset = box_list_ops.boolean_mask(boxes, indicator, ['weights'])
with self.test_session() as sess:
subset_output, weights_output = sess.run(
[subset.get(), subset.get_field('weights')])
self.assertAllClose(subset_output, expected_subset)
self.assertAllClose(weights_output, expected_weights)
def test_gather(self):
corners = tf.constant(
[4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
indices = tf.constant([0, 2, 4], tf.int32)
expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
boxes = box_list.BoxList(corners)
subset = box_list_ops.gather(boxes, indices)
with self.test_session() as sess:
subset_output = sess.run(subset.get())
self.assertAllClose(subset_output, expected_subset)
def test_gather_with_field(self):
corners = tf.constant([4*[0.0], 4*[1.0], 4*[2.0], 4*[3.0], 4*[4.0]])
indices = tf.constant([0, 2, 4], tf.int32)
weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32)
expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
expected_weights = [[.1], [.5], [.9]]
boxes = box_list.BoxList(corners)
boxes.add_field('weights', weights)
subset = box_list_ops.gather(boxes, indices, ['weights'])
with self.test_session() as sess:
subset_output, weights_output = sess.run(
[subset.get(), subset.get_field('weights')])
self.assertAllClose(subset_output, expected_subset)
self.assertAllClose(weights_output, expected_weights)
def test_gather_with_invalid_field(self):
corners = tf.constant([4 * [0.0], 4 * [1.0]])
indices = tf.constant([0, 1], tf.int32)
weights = tf.constant([[.1], [.3]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('weights', weights)
with self.assertRaises(ValueError):
box_list_ops.gather(boxes, indices, ['foo', 'bar'])
def test_gather_with_invalid_inputs(self):
corners = tf.constant(
[4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
indices_float32 = tf.constant([0, 2, 4], tf.float32)
boxes = box_list.BoxList(corners)
with self.assertRaises(ValueError):
_ = box_list_ops.gather(boxes, indices_float32)
indices_2d = tf.constant([[0, 2, 4]], tf.int32)
boxes = box_list.BoxList(corners)
with self.assertRaises(ValueError):
_ = box_list_ops.gather(boxes, indices_2d)
def test_gather_with_dynamic_indexing(self):
corners = tf.constant([4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]
])
weights = tf.constant([.5, .3, .7, .1, .9], tf.float32)
indices = tf.reshape(tf.where(tf.greater(weights, 0.4)), [-1])
expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
expected_weights = [.5, .7, .9]
boxes = box_list.BoxList(corners)
boxes.add_field('weights', weights)
subset = box_list_ops.gather(boxes, indices, ['weights'])
with self.test_session() as sess:
subset_output, weights_output = sess.run([subset.get(), subset.get_field(
'weights')])
self.assertAllClose(subset_output, expected_subset)
self.assertAllClose(weights_output, expected_weights)
def test_sort_by_field_ascending_order(self):
exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
[0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
exp_scores = [.95, .9, .75, .6, .5, .3]
exp_weights = [.2, .45, .6, .75, .8, .92]
shuffle = [2, 4, 0, 5, 1, 3]
corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant(
[exp_scores[i] for i in shuffle], tf.float32))
boxes.add_field('weights', tf.constant(
[exp_weights[i] for i in shuffle], tf.float32))
sort_by_weight = box_list_ops.sort_by_field(
boxes,
'weights',
order=box_list_ops.SortOrder.ascend)
with self.test_session() as sess:
corners_out, scores_out, weights_out = sess.run([
sort_by_weight.get(),
sort_by_weight.get_field('scores'),
sort_by_weight.get_field('weights')])
self.assertAllClose(corners_out, exp_corners)
self.assertAllClose(scores_out, exp_scores)
self.assertAllClose(weights_out, exp_weights)
def test_sort_by_field_descending_order(self):
exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
[0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
exp_scores = [.95, .9, .75, .6, .5, .3]
exp_weights = [.2, .45, .6, .75, .8, .92]
shuffle = [2, 4, 0, 5, 1, 3]
corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant(
[exp_scores[i] for i in shuffle], tf.float32))
boxes.add_field('weights', tf.constant(
[exp_weights[i] for i in shuffle], tf.float32))
sort_by_score = box_list_ops.sort_by_field(boxes, 'scores')
with self.test_session() as sess:
corners_out, scores_out, weights_out = sess.run([sort_by_score.get(
), sort_by_score.get_field('scores'), sort_by_score.get_field('weights')])
self.assertAllClose(corners_out, exp_corners)
self.assertAllClose(scores_out, exp_scores)
self.assertAllClose(weights_out, exp_weights)
def test_sort_by_field_invalid_inputs(self):
corners = tf.constant([4 * [0.0], 4 * [0.5], 4 * [1.0], 4 * [2.0], 4 *
[3.0], 4 * [4.0]])
misc = tf.constant([[.95, .9], [.5, .3]], tf.float32)
weights = tf.constant([.1, .2], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('misc', misc)
boxes.add_field('weights', weights)
with self.test_session() as sess:
with self.assertRaises(ValueError):
box_list_ops.sort_by_field(boxes, 'area')
with self.assertRaises(ValueError):
box_list_ops.sort_by_field(boxes, 'misc')
with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
'Incorrect field size'):
sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())
def test_visualize_boxes_in_image(self):
image = tf.zeros((6, 4, 3))
corners = tf.constant([[0, 0, 5, 3],
[0, 0, 3, 2]], tf.float32)
boxes = box_list.BoxList(corners)
image_and_boxes = box_list_ops.visualize_boxes_in_image(image, boxes)
image_and_boxes_bw = tf.to_float(
tf.greater(tf.reduce_sum(image_and_boxes, 2), 0.0))
exp_result = [[1, 1, 1, 0],
[1, 1, 1, 0],
[1, 1, 1, 0],
[1, 0, 1, 0],
[1, 1, 1, 0],
[0, 0, 0, 0]]
with self.test_session() as sess:
output = sess.run(image_and_boxes_bw)
self.assertAllEqual(output.astype(int), exp_result)
def test_filter_field_value_equals(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('classes', tf.constant([1, 2, 1, 2, 2, 1]))
exp_output1 = [[0, 0, 1, 1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
exp_output2 = [[0, 0.1, 1, 1.1], [0, 10, 1, 11], [0, 10.1, 1, 11.1]]
filtered_boxes1 = box_list_ops.filter_field_value_equals(
boxes, 'classes', 1)
filtered_boxes2 = box_list_ops.filter_field_value_equals(
boxes, 'classes', 2)
with self.test_session() as sess:
filtered_output1, filtered_output2 = sess.run([filtered_boxes1.get(),
filtered_boxes2.get()])
self.assertAllClose(filtered_output1, exp_output1)
self.assertAllClose(filtered_output2, exp_output2)
def test_filter_greater_than(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant([.1, .75, .9, .5, .5, .8]))
thresh = .6
exp_output = [[0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
filtered_boxes = box_list_ops.filter_greater_than(boxes, thresh)
with self.test_session() as sess:
filtered_output = sess.run(filtered_boxes.get())
self.assertAllClose(filtered_output, exp_output)
def test_clip_box_list(self):
boxlist = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
[0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
boxlist.add_field('classes', tf.constant([0, 0, 1, 1]))
boxlist.add_field('scores', tf.constant([0.75, 0.65, 0.3, 0.2]))
num_boxes = 2
clipped_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
expected_classes = [0, 0]
expected_scores = [0.75, 0.65]
with self.test_session() as sess:
boxes_out, classes_out, scores_out = sess.run(
[clipped_boxlist.get(), clipped_boxlist.get_field('classes'),
clipped_boxlist.get_field('scores')])
self.assertAllClose(expected_boxes, boxes_out)
self.assertAllEqual(expected_classes, classes_out)
self.assertAllClose(expected_scores, scores_out)
def test_pad_box_list(self):
boxlist = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
boxlist.add_field('classes', tf.constant([0, 1]))
boxlist.add_field('scores', tf.constant([0.75, 0.2]))
num_boxes = 4
padded_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
[0, 0, 0, 0], [0, 0, 0, 0]]
expected_classes = [0, 1, 0, 0]
expected_scores = [0.75, 0.2, 0, 0]
with self.test_session() as sess:
boxes_out, classes_out, scores_out = sess.run(
[padded_boxlist.get(), padded_boxlist.get_field('classes'),
padded_boxlist.get_field('scores')])
self.assertAllClose(expected_boxes, boxes_out)
self.assertAllEqual(expected_classes, classes_out)
self.assertAllClose(expected_scores, scores_out)
class ConcatenateTest(tf.test.TestCase):
def test_invalid_input_box_list_list(self):
with self.assertRaises(ValueError):
box_list_ops.concatenate(None)
with self.assertRaises(ValueError):
box_list_ops.concatenate([])
with self.assertRaises(ValueError):
corners = tf.constant([[0, 0, 0, 0]], tf.float32)
boxlist = box_list.BoxList(corners)
box_list_ops.concatenate([boxlist, 2])
def test_concatenate_with_missing_fields(self):
corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
scores1 = tf.constant([1.0, 2.1])
corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
boxlist1 = box_list.BoxList(corners1)
boxlist1.add_field('scores', scores1)
boxlist2 = box_list.BoxList(corners2)
with self.assertRaises(ValueError):
box_list_ops.concatenate([boxlist1, boxlist2])
def test_concatenate_with_incompatible_field_shapes(self):
corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
scores1 = tf.constant([1.0, 2.1])
corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
scores2 = tf.constant([[1.0, 1.0], [2.1, 3.2]])
boxlist1 = box_list.BoxList(corners1)
boxlist1.add_field('scores', scores1)
boxlist2 = box_list.BoxList(corners2)
boxlist2.add_field('scores', scores2)
with self.assertRaises(ValueError):
box_list_ops.concatenate([boxlist1, boxlist2])
def test_concatenate_is_correct(self):
corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
scores1 = tf.constant([1.0, 2.1])
corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]],
tf.float32)
scores2 = tf.constant([1.0, 2.1, 5.6])
exp_corners = [[0, 0, 0, 0],
[1, 2, 3, 4],
[0, 3, 1, 6],
[2, 4, 3, 8],
[1, 0, 5, 10]]
exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6]
boxlist1 = box_list.BoxList(corners1)
boxlist1.add_field('scores', scores1)
boxlist2 = box_list.BoxList(corners2)
boxlist2.add_field('scores', scores2)
result = box_list_ops.concatenate([boxlist1, boxlist2])
with self.test_session() as sess:
corners_output, scores_output = sess.run(
[result.get(), result.get_field('scores')])
self.assertAllClose(corners_output, exp_corners)
self.assertAllClose(scores_output, exp_scores)
class NonMaxSuppressionTest(tf.test.TestCase):
def test_with_invalid_scores_field(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5]))
iou_thresh = .5
max_output_size = 3
nms = box_list_ops.non_max_suppression(
boxes, iou_thresh, max_output_size)
with self.test_session() as sess:
with self.assertRaisesWithPredicateMatch(
errors.InvalidArgumentError, 'scores has incompatible shape'):
sess.run(nms.get())
def test_select_from_three_clusters(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
iou_thresh = .5
max_output_size = 3
exp_nms = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 100, 1, 101]]
nms = box_list_ops.non_max_suppression(
boxes, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms.get())
self.assertAllClose(nms_output, exp_nms)
def test_select_at_most_two_boxes_from_three_clusters(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
iou_thresh = .5
max_output_size = 2
exp_nms = [[0, 10, 1, 11],
[0, 0, 1, 1]]
nms = box_list_ops.non_max_suppression(
boxes, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms.get())
self.assertAllClose(nms_output, exp_nms)
def test_select_at_most_thirty_boxes_from_three_clusters(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1],
[0, -0.1, 1, 0.9],
[0, 10, 1, 11],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
iou_thresh = .5
max_output_size = 30
exp_nms = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 100, 1, 101]]
nms = box_list_ops.non_max_suppression(
boxes, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms.get())
self.assertAllClose(nms_output, exp_nms)
def test_select_single_box(self):
corners = tf.constant([[0, 0, 1, 1]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant([.9]))
iou_thresh = .5
max_output_size = 3
exp_nms = [[0, 0, 1, 1]]
nms = box_list_ops.non_max_suppression(
boxes, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms.get())
self.assertAllClose(nms_output, exp_nms)
def test_select_from_ten_identical_boxes(self):
corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32)
boxes = box_list.BoxList(corners)
boxes.add_field('scores', tf.constant(10 * [.9]))
iou_thresh = .5
max_output_size = 3
exp_nms = [[0, 0, 1, 1]]
nms = box_list_ops.non_max_suppression(
boxes, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms.get())
self.assertAllClose(nms_output, exp_nms)
def test_copy_extra_fields(self):
corners = tf.constant([[0, 0, 1, 1],
[0, 0.1, 1, 1.1]], tf.float32)
boxes = box_list.BoxList(corners)
tensor1 = np.array([[1], [4]])
tensor2 = np.array([[1, 1], [2, 2]])
boxes.add_field('tensor1', tf.constant(tensor1))
boxes.add_field('tensor2', tf.constant(tensor2))
new_boxes = box_list.BoxList(tf.constant([[0, 0, 10, 10],
[1, 3, 5, 5]], tf.float32))
new_boxes = box_list_ops._copy_extra_fields(new_boxes, boxes)
with self.test_session() as sess:
self.assertAllClose(tensor1, sess.run(new_boxes.get_field('tensor1')))
self.assertAllClose(tensor2, sess.run(new_boxes.get_field('tensor2')))
class CoordinatesConversionTest(tf.test.TestCase):
def test_to_normalized_coordinates(self):
coordinates = tf.constant([[0, 0, 100, 100],
[25, 25, 75, 75]], tf.float32)
img = tf.ones((128, 100, 100, 3))
boxlist = box_list.BoxList(coordinates)
normalized_boxlist = box_list_ops.to_normalized_coordinates(
boxlist, tf.shape(img)[1], tf.shape(img)[2])
expected_boxes = [[0, 0, 1, 1],
[0.25, 0.25, 0.75, 0.75]]
with self.test_session() as sess:
normalized_boxes = sess.run(normalized_boxlist.get())
self.assertAllClose(normalized_boxes, expected_boxes)
def test_to_normalized_coordinates_already_normalized(self):
coordinates = tf.constant([[0, 0, 1, 1],
[0.25, 0.25, 0.75, 0.75]], tf.float32)
img = tf.ones((128, 100, 100, 3))
boxlist = box_list.BoxList(coordinates)
normalized_boxlist = box_list_ops.to_normalized_coordinates(
boxlist, tf.shape(img)[1], tf.shape(img)[2])
with self.test_session() as sess:
with self.assertRaisesOpError('assertion failed'):
sess.run(normalized_boxlist.get())
def test_to_absolute_coordinates(self):
coordinates = tf.constant([[0, 0, 1, 1],
[0.25, 0.25, 0.75, 0.75]], tf.float32)
img = tf.ones((128, 100, 100, 3))
boxlist = box_list.BoxList(coordinates)
absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
expected_boxes = [[0, 0, 100, 100],
[25, 25, 75, 75]]
with self.test_session() as sess:
absolute_boxes = sess.run(absolute_boxlist.get())
self.assertAllClose(absolute_boxes, expected_boxes)
def test_to_absolute_coordinates_already_abolute(self):
coordinates = tf.constant([[0, 0, 100, 100],
[25, 25, 75, 75]], tf.float32)
img = tf.ones((128, 100, 100, 3))
boxlist = box_list.BoxList(coordinates)
absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
with self.test_session() as sess:
with self.assertRaisesOpError('assertion failed'):
sess.run(absolute_boxlist.get())
def test_convert_to_normalized_and_back(self):
coordinates = np.random.uniform(size=(100, 4))
coordinates = np.round(np.sort(coordinates) * 200)
coordinates[:, 2:4] += 1
coordinates[99, :] = [0, 0, 201, 201]
img = tf.ones((128, 202, 202, 3))
boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
boxlist = box_list_ops.to_normalized_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
boxlist = box_list_ops.to_absolute_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
with self.test_session() as sess:
out = sess.run(boxlist.get())
self.assertAllClose(out, coordinates)
def test_convert_to_absolute_and_back(self):
coordinates = np.random.uniform(size=(100, 4))
coordinates = np.sort(coordinates)
coordinates[99, :] = [0, 0, 1, 1]
img = tf.ones((128, 202, 202, 3))
boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
boxlist = box_list_ops.to_absolute_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
boxlist = box_list_ops.to_normalized_coordinates(boxlist,
tf.shape(img)[1],
tf.shape(img)[2])
with self.test_session() as sess:
out = sess.run(boxlist.get())
self.assertAllClose(out, coordinates)
class BoxRefinementTest(tf.test.TestCase):
def test_box_voting(self):
candidates = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.8, 0.8]], tf.float32))
candidates.add_field('ExtraField', tf.constant([1, 2]))
pool = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
[0.6, 0.6, 0.8, 0.8]], tf.float32))
pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
averaged_boxes = box_list_ops.box_voting(candidates, pool)
expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
expected_scores = [0.5, 0.3]
with self.test_session() as sess:
boxes_out, scores_out, extra_field_out = sess.run(
[averaged_boxes.get(), averaged_boxes.get_field('scores'),
averaged_boxes.get_field('ExtraField')])
self.assertAllClose(expected_boxes, boxes_out)
self.assertAllClose(expected_scores, scores_out)
self.assertAllEqual(extra_field_out, [1, 2])
def test_box_voting_fails_with_negative_scores(self):
candidates = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
pool = box_list.BoxList(tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
pool.add_field('scores', tf.constant([-0.2]))
averaged_boxes = box_list_ops.box_voting(candidates, pool)
with self.test_session() as sess:
with self.assertRaisesOpError('Scores must be non negative'):
sess.run([averaged_boxes.get()])
def test_box_voting_fails_when_unmatched(self):
candidates = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
pool = box_list.BoxList(tf.constant([[0.6, 0.6, 0.8, 0.8]], tf.float32))
pool.add_field('scores', tf.constant([0.2]))
averaged_boxes = box_list_ops.box_voting(candidates, pool)
with self.test_session() as sess:
with self.assertRaisesOpError('Each box in selected_boxes must match '
'with at least one box in pool_boxes.'):
sess.run([averaged_boxes.get()])
def test_refine_boxes(self):
pool = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
[0.6, 0.6, 0.8, 0.8]], tf.float32))
pool.add_field('ExtraField', tf.constant([1, 2, 3]))
pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
refined_boxes = box_list_ops.refine_boxes(pool, 0.5, 10)
expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
expected_scores = [0.5, 0.3]
with self.test_session() as sess:
boxes_out, scores_out, extra_field_out = sess.run(
[refined_boxes.get(), refined_boxes.get_field('scores'),
refined_boxes.get_field('ExtraField')])
self.assertAllClose(expected_boxes, boxes_out)
self.assertAllClose(expected_scores, scores_out)
self.assertAllEqual(extra_field_out, [1, 3])
def test_refine_boxes_multi_class(self):
pool = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
[0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
pool.add_field('classes', tf.constant([0, 0, 1, 1]))
pool.add_field('scores', tf.constant([0.75, 0.25, 0.3, 0.2]))
refined_boxes = box_list_ops.refine_boxes_multi_class(pool, 3, 0.5, 10)
expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8],
[0.2, 0.2, 0.3, 0.3]]
expected_scores = [0.5, 0.3, 0.2]
with self.test_session() as sess:
boxes_out, scores_out, extra_field_out = sess.run(
[refined_boxes.get(), refined_boxes.get_field('scores'),
refined_boxes.get_field('classes')])
self.assertAllClose(expected_boxes, boxes_out)
self.assertAllClose(expected_scores, scores_out)
self.assertAllEqual(extra_field_out, [0, 1, 1])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_list."""
import tensorflow as tf
from object_detection.core import box_list
class BoxListTest(tf.test.TestCase):
"""Tests for BoxList class."""
def test_num_boxes(self):
data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
expected_num_boxes = 3
boxes = box_list.BoxList(data)
with self.test_session() as sess:
num_boxes_output = sess.run(boxes.num_boxes())
self.assertEquals(num_boxes_output, expected_num_boxes)
def test_get_correct_center_coordinates_and_sizes(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
boxes = box_list.BoxList(tf.constant(boxes))
centers_sizes = boxes.get_center_coordinates_and_sizes()
expected_centers_sizes = [[15, 0.35], [12.5, 0.25], [10, 0.3], [5, 0.3]]
with self.test_session() as sess:
centers_sizes_out = sess.run(centers_sizes)
self.assertAllClose(centers_sizes_out, expected_centers_sizes)
def test_create_box_list_with_dynamic_shape(self):
data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
indices = tf.reshape(tf.where(tf.greater([1, 0, 1], 0)), [-1])
data = tf.gather(data, indices)
assert data.get_shape().as_list() == [None, 4]
expected_num_boxes = 2
boxes = box_list.BoxList(data)
with self.test_session() as sess:
num_boxes_output = sess.run(boxes.num_boxes())
self.assertEquals(num_boxes_output, expected_num_boxes)
def test_transpose_coordinates(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.transpose_coordinates()
expected_corners = [[10.0, 10.0, 15.0, 20.0], [0.1, 0.2, 0.4, 0.5]]
with self.test_session() as sess:
corners_out = sess.run(boxes.get())
self.assertAllClose(corners_out, expected_corners)
def test_box_list_invalid_inputs(self):
data0 = tf.constant([[[0, 0, 1, 1], [3, 4, 5, 5]]], tf.float32)
data1 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.float32)
data2 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.int32)
with self.assertRaises(ValueError):
_ = box_list.BoxList(data0)
with self.assertRaises(ValueError):
_ = box_list.BoxList(data1)
with self.assertRaises(ValueError):
_ = box_list.BoxList(data2)
def test_num_boxes_static(self):
box_corners = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
boxes = box_list.BoxList(tf.constant(box_corners))
self.assertEquals(boxes.num_boxes_static(), 2)
self.assertEquals(type(boxes.num_boxes_static()), int)
def test_num_boxes_static_for_uninferrable_shape(self):
placeholder = tf.placeholder(tf.float32, shape=[None, 4])
boxes = box_list.BoxList(placeholder)
self.assertEquals(boxes.num_boxes_static(), None)
def test_as_tensor_dict(self):
boxlist = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
boxlist.add_field('classes', tf.constant([0, 1]))
boxlist.add_field('scores', tf.constant([0.75, 0.2]))
tensor_dict = boxlist.as_tensor_dict()
expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
expected_classes = [0, 1]
expected_scores = [0.75, 0.2]
with self.test_session() as sess:
tensor_dict_out = sess.run(tensor_dict)
self.assertAllEqual(3, len(tensor_dict_out))
self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
self.assertAllClose(expected_scores, tensor_dict_out['scores'])
def test_as_tensor_dict_with_features(self):
boxlist = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
boxlist.add_field('classes', tf.constant([0, 1]))
boxlist.add_field('scores', tf.constant([0.75, 0.2]))
tensor_dict = boxlist.as_tensor_dict(['boxes', 'classes', 'scores'])
expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
expected_classes = [0, 1]
expected_scores = [0.75, 0.2]
with self.test_session() as sess:
tensor_dict_out = sess.run(tensor_dict)
self.assertAllEqual(3, len(tensor_dict_out))
self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
self.assertAllClose(expected_scores, tensor_dict_out['scores'])
def test_as_tensor_dict_missing_field(self):
boxlist = box_list.BoxList(
tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
boxlist.add_field('classes', tf.constant([0, 1]))
boxlist.add_field('scores', tf.constant([0.75, 0.2]))
with self.assertRaises(ValueError):
boxlist.as_tensor_dict(['foo', 'bar'])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Box predictor for object detectors.
Box predictors are classes that take a high level
image feature map as input and produce two predictions,
(1) a tensor encoding box locations, and
(2) a tensor encoding classes for each box.
These components are passed directly to loss functions
in our detection models.
These modules are separated from the main model since the same
few box predictor architectures are shared across many models.
"""
from abc import abstractmethod
import tensorflow as tf
from object_detection.utils import ops
from object_detection.utils import static_shape
slim = tf.contrib.slim
BOX_ENCODINGS = 'box_encodings'
CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background'
MASK_PREDICTIONS = 'mask_predictions'
class BoxPredictor(object):
"""BoxPredictor."""
def __init__(self, is_training, num_classes):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
"""
self._is_training = is_training
self._num_classes = num_classes
@property
def num_classes(self):
return self._num_classes
def predict(self, image_features, num_predictions_per_location, scope,
**params):
"""Computes encoded object locations and corresponding confidences.
Takes a high level image feature map as input and produce two predictions,
(1) a tensor encoding box locations, and
(2) a tensor encoding class scores for each corresponding box.
In this interface, we only assume that two tensors are returned as output
and do not assume anything about their shapes.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
scope: Variable and Op scope name.
**params: Additional keyword arguments for specific implementations of
BoxPredictor.
Returns:
A dictionary containing at least the following tensors.
box_encodings: A float tensor of shape
[batch_size, num_anchors, q, code_size] representing the location of
the objects, where q is 1 or the number of classes.
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
"""
with tf.variable_scope(scope):
return self._predict(image_features, num_predictions_per_location,
**params)
# TODO: num_predictions_per_location could be moved to constructor.
# This is currently only used by ConvolutionalBoxPredictor.
@abstractmethod
def _predict(self, image_features, num_predictions_per_location, **params):
"""Implementations must override this method.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
**params: Additional keyword arguments for specific implementations of
BoxPredictor.
Returns:
A dictionary containing at least the following tensors.
box_encodings: A float tensor of shape
[batch_size, num_anchors, q, code_size] representing the location of
the objects, where q is 1 or the number of classes.
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
"""
pass
class RfcnBoxPredictor(BoxPredictor):
"""RFCN Box Predictor.
Applies a position sensitve ROI pooling on position sensitive feature maps to
predict classes and refined locations. See https://arxiv.org/abs/1605.06409
for details.
This is used for the second stage of the RFCN meta architecture. Notice that
locations are *not* shared across classes, thus for each anchor, a separate
prediction is made for each class.
"""
def __init__(self,
is_training,
num_classes,
conv_hyperparams,
num_spatial_bins,
depth,
crop_size,
box_code_size):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams: Slim arg_scope with hyperparameters for conolutional
layers.
num_spatial_bins: A list of two integers `[spatial_bins_y,
spatial_bins_x]`.
depth: Target depth to reduce the input feature maps to.
crop_size: A list of two integers `[crop_height, crop_width]`.
box_code_size: Size of encoding for each box.
"""
super(RfcnBoxPredictor, self).__init__(is_training, num_classes)
self._conv_hyperparams = conv_hyperparams
self._num_spatial_bins = num_spatial_bins
self._depth = depth
self._crop_size = crop_size
self._box_code_size = box_code_size
@property
def num_classes(self):
return self._num_classes
def _predict(self, image_features, num_predictions_per_location,
proposal_boxes):
"""Computes encoded object locations and corresponding confidences.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
Currently, this must be set to 1, or an error will be raised.
proposal_boxes: A float tensor of shape [batch_size, num_proposals,
box_code_size].
Returns:
box_encodings: A float tensor of shape
[batch_size, 1, num_classes, code_size] representing the
location of the objects.
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class
predictions for the proposals.
Raises:
ValueError: if num_predictions_per_location is not 1.
"""
if num_predictions_per_location != 1:
raise ValueError('Currently RfcnBoxPredictor only supports '
'predicting a single box per class per location.')
batch_size = tf.shape(proposal_boxes)[0]
num_boxes = tf.shape(proposal_boxes)[1]
def get_box_indices(proposals):
proposals_shape = proposals.get_shape().as_list()
if any(dim is None for dim in proposals_shape):
proposals_shape = tf.shape(proposals)
ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
multiplier = tf.expand_dims(
tf.range(start=0, limit=proposals_shape[0]), 1)
return tf.reshape(ones_mat * multiplier, [-1])
net = image_features
with slim.arg_scope(self._conv_hyperparams):
net = slim.conv2d(net, self._depth, [1, 1], scope='reduce_depth')
# Location predictions.
location_feature_map_depth = (self._num_spatial_bins[0] *
self._num_spatial_bins[1] *
self.num_classes *
self._box_code_size)
location_feature_map = slim.conv2d(net, location_feature_map_depth,
[1, 1], activation_fn=None,
scope='refined_locations')
box_encodings = ops.position_sensitive_crop_regions(
location_feature_map,
boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
box_ind=get_box_indices(proposal_boxes),
crop_size=self._crop_size,
num_spatial_bins=self._num_spatial_bins,
global_pool=True)
box_encodings = tf.squeeze(box_encodings, squeeze_dims=[1, 2])
box_encodings = tf.reshape(box_encodings,
[batch_size * num_boxes, 1, self.num_classes,
self._box_code_size])
# Class predictions.
total_classes = self.num_classes + 1 # Account for background class.
class_feature_map_depth = (self._num_spatial_bins[0] *
self._num_spatial_bins[1] *
total_classes)
class_feature_map = slim.conv2d(net, class_feature_map_depth, [1, 1],
activation_fn=None,
scope='class_predictions')
class_predictions_with_background = ops.position_sensitive_crop_regions(
class_feature_map,
boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
box_ind=get_box_indices(proposal_boxes),
crop_size=self._crop_size,
num_spatial_bins=self._num_spatial_bins,
global_pool=True)
class_predictions_with_background = tf.squeeze(
class_predictions_with_background, squeeze_dims=[1, 2])
class_predictions_with_background = tf.reshape(
class_predictions_with_background,
[batch_size * num_boxes, 1, total_classes])
return {BOX_ENCODINGS: box_encodings,
CLASS_PREDICTIONS_WITH_BACKGROUND:
class_predictions_with_background}
class MaskRCNNBoxPredictor(BoxPredictor):
"""Mask R-CNN Box Predictor.
See Mask R-CNN: He, K., Gkioxari, G., Dollar, P., & Girshick, R. (2017).
Mask R-CNN. arXiv preprint arXiv:1703.06870.
This is used for the second stage of the Mask R-CNN detector where proposals
cropped from an image are arranged along the batch dimension of the input
image_features tensor. Notice that locations are *not* shared across classes,
thus for each anchor, a separate prediction is made for each class.
In addition to predicting boxes and classes, optionally this class allows
predicting masks and/or keypoints inside detection boxes.
Currently this box predictor makes per-class predictions; that is, each
anchor makes a separate box prediction for each class.
"""
def __init__(self,
is_training,
num_classes,
fc_hyperparams,
use_dropout,
dropout_keep_prob,
box_code_size,
conv_hyperparams=None,
predict_instance_masks=False,
mask_prediction_conv_depth=256,
predict_keypoints=False):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
fc_hyperparams: Slim arg_scope with hyperparameters for fully
connected ops.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
dropout_keep_prob: Keep probability for dropout.
This is only used if use_dropout is True.
box_code_size: Size of encoding for each box.
conv_hyperparams: Slim arg_scope with hyperparameters for convolution
ops.
predict_instance_masks: Whether to predict object masks inside detection
boxes.
mask_prediction_conv_depth: The depth for the first conv2d_transpose op
applied to the image_features in the mask prediciton branch.
predict_keypoints: Whether to predict keypoints insde detection boxes.
Raises:
ValueError: If predict_instance_masks or predict_keypoints is true.
"""
super(MaskRCNNBoxPredictor, self).__init__(is_training, num_classes)
self._fc_hyperparams = fc_hyperparams
self._use_dropout = use_dropout
self._box_code_size = box_code_size
self._dropout_keep_prob = dropout_keep_prob
self._conv_hyperparams = conv_hyperparams
self._predict_instance_masks = predict_instance_masks
self._mask_prediction_conv_depth = mask_prediction_conv_depth
self._predict_keypoints = predict_keypoints
if self._predict_keypoints:
raise ValueError('Keypoint prediction is unimplemented.')
if ((self._predict_instance_masks or self._predict_keypoints) and
self._conv_hyperparams is None):
raise ValueError('`conv_hyperparams` must be provided when predicting '
'masks.')
@property
def num_classes(self):
return self._num_classes
def _predict(self, image_features, num_predictions_per_location):
"""Computes encoded object locations and corresponding confidences.
Flattens image_features and applies fully connected ops (with no
non-linearity) to predict box encodings and class predictions. In this
setting, anchors are not spatially arranged in any way and are assumed to
have been folded into the batch dimension. Thus we output 1 for the
anchors dimension.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
Currently, this must be set to 1, or an error will be raised.
Returns:
A dictionary containing the following tensors.
box_encodings: A float tensor of shape
[batch_size, 1, num_classes, code_size] representing the
location of the objects.
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class
predictions for the proposals.
If predict_masks is True the dictionary also contains:
instance_masks: A float tensor of shape
[batch_size, 1, num_classes, image_height, image_width]
If predict_keypoints is True the dictionary also contains:
keypoints: [batch_size, 1, num_keypoints, 2]
Raises:
ValueError: if num_predictions_per_location is not 1.
"""
if num_predictions_per_location != 1:
raise ValueError('Currently FullyConnectedBoxPredictor only supports '
'predicting a single box per class per location.')
spatial_averaged_image_features = tf.reduce_mean(image_features, [1, 2],
keep_dims=True,
name='AvgPool')
flattened_image_features = slim.flatten(spatial_averaged_image_features)
if self._use_dropout:
flattened_image_features = slim.dropout(flattened_image_features,
keep_prob=self._dropout_keep_prob,
is_training=self._is_training)
with slim.arg_scope(self._fc_hyperparams):
box_encodings = slim.fully_connected(
flattened_image_features,
self._num_classes * self._box_code_size,
activation_fn=None,
scope='BoxEncodingPredictor')
class_predictions_with_background = slim.fully_connected(
flattened_image_features,
self._num_classes + 1,
activation_fn=None,
scope='ClassPredictor')
box_encodings = tf.reshape(
box_encodings, [-1, 1, self._num_classes, self._box_code_size])
class_predictions_with_background = tf.reshape(
class_predictions_with_background, [-1, 1, self._num_classes + 1])
predictions_dict = {
BOX_ENCODINGS: box_encodings,
CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_with_background
}
if self._predict_instance_masks:
with slim.arg_scope(self._conv_hyperparams):
upsampled_features = slim.conv2d_transpose(
image_features,
num_outputs=self._mask_prediction_conv_depth,
kernel_size=[2, 2],
stride=2)
mask_predictions = slim.conv2d(upsampled_features,
num_outputs=self.num_classes,
activation_fn=None,
kernel_size=[1, 1])
instance_masks = tf.expand_dims(tf.transpose(mask_predictions,
perm=[0, 3, 1, 2]),
axis=1,
name='MaskPredictor')
predictions_dict[MASK_PREDICTIONS] = instance_masks
return predictions_dict
class ConvolutionalBoxPredictor(BoxPredictor):
"""Convolutional Box Predictor.
Optionally add an intermediate 1x1 convolutional layer after features and
predict in parallel branches box_encodings and
class_predictions_with_background.
Currently this box predictor assumes that predictions are "shared" across
classes --- that is each anchor makes box predictions which do not depend
on class.
"""
def __init__(self,
is_training,
num_classes,
conv_hyperparams,
min_depth,
max_depth,
num_layers_before_predictor,
use_dropout,
dropout_keep_prob,
kernel_size,
box_code_size,
apply_sigmoid_to_scores=False):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams: Slim arg_scope with hyperparameters for convolution ops.
min_depth: Minumum feature depth prior to predicting box encodings
and class predictions.
max_depth: Maximum feature depth prior to predicting box encodings
and class predictions. If max_depth is set to 0, no additional
feature map will be inserted before location and class predictions.
num_layers_before_predictor: Number of the additional conv layers before
the predictor.
use_dropout: Option to use dropout for class prediction or not.
dropout_keep_prob: Keep probability for dropout.
This is only used if use_dropout is True.
kernel_size: Size of final convolution kernel. If the
spatial resolution of the feature map is smaller than the kernel size,
then the kernel size is automatically set to be
min(feature_width, feature_height).
box_code_size: Size of encoding for each box.
apply_sigmoid_to_scores: if True, apply the sigmoid on the output
class_predictions.
Raises:
ValueError: if min_depth > max_depth.
"""
super(ConvolutionalBoxPredictor, self).__init__(is_training, num_classes)
if min_depth > max_depth:
raise ValueError('min_depth should be less than or equal to max_depth')
self._conv_hyperparams = conv_hyperparams
self._min_depth = min_depth
self._max_depth = max_depth
self._num_layers_before_predictor = num_layers_before_predictor
self._use_dropout = use_dropout
self._kernel_size = kernel_size
self._box_code_size = box_code_size
self._dropout_keep_prob = dropout_keep_prob
self._apply_sigmoid_to_scores = apply_sigmoid_to_scores
def _predict(self, image_features, num_predictions_per_location):
"""Computes encoded object locations and corresponding confidences.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
Returns:
A dictionary containing the following tensors.
box_encodings: A float tensor of shape [batch_size, num_anchors, 1,
code_size] representing the location of the objects, where
num_anchors = feat_height * feat_width * num_predictions_per_location
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
"""
features_depth = static_shape.get_depth(image_features.get_shape())
depth = max(min(features_depth, self._max_depth), self._min_depth)
# Add a slot for the background class.
num_class_slots = self.num_classes + 1
net = image_features
with slim.arg_scope(self._conv_hyperparams), \
slim.arg_scope([slim.dropout], is_training=self._is_training):
# Add additional conv layers before the predictor.
if depth > 0 and self._num_layers_before_predictor > 0:
for i in range(self._num_layers_before_predictor):
net = slim.conv2d(
net, depth, [1, 1], scope='Conv2d_%d_1x1_%d' % (i, depth))
with slim.arg_scope([slim.conv2d], activation_fn=None,
normalizer_fn=None, normalizer_params=None):
box_encodings = slim.conv2d(
net, num_predictions_per_location * self._box_code_size,
[self._kernel_size, self._kernel_size],
scope='BoxEncodingPredictor')
if self._use_dropout:
net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
class_predictions_with_background = slim.conv2d(
net, num_predictions_per_location * num_class_slots,
[self._kernel_size, self._kernel_size], scope='ClassPredictor')
if self._apply_sigmoid_to_scores:
class_predictions_with_background = tf.sigmoid(
class_predictions_with_background)
batch_size = static_shape.get_batch_size(image_features.get_shape())
if batch_size is None:
features_height = static_shape.get_height(image_features.get_shape())
features_width = static_shape.get_width(image_features.get_shape())
flattened_predictions_size = (features_height * features_width *
num_predictions_per_location)
box_encodings = tf.reshape(
box_encodings,
[-1, flattened_predictions_size, 1, self._box_code_size])
class_predictions_with_background = tf.reshape(
class_predictions_with_background,
[-1, flattened_predictions_size, num_class_slots])
else:
box_encodings = tf.reshape(
box_encodings, [batch_size, -1, 1, self._box_code_size])
class_predictions_with_background = tf.reshape(
class_predictions_with_background, [batch_size, -1, num_class_slots])
return {BOX_ENCODINGS: box_encodings,
CLASS_PREDICTIONS_WITH_BACKGROUND:
class_predictions_with_background}
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_predictor."""
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.core import box_predictor
from object_detection.protos import hyperparams_pb2
class MaskRCNNBoxPredictorTest(tf.test.TestCase):
def _build_arg_scope_with_hyperparams(self,
op_type=hyperparams_pb2.Hyperparams.FC):
hyperparams = hyperparams_pb2.Hyperparams()
hyperparams_text_proto = """
activation: NONE
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format.Merge(hyperparams_text_proto, hyperparams)
hyperparams.op = op_type
return hyperparams_builder.build(hyperparams, is_training=True)
def test_get_boxes_with_five_classes(self):
image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
)
box_predictions = mask_box_predictor.predict(
image_features, num_predictions_per_location=1, scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
class_predictions_with_background = box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
class_predictions_with_background_shape) = sess.run(
[tf.shape(box_encodings),
tf.shape(class_predictions_with_background)])
self.assertAllEqual(box_encodings_shape, [2, 1, 5, 4])
self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6])
def test_value_error_on_predict_instance_masks_with_no_conv_hyperparms(self):
with self.assertRaises(ValueError):
box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
predict_instance_masks=True)
def test_get_instance_masks(self):
image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
conv_hyperparams=self._build_arg_scope_with_hyperparams(
op_type=hyperparams_pb2.Hyperparams.CONV),
predict_instance_masks=True)
box_predictions = mask_box_predictor.predict(
image_features, num_predictions_per_location=1, scope='BoxPredictor')
mask_predictions = box_predictions[box_predictor.MASK_PREDICTIONS]
self.assertListEqual([2, 1, 5, 14, 14],
mask_predictions.get_shape().as_list())
def test_do_not_return_instance_masks_and_keypoints_without_request(self):
image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4)
box_predictions = mask_box_predictor.predict(
image_features, num_predictions_per_location=1, scope='BoxPredictor')
self.assertEqual(len(box_predictions), 2)
self.assertTrue(box_predictor.BOX_ENCODINGS in box_predictions)
self.assertTrue(box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND
in box_predictions)
def test_value_error_on_predict_keypoints(self):
with self.assertRaises(ValueError):
box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
predict_keypoints=True)
class RfcnBoxPredictorTest(tf.test.TestCase):
def _build_arg_scope_with_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.build(conv_hyperparams, is_training=True)
def test_get_correct_box_encoding_and_class_prediction_shapes(self):
image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
proposal_boxes = tf.random_normal([4, 2, 4], dtype=tf.float32)
rfcn_box_predictor = box_predictor.RfcnBoxPredictor(
is_training=False,
num_classes=2,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
num_spatial_bins=[3, 3],
depth=4,
crop_size=[12, 12],
box_code_size=4
)
box_predictions = rfcn_box_predictor.predict(
image_features, num_predictions_per_location=1, scope='BoxPredictor',
proposal_boxes=proposal_boxes)
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
class_predictions_with_background = box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
class_predictions_shape) = sess.run(
[tf.shape(box_encodings),
tf.shape(class_predictions_with_background)])
self.assertAllEqual(box_encodings_shape, [8, 1, 2, 4])
self.assertAllEqual(class_predictions_shape, [8, 1, 3])
class ConvolutionalBoxPredictorTest(tf.test.TestCase):
def _build_arg_scope_with_conv_hyperparams(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.build(conv_hyperparams, is_training=True)
def test_get_boxes_for_five_aspect_ratios_per_location(self):
image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
use_dropout=True,
dropout_keep_prob=0.8,
kernel_size=1,
box_code_size=4
)
box_predictions = conv_box_predictor.predict(
image_features, num_predictions_per_location=5, scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
objectness_predictions = box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
objectness_predictions_shape) = sess.run(
[tf.shape(box_encodings), tf.shape(objectness_predictions)])
self.assertAllEqual(box_encodings_shape, [4, 320, 1, 4])
self.assertAllEqual(objectness_predictions_shape, [4, 320, 1])
def test_get_boxes_for_one_aspect_ratio_per_location(self):
image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
use_dropout=True,
dropout_keep_prob=0.8,
kernel_size=1,
box_code_size=4
)
box_predictions = conv_box_predictor.predict(
image_features, num_predictions_per_location=1, scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
objectness_predictions = box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
objectness_predictions_shape) = sess.run(
[tf.shape(box_encodings), tf.shape(objectness_predictions)])
self.assertAllEqual(box_encodings_shape, [4, 64, 1, 4])
self.assertAllEqual(objectness_predictions_shape, [4, 64, 1])
def test_get_multi_class_predictions_for_five_aspect_ratios_per_location(
self):
num_classes_without_background = 6
image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
use_dropout=True,
dropout_keep_prob=0.8,
kernel_size=1,
box_code_size=4
)
box_predictions = conv_box_predictor.predict(
image_features,
num_predictions_per_location=5,
scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
class_predictions_with_background = box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape, class_predictions_with_background_shape
) = sess.run([
tf.shape(box_encodings), tf.shape(class_predictions_with_background)])
self.assertAllEqual(box_encodings_shape, [4, 320, 1, 4])
self.assertAllEqual(class_predictions_with_background_shape,
[4, 320, num_classes_without_background+1])
def test_get_boxes_for_five_aspect_ratios_per_location_fully_convolutional(
self):
image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
use_dropout=True,
dropout_keep_prob=0.8,
kernel_size=1,
box_code_size=4
)
box_predictions = conv_box_predictor.predict(
image_features, num_predictions_per_location=5, scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
objectness_predictions = box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
init_op = tf.global_variables_initializer()
resolution = 32
expected_num_anchors = resolution*resolution*5
with self.test_session() as sess:
sess.run(init_op)
(box_encodings_shape,
objectness_predictions_shape) = sess.run(
[tf.shape(box_encodings), tf.shape(objectness_predictions)],
feed_dict={image_features:
np.random.rand(4, resolution, resolution, 64)})
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1])
if __name__ == '__main__':
tf.test.main()
# Tensorflow Object Detection API: Core.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "batcher",
srcs = ["batcher.py"],
deps = [
":prefetcher",
":preprocessor",
":standard_fields",
"//tensorflow",
],
)
py_test(
name = "batcher_test",
srcs = ["batcher_test.py"],
deps = [
":batcher",
"//tensorflow",
],
)
py_library(
name = "box_list",
srcs = [
"box_list.py",
],
deps = [
"//tensorflow",
],
)
py_test(
name = "box_list_test",
srcs = ["box_list_test.py"],
deps = [
":box_list",
],
)
py_library(
name = "box_list_ops",
srcs = [
"box_list_ops.py",
],
deps = [
":box_list",
"//tensorflow",
"//tensorflow_models/object_detection/utils:shape_utils",
],
)
py_test(
name = "box_list_ops_test",
srcs = ["box_list_ops_test.py"],
deps = [
":box_list",
":box_list_ops",
],
)
py_library(
name = "box_coder",
srcs = [
"box_coder.py",
],
deps = [
"//tensorflow",
],
)
py_test(
name = "box_coder_test",
srcs = [
"box_coder_test.py",
],
deps = [
":box_coder",
":box_list",
"//tensorflow",
],
)
py_library(
name = "keypoint_ops",
srcs = [
"keypoint_ops.py",
],
deps = [
"//tensorflow",
],
)
py_test(
name = "keypoint_ops_test",
srcs = ["keypoint_ops_test.py"],
deps = [
":keypoint_ops",
],
)
py_library(
name = "losses",
srcs = ["losses.py"],
deps = [
":box_list",
":box_list_ops",
"//tensorflow",
"//tensorflow_models/object_detection/utils:ops",
],
)
py_library(
name = "matcher",
srcs = [
"matcher.py",
],
deps = [
],
)
py_library(
name = "model",
srcs = ["model.py"],
deps = [
":standard_fields",
],
)
py_test(
name = "matcher_test",
srcs = [
"matcher_test.py",
],
deps = [
":matcher",
"//tensorflow",
],
)
py_library(
name = "prefetcher",
srcs = ["prefetcher.py"],
deps = ["//tensorflow"],
)
py_library(
name = "preprocessor",
srcs = [
"preprocessor.py",
],
deps = [
":box_list",
":box_list_ops",
":keypoint_ops",
":standard_fields",
"//tensorflow",
],
)
py_test(
name = "preprocessor_test",
srcs = [
"preprocessor_test.py",
],
deps = [
":preprocessor",
"//tensorflow",
],
)
py_test(
name = "losses_test",
srcs = ["losses_test.py"],
deps = [
":box_list",
":losses",
":matcher",
"//tensorflow",
],
)
py_test(
name = "prefetcher_test",
srcs = ["prefetcher_test.py"],
deps = [
":prefetcher",
"//tensorflow",
],
)
py_library(
name = "standard_fields",
srcs = [
"standard_fields.py",
],
)
py_library(
name = "post_processing",
srcs = ["post_processing.py"],
deps = [
":box_list",
":box_list_ops",
":standard_fields",
"//tensorflow",
],
)
py_test(
name = "post_processing_test",
srcs = ["post_processing_test.py"],
deps = [
":box_list",
":box_list_ops",
":post_processing",
"//tensorflow",
],
)
py_library(
name = "target_assigner",
srcs = [
"target_assigner.py",
],
deps = [
":box_list",
":box_list_ops",
":matcher",
":region_similarity_calculator",
"//tensorflow",
"//tensorflow_models/object_detection/box_coders:faster_rcnn_box_coder",
"//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder",
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/matchers:argmax_matcher",
"//tensorflow_models/object_detection/matchers:bipartite_matcher",
],
)
py_test(
name = "target_assigner_test",
size = "large",
timeout = "long",
srcs = ["target_assigner_test.py"],
deps = [
":box_list",
":region_similarity_calculator",
":target_assigner",
"//tensorflow",
"//tensorflow_models/object_detection/box_coders:mean_stddev_box_coder",
"//tensorflow_models/object_detection/matchers:bipartite_matcher",
],
)
py_library(
name = "data_decoder",
srcs = ["data_decoder.py"],
)
py_library(
name = "box_predictor",
srcs = ["box_predictor.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/object_detection/utils:static_shape",
],
)
py_test(
name = "box_predictor_test",
srcs = ["box_predictor_test.py"],
deps = [
":box_predictor",
"//tensorflow",
"//tensorflow_models/object_detection/builders:hyperparams_builder",
"//tensorflow_models/object_detection/protos:hyperparams_py_pb2",
],
)
py_library(
name = "region_similarity_calculator",
srcs = [
"region_similarity_calculator.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list_ops",
],
)
py_test(
name = "region_similarity_calculator_test",
srcs = [
"region_similarity_calculator_test.py",
],
deps = [
":region_similarity_calculator",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_library(
name = "anchor_generator",
srcs = [
"anchor_generator.py",
],
deps = [
"//tensorflow",
],
)
py_library(
name = "minibatch_sampler",
srcs = [
"minibatch_sampler.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/utils:ops",
],
)
py_test(
name = "minibatch_sampler_test",
srcs = [
"minibatch_sampler_test.py",
],
deps = [
":minibatch_sampler",
"//tensorflow",
],
)
py_library(
name = "balanced_positive_negative_sampler",
srcs = [
"balanced_positive_negative_sampler.py",
],
deps = [
":minibatch_sampler",
"//tensorflow",
],
)
py_test(
name = "balanced_positive_negative_sampler_test",
srcs = [
"balanced_positive_negative_sampler_test.py",
],
deps = [
":balanced_positive_negative_sampler",
"//tensorflow",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Interface for data decoders.
Data decoders decode the input data and return a dictionary of tensors keyed by
the entries in core.reader.Fields.
"""
from abc import ABCMeta
from abc import abstractmethod
class DataDecoder(object):
"""Interface for data decoders."""
__metaclass__ = ABCMeta
# TODO: snake_case this method.
@abstractmethod
def Decode(self, data):
"""Return a single image and associated labels.
Args:
data: a string tensor holding a serialized protocol buffer corresponding
to data for a single image.
Returns:
tensor_dict: a dictionary containing tensors. Possible keys are defined in
reader.Fields.
"""
pass
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Keypoint operations.
Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2],
where the last dimension holds rank 2 tensors of the form [y, x] representing
the coordinates of the keypoint.
"""
import numpy as np
import tensorflow as tf
def scale(keypoints, y_scale, x_scale, scope=None):
"""Scales keypoint coordinates in x and y dimensions.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'Scale'):
y_scale = tf.cast(y_scale, tf.float32)
x_scale = tf.cast(x_scale, tf.float32)
new_keypoints = keypoints * [[[y_scale, x_scale]]]
return new_keypoints
def clip_to_window(keypoints, window, scope=None):
"""Clips keypoints to a window.
This op clips any input keypoints to a window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window to which the op should clip the keypoints.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'ClipToWindow'):
y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
y = tf.maximum(tf.minimum(y, win_y_max), win_y_min)
x = tf.maximum(tf.minimum(x, win_x_max), win_x_min)
new_keypoints = tf.concat([y, x], 2)
return new_keypoints
def prune_outside_window(keypoints, window, scope=None):
"""Prunes keypoints that fall outside a given window.
This function replaces keypoints that fall outside the given window with nan.
See also clip_to_window which clips any keypoints that fall outside the given
window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window outside of which the op should prune the keypoints.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'PruneOutsideWindow'):
y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
valid_indices = tf.logical_and(
tf.logical_and(y >= win_y_min, y <= win_y_max),
tf.logical_and(x >= win_x_min, x <= win_x_max))
new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y))
new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x))
new_keypoints = tf.concat([new_y, new_x], 2)
return new_keypoints
def change_coordinate_frame(keypoints, window, scope=None):
"""Changes coordinate frame of the keypoints to be relative to window's frame.
Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint
coordinates from keypoints of shape [num_instances, num_keypoints, 2]
to be relative to this window.
An example use case is data augmentation: where we are given groundtruth
keypoints and would like to randomly crop the image to some window. In this
case we need to change the coordinate frame of each groundtruth keypoint to be
relative to this new window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window we should change the coordinate frame to.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'ChangeCoordinateFrame'):
win_height = window[2] - window[0]
win_width = window[3] - window[1]
new_keypoints = scale(keypoints - [window[0], window[1]], 1.0 / win_height,
1.0 / win_width)
return new_keypoints
def to_normalized_coordinates(keypoints, height, width,
check_range=True, scope=None):
"""Converts absolute keypoint coordinates to normalized coordinates in [0, 1].
Usually one uses the dynamic shape of the image or conv-layer tensor:
keypoints = keypoint_ops.to_normalized_coordinates(keypoints,
tf.shape(images)[1],
tf.shape(images)[2]),
This function raises an assertion failed error at graph execution time when
the maximum coordinate is smaller than 1.01 (which means that coordinates are
already normalized). The value 1.01 is to deal with small rounding errors.
Args:
keypoints: A tensor of shape [num_instances, num_keypoints, 2].
height: Maximum value for y coordinate of absolute keypoint coordinates.
width: Maximum value for x coordinate of absolute keypoint coordinates.
check_range: If True, checks if the coordinates are normalized.
scope: name scope.
Returns:
tensor of shape [num_instances, num_keypoints, 2] with normalized
coordinates in [0, 1].
"""
with tf.name_scope(scope, 'ToNormalizedCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
if check_range:
max_val = tf.reduce_max(keypoints)
max_assert = tf.Assert(tf.greater(max_val, 1.01),
['max value is lower than 1.01: ', max_val])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
return scale(keypoints, 1.0 / height, 1.0 / width)
def to_absolute_coordinates(keypoints, height, width,
check_range=True, scope=None):
"""Converts normalized keypoint coordinates to absolute pixel coordinates.
This function raises an assertion failed error when the maximum keypoint
coordinate value is larger than 1.01 (in which case coordinates are already
absolute).
Args:
keypoints: A tensor of shape [num_instances, num_keypoints, 2]
height: Maximum value for y coordinate of absolute keypoint coordinates.
width: Maximum value for x coordinate of absolute keypoint coordinates.
check_range: If True, checks if the coordinates are normalized or not.
scope: name scope.
Returns:
tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates
in terms of the image size.
"""
with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
# Ensure range of input keypoints is correct.
if check_range:
max_val = tf.reduce_max(keypoints)
max_assert = tf.Assert(tf.greater_equal(1.01, max_val),
['maximum keypoint coordinate value is larger '
'than 1.01: ', max_val])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
return scale(keypoints, height, width)
def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
"""Flips the keypoints horizontally around the flip_point.
This operation flips the x coordinate for each keypoint around the flip_point
and also permutes the keypoints in a manner specified by flip_permutation.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
flip_point: (float) scalar tensor representing the x coordinate to flip the
keypoints around.
flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. This specifies the mapping from original keypoint indices
to the flipped keypoint indices. This is used primarily for keypoints
that are not reflection invariant. E.g. Suppose there are 3 keypoints
representing ['head', 'right_eye', 'left_eye'], then a logical choice for
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
and 'right_eye' after a horizontal flip.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'FlipHorizontal'):
keypoints = tf.transpose(keypoints, [1, 0, 2])
keypoints = tf.gather(keypoints, flip_permutation)
v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
u = flip_point * 2.0 - u
new_keypoints = tf.concat([v, u], 2)
new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
return new_keypoints
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.keypoint_ops."""
import numpy as np
import tensorflow as tf
from object_detection.core import keypoint_ops
class KeypointOpsTest(tf.test.TestCase):
"""Tests for common keypoint operations."""
def test_scale(self):
keypoints = tf.constant([
[[0.0, 0.0], [100.0, 200.0]],
[[50.0, 120.0], [100.0, 140.0]]
])
y_scale = tf.constant(1.0 / 100)
x_scale = tf.constant(1.0 / 200)
expected_keypoints = tf.constant([
[[0., 0.], [1.0, 1.0]],
[[0.5, 0.6], [1.0, 0.7]]
])
output = keypoint_ops.scale(keypoints, y_scale, x_scale)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_clip_to_window(self):
keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
window = tf.constant([0.25, 0.25, 0.75, 0.75])
expected_keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.25], [0.75, 0.75]]
])
output = keypoint_ops.clip_to_window(keypoints, window)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_prune_outside_window(self):
keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
window = tf.constant([0.25, 0.25, 0.75, 0.75])
expected_keypoints = tf.constant([[[0.25, 0.5], [0.75, 0.75]],
[[np.nan, np.nan], [np.nan, np.nan]]])
output = keypoint_ops.prune_outside_window(keypoints, window)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_change_coordinate_frame(self):
keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
window = tf.constant([0.25, 0.25, 0.75, 0.75])
expected_keypoints = tf.constant([
[[0, 0.5], [1.0, 1.0]],
[[0.5, -0.5], [1.5, 1.5]]
])
output = keypoint_ops.change_coordinate_frame(keypoints, window)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_to_normalized_coordinates(self):
keypoints = tf.constant([
[[10., 30.], [30., 45.]],
[[20., 0.], [40., 60.]]
])
output = keypoint_ops.to_normalized_coordinates(
keypoints, 40, 60)
expected_keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_to_normalized_coordinates_already_normalized(self):
keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
output = keypoint_ops.to_normalized_coordinates(
keypoints, 40, 60)
with self.test_session() as sess:
with self.assertRaisesOpError('assertion failed'):
sess.run(output)
def test_to_absolute_coordinates(self):
keypoints = tf.constant([
[[0.25, 0.5], [0.75, 0.75]],
[[0.5, 0.0], [1.0, 1.0]]
])
output = keypoint_ops.to_absolute_coordinates(
keypoints, 40, 60)
expected_keypoints = tf.constant([
[[10., 30.], [30., 45.]],
[[20., 0.], [40., 60.]]
])
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
def test_to_absolute_coordinates_already_absolute(self):
keypoints = tf.constant([
[[10., 30.], [30., 45.]],
[[20., 0.], [40., 60.]]
])
output = keypoint_ops.to_absolute_coordinates(
keypoints, 40, 60)
with self.test_session() as sess:
with self.assertRaisesOpError('assertion failed'):
sess.run(output)
def test_flip_horizontal(self):
keypoints = tf.constant([
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
])
flip_permutation = [0, 2, 1]
expected_keypoints = tf.constant([
[[0.1, 0.9], [0.3, 0.7], [0.2, 0.8]],
[[0.4, 0.6], [0.6, 0.4], [0.5, 0.5]],
])
output = keypoint_ops.flip_horizontal(keypoints, 0.5, flip_permutation)
with self.test_session() as sess:
output_, expected_keypoints_ = sess.run([output, expected_keypoints])
self.assertAllClose(output_, expected_keypoints_)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Classification and regression loss functions for object detection.
Localization losses:
* WeightedL2LocalizationLoss
* WeightedSmoothL1LocalizationLoss
* WeightedIOULocalizationLoss
Classification losses:
* WeightedSigmoidClassificationLoss
* WeightedSoftmaxClassificationLoss
* BootstrappedSigmoidClassificationLoss
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.utils import ops
slim = tf.contrib.slim
class Loss(object):
"""Abstract base class for loss functions."""
__metaclass__ = ABCMeta
def __call__(self,
prediction_tensor,
target_tensor,
ignore_nan_targets=False,
scope=None,
**params):
"""Call the loss function.
Args:
prediction_tensor: a tensor representing predicted quantities.
target_tensor: a tensor representing regression or classification targets.
ignore_nan_targets: whether to ignore nan targets in the loss computation.
E.g. can be used if the target tensor is missing groundtruth data that
shouldn't be factored into the loss.
scope: Op scope name. Defaults to 'Loss' if None.
**params: Additional keyword arguments for specific implementations of
the Loss.
Returns:
loss: a tensor representing the value of the loss function.
"""
with tf.name_scope(scope, 'Loss',
[prediction_tensor, target_tensor, params]) as scope:
if ignore_nan_targets:
target_tensor = tf.where(tf.is_nan(target_tensor),
prediction_tensor,
target_tensor)
return self._compute_loss(prediction_tensor, target_tensor, **params)
@abstractmethod
def _compute_loss(self, prediction_tensor, target_tensor, **params):
"""Method to be overriden by implementations.
Args:
prediction_tensor: a tensor representing predicted quantities
target_tensor: a tensor representing regression or classification targets
**params: Additional keyword arguments for specific implementations of
the Loss.
Returns:
loss: a tensor representing the value of the loss function
"""
pass
class WeightedL2LocalizationLoss(Loss):
"""L2 localization loss function with anchorwise output support.
Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2
"""
def __init__(self, anchorwise_output=False):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self._anchorwise_output = anchorwise_output
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the regression targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
"""
weighted_diff = (prediction_tensor - target_tensor) * tf.expand_dims(
weights, 2)
square_diff = 0.5 * tf.square(weighted_diff)
if self._anchorwise_output:
return tf.reduce_sum(square_diff, 2)
return tf.reduce_sum(square_diff)
class WeightedSmoothL1LocalizationLoss(Loss):
"""Smooth L1 localization loss function.
The smooth L1_loss is defined elementwise as .5 x^2 if |x|<1 and |x|-.5
otherwise, where x is the difference between predictions and target.
See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
"""
def __init__(self, anchorwise_output=False):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self._anchorwise_output = anchorwise_output
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the regression targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
"""
diff = prediction_tensor - target_tensor
abs_diff = tf.abs(diff)
abs_diff_lt_1 = tf.less(abs_diff, 1)
anchorwise_smooth_l1norm = tf.reduce_sum(
tf.where(abs_diff_lt_1, 0.5 * tf.square(abs_diff), abs_diff - 0.5),
2) * weights
if self._anchorwise_output:
return anchorwise_smooth_l1norm
return tf.reduce_sum(anchorwise_smooth_l1norm)
class WeightedIOULocalizationLoss(Loss):
"""IOU localization loss function.
Sums the IOU for corresponding pairs of predicted/groundtruth boxes
and for each pair assign a loss of 1 - IOU. We then compute a weighted
sum over all pairs which is returned as the total loss.
"""
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
representing the decoded predicted boxes
target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
representing the decoded target boxes
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
"""
predicted_boxes = box_list.BoxList(tf.reshape(prediction_tensor, [-1, 4]))
target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4]))
per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(predicted_boxes,
target_boxes)
return tf.reduce_sum(tf.reshape(weights, [-1]) * per_anchor_iou_loss)
class WeightedSigmoidClassificationLoss(Loss):
"""Sigmoid cross entropy classification loss function."""
def __init__(self, anchorwise_output=False):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self._anchorwise_output = anchorwise_output
def _compute_loss(self,
prediction_tensor,
target_tensor,
weights,
class_indices=None):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
class_indices: (Optional) A 1-D integer tensor of class indices.
If provided, computes loss only for the specified class indices.
Returns:
loss: a (scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
"""
weights = tf.expand_dims(weights, 2)
if class_indices is not None:
weights *= tf.reshape(
ops.indices_to_dense_vector(class_indices,
tf.shape(prediction_tensor)[2]),
[1, 1, -1])
per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
labels=target_tensor, logits=prediction_tensor))
if self._anchorwise_output:
return tf.reduce_sum(per_entry_cross_ent * weights, 2)
return tf.reduce_sum(per_entry_cross_ent * weights)
class WeightedSoftmaxClassificationLoss(Loss):
"""Softmax loss function."""
def __init__(self, anchorwise_output=False):
"""Constructor.
Args:
anchorwise_output: Whether to output loss per anchor (default False)
"""
self._anchorwise_output = anchorwise_output
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
"""
num_classes = prediction_tensor.get_shape().as_list()[-1]
per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
labels=tf.reshape(target_tensor, [-1, num_classes]),
logits=tf.reshape(prediction_tensor, [-1, num_classes])))
if self._anchorwise_output:
return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
return tf.reduce_sum(per_row_cross_ent * tf.reshape(weights, [-1]))
class BootstrappedSigmoidClassificationLoss(Loss):
"""Bootstrapped sigmoid cross entropy classification loss function.
This loss uses a convex combination of training labels and the current model's
predictions as training targets in the classification loss. The idea is that
as the model improves over time, its predictions can be trusted more and we
can use these predictions to mitigate the damage of noisy/incorrect labels,
because incorrect labels are likely to be eventually highly inconsistent with
other stimuli predicted to have the same label by the model.
In "soft" bootstrapping, we use all predicted class probabilities, whereas in
"hard" bootstrapping, we use the single class favored by the model.
See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by
Reed et al. (ICLR 2015).
"""
def __init__(self, alpha, bootstrap_type='soft', anchorwise_output=False):
"""Constructor.
Args:
alpha: a float32 scalar tensor between 0 and 1 representing interpolation
weight
bootstrap_type: set to either 'hard' or 'soft' (default)
anchorwise_output: Outputs loss per anchor. (default False)
Raises:
ValueError: if bootstrap_type is not either 'hard' or 'soft'
"""
if bootstrap_type != 'hard' and bootstrap_type != 'soft':
raise ValueError('Unrecognized bootstrap_type: must be one of '
'\'hard\' or \'soft.\'')
self._alpha = alpha
self._bootstrap_type = bootstrap_type
self._anchorwise_output = anchorwise_output
def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
"""
if self._bootstrap_type == 'soft':
bootstrap_target_tensor = self._alpha * target_tensor + (
1.0 - self._alpha) * tf.sigmoid(prediction_tensor)
else:
bootstrap_target_tensor = self._alpha * target_tensor + (
1.0 - self._alpha) * tf.cast(
tf.sigmoid(prediction_tensor) > 0.5, tf.float32)
per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
labels=bootstrap_target_tensor, logits=prediction_tensor))
if self._anchorwise_output:
return tf.reduce_sum(per_entry_cross_ent * tf.expand_dims(weights, 2), 2)
return tf.reduce_sum(per_entry_cross_ent * tf.expand_dims(weights, 2))
class HardExampleMiner(object):
"""Hard example mining for regions in a list of images.
Implements hard example mining to select a subset of regions to be
back-propagated. For each image, selects the regions with highest losses,
subject to the condition that a newly selected region cannot have
an IOU > iou_threshold with any of the previously selected regions.
This can be achieved by re-using a greedy non-maximum suppression algorithm.
A constraint on the number of negatives mined per positive region can also be
enforced.
Reference papers: "Training Region-based Object Detectors with Online
Hard Example Mining" (CVPR 2016) by Srivastava et al., and
"SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al.
"""
def __init__(self,
num_hard_examples=64,
iou_threshold=0.7,
loss_type='both',
cls_loss_weight=0.05,
loc_loss_weight=0.06,
max_negatives_per_positive=None,
min_negatives_per_image=0):
"""Constructor.
The hard example mining implemented by this class can replicate the behavior
in the two aforementioned papers (Srivastava et al., and Liu et al).
To replicate the A2 paper (Srivastava et al), num_hard_examples is set
to a fixed parameter (64 by default) and iou_threshold is set to .7 for
running non-max-suppression the predicted boxes prior to hard mining.
In order to replicate the SSD paper (Liu et al), num_hard_examples should
be set to None, max_negatives_per_positive should be 3 and iou_threshold
should be 1.0 (in order to effectively turn off NMS).
Args:
num_hard_examples: maximum number of hard examples to be
selected per image (prior to enforcing max negative to positive ratio
constraint). If set to None, all examples obtained after NMS are
considered.
iou_threshold: minimum intersection over union for an example
to be discarded during NMS.
loss_type: use only classification losses ('cls', default),
localization losses ('loc') or both losses ('both').
In the last case, cls_loss_weight and loc_loss_weight are used to
compute weighted sum of the two losses.
cls_loss_weight: weight for classification loss.
loc_loss_weight: weight for location loss.
max_negatives_per_positive: maximum number of negatives to retain for
each positive anchor. By default, num_negatives_per_positive is None,
which means that we do not enforce a prespecified negative:positive
ratio. Note also that num_negatives_per_positives can be a float
(and will be converted to be a float even if it is passed in otherwise).
min_negatives_per_image: minimum number of negative anchors to sample for
a given image. Setting this to a positive number allows sampling
negatives in an image without any positive anchors and thus not biased
towards at least one detection per image.
"""
self._num_hard_examples = num_hard_examples
self._iou_threshold = iou_threshold
self._loss_type = loss_type
self._cls_loss_weight = cls_loss_weight
self._loc_loss_weight = loc_loss_weight
self._max_negatives_per_positive = max_negatives_per_positive
self._min_negatives_per_image = min_negatives_per_image
if self._max_negatives_per_positive is not None:
self._max_negatives_per_positive = float(self._max_negatives_per_positive)
self._num_positives_list = None
self._num_negatives_list = None
def __call__(self,
location_losses,
cls_losses,
decoded_boxlist_list,
match_list=None):
"""Computes localization and classification losses after hard mining.
Args:
location_losses: a float tensor of shape [num_images, num_anchors]
representing anchorwise localization losses.
cls_losses: a float tensor of shape [num_images, num_anchors]
representing anchorwise classification losses.
decoded_boxlist_list: a list of decoded BoxList representing location
predictions for each image.
match_list: an optional list of matcher.Match objects encoding the match
between anchors and groundtruth boxes for each image of the batch,
with rows of the Match objects corresponding to groundtruth boxes
and columns corresponding to anchors. Match objects in match_list are
used to reference which anchors are positive, negative or ignored. If
self._max_negatives_per_positive exists, these are then used to enforce
a prespecified negative to positive ratio.
Returns:
mined_location_loss: a float scalar with sum of localization losses from
selected hard examples.
mined_cls_loss: a float scalar with sum of classification losses from
selected hard examples.
Raises:
ValueError: if location_losses, cls_losses and decoded_boxlist_list do
not have compatible shapes (i.e., they must correspond to the same
number of images).
ValueError: if match_list is specified but its length does not match
len(decoded_boxlist_list).
"""
mined_location_losses = []
mined_cls_losses = []
location_losses = tf.unstack(location_losses)
cls_losses = tf.unstack(cls_losses)
num_images = len(decoded_boxlist_list)
if not match_list:
match_list = num_images * [None]
if not len(location_losses) == len(decoded_boxlist_list) == len(cls_losses):
raise ValueError('location_losses, cls_losses and decoded_boxlist_list '
'do not have compatible shapes.')
if not isinstance(match_list, list):
raise ValueError('match_list must be a list.')
if len(match_list) != len(decoded_boxlist_list):
raise ValueError('match_list must either be None or have '
'length=len(decoded_boxlist_list).')
num_positives_list = []
num_negatives_list = []
for ind, detection_boxlist in enumerate(decoded_boxlist_list):
box_locations = detection_boxlist.get()
match = match_list[ind]
image_losses = cls_losses[ind]
if self._loss_type == 'loc':
image_losses = location_losses[ind]
elif self._loss_type == 'both':
image_losses *= self._cls_loss_weight
image_losses += location_losses[ind] * self._loc_loss_weight
if self._num_hard_examples is not None:
num_hard_examples = self._num_hard_examples
else:
num_hard_examples = detection_boxlist.num_boxes()
selected_indices = tf.image.non_max_suppression(
box_locations, image_losses, num_hard_examples, self._iou_threshold)
if self._max_negatives_per_positive is not None and match:
(selected_indices, num_positives,
num_negatives) = self._subsample_selection_to_desired_neg_pos_ratio(
selected_indices, match, self._max_negatives_per_positive,
self._min_negatives_per_image)
num_positives_list.append(num_positives)
num_negatives_list.append(num_negatives)
mined_location_losses.append(
tf.reduce_sum(tf.gather(location_losses[ind], selected_indices)))
mined_cls_losses.append(
tf.reduce_sum(tf.gather(cls_losses[ind], selected_indices)))
location_loss = tf.reduce_sum(tf.stack(mined_location_losses))
cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses))
if match and self._max_negatives_per_positive:
self._num_positives_list = num_positives_list
self._num_negatives_list = num_negatives_list
return (location_loss, cls_loss)
def summarize(self):
"""Summarize the number of positives and negatives after mining."""
if self._num_positives_list and self._num_negatives_list:
avg_num_positives = tf.reduce_mean(tf.to_float(self._num_positives_list))
avg_num_negatives = tf.reduce_mean(tf.to_float(self._num_negatives_list))
tf.summary.scalar('HardExampleMiner/NumPositives', avg_num_positives)
tf.summary.scalar('HardExampleMiner/NumNegatives', avg_num_negatives)
def _subsample_selection_to_desired_neg_pos_ratio(self,
indices,
match,
max_negatives_per_positive,
min_negatives_per_image=0):
"""Subsample a collection of selected indices to a desired neg:pos ratio.
This function takes a subset of M indices (indexing into a large anchor
collection of N anchors where M<N) which are labeled as positive/negative
via a Match object (matched indices are positive, unmatched indices
are negative). It returns a subset of the provided indices retaining all
positives as well as up to the first K negatives, where:
K=floor(num_negative_per_positive * num_positives).
For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors),
with positives=[2, 5] and negatives=[4, 7, 9, 10] and
num_negatives_per_positive=1, then the returned subset of indices
is [2, 4, 5, 7].
Args:
indices: An integer tensor of shape [M] representing a collection
of selected anchor indices
match: A matcher.Match object encoding the match between anchors and
groundtruth boxes for a given image, with rows of the Match objects
corresponding to groundtruth boxes and columns corresponding to anchors.
max_negatives_per_positive: (float) maximum number of negatives for
each positive anchor.
min_negatives_per_image: minimum number of negative anchors for a given
image. Allow sampling negatives in image without any positive anchors.
Returns:
selected_indices: An integer tensor of shape [M'] representing a
collection of selected anchor indices with M' <= M.
num_positives: An integer tensor representing the number of positive
examples in selected set of indices.
num_negatives: An integer tensor representing the number of negative
examples in selected set of indices.
"""
positives_indicator = tf.gather(match.matched_column_indicator(), indices)
negatives_indicator = tf.gather(match.unmatched_column_indicator(), indices)
num_positives = tf.reduce_sum(tf.to_int32(positives_indicator))
max_negatives = tf.maximum(min_negatives_per_image,
tf.to_int32(max_negatives_per_positive *
tf.to_float(num_positives)))
topk_negatives_indicator = tf.less_equal(
tf.cumsum(tf.to_int32(negatives_indicator)), max_negatives)
subsampled_selection_indices = tf.where(
tf.logical_or(positives_indicator, topk_negatives_indicator))
num_negatives = tf.size(subsampled_selection_indices) - num_positives
return (tf.reshape(tf.gather(indices, subsampled_selection_indices), [-1]),
num_positives, num_negatives)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for google3.research.vale.object_detection.losses."""
import math
import numpy as np
import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import losses
from object_detection.core import matcher
class WeightedL2LocalizationLossTest(tf.test.TestCase):
def testReturnsCorrectLoss(self):
batch_size = 3
num_anchors = 10
code_size = 4
prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
target_tensor = tf.zeros([batch_size, num_anchors, code_size])
weights = tf.constant([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0, 0, 0]], tf.float32)
loss_op = losses.WeightedL2LocalizationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
expected_loss = (3 * 5 * 4) / 2.0
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, expected_loss)
def testReturnsCorrectAnchorwiseLoss(self):
batch_size = 3
num_anchors = 16
code_size = 4
prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
target_tensor = tf.zeros([batch_size, num_anchors, code_size])
weights = tf.ones([batch_size, num_anchors])
loss_op = losses.WeightedL2LocalizationLoss(anchorwise_output=True)
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
expected_loss = np.ones((batch_size, num_anchors)) * 2
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, expected_loss)
def testReturnsCorrectLossSum(self):
batch_size = 3
num_anchors = 16
code_size = 4
prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
target_tensor = tf.zeros([batch_size, num_anchors, code_size])
weights = tf.ones([batch_size, num_anchors])
loss_op = losses.WeightedL2LocalizationLoss(anchorwise_output=False)
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
expected_loss = tf.nn.l2_loss(prediction_tensor - target_tensor)
with self.test_session() as sess:
loss_output = sess.run(loss)
expected_loss_output = sess.run(expected_loss)
self.assertAllClose(loss_output, expected_loss_output)
def testReturnsCorrectNanLoss(self):
batch_size = 3
num_anchors = 10
code_size = 4
prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
target_tensor = tf.concat([
tf.zeros([batch_size, num_anchors, code_size / 2]),
tf.ones([batch_size, num_anchors, code_size / 2]) * np.nan
], axis=2)
weights = tf.ones([batch_size, num_anchors])
loss_op = losses.WeightedL2LocalizationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights,
ignore_nan_targets=True)
expected_loss = (3 * 5 * 4) / 2.0
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, expected_loss)
class WeightedSmoothL1LocalizationLossTest(tf.test.TestCase):
def testReturnsCorrectLoss(self):
batch_size = 2
num_anchors = 3
code_size = 4
prediction_tensor = tf.constant([[[2.5, 0, .4, 0],
[0, 0, 0, 0],
[0, 2.5, 0, .4]],
[[3.5, 0, 0, 0],
[0, .4, 0, .9],
[0, 0, 1.5, 0]]], tf.float32)
target_tensor = tf.zeros([batch_size, num_anchors, code_size])
weights = tf.constant([[2, 1, 1],
[0, 3, 0]], tf.float32)
loss_op = losses.WeightedSmoothL1LocalizationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = 7.695
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
class WeightedIOULocalizationLossTest(tf.test.TestCase):
def testReturnsCorrectLoss(self):
prediction_tensor = tf.constant([[[1.5, 0, 2.4, 1],
[0, 0, 1, 1],
[0, 0, .5, .25]]])
target_tensor = tf.constant([[[1.5, 0, 2.4, 1],
[0, 0, 1, 1],
[50, 50, 500.5, 100.25]]])
weights = [[1.0, .5, 2.0]]
loss_op = losses.WeightedIOULocalizationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = 2.0
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
def testReturnsCorrectLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, 0, -100],
[-100, -100, 100]],
[[-100, 0, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSigmoidClassificationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = -2 * math.log(.5)
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectAnchorWiseLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, 0, -100],
[-100, -100, 100]],
[[-100, 0, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSigmoidClassificationLoss(True)
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
[-math.log(.5), 0, 0, 0]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectLossWithClassIndices(self):
prediction_tensor = tf.constant([[[-100, 100, -100, 100],
[100, -100, -100, -100],
[100, 0, -100, 100],
[-100, -100, 100, -100]],
[[-100, 0, 100, 100],
[-100, 100, -100, 100],
[100, 100, 100, 100],
[0, 0, -1, 100]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0, 0],
[1, 0, 0, 1],
[1, 0, 0, 0],
[0, 0, 1, 1]],
[[0, 0, 1, 0],
[0, 1, 0, 0],
[1, 1, 1, 0],
[1, 0, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
# Ignores the last class.
class_indices = tf.constant([0, 1, 2], tf.int32)
loss_op = losses.WeightedSigmoidClassificationLoss(True)
loss = loss_op(prediction_tensor, target_tensor, weights=weights,
class_indices=class_indices)
exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
[-math.log(.5), 0, 0, 0]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
def testReturnsCorrectLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[0, 1, 0],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, .5, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSoftmaxClassificationLoss()
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = - 1.5 * math.log(.5)
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectAnchorWiseLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[0, 0, -100],
[-100, -100, 100]],
[[-100, 0, 0],
[-100, 100, -100],
[-100, 100, -100],
[100, -100, -100]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[0, 1, 0],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, .5, 1],
[1, 1, 1, 0]], tf.float32)
loss_op = losses.WeightedSoftmaxClassificationLoss(True)
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = np.matrix([[0, 0, - 0.5 * math.log(.5), 0],
[-math.log(.5), 0, 0, 0]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
class BootstrappedSigmoidClassificationLossTest(tf.test.TestCase):
def testReturnsCorrectLossSoftBootstrapping(self):
prediction_tensor = tf.constant([[[-100, 100, 0],
[100, -100, -100],
[100, -100, -100],
[-100, -100, 100]],
[[-100, -100, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
alpha = tf.constant(.5, tf.float32)
loss_op = losses.BootstrappedSigmoidClassificationLoss(
alpha, bootstrap_type='soft')
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = -math.log(.5)
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectLossHardBootstrapping(self):
prediction_tensor = tf.constant([[[-100, 100, 0],
[100, -100, -100],
[100, -100, -100],
[-100, -100, 100]],
[[-100, -100, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
alpha = tf.constant(.5, tf.float32)
loss_op = losses.BootstrappedSigmoidClassificationLoss(
alpha, bootstrap_type='hard')
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = -math.log(.5)
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
def testReturnsCorrectAnchorWiseLoss(self):
prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100],
[100, 0, -100],
[-100, -100, 100]],
[[-100, 0, 100],
[-100, 100, -100],
[100, 100, 100],
[0, 0, -1]]], tf.float32)
target_tensor = tf.constant([[[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1]],
[[0, 0, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 0]]], tf.float32)
weights = tf.constant([[1, 1, 1, 1],
[1, 1, 1, 0]], tf.float32)
alpha = tf.constant(.5, tf.float32)
loss_op = losses.BootstrappedSigmoidClassificationLoss(
alpha, bootstrap_type='hard', anchorwise_output=True)
loss = loss_op(prediction_tensor, target_tensor, weights=weights)
exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
[-math.log(.5), 0, 0, 0]])
with self.test_session() as sess:
loss_output = sess.run(loss)
self.assertAllClose(loss_output, exp_loss)
class HardExampleMinerTest(tf.test.TestCase):
def testHardMiningWithSingleLossType(self):
location_losses = tf.constant([[100, 90, 80, 0],
[0, 1, 2, 3]], tf.float32)
cls_losses = tf.constant([[0, 10, 50, 110],
[9, 6, 3, 0]], tf.float32)
box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9]], tf.float32)
decoded_boxlist_list = []
decoded_boxlist_list.append(box_list.BoxList(box_corners))
decoded_boxlist_list.append(box_list.BoxList(box_corners))
# Uses only location loss to select hard examples
loss_op = losses.HardExampleMiner(num_hard_examples=1,
iou_threshold=0.0,
loss_type='loc',
cls_loss_weight=1,
loc_loss_weight=1)
(loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
decoded_boxlist_list)
exp_loc_loss = 100 + 3
exp_cls_loss = 0 + 0
with self.test_session() as sess:
loc_loss_output = sess.run(loc_loss)
self.assertAllClose(loc_loss_output, exp_loc_loss)
cls_loss_output = sess.run(cls_loss)
self.assertAllClose(cls_loss_output, exp_cls_loss)
def testHardMiningWithBothLossType(self):
location_losses = tf.constant([[100, 90, 80, 0],
[0, 1, 2, 3]], tf.float32)
cls_losses = tf.constant([[0, 10, 50, 110],
[9, 6, 3, 0]], tf.float32)
box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9]], tf.float32)
decoded_boxlist_list = []
decoded_boxlist_list.append(box_list.BoxList(box_corners))
decoded_boxlist_list.append(box_list.BoxList(box_corners))
loss_op = losses.HardExampleMiner(num_hard_examples=1,
iou_threshold=0.0,
loss_type='both',
cls_loss_weight=1,
loc_loss_weight=1)
(loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
decoded_boxlist_list)
exp_loc_loss = 80 + 0
exp_cls_loss = 50 + 9
with self.test_session() as sess:
loc_loss_output = sess.run(loc_loss)
self.assertAllClose(loc_loss_output, exp_loc_loss)
cls_loss_output = sess.run(cls_loss)
self.assertAllClose(cls_loss_output, exp_cls_loss)
def testHardMiningNMS(self):
location_losses = tf.constant([[100, 90, 80, 0],
[0, 1, 2, 3]], tf.float32)
cls_losses = tf.constant([[0, 10, 50, 110],
[9, 6, 3, 0]], tf.float32)
box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
[0.9, 0.9, 0.99, 0.99],
[0.1, 0.1, 0.9, 0.9],
[0.1, 0.1, 0.9, 0.9]], tf.float32)
decoded_boxlist_list = []
decoded_boxlist_list.append(box_list.BoxList(box_corners))
decoded_boxlist_list.append(box_list.BoxList(box_corners))
loss_op = losses.HardExampleMiner(num_hard_examples=2,
iou_threshold=0.5,
loss_type='cls',
cls_loss_weight=1,
loc_loss_weight=1)
(loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
decoded_boxlist_list)
exp_loc_loss = 0 + 90 + 0 + 1
exp_cls_loss = 110 + 10 + 9 + 6
with self.test_session() as sess:
loc_loss_output = sess.run(loc_loss)
self.assertAllClose(loc_loss_output, exp_loc_loss)
cls_loss_output = sess.run(cls_loss)
self.assertAllClose(cls_loss_output, exp_cls_loss)
def testEnforceNegativesPerPositiveRatio(self):
location_losses = tf.constant([[100, 90, 80, 0, 1, 2,
3, 10, 20, 100, 20, 3]], tf.float32)
cls_losses = tf.constant([[0, 0, 100, 0, 90, 70,
0, 60, 0, 17, 13, 0]], tf.float32)
box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.5, 0.1],
[0.0, 0.0, 0.6, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.8, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 1.0, 0.1],
[0.0, 0.0, 1.1, 0.1],
[0.0, 0.0, 0.2, 0.1]], tf.float32)
match_results = tf.constant([2, -1, 0, -1, -1, 1, -1, -1, -1, -1, -1, 3])
match_list = [matcher.Match(match_results)]
decoded_boxlist_list = []
decoded_boxlist_list.append(box_list.BoxList(box_corners))
max_negatives_per_positive_list = [0.0, 0.5, 1.0, 1.5, 10]
exp_loc_loss_list = [80 + 2,
80 + 1 + 2,
80 + 1 + 2 + 10,
80 + 1 + 2 + 10 + 100,
80 + 1 + 2 + 10 + 100 + 20]
exp_cls_loss_list = [100 + 70,
100 + 90 + 70,
100 + 90 + 70 + 60,
100 + 90 + 70 + 60 + 17,
100 + 90 + 70 + 60 + 17 + 13]
for max_negatives_per_positive, exp_loc_loss, exp_cls_loss in zip(
max_negatives_per_positive_list, exp_loc_loss_list, exp_cls_loss_list):
loss_op = losses.HardExampleMiner(
num_hard_examples=None, iou_threshold=0.9999, loss_type='cls',
cls_loss_weight=1, loc_loss_weight=1,
max_negatives_per_positive=max_negatives_per_positive)
(loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
decoded_boxlist_list, match_list)
loss_op.summarize()
with self.test_session() as sess:
loc_loss_output = sess.run(loc_loss)
self.assertAllClose(loc_loss_output, exp_loc_loss)
cls_loss_output = sess.run(cls_loss)
self.assertAllClose(cls_loss_output, exp_cls_loss)
def testEnforceNegativesPerPositiveRatioWithMinNegativesPerImage(self):
location_losses = tf.constant([[100, 90, 80, 0, 1, 2,
3, 10, 20, 100, 20, 3]], tf.float32)
cls_losses = tf.constant([[0, 0, 100, 0, 90, 70,
0, 60, 0, 17, 13, 0]], tf.float32)
box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.5, 0.1],
[0.0, 0.0, 0.6, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 0.8, 0.1],
[0.0, 0.0, 0.2, 0.1],
[0.0, 0.0, 1.0, 0.1],
[0.0, 0.0, 1.1, 0.1],
[0.0, 0.0, 0.2, 0.1]], tf.float32)
match_results = tf.constant([-1] * 12)
match_list = [matcher.Match(match_results)]
decoded_boxlist_list = []
decoded_boxlist_list.append(box_list.BoxList(box_corners))
min_negatives_per_image_list = [0, 1, 2, 4, 5, 6]
exp_loc_loss_list = [0,
80,
80 + 1,
80 + 1 + 2 + 10,
80 + 1 + 2 + 10 + 100,
80 + 1 + 2 + 10 + 100 + 20]
exp_cls_loss_list = [0,
100,
100 + 90,
100 + 90 + 70 + 60,
100 + 90 + 70 + 60 + 17,
100 + 90 + 70 + 60 + 17 + 13]
for min_negatives_per_image, exp_loc_loss, exp_cls_loss in zip(
min_negatives_per_image_list, exp_loc_loss_list, exp_cls_loss_list):
loss_op = losses.HardExampleMiner(
num_hard_examples=None, iou_threshold=0.9999, loss_type='cls',
cls_loss_weight=1, loc_loss_weight=1,
max_negatives_per_positive=3,
min_negatives_per_image=min_negatives_per_image)
(loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
decoded_boxlist_list, match_list)
with self.test_session() as sess:
loc_loss_output = sess.run(loc_loss)
self.assertAllClose(loc_loss_output, exp_loc_loss)
cls_loss_output = sess.run(cls_loss)
self.assertAllClose(cls_loss_output, exp_cls_loss)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Matcher interface and Match class.
This module defines the Matcher interface and the Match object. The job of the
matcher is to match row and column indices based on the similarity matrix and
other optional parameters. Each column is matched to at most one row. There
are three possibilities for the matching:
1) match: A column matches a row.
2) no_match: A column does not match any row.
3) ignore: A column that is neither 'match' nor no_match.
The ignore case is regularly encountered in object detection: when an anchor has
a relatively small overlap with a ground-truth box, one neither wants to
consider this box a positive example (match) nor a negative example (no match).
The Match class is used to store the match results and it provides simple apis
to query the results.
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
class Match(object):
"""Class to store results from the matcher.
This class is used to store the results from the matcher. It provides
convenient methods to query the matching results.
"""
def __init__(self, match_results):
"""Constructs a Match object.
Args:
match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
Raises:
ValueError: if match_results does not have rank 1 or is not an
integer int32 scalar tensor
"""
if match_results.shape.ndims != 1:
raise ValueError('match_results should have rank 1')
if match_results.dtype != tf.int32:
raise ValueError('match_results should be an int32 or int64 scalar '
'tensor')
self._match_results = match_results
@property
def match_results(self):
"""The accessor for match results.
Returns:
the tensor which encodes the match results.
"""
return self._match_results
def matched_column_indices(self):
"""Returns column indices that match to some row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1)))
def matched_column_indicator(self):
"""Returns column indices that are matched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return tf.greater_equal(self._match_results, 0)
def num_matched_columns(self):
"""Returns number (int32 scalar tensor) of matched columns."""
return tf.size(self.matched_column_indices())
def unmatched_column_indices(self):
"""Returns column indices that do not match any row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1)))
def unmatched_column_indicator(self):
"""Returns column indices that are unmatched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return tf.equal(self._match_results, -1)
def num_unmatched_columns(self):
"""Returns number (int32 scalar tensor) of unmatched columns."""
return tf.size(self.unmatched_column_indices())
def ignored_column_indices(self):
"""Returns column indices that are ignored (neither Matched nor Unmatched).
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(self.ignored_column_indicator()))
def ignored_column_indicator(self):
"""Returns boolean column indicator where True means the colum is ignored.
Returns:
column_indicator: boolean vector which is True for all ignored column
indices.
"""
return tf.equal(self._match_results, -2)
def num_ignored_columns(self):
"""Returns number (int32 scalar tensor) of matched columns."""
return tf.size(self.ignored_column_indices())
def unmatched_or_ignored_column_indices(self):
"""Returns column indices that are unmatched or ignored.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results)))
def matched_row_indices(self):
"""Returns row indices that match some column.
The indices returned by this op are ordered so as to be in correspondence
with the output of matched_column_indicator(). For example if
self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
[7, 3], then we know that column 0 was matched to row 7 and column 2 was
matched to row 3.
Returns:
row_indices: int32 tensor of shape [K] with row indices.
"""
return self._reshape_and_cast(
tf.gather(self._match_results, self.matched_column_indices()))
def _reshape_and_cast(self, t):
return tf.cast(tf.reshape(t, [-1]), tf.int32)
class Matcher(object):
"""Abstract base class for matcher.
"""
__metaclass__ = ABCMeta
def match(self, similarity_matrix, scope=None, **params):
"""Computes matches among row and column indices and returns the result.
Computes matches among the row and column indices based on the similarity
matrix and optional arguments.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
scope: Op scope name. Defaults to 'Match' if None.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
A Match object with the results of matching.
"""
with tf.name_scope(scope, 'Match', [similarity_matrix, params]) as scope:
return Match(self._match(similarity_matrix, **params))
@abstractmethod
def _match(self, similarity_matrix, **params):
"""Method to be overriden by implementations.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
match_results: Integer tensor of shape [M]: match_results[i]>=0 means
that column i is matched to row match_results[i], match_results[i]=-1
means that the column is not matched. match_results[i]=-2 means that
the column is ignored (usually this happens when there is a very weak
match which one neither wants as positive nor negative example).
"""
pass
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.matcher."""
import numpy as np
import tensorflow as tf
from object_detection.core import matcher
class AnchorMatcherTest(tf.test.TestCase):
def test_get_correct_matched_columnIndices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indices = [0, 1, 3, 5]
matched_column_indices = match.matched_column_indices()
self.assertEquals(matched_column_indices.dtype, tf.int32)
with self.test_session() as sess:
matched_column_indices = sess.run(matched_column_indices)
self.assertAllEqual(matched_column_indices, expected_column_indices)
def test_get_correct_counts(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
exp_num_matched_columns = 4
exp_num_unmatched_columns = 2
exp_num_ignored_columns = 1
num_matched_columns = match.num_matched_columns()
num_unmatched_columns = match.num_unmatched_columns()
num_ignored_columns = match.num_ignored_columns()
self.assertEquals(num_matched_columns.dtype, tf.int32)
self.assertEquals(num_unmatched_columns.dtype, tf.int32)
self.assertEquals(num_ignored_columns.dtype, tf.int32)
with self.test_session() as sess:
(num_matched_columns_out, num_unmatched_columns_out,
num_ignored_columns_out) = sess.run(
[num_matched_columns, num_unmatched_columns, num_ignored_columns])
self.assertAllEqual(num_matched_columns_out, exp_num_matched_columns)
self.assertAllEqual(num_unmatched_columns_out, exp_num_unmatched_columns)
self.assertAllEqual(num_ignored_columns_out, exp_num_ignored_columns)
def testGetCorrectUnmatchedColumnIndices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indices = [2, 4]
unmatched_column_indices = match.unmatched_column_indices()
self.assertEquals(unmatched_column_indices.dtype, tf.int32)
with self.test_session() as sess:
unmatched_column_indices = sess.run(unmatched_column_indices)
self.assertAllEqual(unmatched_column_indices, expected_column_indices)
def testGetCorrectMatchedRowIndices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_row_indices = [3, 1, 0, 5]
matched_row_indices = match.matched_row_indices()
self.assertEquals(matched_row_indices.dtype, tf.int32)
with self.test_session() as sess:
matched_row_inds = sess.run(matched_row_indices)
self.assertAllEqual(matched_row_inds, expected_row_indices)
def test_get_correct_ignored_column_indices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indices = [6]
ignored_column_indices = match.ignored_column_indices()
self.assertEquals(ignored_column_indices.dtype, tf.int32)
with self.test_session() as sess:
ignored_column_indices = sess.run(ignored_column_indices)
self.assertAllEqual(ignored_column_indices, expected_column_indices)
def test_get_correct_matched_column_indicator(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indicator = [True, True, False, True, False, True, False]
matched_column_indicator = match.matched_column_indicator()
self.assertEquals(matched_column_indicator.dtype, tf.bool)
with self.test_session() as sess:
matched_column_indicator = sess.run(matched_column_indicator)
self.assertAllEqual(matched_column_indicator, expected_column_indicator)
def test_get_correct_unmatched_column_indicator(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indicator = [False, False, True, False, True, False, False]
unmatched_column_indicator = match.unmatched_column_indicator()
self.assertEquals(unmatched_column_indicator.dtype, tf.bool)
with self.test_session() as sess:
unmatched_column_indicator = sess.run(unmatched_column_indicator)
self.assertAllEqual(unmatched_column_indicator, expected_column_indicator)
def test_get_correct_ignored_column_indicator(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indicator = [False, False, False, False, False, False, True]
ignored_column_indicator = match.ignored_column_indicator()
self.assertEquals(ignored_column_indicator.dtype, tf.bool)
with self.test_session() as sess:
ignored_column_indicator = sess.run(ignored_column_indicator)
self.assertAllEqual(ignored_column_indicator, expected_column_indicator)
def test_get_correct_unmatched_ignored_column_indices(self):
match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
match = matcher.Match(match_results)
expected_column_indices = [2, 4, 6]
unmatched_ignored_column_indices = (match.
unmatched_or_ignored_column_indices())
self.assertEquals(unmatched_ignored_column_indices.dtype, tf.int32)
with self.test_session() as sess:
unmatched_ignored_column_indices = sess.run(
unmatched_ignored_column_indices)
self.assertAllEqual(unmatched_ignored_column_indices,
expected_column_indices)
def test_all_columns_accounted_for(self):
# Note: deliberately setting to small number so not always
# all possibilities appear (matched, unmatched, ignored)
num_matches = 10
match_results = tf.random_uniform(
[num_matches], minval=-2, maxval=5, dtype=tf.int32)
match = matcher.Match(match_results)
matched_column_indices = match.matched_column_indices()
unmatched_column_indices = match.unmatched_column_indices()
ignored_column_indices = match.ignored_column_indices()
with self.test_session() as sess:
matched, unmatched, ignored = sess.run([
matched_column_indices, unmatched_column_indices,
ignored_column_indices
])
all_indices = np.hstack((matched, unmatched, ignored))
all_indices_sorted = np.sort(all_indices)
self.assertAllEqual(all_indices_sorted,
np.arange(num_matches, dtype=np.int32))
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base minibatch sampler module.
The job of the minibatch_sampler is to subsample a minibatch based on some
criterion.
The main function call is:
subsample(indicator, batch_size, **params).
Indicator is a 1d boolean tensor where True denotes which examples can be
sampled. It returns a boolean indicator where True denotes an example has been
sampled..
Subclasses should implement the Subsample function and can make use of the
@staticmethod SubsampleIndicator.
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
from object_detection.utils import ops
class MinibatchSampler(object):
"""Abstract base class for subsampling minibatches."""
__metaclass__ = ABCMeta
def __init__(self):
"""Constructs a minibatch sampler."""
pass
@abstractmethod
def subsample(self, indicator, batch_size, **params):
"""Returns subsample of entries in indicator.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size.
**params: additional keyword arguments for specific implementations of
the MinibatchSampler.
Returns:
sample_indicator: boolean tensor of shape [N] whose True entries have been
sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
"""
pass
@staticmethod
def subsample_indicator(indicator, num_samples):
"""Subsample indicator vector.
Given a boolean indicator vector with M elements set to `True`, the function
assigns all but `num_samples` of these previously `True` elements to
`False`. If `num_samples` is greater than M, the original indicator vector
is returned.
Args:
indicator: a 1-dimensional boolean tensor indicating which elements
are allowed to be sampled and which are not.
num_samples: int32 scalar tensor
Returns:
a boolean tensor with the same shape as input (indicator) tensor
"""
indices = tf.where(indicator)
indices = tf.random_shuffle(indices)
indices = tf.reshape(indices, [-1])
num_samples = tf.minimum(tf.size(indices), num_samples)
selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1]))
selected_indicator = ops.indices_to_dense_vector(selected_indices,
tf.shape(indicator)[0])
return tf.equal(selected_indicator, 1)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for google3.research.vale.object_detection.minibatch_sampler."""
import numpy as np
import tensorflow as tf
from object_detection.core import minibatch_sampler
class MinibatchSamplerTest(tf.test.TestCase):
def test_subsample_indicator_when_more_true_elements_than_num_samples(self):
np_indicator = [True, False, True, False, True, True, False]
indicator = tf.constant(np_indicator)
samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
indicator, 3)
with self.test_session() as sess:
samples_out = sess.run(samples)
self.assertTrue(np.sum(samples_out), 3)
self.assertAllEqual(samples_out,
np.logical_and(samples_out, np_indicator))
def test_subsample_when_more_true_elements_than_num_samples_no_shape(self):
np_indicator = [True, False, True, False, True, True, False]
indicator = tf.placeholder(tf.bool)
feed_dict = {indicator: np_indicator}
samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
indicator, 3)
with self.test_session() as sess:
samples_out = sess.run(samples, feed_dict=feed_dict)
self.assertTrue(np.sum(samples_out), 3)
self.assertAllEqual(samples_out,
np.logical_and(samples_out, np_indicator))
def test_subsample_indicator_when_less_true_elements_than_num_samples(self):
np_indicator = [True, False, True, False, True, True, False]
indicator = tf.constant(np_indicator)
samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
indicator, 5)
with self.test_session() as sess:
samples_out = sess.run(samples)
self.assertTrue(np.sum(samples_out), 4)
self.assertAllEqual(samples_out,
np.logical_and(samples_out, np_indicator))
def test_subsample_indicator_when_num_samples_is_zero(self):
np_indicator = [True, False, True, False, True, True, False]
indicator = tf.constant(np_indicator)
samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator(
indicator, 0)
with self.test_session() as sess:
samples_none_out = sess.run(samples_none)
self.assertAllEqual(
np.zeros_like(samples_none_out, dtype=bool),
samples_none_out)
def test_subsample_indicator_when_indicator_all_false(self):
indicator_empty = tf.zeros([0], dtype=tf.bool)
samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator(
indicator_empty, 4)
with self.test_session() as sess:
samples_empty_out = sess.run(samples_empty)
self.assertEqual(0, samples_empty_out.size)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Abstract detection model.
This file defines a generic base class for detection models. Programs that are
designed to work with arbitrary detection models should only depend on this
class. We intend for the functions in this class to follow tensor-in/tensor-out
design, thus all functions have tensors or lists/dictionaries holding tensors as
inputs and outputs.
Abstractly, detection models predict output tensors given input images
which can be passed to a loss function at training time or passed to a
postprocessing function at eval time. The computation graphs at a high level
consequently look as follows:
Training time:
inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
Evaluation time:
inputs (images tensor) -> preprocess -> predict -> postprocess
-> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
DetectionModels must thus implement four functions (1) preprocess, (2) predict,
(3) postprocess and (4) loss. DetectionModels should make no assumptions about
the input size or aspect ratio --- they are responsible for doing any
resize/reshaping necessary (see docstring for the preprocess function).
Output classes are always integers in the range [0, num_classes). Any mapping
of these integers to semantic labels is to be handled outside of this class.
By default, DetectionModels produce bounding box detections; However, we support
a handful of auxiliary annotations associated with each bounding box, namely,
instance masks and keypoints.
"""
from abc import ABCMeta
from abc import abstractmethod
from object_detection.core import standard_fields as fields
class DetectionModel(object):
"""Abstract base class for detection models."""
__metaclass__ = ABCMeta
def __init__(self, num_classes):
"""Constructor.
Args:
num_classes: number of classes. Note that num_classes *does not* include
background categories that might be implicitly be predicted in various
implementations.
"""
self._num_classes = num_classes
self._groundtruth_lists = {}
@property
def num_classes(self):
return self._num_classes
def groundtruth_lists(self, field):
"""Access list of groundtruth tensors.
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints}
Returns:
a list of tensors holding groundtruth information (see also
provide_groundtruth function below), with one entry for each image in the
batch.
Raises:
RuntimeError: if the field has not been provided via provide_groundtruth.
"""
if field not in self._groundtruth_lists:
raise RuntimeError('Groundtruth tensor %s has not been provided', field)
return self._groundtruth_lists[field]
@abstractmethod
def preprocess(self, inputs):
"""Input preprocessing.
To be overridden by implementations.
This function is responsible for any scaling/shifting of input values that
is necessary prior to running the detector on an input image.
It is also responsible for any resizing that might be necessary as images
are assumed to arrive in arbitrary sizes. While this function could
conceivably be part of the predict method (below), it is often convenient
to keep these separate --- for example, we may want to preprocess on one
device, place onto a queue, and let another device (e.g., the GPU) handle
prediction.
A few important notes about the preprocess function:
+ We assume that this operation does not have any trainable variables nor
does it affect the groundtruth annotations in any way (thus data
augmentation operations such as random cropping should be performed
externally).
+ There is no assumption that the batchsize in this function is the same as
the batch size in the predict function. In fact, we recommend calling the
preprocess function prior to calling any batching operations (which should
happen outside of the model) and thus assuming that batch sizes are equal
to 1 in the preprocess function.
+ There is also no explicit assumption that the output resolutions
must be fixed across inputs --- this is to support "fully convolutional"
settings in which input images can have different shapes/resolutions.
Args:
inputs: a [batch, height_in, width_in, channels] float32 tensor
representing a batch of images with values between 0 and 255.0.
Returns:
preprocessed_inputs: a [batch, height_out, width_out, channels] float32
tensor representing a batch of images.
"""
pass
@abstractmethod
def predict(self, preprocessed_inputs):
"""Predict prediction tensors from inputs tensor.
Outputs of this function can be passed to loss or postprocess functions.
Args:
preprocessed_inputs: a [batch, height, width, channels] float32 tensor
representing a batch of images.
Returns:
prediction_dict: a dictionary holding prediction tensors to be
passed to the Loss or Postprocess functions.
"""
pass
@abstractmethod
def postprocess(self, prediction_dict, **params):
"""Convert predicted output tensors to final detections.
Outputs adhere to the following conventions:
* Classes are integers in [0, num_classes); background classes are removed
and the first non-background class is mapped to 0.
* Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
format and normalized relative to the image window.
* `num_detections` is provided for settings where detections are padded to a
fixed number of boxes.
* We do not specifically assume any kind of probabilistic interpretation
of the scores --- the only important thing is their relative ordering.
Thus implementations of the postprocess function are free to output
logits, probabilities, calibrated probabilities, or anything else.
Args:
prediction_dict: a dictionary holding prediction tensors.
**params: Additional keyword arguments for specific implementations of
DetectionModel.
Returns:
detections: a dictionary containing the following fields
detection_boxes: [batch, max_detections, 4]
detection_scores: [batch, max_detections]
detection_classes: [batch, max_detections]
instance_masks: [batch, max_detections, image_height, image_width]
(optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional)
num_detections: [batch]
"""
pass
@abstractmethod
def loss(self, prediction_dict):
"""Compute scalar loss tensors with respect to provided groundtruth.
Calling this function requires that groundtruth tensors have been
provided via the provide_groundtruth function.
Args:
prediction_dict: a dictionary holding predicted tensors
Returns:
a dictionary mapping strings (loss names) to scalar tensors representing
loss values.
"""
pass
def provide_groundtruth(self,
groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_masks_list=None,
groundtruth_keypoints_list=None):
"""Provide groundtruth tensors.
Args:
groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape
[num_boxes, 4] containing coordinates of the groundtruth boxes.
Groundtruth boxes are provided in [y_min, x_min, y_max, x_max]
format and assumed to be normalized and clipped
relative to the image window with y_min <= y_max and x_min <= x_max.
groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
tensors of shape [num_boxes, num_classes] containing the class targets
with the 0th index assumed to map to the first non-background class.
groundtruth_masks_list: a list of 2-D tf.float32 tensors of
shape [max_detections, height_in, width_in] containing instance
masks with values in {0, 1}. If None, no masks are provided.
Mask resolution `height_in`x`width_in` must agree with the resolution
of the input image tensor provided to the `preprocess` function.
groundtruth_keypoints_list: a list of 2-D tf.float32 tensors of
shape [batch, max_detections, num_keypoints, 2] containing keypoints.
Keypoints are assumed to be provided in normalized coordinates and
missing keypoints should be encoded as NaN.
"""
self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
self._groundtruth_lists[
fields.BoxListFields.classes] = groundtruth_classes_list
if groundtruth_masks_list:
self._groundtruth_lists[
fields.BoxListFields.masks] = groundtruth_masks_list
if groundtruth_keypoints_list:
self._groundtruth_lists[
fields.BoxListFields.keypoints] = groundtruth_keypoints_list
@abstractmethod
def restore_fn(self, checkpoint_path, from_detection_checkpoint=True):
"""Return callable for loading a foreign checkpoint into tensorflow graph.
Loads variables from a different tensorflow graph (typically feature
extractor variables). This enables the model to initialize based on weights
from another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Args:
checkpoint_path: path to checkpoint to restore.
from_detection_checkpoint: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Returns:
a callable which takes a tf.Session as input and loads a checkpoint when
run.
"""
pass
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Post-processing operations on detected boxes."""
import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import standard_fields as fields
def multiclass_non_max_suppression(boxes,
scores,
score_thresh,
iou_thresh,
max_size_per_class,
max_total_size=0,
clip_window=None,
change_coordinate_frame=False,
masks=None,
additional_fields=None,
scope=None):
"""Multi-class version of non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. It operates independently for each class for
which scores are provided (via the scores field of the input box_list),
pruning boxes with score less than a provided threshold prior to
applying NMS.
Please note that this operation is performed on *all* classes, therefore any
background classes should be removed prior to calling this function.
Args:
boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
number of classes or 1 depending on whether a separate box is predicted
per class.
scores: A [k, num_classes] float32 tensor containing the scores for each of
the k detections.
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
with previously selected boxes are removed).
max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
representing the window to clip and normalize boxes to before performing
non-max suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window
is provided)
masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
containing box masks. `q` can be either number of classes or 1 depending
on whether a separate mask is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose first dimensions are all of size `k`. After non-maximum
suppression, all tensors corresponding to the selected boxes will be
added to resulting BoxList.
scope: name scope.
Returns:
a BoxList holding M boxes with a rank-1 scores field representing
corresponding scores for each box with scores sorted in decreasing order
and a rank-1 classes field representing a class label for each box.
If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
contain masks, keypoints, keypoint_heatmaps corresponding to boxes.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
a valid scores field.
"""
if not 0 <= iou_thresh <= 1.0:
raise ValueError('iou_thresh must be between 0 and 1')
if scores.shape.ndims != 2:
raise ValueError('scores field must be of rank 2')
if scores.shape[1].value is None:
raise ValueError('scores must have statically defined second '
'dimension')
if boxes.shape.ndims != 3:
raise ValueError('boxes must be of rank 3.')
if not (boxes.shape[1].value == scores.shape[1].value or
boxes.shape[1].value == 1):
raise ValueError('second dimension of boxes must be either 1 or equal '
'to the second dimension of scores')
if boxes.shape[2].value != 4:
raise ValueError('last dimension of boxes must be of size 4.')
if change_coordinate_frame and clip_window is None:
raise ValueError('if change_coordinate_frame is True, then a clip_window'
'must be specified.')
with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
num_boxes = tf.shape(boxes)[0]
num_scores = tf.shape(scores)[0]
num_classes = scores.get_shape()[1]
length_assert = tf.Assert(
tf.equal(num_boxes, num_scores),
['Incorrect scores field length: actual vs expected.',
num_scores, num_boxes])
selected_boxes_list = []
per_class_boxes_list = tf.unstack(boxes, axis=1)
if masks is not None:
per_class_masks_list = tf.unstack(masks, axis=1)
boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
else [0] * num_classes)
for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
per_class_boxes = per_class_boxes_list[boxes_idx]
boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
with tf.control_dependencies([length_assert]):
class_scores = tf.reshape(
tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1])
boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
class_scores)
if masks is not None:
per_class_masks = per_class_masks_list[boxes_idx]
boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
per_class_masks)
if additional_fields is not None:
for key, tensor in additional_fields.iteritems():
boxlist_and_class_scores.add_field(key, tensor)
boxlist_filtered = box_list_ops.filter_greater_than(
boxlist_and_class_scores, score_thresh)
if clip_window is not None:
boxlist_filtered = box_list_ops.clip_to_window(
boxlist_filtered, clip_window)
if change_coordinate_frame:
boxlist_filtered = box_list_ops.change_coordinate_frame(
boxlist_filtered, clip_window)
max_selection_size = tf.minimum(max_size_per_class,
boxlist_filtered.num_boxes())
selected_indices = tf.image.non_max_suppression(
boxlist_filtered.get(),
boxlist_filtered.get_field(fields.BoxListFields.scores),
max_selection_size,
iou_threshold=iou_thresh)
nms_result = box_list_ops.gather(boxlist_filtered, selected_indices)
nms_result.add_field(
fields.BoxListFields.classes, (tf.zeros_like(
nms_result.get_field(fields.BoxListFields.scores)) + class_idx))
selected_boxes_list.append(nms_result)
selected_boxes = box_list_ops.concatenate(selected_boxes_list)
sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
fields.BoxListFields.scores)
if max_total_size:
max_total_size = tf.minimum(max_total_size,
sorted_boxes.num_boxes())
sorted_boxes = box_list_ops.gather(sorted_boxes,
tf.range(max_total_size))
return sorted_boxes
def batch_multiclass_non_max_suppression(boxes,
scores,
score_thresh,
iou_thresh,
max_size_per_class,
max_total_size=0,
clip_window=None,
change_coordinate_frame=False,
num_valid_boxes=None,
masks=None,
scope=None):
"""Multi-class version of non maximum suppression that operates on a batch.
This op is similar to `multiclass_non_max_suppression` but operates on a batch
of boxes and scores. See documentation for `multiclass_non_max_suppression`
for details.
Args:
boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
detections. If `q` is 1 then same boxes are used for all classes
otherwise, if `q` is equal to number of classes, class-specific boxes
are used.
scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
the scores for each of the `num_anchors` detections.
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
with previously selected boxes are removed).
max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
representing the window to clip boxes to before performing non-max
suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window
is provided)
num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
[batch_size] representing the number of valid boxes to be considered
for each image in the batch. This parameter allows for ignoring zero
paddings.
masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
float32 tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
scope: tf scope name.
Returns:
A dictionary containing the following entries:
'detection_boxes': A [batch_size, max_detections, 4] float32 tensor
containing the non-max suppressed boxes.
'detection_scores': A [bath_size, max_detections] float32 tensor containing
the scores for the boxes.
'detection_classes': A [batch_size, max_detections] float32 tensor
containing the class for boxes.
'num_detections': A [batchsize] float32 tensor indicating the number of
valid detections per batch item. Only the top num_detections[i] entries in
nms_boxes[i], nms_scores[i] and nms_class[i] are valid. the rest of the
entries are zero paddings.
'detection_masks': (optional) a
[batch_size, max_detections, mask_height, mask_width] float32 tensor
containing masks for each selected box.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
a valid scores field.
"""
q = boxes.shape[2].value
num_classes = scores.shape[2].value
if q != 1 and q != num_classes:
raise ValueError('third dimension of boxes must be either 1 or equal '
'to the third dimension of scores')
with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
per_image_boxes_list = tf.unstack(boxes)
per_image_scores_list = tf.unstack(scores)
num_valid_boxes_list = len(per_image_boxes_list) * [None]
per_image_masks_list = len(per_image_boxes_list) * [None]
if num_valid_boxes is not None:
num_valid_boxes_list = tf.unstack(num_valid_boxes)
if masks is not None:
per_image_masks_list = tf.unstack(masks)
detection_boxes_list = []
detection_scores_list = []
detection_classes_list = []
num_detections_list = []
detection_masks_list = []
for (per_image_boxes, per_image_scores, per_image_masks, num_valid_boxes
) in zip(per_image_boxes_list, per_image_scores_list,
per_image_masks_list, num_valid_boxes_list):
if num_valid_boxes is not None:
per_image_boxes = tf.reshape(
tf.slice(per_image_boxes, 3*[0],
tf.stack([num_valid_boxes, -1, -1])), [-1, q, 4])
per_image_scores = tf.reshape(
tf.slice(per_image_scores, [0, 0],
tf.stack([num_valid_boxes, -1])), [-1, num_classes])
if masks is not None:
per_image_masks = tf.reshape(
tf.slice(per_image_masks, 4*[0],
tf.stack([num_valid_boxes, -1, -1, -1])),
[-1, q, masks.shape[3].value, masks.shape[4].value])
nmsed_boxlist = multiclass_non_max_suppression(
per_image_boxes,
per_image_scores,
score_thresh,
iou_thresh,
max_size_per_class,
max_total_size,
masks=per_image_masks,
clip_window=clip_window,
change_coordinate_frame=change_coordinate_frame)
num_detections_list.append(tf.to_float(nmsed_boxlist.num_boxes()))
padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist,
max_total_size)
detection_boxes_list.append(padded_boxlist.get())
detection_scores_list.append(
padded_boxlist.get_field(fields.BoxListFields.scores))
detection_classes_list.append(
padded_boxlist.get_field(fields.BoxListFields.classes))
if masks is not None:
detection_masks_list.append(
padded_boxlist.get_field(fields.BoxListFields.masks))
nms_dict = {
'detection_boxes': tf.stack(detection_boxes_list),
'detection_scores': tf.stack(detection_scores_list),
'detection_classes': tf.stack(detection_classes_list),
'num_detections': tf.stack(num_detections_list)
}
if masks is not None:
nms_dict['detection_masks'] = tf.stack(detection_masks_list)
return nms_dict
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_models.object_detection.core.post_processing."""
import numpy as np
import tensorflow as tf
from object_detection.core import post_processing
from object_detection.core import standard_fields as fields
class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
def test_with_invalid_scores_size(self):
boxes = tf.constant([[[0, 0, 1, 1]],
[[0, 0.1, 1, 1.1]],
[[0, -0.1, 1, 0.9]],
[[0, 10, 1, 11]],
[[0, 10.1, 1, 11.1]],
[[0, 100, 1, 101]]], tf.float32)
scores = tf.constant([[.9], [.75], [.6], [.95], [.5]])
iou_thresh = .5
score_thresh = 0.6
max_output_size = 3
nms = post_processing.multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh, max_output_size)
with self.test_session() as sess:
with self.assertRaisesWithPredicateMatch(
tf.errors.InvalidArgumentError, 'Incorrect scores field length'):
sess.run(nms.get())
def test_multiclass_nms_select_with_shared_boxes(self):
boxes = tf.constant([[[0, 0, 1, 1]],
[[0, 0.1, 1, 1.1]],
[[0, -0.1, 1, 0.9]],
[[0, 10, 1, 11]],
[[0, 10.1, 1, 11.1]],
[[0, 100, 1, 101]],
[[0, 1000, 1, 1002]],
[[0, 1000, 1, 1002.1]]], tf.float32)
scores = tf.constant([[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0],
[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]])
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 1000, 1, 1002],
[0, 100, 1, 101]]
exp_nms_scores = [.95, .9, .85, .3]
exp_nms_classes = [0, 0, 1, 0]
nms = post_processing.multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
[nms.get(), nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes)])
self.assertAllClose(nms_corners_output, exp_nms_corners)
self.assertAllClose(nms_scores_output, exp_nms_scores)
self.assertAllClose(nms_classes_output, exp_nms_classes)
def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self):
boxes = tf.constant([[[0, 0, 1, 1]],
[[0, 0.1, 1, 1.1]],
[[0, -0.1, 1, 0.9]],
[[0, 10, 1, 11]],
[[0, 10.1, 1, 11.1]],
[[0, 100, 1, 101]],
[[0, 1000, 1, 1002]],
[[0, 1000, 1, 1002.1]]], tf.float32)
scores = tf.constant([[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0],
[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]])
num_keypoints = 6
keypoints = tf.tile(
tf.reshape(tf.range(8), [8, 1, 1]),
[1, num_keypoints, 2])
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 1000, 1, 1002],
[0, 100, 1, 101]]
exp_nms_scores = [.95, .9, .85, .3]
exp_nms_classes = [0, 0, 1, 0]
exp_nms_keypoints_tensor = tf.tile(
tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
[1, num_keypoints, 2])
nms = post_processing.multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh, max_output_size,
additional_fields={
fields.BoxListFields.keypoints: keypoints})
with self.test_session() as sess:
(nms_corners_output,
nms_scores_output,
nms_classes_output,
nms_keypoints,
exp_nms_keypoints) = sess.run([
nms.get(),
nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes),
nms.get_field(fields.BoxListFields.keypoints),
exp_nms_keypoints_tensor
])
self.assertAllClose(nms_corners_output, exp_nms_corners)
self.assertAllClose(nms_scores_output, exp_nms_scores)
self.assertAllClose(nms_classes_output, exp_nms_classes)
self.assertAllEqual(nms_keypoints, exp_nms_keypoints)
def test_multiclass_nms_with_shared_boxes_given_keypoint_heatmaps(self):
boxes = tf.constant([[[0, 0, 1, 1]],
[[0, 0.1, 1, 1.1]],
[[0, -0.1, 1, 0.9]],
[[0, 10, 1, 11]],
[[0, 10.1, 1, 11.1]],
[[0, 100, 1, 101]],
[[0, 1000, 1, 1002]],
[[0, 1000, 1, 1002.1]]], tf.float32)
scores = tf.constant([[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0],
[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]])
num_boxes = tf.shape(boxes)[0]
heatmap_height = 5
heatmap_width = 5
num_keypoints = 17
keypoint_heatmaps = tf.ones(
[num_boxes, heatmap_height, heatmap_width, num_keypoints],
dtype=tf.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 1000, 1, 1002],
[0, 100, 1, 101]]
exp_nms_scores = [.95, .9, .85, .3]
exp_nms_classes = [0, 0, 1, 0]
exp_nms_keypoint_heatmaps = np.ones(
(4, heatmap_height, heatmap_width, num_keypoints), dtype=np.float32)
nms = post_processing.multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh, max_output_size,
additional_fields={
fields.BoxListFields.keypoint_heatmaps: keypoint_heatmaps})
with self.test_session() as sess:
(nms_corners_output,
nms_scores_output,
nms_classes_output,
nms_keypoint_heatmaps) = sess.run(
[nms.get(),
nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes),
nms.get_field(fields.BoxListFields.keypoint_heatmaps)])
self.assertAllClose(nms_corners_output, exp_nms_corners)
self.assertAllClose(nms_scores_output, exp_nms_scores)
self.assertAllClose(nms_classes_output, exp_nms_classes)
self.assertAllEqual(nms_keypoint_heatmaps, exp_nms_keypoint_heatmaps)
def test_multiclass_nms_with_additional_fields(self):
boxes = tf.constant([[[0, 0, 1, 1]],
[[0, 0.1, 1, 1.1]],
[[0, -0.1, 1, 0.9]],
[[0, 10, 1, 11]],
[[0, 10.1, 1, 11.1]],
[[0, 100, 1, 101]],
[[0, 1000, 1, 1002]],
[[0, 1000, 1, 1002.1]]], tf.float32)
scores = tf.constant([[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0],
[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]])
coarse_boxes_key = 'coarse_boxes'
coarse_boxes = tf.constant([[0.1, 0.1, 1.1, 1.1],
[0.1, 0.2, 1.1, 1.2],
[0.1, -0.2, 1.1, 1.0],
[0.1, 10.1, 1.1, 11.1],
[0.1, 10.2, 1.1, 11.2],
[0.1, 100.1, 1.1, 101.1],
[0.1, 1000.1, 1.1, 1002.1],
[0.1, 1000.1, 1.1, 1002.2]], tf.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = np.array([[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 1000, 1, 1002],
[0, 100, 1, 101]], dtype=np.float32)
exp_nms_coarse_corners = np.array([[0.1, 10.1, 1.1, 11.1],
[0.1, 0.1, 1.1, 1.1],
[0.1, 1000.1, 1.1, 1002.1],
[0.1, 100.1, 1.1, 101.1]],
dtype=np.float32)
exp_nms_scores = [.95, .9, .85, .3]
exp_nms_classes = [0, 0, 1, 0]
nms = post_processing.multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh, max_output_size,
additional_fields={coarse_boxes_key: coarse_boxes})
with self.test_session() as sess:
(nms_corners_output,
nms_scores_output,
nms_classes_output,
nms_coarse_corners) = sess.run(
[nms.get(),
nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes),
nms.get_field(coarse_boxes_key)])
self.assertAllClose(nms_corners_output, exp_nms_corners)
self.assertAllClose(nms_scores_output, exp_nms_scores)
self.assertAllClose(nms_classes_output, exp_nms_classes)
self.assertAllEqual(nms_coarse_corners, exp_nms_coarse_corners)
def test_multiclass_nms_select_with_shared_boxes_given_masks(self):
boxes = tf.constant([[[0, 0, 1, 1]],
[[0, 0.1, 1, 1.1]],
[[0, -0.1, 1, 0.9]],
[[0, 10, 1, 11]],
[[0, 10.1, 1, 11.1]],
[[0, 100, 1, 101]],
[[0, 1000, 1, 1002]],
[[0, 1000, 1, 1002.1]]], tf.float32)
scores = tf.constant([[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0],
[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]])
num_classes = 2
mask_height = 3
mask_width = 3
masks = tf.tile(
tf.reshape(tf.range(8), [8, 1, 1, 1]),
[1, num_classes, mask_height, mask_width])
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 1000, 1, 1002],
[0, 100, 1, 101]]
exp_nms_scores = [.95, .9, .85, .3]
exp_nms_classes = [0, 0, 1, 0]
exp_nms_masks_tensor = tf.tile(
tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
[1, mask_height, mask_width])
nms = post_processing.multiclass_non_max_suppression(boxes, scores,
score_thresh,
iou_thresh,
max_output_size,
masks=masks)
with self.test_session() as sess:
(nms_corners_output,
nms_scores_output,
nms_classes_output,
nms_masks,
exp_nms_masks) = sess.run([nms.get(),
nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes),
nms.get_field(fields.BoxListFields.masks),
exp_nms_masks_tensor])
self.assertAllClose(nms_corners_output, exp_nms_corners)
self.assertAllClose(nms_scores_output, exp_nms_scores)
self.assertAllClose(nms_classes_output, exp_nms_classes)
self.assertAllEqual(nms_masks, exp_nms_masks)
def test_multiclass_nms_select_with_clip_window(self):
boxes = tf.constant([[[0, 0, 10, 10]],
[[1, 1, 11, 11]]], tf.float32)
scores = tf.constant([[.9], [.75]])
clip_window = tf.constant([5, 4, 8, 7], tf.float32)
score_thresh = 0.0
iou_thresh = 0.5
max_output_size = 100
exp_nms_corners = [[5, 4, 8, 7]]
exp_nms_scores = [.9]
exp_nms_classes = [0]
nms = post_processing.multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh, max_output_size,
clip_window=clip_window)
with self.test_session() as sess:
nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
[nms.get(), nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes)])
self.assertAllClose(nms_corners_output, exp_nms_corners)
self.assertAllClose(nms_scores_output, exp_nms_scores)
self.assertAllClose(nms_classes_output, exp_nms_classes)
def test_multiclass_nms_select_with_clip_window_change_coordinate_frame(self):
boxes = tf.constant([[[0, 0, 10, 10]],
[[1, 1, 11, 11]]], tf.float32)
scores = tf.constant([[.9], [.75]])
clip_window = tf.constant([5, 4, 8, 7], tf.float32)
score_thresh = 0.0
iou_thresh = 0.5
max_output_size = 100
exp_nms_corners = [[0, 0, 1, 1]]
exp_nms_scores = [.9]
exp_nms_classes = [0]
nms = post_processing.multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh, max_output_size,
clip_window=clip_window, change_coordinate_frame=True)
with self.test_session() as sess:
nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
[nms.get(), nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes)])
self.assertAllClose(nms_corners_output, exp_nms_corners)
self.assertAllClose(nms_scores_output, exp_nms_scores)
self.assertAllClose(nms_classes_output, exp_nms_classes)
def test_multiclass_nms_select_with_per_class_cap(self):
boxes = tf.constant([[[0, 0, 1, 1]],
[[0, 0.1, 1, 1.1]],
[[0, -0.1, 1, 0.9]],
[[0, 10, 1, 11]],
[[0, 10.1, 1, 11.1]],
[[0, 100, 1, 101]],
[[0, 1000, 1, 1002]],
[[0, 1000, 1, 1002.1]]], tf.float32)
scores = tf.constant([[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0],
[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]])
score_thresh = 0.1
iou_thresh = .5
max_size_per_class = 2
exp_nms_corners = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 1000, 1, 1002]]
exp_nms_scores = [.95, .9, .85]
exp_nms_classes = [0, 0, 1]
nms = post_processing.multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh, max_size_per_class)
with self.test_session() as sess:
nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
[nms.get(), nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes)])
self.assertAllClose(nms_corners_output, exp_nms_corners)
self.assertAllClose(nms_scores_output, exp_nms_scores)
self.assertAllClose(nms_classes_output, exp_nms_classes)
def test_multiclass_nms_select_with_total_cap(self):
boxes = tf.constant([[[0, 0, 1, 1]],
[[0, 0.1, 1, 1.1]],
[[0, -0.1, 1, 0.9]],
[[0, 10, 1, 11]],
[[0, 10.1, 1, 11.1]],
[[0, 100, 1, 101]],
[[0, 1000, 1, 1002]],
[[0, 1000, 1, 1002.1]]], tf.float32)
scores = tf.constant([[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0],
[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]])
score_thresh = 0.1
iou_thresh = .5
max_size_per_class = 4
max_total_size = 2
exp_nms_corners = [[0, 10, 1, 11],
[0, 0, 1, 1]]
exp_nms_scores = [.95, .9]
exp_nms_classes = [0, 0]
nms = post_processing.multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh, max_size_per_class,
max_total_size)
with self.test_session() as sess:
nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
[nms.get(), nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes)])
self.assertAllClose(nms_corners_output, exp_nms_corners)
self.assertAllClose(nms_scores_output, exp_nms_scores)
self.assertAllClose(nms_classes_output, exp_nms_classes)
def test_multiclass_nms_threshold_then_select_with_shared_boxes(self):
boxes = tf.constant([[[0, 0, 1, 1]],
[[0, 0.1, 1, 1.1]],
[[0, -0.1, 1, 0.9]],
[[0, 10, 1, 11]],
[[0, 10.1, 1, 11.1]],
[[0, 100, 1, 101]],
[[0, 1000, 1, 1002]],
[[0, 1000, 1, 1002.1]]], tf.float32)
scores = tf.constant([[.9], [.75], [.6], [.95], [.5], [.3], [.01], [.01]])
score_thresh = 0.1
iou_thresh = .5
max_output_size = 3
exp_nms = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 100, 1, 101]]
nms = post_processing.multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms.get())
self.assertAllClose(nms_output, exp_nms)
def test_multiclass_nms_select_with_separate_boxes(self):
boxes = tf.constant([[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]],
[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]],
tf.float32)
scores = tf.constant([[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0],
[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]])
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = [[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 999, 2, 1004],
[0, 100, 1, 101]]
exp_nms_scores = [.95, .9, .85, .3]
exp_nms_classes = [0, 0, 1, 0]
nms = post_processing.multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh, max_output_size)
with self.test_session() as sess:
nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
[nms.get(), nms.get_field(fields.BoxListFields.scores),
nms.get_field(fields.BoxListFields.classes)])
self.assertAllClose(nms_corners_output, exp_nms_corners)
self.assertAllClose(nms_scores_output, exp_nms_scores)
self.assertAllClose(nms_classes_output, exp_nms_classes)
def test_batch_multiclass_nms_with_batch_size_1(self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]],
[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
tf.float32)
scores = tf.constant([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0],
[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]])
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = [[[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 999, 2, 1004],
[0, 100, 1, 101]]]
exp_nms_scores = [[.95, .9, .85, .3]]
exp_nms_classes = [[0, 0, 1, 0]]
nms_dict = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms_dict)
self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
self.assertEqual(nms_output['num_detections'], [4])
def test_batch_multiclass_nms_with_batch_size_2(self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]]],
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
tf.float32)
scores = tf.constant([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]])
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = [[[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 999, 2, 1004],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101],
[0, 0, 0, 0]]]
exp_nms_scores = [[.95, .9, 0, 0],
[.85, .5, .3, 0]]
exp_nms_classes = [[0, 0, 0, 0],
[1, 0, 0, 0]]
nms_dict = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size)
with self.test_session() as sess:
nms_output = sess.run(nms_dict)
self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
self.assertAllClose(nms_output['num_detections'], [2, 3])
def test_batch_multiclass_nms_with_masks(self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]]],
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
tf.float32)
scores = tf.constant([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]])
masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
[[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
[[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
[[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
[[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
[[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
[[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
[[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
tf.float32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = [[[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 999, 2, 1004],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101],
[0, 0, 0, 0]]]
exp_nms_scores = [[.95, .9, 0, 0],
[.85, .5, .3, 0]]
exp_nms_classes = [[0, 0, 0, 0],
[1, 0, 0, 0]]
exp_nms_masks = [[[[6, 7], [8, 9]],
[[0, 1], [2, 3]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]]],
[[[13, 14], [15, 16]],
[[8, 9], [10, 11]],
[[10, 11], [12, 13]],
[[0, 0], [0, 0]]]]
nms_dict = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size,
masks=masks)
with self.test_session() as sess:
nms_output = sess.run(nms_dict)
self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
self.assertAllClose(nms_output['num_detections'], [2, 3])
self.assertAllClose(nms_output['detection_masks'], exp_nms_masks)
def test_batch_multiclass_nms_with_masks_and_num_valid_boxes(self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]]],
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
tf.float32)
scores = tf.constant([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]])
masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
[[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
[[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
[[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
[[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
[[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
[[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
[[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
tf.float32)
num_valid_boxes = tf.constant([1, 1], tf.int32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = [[[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 10.1, 1, 11.1],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]]
exp_nms_scores = [[.9, 0, 0, 0],
[.5, 0, 0, 0]]
exp_nms_classes = [[0, 0, 0, 0],
[0, 0, 0, 0]]
exp_nms_masks = [[[[0, 1], [2, 3]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]]],
[[[8, 9], [10, 11]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]]]]
nms_dict = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size,
num_valid_boxes=num_valid_boxes, masks=masks)
with self.test_session() as sess:
nms_output = sess.run(nms_dict)
self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
self.assertAllClose(nms_output['num_detections'], [1, 1])
self.assertAllClose(nms_output['detection_masks'], exp_nms_masks)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides functions to prefetch tensors to feed into models."""
import tensorflow as tf
def prefetch(tensor_dict, capacity):
"""Creates a prefetch queue for tensors.
Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a
dequeue op that evaluates to a tensor_dict. This function is useful in
prefetching preprocessed tensors so that the data is readily available for
consumers.
Example input pipeline when you don't need batching:
----------------------------------------------------
key, string_tensor = slim.parallel_reader.parallel_read(...)
tensor_dict = decoder.decode(string_tensor)
tensor_dict = preprocessor.preprocess(tensor_dict, ...)
prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20)
tensor_dict = prefetch_queue.dequeue()
outputs = Model(tensor_dict)
...
----------------------------------------------------
For input pipelines with batching, refer to core/batcher.py
Args:
tensor_dict: a dictionary of tensors to prefetch.
capacity: the size of the prefetch queue.
Returns:
a FIFO prefetcher queue
"""
names = tensor_dict.keys()
dtypes = [t.dtype for t in tensor_dict.values()]
shapes = [t.get_shape() for t in tensor_dict.values()]
prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes,
shapes=shapes,
names=names,
name='prefetch_queue')
enqueue_op = prefetch_queue.enqueue(tensor_dict)
tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(
prefetch_queue, [enqueue_op]))
tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name,
capacity),
tf.to_float(prefetch_queue.size()) * (1. / capacity))
return prefetch_queue
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.prefetcher."""
import tensorflow as tf
from object_detection.core import prefetcher
slim = tf.contrib.slim
class PrefetcherTest(tf.test.TestCase):
def test_prefetch_tensors_with_fully_defined_shapes(self):
with self.test_session() as sess:
batch_size = 10
image_size = 32
num_batches = 5
examples = tf.Variable(tf.constant(0, dtype=tf.int64))
counter = examples.count_up_to(num_batches)
image = tf.random_normal([batch_size, image_size,
image_size, 3],
dtype=tf.float32,
name='images')
label = tf.random_uniform([batch_size, 1], 0, 10,
dtype=tf.int32, name='labels')
prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
'image': image,
'label': label},
capacity=100)
tensor_dict = prefetch_queue.dequeue()
self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
[batch_size, image_size, image_size, 3])
self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
[batch_size, 1])
tf.initialize_all_variables().run()
with slim.queues.QueueRunners(sess):
for _ in range(num_batches):
results = sess.run(tensor_dict)
self.assertEquals(results['image'].shape,
(batch_size, image_size, image_size, 3))
self.assertEquals(results['label'].shape, (batch_size, 1))
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(tensor_dict)
def test_prefetch_tensors_with_partially_defined_shapes(self):
with self.test_session() as sess:
batch_size = 10
image_size = 32
num_batches = 5
examples = tf.Variable(tf.constant(0, dtype=tf.int64))
counter = examples.count_up_to(num_batches)
image = tf.random_normal([batch_size,
tf.Variable(image_size),
tf.Variable(image_size), 3],
dtype=tf.float32,
name='image')
image.set_shape([batch_size, None, None, 3])
label = tf.random_uniform([batch_size, tf.Variable(1)], 0,
10, dtype=tf.int32, name='label')
label.set_shape([batch_size, None])
prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
'image': image,
'label': label},
capacity=100)
tensor_dict = prefetch_queue.dequeue()
self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
[batch_size, None, None, 3])
self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
[batch_size, None])
tf.initialize_all_variables().run()
with slim.queues.QueueRunners(sess):
for _ in range(num_batches):
results = sess.run(tensor_dict)
self.assertEquals(results['image'].shape,
(batch_size, image_size, image_size, 3))
self.assertEquals(results['label'].shape, (batch_size, 1))
with self.assertRaises(tf.errors.OutOfRangeError):
sess.run(tensor_dict)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocess images and bounding boxes for detection.
We perform two sets of operations in preprocessing stage:
(a) operations that are applied to both training and testing data,
(b) operations that are applied only to training data for the purpose of
data augmentation.
A preprocessing function receives a set of inputs,
e.g. an image and bounding boxes,
performs an operation on them, and returns them.
Some examples are: randomly cropping the image, randomly mirroring the image,
randomly changing the brightness, contrast, hue and
randomly jittering the bounding boxes.
The preprocess function receives a tensor_dict which is a dictionary that maps
different field names to their tensors. For example,
tensor_dict[fields.InputDataFields.image] holds the image tensor.
The image is a rank 4 tensor: [1, height, width, channels] with
dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
in each row there is a box with [ymin xmin ymax xmax].
Boxes are in normalized coordinates meaning
their coordinate values range in [0, 1]
Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
functions receive a rank 3 tensor for processing the image. Thus, inside the
preprocess function we squeeze the image to become a rank 3 tensor and then
we pass it to the functions. At the end of the preprocess we expand the image
back to rank 4.
"""
import sys
import tensorflow as tf
from tensorflow.python.ops import control_flow_ops
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import keypoint_ops
from object_detection.core import standard_fields as fields
def _apply_with_random_selector(x, func, num_cases):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
Args:
x: input Tensor.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
# Pass the real x only to one of the func calls.
return control_flow_ops.merge([func(
control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case)
for case in range(num_cases)])[0]
def _apply_with_random_selector_tuples(x, func, num_cases):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
Args:
x: A tuple of input tensors.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
num_inputs = len(x)
rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
# Pass the real x only to one of the func calls.
tuples = [list() for t in x]
for case in range(num_cases):
new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x]
output = func(tuple(new_x), case)
for j in range(num_inputs):
tuples[j].append(output[j])
for i in range(num_inputs):
tuples[i] = control_flow_ops.merge(tuples[i])[0]
return tuple(tuples)
def _random_integer(minval, maxval, seed):
"""Returns a random 0-D tensor between minval and maxval.
Args:
minval: minimum value of the random tensor.
maxval: maximum value of the random tensor.
seed: random seed.
Returns:
A random 0-D tensor between minval and maxval.
"""
return tf.random_uniform(
[], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed)
def normalize_image(image, original_minval, original_maxval, target_minval,
target_maxval):
"""Normalizes pixel values in the image.
Moves the pixel values from the current [original_minval, original_maxval]
range to a the [target_minval, target_maxval] range.
Args:
image: rank 3 float32 tensor containing 1
image -> [height, width, channels].
original_minval: current image minimum value.
original_maxval: current image maximum value.
target_minval: target image minimum value.
target_maxval: target image maximum value.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('NormalizeImage', values=[image]):
original_minval = float(original_minval)
original_maxval = float(original_maxval)
target_minval = float(target_minval)
target_maxval = float(target_maxval)
image = tf.to_float(image)
image = tf.subtract(image, original_minval)
image = tf.multiply(image, (target_maxval - target_minval) /
(original_maxval - original_minval))
image = tf.add(image, target_minval)
return image
def flip_boxes(boxes):
"""Left-right flip the boxes.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Returns:
Flipped boxes.
"""
# Flip boxes.
ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
flipped_xmin = tf.subtract(1.0, xmax)
flipped_xmax = tf.subtract(1.0, xmin)
flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
return flipped_boxes
def retain_boxes_above_threshold(
boxes, labels, label_scores, masks=None, keypoints=None, threshold=0.0):
"""Retains boxes whose label score is above a given threshold.
If the label score for a box is missing (represented by NaN), the box is
retained. The boxes that don't pass the threshold will not appear in the
returned tensor.
Args:
boxes: float32 tensor of shape [num_instance, 4] representing boxes
location in normalized coordinates.
labels: rank 1 int32 tensor of shape [num_instance] containing the object
classes.
label_scores: float32 tensor of shape [num_instance] representing the
score for each box.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks are of
the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
threshold: scalar python float.
Returns:
retained_boxes: [num_retained_instance, 4]
retianed_labels: [num_retained_instance]
retained_label_scores: [num_retained_instance]
If masks, or keypoints are not None, the function also returns:
retained_masks: [num_retained_instance, height, width]
retained_keypoints: [num_retained_instance, num_keypoints, 2]
"""
with tf.name_scope('RetainBoxesAboveThreshold',
values=[boxes, labels, label_scores]):
indices = tf.where(
tf.logical_or(label_scores > threshold, tf.is_nan(label_scores)))
indices = tf.squeeze(indices, axis=1)
retained_boxes = tf.gather(boxes, indices)
retained_labels = tf.gather(labels, indices)
retained_label_scores = tf.gather(label_scores, indices)
result = [retained_boxes, retained_labels, retained_label_scores]
if masks is not None:
retained_masks = tf.gather(masks, indices)
result.append(retained_masks)
if keypoints is not None:
retained_keypoints = tf.gather(keypoints, indices)
result.append(retained_keypoints)
return result
def _flip_masks(masks):
"""Left-right flips masks.
Args:
masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
Returns:
flipped masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
"""
return masks[:, :, ::-1]
def random_horizontal_flip(
image,
boxes=None,
masks=None,
keypoints=None,
keypoint_flip_permutation=None,
seed=None):
"""Randomly decides whether to mirror the image and detections or not.
The probability of flipping the image is 50%.
Args:
image: rank 3 float32 tensor with shape [height, width, channels].
boxes: (optional) rank 2 float32 tensor with shape [N, 4]
containing the bounding boxes.
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
keypoint_flip_permutation: rank 1 int32 tensor containing keypoint flip
permutation.
seed: random seed
Returns:
image: image which is the same shape as input image.
If boxes, masks, keypoints, and keypoint_flip_permutation is not None,
the function also returns the following tensors.
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
Raises:
ValueError: if keypoints are provided but keypoint_flip_permutation is not.
"""
def _flip_image(image):
# flip image
image_flipped = tf.image.flip_left_right(image)
return image_flipped
if keypoints is not None and keypoint_flip_permutation is None:
raise ValueError(
'keypoints are provided but keypoints_flip_permutation is not provided')
with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
result = []
# random variable defining whether to do flip or not
do_a_flip_random = tf.random_uniform([], seed=seed)
# flip only if there are bounding boxes in image!
do_a_flip_random = tf.logical_and(
tf.greater(tf.size(boxes), 0), tf.greater(do_a_flip_random, 0.5))
# flip image
image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
result.append(image)
# flip boxes
if boxes is not None:
boxes = tf.cond(
do_a_flip_random, lambda: flip_boxes(boxes), lambda: boxes)
result.append(boxes)
# flip masks
if masks is not None:
masks = tf.cond(
do_a_flip_random, lambda: _flip_masks(masks), lambda: masks)
result.append(masks)
# flip keypoints
if keypoints is not None and keypoint_flip_permutation is not None:
permutation = keypoint_flip_permutation
keypoints = tf.cond(
do_a_flip_random,
lambda: keypoint_ops.flip_horizontal(keypoints, 0.5, permutation),
lambda: keypoints)
result.append(keypoints)
return tuple(result)
def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
"""Scales each value in the pixels of the image.
This function scales each pixel independent of the other ones.
For each value in image tensor, draws a random number between
minval and maxval and multiples the values with them.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
minval: lower ratio of scaling pixel values.
maxval: upper ratio of scaling pixel values.
seed: random seed.
Returns:
image: image which is the same shape as input image.
boxes: boxes which is the same shape as input boxes.
"""
with tf.name_scope('RandomPixelValueScale', values=[image]):
color_coef = tf.random_uniform(
tf.shape(image),
minval=minval,
maxval=maxval,
dtype=tf.float32,
seed=seed)
image = tf.multiply(image, color_coef)
image = tf.clip_by_value(image, 0.0, 1.0)
return image
def random_image_scale(image,
masks=None,
min_scale_ratio=0.5,
max_scale_ratio=2.0,
seed=None):
"""Scales the image size.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels].
masks: (optional) rank 3 float32 tensor containing masks with
size [height, width, num_masks]. The value is set to None if there are no
masks.
min_scale_ratio: minimum scaling ratio.
max_scale_ratio: maximum scaling ratio.
seed: random seed.
Returns:
image: image which is the same rank as input image.
masks: If masks is not none, resized masks which are the same rank as input
masks will be returned.
"""
with tf.name_scope('RandomImageScale', values=[image]):
result = []
image_shape = tf.shape(image)
image_height = image_shape[0]
image_width = image_shape[1]
size_coef = tf.random_uniform([],
minval=min_scale_ratio,
maxval=max_scale_ratio,
dtype=tf.float32, seed=seed)
image_newysize = tf.to_int32(
tf.multiply(tf.to_float(image_height), size_coef))
image_newxsize = tf.to_int32(
tf.multiply(tf.to_float(image_width), size_coef))
image = tf.image.resize_images(
image, [image_newysize, image_newxsize], align_corners=True)
result.append(image)
if masks:
masks = tf.image.resize_nearest_neighbor(
masks, [image_newysize, image_newxsize], align_corners=True)
result.append(masks)
return tuple(result)
def random_rgb_to_gray(image, probability=0.1, seed=None):
"""Changes the image from RGB to Grayscale with the given probability.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
probability: the probability of returning a grayscale image.
The probability should be a number between [0, 1].
seed: random seed.
Returns:
image: image which is the same shape as input image.
"""
def _image_to_gray(image):
image_gray1 = tf.image.rgb_to_grayscale(image)
image_gray3 = tf.image.grayscale_to_rgb(image_gray1)
return image_gray3
with tf.name_scope('RandomRGBtoGray', values=[image]):
# random variable defining whether to do flip or not
do_gray_random = tf.random_uniform([], seed=seed)
image = tf.cond(
tf.greater(do_gray_random, probability), lambda: image,
lambda: _image_to_gray(image))
return image
def random_adjust_brightness(image, max_delta=0.2):
"""Randomly adjusts brightness.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_delta: how much to change the brightness. A value between [0, 1).
Returns:
image: image which is the same shape as input image.
boxes: boxes which is the same shape as input boxes.
"""
with tf.name_scope('RandomAdjustBrightness', values=[image]):
image = tf.image.random_brightness(image, max_delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
"""Randomly adjusts contrast.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
min_delta: see max_delta.
max_delta: how much to change the contrast. Contrast will change with a
value between min_delta and max_delta. This value will be
multiplied to the current contrast of the image.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('RandomAdjustContrast', values=[image]):
image = tf.image.random_contrast(image, min_delta, max_delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_adjust_hue(image, max_delta=0.02):
"""Randomly adjusts hue.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_delta: change hue randomly with a value between 0 and max_delta.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('RandomAdjustHue', values=[image]):
image = tf.image.random_hue(image, max_delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
"""Randomly adjusts saturation.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
min_delta: see max_delta.
max_delta: how much to change the saturation. Saturation will change with a
value between min_delta and max_delta. This value will be
multiplied to the current saturation of the image.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('RandomAdjustSaturation', values=[image]):
image = tf.image.random_saturation(image, min_delta, max_delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_distort_color(image, color_ordering=0):
"""Randomly distorts color.
Randomly distorts color using a combination of brightness, hue, contrast
and saturation changes. Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
color_ordering: Python int, a type of distortion (valid values: 0, 1).
Returns:
image: image which is the same shape as input image.
Raises:
ValueError: if color_ordering is not in {0, 1}.
"""
with tf.name_scope('RandomDistortColor', values=[image]):
if color_ordering == 0:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
elif color_ordering == 1:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
else:
raise ValueError('color_ordering must be in {0, 1}')
# The random_* ops do not necessarily clamp.
image = tf.clip_by_value(image, 0.0, 1.0)
return image
def random_jitter_boxes(boxes, ratio=0.05, seed=None):
"""Randomly jitter boxes in image.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
ratio: The ratio of the box width and height that the corners can jitter.
For example if the width is 100 pixels and ratio is 0.05,
the corners can jitter up to 5 pixels in the x direction.
seed: random seed.
Returns:
boxes: boxes which is the same shape as input boxes.
"""
def random_jitter_box(box, ratio, seed):
"""Randomly jitter box.
Args:
box: bounding box [1, 1, 4].
ratio: max ratio between jittered box and original box,
a number between [0, 0.5].
seed: random seed.
Returns:
jittered_box: jittered box.
"""
rand_numbers = tf.random_uniform(
[1, 1, 4], minval=-ratio, maxval=ratio, dtype=tf.float32, seed=seed)
box_width = tf.subtract(box[0, 0, 3], box[0, 0, 1])
box_height = tf.subtract(box[0, 0, 2], box[0, 0, 0])
hw_coefs = tf.stack([box_height, box_width, box_height, box_width])
hw_rand_coefs = tf.multiply(hw_coefs, rand_numbers)
jittered_box = tf.add(box, hw_rand_coefs)
jittered_box = tf.clip_by_value(jittered_box, 0.0, 1.0)
return jittered_box
with tf.name_scope('RandomJitterBoxes', values=[boxes]):
# boxes are [N, 4]. Lets first make them [N, 1, 1, 4]
boxes_shape = tf.shape(boxes)
boxes = tf.expand_dims(boxes, 1)
boxes = tf.expand_dims(boxes, 2)
distorted_boxes = tf.map_fn(
lambda x: random_jitter_box(x, ratio, seed), boxes, dtype=tf.float32)
distorted_boxes = tf.reshape(distorted_boxes, boxes_shape)
return distorted_boxes
def _strict_random_crop_image(image,
boxes,
labels,
masks=None,
keypoints=None,
min_object_covered=1.0,
aspect_ratio_range=(0.75, 1.33),
area_range=(0.1, 1.0),
overlap_thresh=0.3):
"""Performs random crop.
Note: boxes will be clipped to the crop. Keypoint coordinates that are
outside the crop will be set to NaN, which is consistent with the original
keypoint encoding for non-existing keypoints. This function always crops
the image and is supposed to be used by `random_crop_image` function which
sometimes returns image unchanged.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes with shape
[num_instances, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
with tf.name_scope('RandomCropImage', values=[image, boxes]):
image_shape = tf.shape(image)
# boxes are [N, 4]. Lets first make them [N, 1, 4].
boxes_expanded = tf.expand_dims(
tf.clip_by_value(
boxes, clip_value_min=0.0, clip_value_max=1.0), 1)
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
image_shape,
bounding_boxes=boxes_expanded,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
max_attempts=100,
use_image_if_no_bounding_boxes=True)
im_box_begin, im_box_size, im_box = sample_distorted_bounding_box
new_image = tf.slice(image, im_box_begin, im_box_size)
new_image.set_shape([None, None, image.get_shape()[2]])
# [1, 4]
im_box_rank2 = tf.squeeze(im_box, squeeze_dims=[0])
# [4]
im_box_rank1 = tf.squeeze(im_box)
boxlist = box_list.BoxList(boxes)
boxlist.add_field('labels', labels)
im_boxlist = box_list.BoxList(im_box_rank2)
# remove boxes that are outside cropped image
boxlist, inside_window_ids = box_list_ops.prune_completely_outside_window(
boxlist, im_box_rank1)
# remove boxes that are outside image
overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
boxlist, im_boxlist, overlap_thresh)
# change the coordinate of the remaining boxes
new_labels = overlapping_boxlist.get_field('labels')
new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
im_box_rank1)
new_boxes = new_boxlist.get()
new_boxes = tf.clip_by_value(
new_boxes, clip_value_min=0.0, clip_value_max=1.0)
result = [new_image, new_boxes, new_labels]
if masks is not None:
masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids)
masks_of_boxes_completely_inside_window = tf.gather(
masks_of_boxes_inside_window, keep_ids)
masks_box_begin = [im_box_begin[2], im_box_begin[0], im_box_begin[1]]
masks_box_size = [im_box_size[2], im_box_size[0], im_box_size[1]]
new_masks = tf.slice(
masks_of_boxes_completely_inside_window,
masks_box_begin, masks_box_size)
result.append(new_masks)
if keypoints is not None:
keypoints_of_boxes_inside_window = tf.gather(keypoints, inside_window_ids)
keypoints_of_boxes_completely_inside_window = tf.gather(
keypoints_of_boxes_inside_window, keep_ids)
new_keypoints = keypoint_ops.change_coordinate_frame(
keypoints_of_boxes_completely_inside_window, im_box_rank1)
new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
[0.0, 0.0, 1.0, 1.0])
result.append(new_keypoints)
return tuple(result)
def random_crop_image(image,
boxes,
labels,
masks=None,
keypoints=None,
min_object_covered=1.0,
aspect_ratio_range=(0.75, 1.33),
area_range=(0.1, 1.0),
overlap_thresh=0.3,
random_coef=0.0,
seed=None):
"""Randomly crops the image.
Given the input image and its bounding boxes, this op randomly
crops a subimage. Given a user-provided set of input constraints,
the crop window is resampled until it satisfies these constraints.
If within 100 trials it is unable to find a valid crop, the original
image is returned. See the Args section for a description of the input
constraints. Both input boxes and returned Boxes are in normalized
form (e.g., lie in the unit square [0, 1]).
This function will return the original image with probability random_coef.
Note: boxes will be clipped to the crop. Keypoint coordinates that are
outside the crop will be set to NaN, which is consistent with the original
keypoint encoding for non-existing keypoints.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes with shape
[num_instances, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
Returns:
image: Image shape will be [new_height, new_width, channels].
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
labels: new labels.
If masks, or keypoints are not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
def strict_random_crop_image_fn():
return _strict_random_crop_image(
image,
boxes,
labels,
masks=masks,
keypoints=keypoints,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
overlap_thresh=overlap_thresh)
# avoids tf.cond to make faster RCNN training on borg. See b/140057645.
if random_coef < sys.float_info.min:
result = strict_random_crop_image_fn()
else:
do_a_crop_random = tf.random_uniform([], seed=seed)
do_a_crop_random = tf.greater(do_a_crop_random, random_coef)
outputs = [image, boxes, labels]
if masks is not None:
outputs.append(masks)
if keypoints is not None:
outputs.append(keypoints)
result = tf.cond(do_a_crop_random,
strict_random_crop_image_fn,
lambda: tuple(outputs))
return result
def random_pad_image(image,
boxes,
min_image_size=None,
max_image_size=None,
pad_color=None,
seed=None):
"""Randomly pads the image.
This function randomly pads the image with zeros. The final size of the
padded image will be between min_image_size and max_image_size.
if min_image_size is smaller than the input image size, min_image_size will
be set to the input image size. The same for max_image_size. The input image
will be located at a uniformly random location inside the padded image.
The relative location of the boxes to the original image will remain the same.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
min_image_size: a tensor of size [min_height, min_width], type tf.int32.
If passed as None, will be set to image size
[height, width].
max_image_size: a tensor of size [max_height, max_width], type tf.int32.
If passed as None, will be set to twice the
image [height * 2, width * 2].
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the input
image.
seed: random seed.
Returns:
image: Image shape will be [new_height, new_width, channels].
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
"""
if pad_color is None:
pad_color = tf.reduce_mean(image, reduction_indices=[0, 1])
image_shape = tf.shape(image)
image_height = image_shape[0]
image_width = image_shape[1]
if max_image_size is None:
max_image_size = tf.stack([image_height * 2, image_width * 2])
max_image_size = tf.maximum(max_image_size,
tf.stack([image_height, image_width]))
if min_image_size is None:
min_image_size = tf.stack([image_height, image_width])
min_image_size = tf.maximum(min_image_size,
tf.stack([image_height, image_width]))
target_height = tf.cond(
max_image_size[0] > min_image_size[0],
lambda: _random_integer(min_image_size[0], max_image_size[0], seed),
lambda: max_image_size[0])
target_width = tf.cond(
max_image_size[1] > min_image_size[1],
lambda: _random_integer(min_image_size[1], max_image_size[1], seed),
lambda: max_image_size[1])
offset_height = tf.cond(
target_height > image_height,
lambda: _random_integer(0, target_height - image_height, seed),
lambda: tf.constant(0, dtype=tf.int32))
offset_width = tf.cond(
target_width > image_width,
lambda: _random_integer(0, target_width - image_width, seed),
lambda: tf.constant(0, dtype=tf.int32))
new_image = tf.image.pad_to_bounding_box(
image, offset_height=offset_height, offset_width=offset_width,
target_height=target_height, target_width=target_width)
# Setting color of the padded pixels
image_ones = tf.ones_like(image)
image_ones_padded = tf.image.pad_to_bounding_box(
image_ones, offset_height=offset_height, offset_width=offset_width,
target_height=target_height, target_width=target_width)
image_color_paded = (1.0 - image_ones_padded) * pad_color
new_image += image_color_paded
# setting boxes
new_window = tf.to_float(
tf.stack([
-offset_height, -offset_width, target_height - offset_height,
target_width - offset_width
]))
new_window /= tf.to_float(
tf.stack([image_height, image_width, image_height, image_width]))
boxlist = box_list.BoxList(boxes)
new_boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
new_boxes = new_boxlist.get()
return new_image, new_boxes
def random_crop_pad_image(image,
boxes,
labels,
min_object_covered=1.0,
aspect_ratio_range=(0.75, 1.33),
area_range=(0.1, 1.0),
overlap_thresh=0.3,
random_coef=0.0,
min_padded_size_ratio=None,
max_padded_size_ratio=None,
pad_color=None,
seed=None):
"""Randomly crops and pads the image.
Given an input image and its bounding boxes, this op first randomly crops
the image and then randomly pads the image with background values. Parameters
min_padded_size_ratio and max_padded_size_ratio, determine the range of the
final output image size. Specifically, the final image size will have a size
in the range of min_padded_size_ratio * tf.shape(image) and
max_padded_size_ratio * tf.shape(image). Note that these ratios are with
respect to the size of the original image, so we can't capture the same
effect easily by independently applying RandomCropImage
followed by RandomPadImage.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
min_padded_size_ratio: min ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [0.0, 0.0].
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [2.0, 2.0].
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the randomly
cropped image.
seed: random seed.
Returns:
padded_image: padded image.
padded_boxes: boxes which is the same rank as input boxes. Boxes are in
normalized form.
cropped_labels: cropped labels.
"""
image_size = tf.shape(image)
image_height = image_size[0]
image_width = image_size[1]
if min_padded_size_ratio is None:
min_padded_size_ratio = tf.constant([0.0, 0.0], tf.float32)
if max_padded_size_ratio is None:
max_padded_size_ratio = tf.constant([2.0, 2.0], tf.float32)
cropped_image, cropped_boxes, cropped_labels = random_crop_image(
image=image,
boxes=boxes,
labels=labels,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
overlap_thresh=overlap_thresh,
random_coef=random_coef,
seed=seed)
min_image_size = tf.to_int32(
tf.to_float(tf.stack([image_height, image_width])) *
min_padded_size_ratio)
max_image_size = tf.to_int32(
tf.to_float(tf.stack([image_height, image_width])) *
max_padded_size_ratio)
padded_image, padded_boxes = random_pad_image(
cropped_image,
cropped_boxes,
min_image_size=min_image_size,
max_image_size=max_image_size,
pad_color=pad_color,
seed=seed)
return padded_image, padded_boxes, cropped_labels
def random_crop_to_aspect_ratio(image,
boxes,
labels,
masks=None,
keypoints=None,
aspect_ratio=1.0,
overlap_thresh=0.3,
seed=None):
"""Randomly crops an image to the specified aspect ratio.
Randomly crops the a portion of the image such that the crop is of the
specified aspect ratio, and the crop is as large as possible. If the specified
aspect ratio is larger than the aspect ratio of the image, this op will
randomly remove rows from the top and bottom of the image. If the specified
aspect ratio is less than the aspect ratio of the image, this op will randomly
remove cols from the left and right of the image. If the specified aspect
ratio is the same as the aspect ratio of the image, this op will return the
image.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
aspect_ratio: the aspect ratio of cropped image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
Raises:
ValueError: If image is not a 3D tensor.
"""
if len(image.get_shape()) != 3:
raise ValueError('Image should be 3D tensor')
with tf.name_scope('RandomCropToAspectRatio', values=[image]):
image_shape = tf.shape(image)
orig_height = image_shape[0]
orig_width = image_shape[1]
orig_aspect_ratio = tf.to_float(orig_width) / tf.to_float(orig_height)
new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
def target_height_fn():
return tf.to_int32(
tf.round(
tf.to_float(orig_height) * orig_aspect_ratio / new_aspect_ratio))
target_height = tf.cond(
orig_aspect_ratio >= new_aspect_ratio,
lambda: orig_height,
target_height_fn)
def target_width_fn():
return tf.to_int32(
tf.round(
tf.to_float(orig_width) * new_aspect_ratio / orig_aspect_ratio))
target_width = tf.cond(
orig_aspect_ratio <= new_aspect_ratio,
lambda: orig_width,
target_width_fn)
# either offset_height = 0 and offset_width is randomly chosen from
# [0, offset_width - target_width), or else offset_width = 0 and
# offset_height is randomly chosen from [0, offset_height - target_height)
offset_height = _random_integer(0, orig_height - target_height + 1, seed)
offset_width = _random_integer(0, orig_width - target_width + 1, seed)
new_image = tf.image.crop_to_bounding_box(
image, offset_height, offset_width, target_height, target_width)
im_box = tf.stack([
tf.to_float(offset_height) / tf.to_float(orig_height),
tf.to_float(offset_width) / tf.to_float(orig_width),
tf.to_float(offset_height + target_height) / tf.to_float(orig_height),
tf.to_float(offset_width + target_width) / tf.to_float(orig_width)
])
boxlist = box_list.BoxList(boxes)
boxlist.add_field('labels', labels)
im_boxlist = box_list.BoxList(tf.expand_dims(im_box, 0))
# remove boxes whose overlap with the image is less than overlap_thresh
overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
boxlist, im_boxlist, overlap_thresh)
# change the coordinate of the remaining boxes
new_labels = overlapping_boxlist.get_field('labels')
new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
im_box)
new_boxlist = box_list_ops.clip_to_window(new_boxlist,
tf.constant(
[0.0, 0.0, 1.0, 1.0],
tf.float32))
new_boxes = new_boxlist.get()
result = [new_image, new_boxes, new_labels]
if masks is not None:
masks_inside_window = tf.gather(masks, keep_ids)
masks_box_begin = tf.stack([0, offset_height, offset_width])
masks_box_size = tf.stack([-1, target_height, target_width])
new_masks = tf.slice(masks_inside_window, masks_box_begin, masks_box_size)
result.append(new_masks)
if keypoints is not None:
keypoints_inside_window = tf.gather(keypoints, keep_ids)
new_keypoints = keypoint_ops.change_coordinate_frame(
keypoints_inside_window, im_box)
new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
[0.0, 0.0, 1.0, 1.0])
result.append(new_keypoints)
return tuple(result)
def random_black_patches(image,
max_black_patches=10,
probability=0.5,
size_to_image_ratio=0.1,
random_seed=None):
"""Randomly adds some black patches to the image.
This op adds up to max_black_patches square black patches of a fixed size
to the image where size is specified via the size_to_image_ratio parameter.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_black_patches: number of times that the function tries to add a
black box to the image.
probability: at each try, what is the chance of adding a box.
size_to_image_ratio: Determines the ratio of the size of the black patches
to the size of the image.
box_size = size_to_image_ratio *
min(image_width, image_height)
random_seed: random seed.
Returns:
image
"""
def add_black_patch_to_image(image):
"""Function for adding one patch to the image.
Args:
image: image
Returns:
image with a randomly added black box
"""
image_shape = tf.shape(image)
image_height = image_shape[0]
image_width = image_shape[1]
box_size = tf.to_int32(
tf.multiply(
tf.minimum(tf.to_float(image_height), tf.to_float(image_width)),
size_to_image_ratio))
normalized_y_min = tf.random_uniform(
[], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
normalized_x_min = tf.random_uniform(
[], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height))
x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width))
black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32)
mask = 1.0 - tf.image.pad_to_bounding_box(black_box, y_min, x_min,
image_height, image_width)
image = tf.multiply(image, mask)
return image
with tf.name_scope('RandomBlackPatchInImage', values=[image]):
for _ in range(max_black_patches):
random_prob = tf.random_uniform([], minval=0.0, maxval=1.0,
dtype=tf.float32, seed=random_seed)
image = tf.cond(
tf.greater(random_prob, probability), lambda: image,
lambda: add_black_patch_to_image(image))
return image
def image_to_float(image):
"""Used in Faster R-CNN. Casts image pixel values to float.
Args:
image: input image which might be in tf.uint8 or sth else format
Returns:
image: image in tf.float32 format.
"""
with tf.name_scope('ImageToFloat', values=[image]):
image = tf.to_float(image)
return image
def random_resize_method(image, target_size):
"""Uses a random resize method to resize the image to target size.
Args:
image: a rank 3 tensor.
target_size: a list of [target_height, target_width]
Returns:
resized image.
"""
resized_image = _apply_with_random_selector(
image,
lambda x, method: tf.image.resize_images(x, target_size, method),
num_cases=4)
return resized_image
def resize_to_range(image,
masks=None,
min_dimension=None,
max_dimension=None,
align_corners=False):
"""Resizes an image so its dimensions are within the provided value.
The output size can be described by two cases:
1. If the image can be rescaled so its minimum dimension is equal to the
provided value without the other dimension exceeding max_dimension,
then do so.
2. Otherwise, resize so the largest dimension is equal to max_dimension.
Args:
image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks.
min_dimension: (optional) (scalar) desired size of the smaller image
dimension.
max_dimension: (optional) (scalar) maximum allowed size
of the larger image dimension.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
Returns:
A 3D tensor of shape [new_height, new_width, channels],
where the image has been resized (with bilinear interpolation) so that
min(new_height, new_width) == min_dimension or
max(new_height, new_width) == max_dimension.
If masks is not None, also outputs masks:
A 3D tensor of shape [num_instances, new_height, new_width]
Raises:
ValueError: if the image is not a 3D tensor.
"""
if len(image.get_shape()) != 3:
raise ValueError('Image should be 3D tensor')
with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
image_shape = tf.shape(image)
orig_height = tf.to_float(image_shape[0])
orig_width = tf.to_float(image_shape[1])
orig_min_dim = tf.minimum(orig_height, orig_width)
# Calculates the larger of the possible sizes
min_dimension = tf.constant(min_dimension, dtype=tf.float32)
large_scale_factor = min_dimension / orig_min_dim
# Scaling orig_(height|width) by large_scale_factor will make the smaller
# dimension equal to min_dimension, save for floating point rounding errors.
# For reasonably-sized images, taking the nearest integer will reliably
# eliminate this error.
large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
large_size = tf.stack([large_height, large_width])
if max_dimension:
# Calculates the smaller of the possible sizes, use that if the larger
# is too big.
orig_max_dim = tf.maximum(orig_height, orig_width)
max_dimension = tf.constant(max_dimension, dtype=tf.float32)
small_scale_factor = max_dimension / orig_max_dim
# Scaling orig_(height|width) by small_scale_factor will make the larger
# dimension equal to max_dimension, save for floating point rounding
# errors. For reasonably-sized images, taking the nearest integer will
# reliably eliminate this error.
small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
small_size = tf.stack([small_height, small_width])
new_size = tf.cond(
tf.to_float(tf.reduce_max(large_size)) > max_dimension,
lambda: small_size, lambda: large_size)
else:
new_size = large_size
new_image = tf.image.resize_images(image, new_size,
align_corners=align_corners)
result = new_image
if masks is not None:
num_instances = tf.shape(masks)[0]
def resize_masks_branch():
new_masks = tf.expand_dims(masks, 3)
new_masks = tf.image.resize_nearest_neighbor(
new_masks, new_size, align_corners=align_corners)
new_masks = tf.squeeze(new_masks, axis=3)
return new_masks
def reshape_masks_branch():
new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
return new_masks
masks = tf.cond(num_instances > 0,
resize_masks_branch,
reshape_masks_branch)
result = [new_image, masks]
return result
def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
"""Scales boxes from normalized to pixel coordinates.
Args:
image: A 3D float32 tensor of shape [height, width, channels].
boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
boxes in normalized coordinates. Each row is of the form
[ymin, xmin, ymax, xmax].
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
Returns:
image: unchanged input image.
scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
bounding boxes in pixel coordinates.
scaled_keypoints: a 3D float32 tensor with shape
[num_instances, num_keypoints, 2] containing the keypoints in pixel
coordinates.
"""
boxlist = box_list.BoxList(boxes)
image_height = tf.shape(image)[0]
image_width = tf.shape(image)[1]
scaled_boxes = box_list_ops.scale(boxlist, image_height, image_width).get()
result = [image, scaled_boxes]
if keypoints is not None:
scaled_keypoints = keypoint_ops.scale(keypoints, image_height, image_width)
result.append(scaled_keypoints)
return tuple(result)
# pylint: disable=g-doc-return-or-yield
def resize_image(image,
masks=None,
new_height=600,
new_width=1024,
method=tf.image.ResizeMethod.BILINEAR,
align_corners=False):
"""See `tf.image.resize_images` for detailed doc."""
with tf.name_scope(
'ResizeImage',
values=[image, new_height, new_width, method, align_corners]):
new_image = tf.image.resize_images(image, [new_height, new_width],
method=method,
align_corners=align_corners)
result = new_image
if masks is not None:
num_instances = tf.shape(masks)[0]
new_size = tf.constant([new_height, new_width], dtype=tf.int32)
def resize_masks_branch():
new_masks = tf.expand_dims(masks, 3)
new_masks = tf.image.resize_nearest_neighbor(
new_masks, new_size, align_corners=align_corners)
new_masks = tf.squeeze(new_masks, axis=3)
return new_masks
def reshape_masks_branch():
new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
return new_masks
masks = tf.cond(num_instances > 0,
resize_masks_branch,
reshape_masks_branch)
result = [new_image, masks]
return result
def subtract_channel_mean(image, means=None):
"""Normalizes an image by subtracting a mean from each channel.
Args:
image: A 3D tensor of shape [height, width, channels]
means: float list containing a mean for each channel
Returns:
normalized_images: a tensor of shape [height, width, channels]
Raises:
ValueError: if images is not a 4D tensor or if the number of means is not
equal to the number of channels.
"""
with tf.name_scope('SubtractChannelMean', values=[image, means]):
if len(image.get_shape()) != 3:
raise ValueError('Input must be of size [height, width, channels]')
if len(means) != image.get_shape()[-1]:
raise ValueError('len(means) must match the number of channels')
return image - [[means]]
def one_hot_encoding(labels, num_classes=None):
"""One-hot encodes the multiclass labels.
Example usage:
labels = tf.constant([1, 4], dtype=tf.int32)
one_hot = OneHotEncoding(labels, num_classes=5)
one_hot.eval() # evaluates to [0, 1, 0, 0, 1]
Args:
labels: A tensor of shape [None] corresponding to the labels.
num_classes: Number of classes in the dataset.
Returns:
onehot_labels: a tensor of shape [num_classes] corresponding to the one hot
encoding of the labels.
Raises:
ValueError: if num_classes is not specified.
"""
with tf.name_scope('OneHotEncoding', values=[labels]):
if num_classes is None:
raise ValueError('num_classes must be specified')
labels = tf.one_hot(labels, num_classes, 1, 0)
return tf.reduce_max(labels, 0)
def rgb_to_gray(image):
"""Converts a 3 channel RGB image to a 1 channel grayscale image.
Args:
image: Rank 3 float32 tensor containing 1 image -> [height, width, 3]
with pixel values varying between [0, 1].
Returns:
image: A single channel grayscale image -> [image, height, 1].
"""
return tf.image.rgb_to_grayscale(image)
def ssd_random_crop(image,
boxes,
labels,
masks=None,
keypoints=None,
min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
aspect_ratio_range=((0.5, 2.0),) * 7,
area_range=((0.1, 1.0),) * 7,
overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
random_coef=(0.15,) * 7,
seed=None):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
For further information on random crop preprocessing refer to RandomCrop
function above.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
def random_crop_selector(selected_result, index):
"""Applies random_crop_image to selected result.
Args:
selected_result: A tuple containing image, boxes, labels, keypoints (if
not None), and masks (if not None).
index: The index that was randomly selected.
Returns: A tuple containing image, boxes, labels, keypoints (if not None),
and masks (if not None).
"""
i = 3
image, boxes, labels = selected_result[:i]
selected_masks = None
selected_keypoints = None
if masks is not None:
selected_masks = selected_result[i]
i += 1
if keypoints is not None:
selected_keypoints = selected_result[i]
return random_crop_image(
image=image,
boxes=boxes,
labels=labels,
masks=selected_masks,
keypoints=selected_keypoints,
min_object_covered=min_object_covered[index],
aspect_ratio_range=aspect_ratio_range[index],
area_range=area_range[index],
overlap_thresh=overlap_thresh[index],
random_coef=random_coef[index],
seed=seed)
result = _apply_with_random_selector_tuples(
tuple(
t for t in (image, boxes, labels, masks, keypoints) if t is not None),
random_crop_selector,
num_cases=len(min_object_covered))
return result
def ssd_random_crop_pad(image,
boxes,
labels,
min_object_covered=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
aspect_ratio_range=((0.5, 2.0),) * 6,
area_range=((0.1, 1.0),) * 6,
overlap_thresh=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
random_coef=(0.15,) * 6,
min_padded_size_ratio=(None,) * 6,
max_padded_size_ratio=(None,) * 6,
pad_color=(None,) * 6,
seed=None):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
For further information on random crop preprocessing refer to RandomCrop
function above.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
min_padded_size_ratio: min ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [0.0, 0.0].
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [2.0, 2.0].
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the randomly
cropped image.
seed: random seed.
Returns:
image: Image shape will be [new_height, new_width, channels].
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
new_labels: new labels.
"""
def random_crop_pad_selector(image_boxes_labels, index):
image, boxes, labels = image_boxes_labels
return random_crop_pad_image(
image,
boxes,
labels,
min_object_covered=min_object_covered[index],
aspect_ratio_range=aspect_ratio_range[index],
area_range=area_range[index],
overlap_thresh=overlap_thresh[index],
random_coef=random_coef[index],
min_padded_size_ratio=min_padded_size_ratio[index],
max_padded_size_ratio=max_padded_size_ratio[index],
pad_color=pad_color[index],
seed=seed)
new_image, new_boxes, new_labels = _apply_with_random_selector_tuples(
(image, boxes, labels),
random_crop_pad_selector,
num_cases=len(min_object_covered))
return new_image, new_boxes, new_labels
def ssd_random_crop_fixed_aspect_ratio(
image,
boxes,
labels,
masks=None,
keypoints=None,
min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
aspect_ratio=1.0,
area_range=((0.1, 1.0),) * 7,
overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
random_coef=(0.15,) * 7,
seed=None):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
For further information on random crop preprocessing refer to RandomCrop
function above.
The only difference is that the aspect ratio of the crops are fixed.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio: aspect ratio of the cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
aspect_ratio_range = ((aspect_ratio, aspect_ratio),) * len(area_range)
crop_result = ssd_random_crop(image, boxes, labels, masks, keypoints,
min_object_covered, aspect_ratio_range,
area_range, overlap_thresh, random_coef, seed)
i = 3
new_image, new_boxes, new_labels = crop_result[:i]
new_masks = None
new_keypoints = None
if masks is not None:
new_masks = crop_result[i]
i += 1
if keypoints is not None:
new_keypoints = crop_result[i]
result = random_crop_to_aspect_ratio(
new_image,
new_boxes,
new_labels,
new_masks,
new_keypoints,
aspect_ratio=aspect_ratio,
seed=seed)
return result
def get_default_func_arg_map(include_instance_masks=False,
include_keypoints=False):
"""Returns the default mapping from a preprocessor function to its args.
Args:
include_instance_masks: If True, preprocessing functions will modify the
instance masks, too.
include_keypoints: If True, preprocessing functions will modify the
keypoints, too.
Returns:
A map from preprocessing functions to the arguments they receive.
"""
groundtruth_instance_masks = None
if include_instance_masks:
groundtruth_instance_masks = (
fields.InputDataFields.groundtruth_instance_masks)
groundtruth_keypoints = None
if include_keypoints:
groundtruth_keypoints = fields.InputDataFields.groundtruth_keypoints
prep_func_arg_map = {
normalize_image: (fields.InputDataFields.image,),
random_horizontal_flip: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
groundtruth_instance_masks,
groundtruth_keypoints,),
random_pixel_value_scale: (fields.InputDataFields.image,),
random_image_scale: (fields.InputDataFields.image,
groundtruth_instance_masks,),
random_rgb_to_gray: (fields.InputDataFields.image,),
random_adjust_brightness: (fields.InputDataFields.image,),
random_adjust_contrast: (fields.InputDataFields.image,),
random_adjust_hue: (fields.InputDataFields.image,),
random_adjust_saturation: (fields.InputDataFields.image,),
random_distort_color: (fields.InputDataFields.image,),
random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
random_crop_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_instance_masks,
groundtruth_keypoints,),
random_pad_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes),
random_crop_pad_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes),
random_crop_to_aspect_ratio: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_instance_masks,
groundtruth_keypoints,),
random_black_patches: (fields.InputDataFields.image,),
retain_boxes_above_threshold: (
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_label_scores,
groundtruth_instance_masks,
groundtruth_keypoints,),
image_to_float: (fields.InputDataFields.image,),
random_resize_method: (fields.InputDataFields.image,),
resize_to_range: (fields.InputDataFields.image,
groundtruth_instance_masks,),
scale_boxes_to_pixel_coordinates: (
fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
groundtruth_keypoints,),
flip_boxes: (fields.InputDataFields.groundtruth_boxes,),
resize_image: (fields.InputDataFields.image,
groundtruth_instance_masks,),
subtract_channel_mean: (fields.InputDataFields.image,),
one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,),
rgb_to_gray: (fields.InputDataFields.image,),
ssd_random_crop: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_instance_masks,
groundtruth_keypoints,),
ssd_random_crop_pad: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes),
ssd_random_crop_fixed_aspect_ratio: (
fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_instance_masks,
groundtruth_keypoints,),
}
return prep_func_arg_map
def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
"""Preprocess images and bounding boxes.
Various types of preprocessing (to be implemented) based on the
preprocess_options dictionary e.g. "crop image" (affects image and possibly
boxes), "white balance image" (affects only image), etc. If self._options
is None, no preprocessing is done.
Args:
tensor_dict: dictionary that contains images, boxes, and can contain other
things as well.
images-> rank 4 float32 tensor contains
1 image -> [1, height, width, 3].
with pixel values varying between [0, 1]
boxes-> rank 2 float32 tensor containing
the bounding boxes -> [N, 4].
Boxes are in normalized form meaning
their coordinates vary between [0, 1].
Each row is in the form
of [ymin, xmin, ymax, xmax].
preprocess_options: It is a list of tuples, where each tuple contains a
function and a dictionary that contains arguments and
their values.
func_arg_map: mapping from preprocessing functions to arguments that they
expect to receive and return.
Returns:
tensor_dict: which contains the preprocessed images, bounding boxes, etc.
Raises:
ValueError: (a) If the functions passed to Preprocess
are not in func_arg_map.
(b) If the arguments that a function needs
do not exist in tensor_dict.
(c) If image in tensor_dict is not rank 4
"""
if func_arg_map is None:
func_arg_map = get_default_func_arg_map()
# changes the images to image (rank 4 to rank 3) since the functions
# receive rank 3 tensor for image
if fields.InputDataFields.image in tensor_dict:
images = tensor_dict[fields.InputDataFields.image]
if len(images.get_shape()) != 4:
raise ValueError('images in tensor_dict should be rank 4')
image = tf.squeeze(images, squeeze_dims=[0])
tensor_dict[fields.InputDataFields.image] = image
# Preprocess inputs based on preprocess_options
for option in preprocess_options:
func, params = option
if func not in func_arg_map:
raise ValueError('The function %s does not exist in func_arg_map' %
(func.__name__))
arg_names = func_arg_map[func]
for a in arg_names:
if a is not None and a not in tensor_dict:
raise ValueError('The function %s requires argument %s' %
(func.__name__, a))
def get_arg(key):
return tensor_dict[key] if key is not None else None
args = [get_arg(a) for a in arg_names]
results = func(*args, **params)
if not isinstance(results, (list, tuple)):
results = (results,)
# Removes None args since the return values will not contain those.
arg_names = [arg_name for arg_name in arg_names if arg_name is not None]
for res, arg_name in zip(results, arg_names):
tensor_dict[arg_name] = res
# changes the image to images (rank 3 to rank 4) to be compatible to what
# we received in the first place
if fields.InputDataFields.image in tensor_dict:
image = tensor_dict[fields.InputDataFields.image]
images = tf.expand_dims(image, 0)
tensor_dict[fields.InputDataFields.image] = images
return tensor_dict
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.preprocessor."""
import mock
import numpy as np
import tensorflow as tf
from object_detection.core import preprocessor
from object_detection.core import standard_fields as fields
class PreprocessorTest(tf.test.TestCase):
def createColorfulTestImage(self):
ch255 = tf.fill([1, 100, 200, 1], tf.constant(255, dtype=tf.uint8))
ch128 = tf.fill([1, 100, 200, 1], tf.constant(128, dtype=tf.uint8))
ch0 = tf.fill([1, 100, 200, 1], tf.constant(0, dtype=tf.uint8))
imr = tf.concat([ch255, ch0, ch0], 3)
img = tf.concat([ch255, ch255, ch0], 3)
imb = tf.concat([ch255, ch0, ch255], 3)
imw = tf.concat([ch128, ch128, ch128], 3)
imu = tf.concat([imr, img], 2)
imd = tf.concat([imb, imw], 2)
im = tf.concat([imu, imd], 1)
return im
def createTestImages(self):
images_r = tf.constant([[[128, 128, 128, 128], [0, 0, 128, 128],
[0, 128, 128, 128], [192, 192, 128, 128]]],
dtype=tf.uint8)
images_r = tf.expand_dims(images_r, 3)
images_g = tf.constant([[[0, 0, 128, 128], [0, 0, 128, 128],
[0, 128, 192, 192], [192, 192, 128, 192]]],
dtype=tf.uint8)
images_g = tf.expand_dims(images_g, 3)
images_b = tf.constant([[[128, 128, 192, 0], [0, 0, 128, 192],
[0, 128, 128, 0], [192, 192, 192, 128]]],
dtype=tf.uint8)
images_b = tf.expand_dims(images_b, 3)
images = tf.concat([images_r, images_g, images_b], 3)
return images
def createTestBoxes(self):
boxes = tf.constant(
[[0.0, 0.25, 0.75, 1.0], [0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
return boxes
def createTestLabelScores(self):
return tf.constant([1.0, 0.5], dtype=tf.float32)
def createTestLabelScoresWithMissingScore(self):
return tf.constant([0.5, np.nan], dtype=tf.float32)
def createTestMasks(self):
mask = np.array([
[[255.0, 0.0, 0.0],
[255.0, 0.0, 0.0],
[255.0, 0.0, 0.0]],
[[255.0, 255.0, 0.0],
[255.0, 255.0, 0.0],
[255.0, 255.0, 0.0]]])
return tf.constant(mask, dtype=tf.float32)
def createTestKeypoints(self):
keypoints = np.array([
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
])
return tf.constant(keypoints, dtype=tf.float32)
def createTestKeypointsInsideCrop(self):
keypoints = np.array([
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
])
return tf.constant(keypoints, dtype=tf.float32)
def createTestKeypointsOutsideCrop(self):
keypoints = np.array([
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
])
return tf.constant(keypoints, dtype=tf.float32)
def createKeypointFlipPermutation(self):
return np.array([0, 2, 1], dtype=np.int32)
def createTestLabels(self):
labels = tf.constant([1, 2], dtype=tf.int32)
return labels
def createTestBoxesOutOfImage(self):
boxes = tf.constant(
[[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32)
return boxes
def expectedImagesAfterNormalization(self):
images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0],
[-1, 0, 0, 0], [0.5, 0.5, 0, 0]]],
dtype=tf.float32)
images_r = tf.expand_dims(images_r, 3)
images_g = tf.constant([[[-1, -1, 0, 0], [-1, -1, 0, 0],
[-1, 0, 0.5, 0.5], [0.5, 0.5, 0, 0.5]]],
dtype=tf.float32)
images_g = tf.expand_dims(images_g, 3)
images_b = tf.constant([[[0, 0, 0.5, -1], [-1, -1, 0, 0.5],
[-1, 0, 0, -1], [0.5, 0.5, 0.5, 0]]],
dtype=tf.float32)
images_b = tf.expand_dims(images_b, 3)
images = tf.concat([images_r, images_g, images_b], 3)
return images
def expectedMaxImageAfterColorScale(self):
images_r = tf.constant([[[0.1, 0.1, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
[-0.9, 0.1, 0.1, 0.1], [0.6, 0.6, 0.1, 0.1]]],
dtype=tf.float32)
images_r = tf.expand_dims(images_r, 3)
images_g = tf.constant([[[-0.9, -0.9, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
[-0.9, 0.1, 0.6, 0.6], [0.6, 0.6, 0.1, 0.6]]],
dtype=tf.float32)
images_g = tf.expand_dims(images_g, 3)
images_b = tf.constant([[[0.1, 0.1, 0.6, -0.9], [-0.9, -0.9, 0.1, 0.6],
[-0.9, 0.1, 0.1, -0.9], [0.6, 0.6, 0.6, 0.1]]],
dtype=tf.float32)
images_b = tf.expand_dims(images_b, 3)
images = tf.concat([images_r, images_g, images_b], 3)
return images
def expectedMinImageAfterColorScale(self):
images_r = tf.constant([[[-0.1, -0.1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
[-1, -0.1, -0.1, -0.1], [0.4, 0.4, -0.1, -0.1]]],
dtype=tf.float32)
images_r = tf.expand_dims(images_r, 3)
images_g = tf.constant([[[-1, -1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
[-1, -0.1, 0.4, 0.4], [0.4, 0.4, -0.1, 0.4]]],
dtype=tf.float32)
images_g = tf.expand_dims(images_g, 3)
images_b = tf.constant([[[-0.1, -0.1, 0.4, -1], [-1, -1, -0.1, 0.4],
[-1, -0.1, -0.1, -1], [0.4, 0.4, 0.4, -0.1]]],
dtype=tf.float32)
images_b = tf.expand_dims(images_b, 3)
images = tf.concat([images_r, images_g, images_b], 3)
return images
def expectedImagesAfterMirroring(self):
images_r = tf.constant([[[0, 0, 0, 0], [0, 0, -1, -1],
[0, 0, 0, -1], [0, 0, 0.5, 0.5]]],
dtype=tf.float32)
images_r = tf.expand_dims(images_r, 3)
images_g = tf.constant([[[0, 0, -1, -1], [0, 0, -1, -1],
[0.5, 0.5, 0, -1], [0.5, 0, 0.5, 0.5]]],
dtype=tf.float32)
images_g = tf.expand_dims(images_g, 3)
images_b = tf.constant([[[-1, 0.5, 0, 0], [0.5, 0, -1, -1],
[-1, 0, 0, -1], [0, 0.5, 0.5, 0.5]]],
dtype=tf.float32)
images_b = tf.expand_dims(images_b, 3)
images = tf.concat([images_r, images_g, images_b], 3)
return images
def expectedBoxesAfterMirroring(self):
boxes = tf.constant([[0.0, 0.0, 0.75, 0.75], [0.25, 0.0, 0.75, 0.5]],
dtype=tf.float32)
return boxes
def expectedBoxesAfterXY(self):
boxes = tf.constant([[0.25, 0.0, 1.0, 0.75], [0.5, 0.25, 1, 0.75]],
dtype=tf.float32)
return boxes
def expectedMasksAfterMirroring(self):
mask = np.array([
[[0.0, 0.0, 255.0],
[0.0, 0.0, 255.0],
[0.0, 0.0, 255.0]],
[[0.0, 255.0, 255.0],
[0.0, 255.0, 255.0],
[0.0, 255.0, 255.0]]])
return tf.constant(mask, dtype=tf.float32)
def expectedLabelScoresAfterThresholding(self):
return tf.constant([1.0], dtype=tf.float32)
def expectedBoxesAfterThresholding(self):
return tf.constant([[0.0, 0.25, 0.75, 1.0]], dtype=tf.float32)
def expectedLabelsAfterThresholding(self):
return tf.constant([1], dtype=tf.float32)
def expectedMasksAfterThresholding(self):
mask = np.array([
[[255.0, 0.0, 0.0],
[255.0, 0.0, 0.0],
[255.0, 0.0, 0.0]]])
return tf.constant(mask, dtype=tf.float32)
def expectedKeypointsAfterThresholding(self):
keypoints = np.array([
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]]
])
return tf.constant(keypoints, dtype=tf.float32)
def expectedLabelScoresAfterThresholdingWithMissingScore(self):
return tf.constant([np.nan], dtype=tf.float32)
def expectedBoxesAfterThresholdingWithMissingScore(self):
return tf.constant([[0.25, 0.5, 0.75, 1]], dtype=tf.float32)
def expectedLabelsAfterThresholdingWithMissingScore(self):
return tf.constant([2], dtype=tf.float32)
def testNormalizeImage(self):
preprocess_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 256,
'target_minval': -1,
'target_maxval': 1
})]
images = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
images = tensor_dict[fields.InputDataFields.image]
images_expected = self.expectedImagesAfterNormalization()
with self.test_session() as sess:
(images_, images_expected_) = sess.run(
[images, images_expected])
images_shape_ = images_.shape
images_expected_shape_ = images_expected_.shape
expected_shape = [1, 4, 4, 3]
self.assertAllEqual(images_expected_shape_, images_shape_)
self.assertAllEqual(images_shape_, expected_shape)
self.assertAllClose(images_, images_expected_)
def testRetainBoxesAboveThreshold(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
(retained_boxes, retained_labels,
retained_label_scores) = preprocessor.retain_boxes_above_threshold(
boxes, labels, label_scores, threshold=0.6)
with self.test_session() as sess:
(retained_boxes_, retained_labels_, retained_label_scores_,
expected_retained_boxes_, expected_retained_labels_,
expected_retained_label_scores_) = sess.run([
retained_boxes, retained_labels, retained_label_scores,
self.expectedBoxesAfterThresholding(),
self.expectedLabelsAfterThresholding(),
self.expectedLabelScoresAfterThresholding()])
self.assertAllClose(
retained_boxes_, expected_retained_boxes_)
self.assertAllClose(
retained_labels_, expected_retained_labels_)
self.assertAllClose(
retained_label_scores_, expected_retained_label_scores_)
def testRetainBoxesAboveThresholdWithMasks(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
masks = self.createTestMasks()
_, _, _, retained_masks = preprocessor.retain_boxes_above_threshold(
boxes, labels, label_scores, masks, threshold=0.6)
with self.test_session() as sess:
retained_masks_, expected_retained_masks_ = sess.run([
retained_masks, self.expectedMasksAfterThresholding()])
self.assertAllClose(
retained_masks_, expected_retained_masks_)
def testRetainBoxesAboveThresholdWithKeypoints(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
keypoints = self.createTestKeypoints()
(_, _, _, retained_keypoints) = preprocessor.retain_boxes_above_threshold(
boxes, labels, label_scores, keypoints=keypoints, threshold=0.6)
with self.test_session() as sess:
(retained_keypoints_,
expected_retained_keypoints_) = sess.run([
retained_keypoints,
self.expectedKeypointsAfterThresholding()])
self.assertAllClose(
retained_keypoints_, expected_retained_keypoints_)
def testRetainBoxesAboveThresholdWithMissingScore(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScoresWithMissingScore()
(retained_boxes, retained_labels,
retained_label_scores) = preprocessor.retain_boxes_above_threshold(
boxes, labels, label_scores, threshold=0.6)
with self.test_session() as sess:
(retained_boxes_, retained_labels_, retained_label_scores_,
expected_retained_boxes_, expected_retained_labels_,
expected_retained_label_scores_) = sess.run([
retained_boxes, retained_labels, retained_label_scores,
self.expectedBoxesAfterThresholdingWithMissingScore(),
self.expectedLabelsAfterThresholdingWithMissingScore(),
self.expectedLabelScoresAfterThresholdingWithMissingScore()])
self.assertAllClose(
retained_boxes_, expected_retained_boxes_)
self.assertAllClose(
retained_labels_, expected_retained_labels_)
self.assertAllClose(
retained_label_scores_, expected_retained_label_scores_)
def testRandomFlipBoxes(self):
boxes = self.createTestBoxes()
# Case where the boxes are flipped.
boxes_expected1 = self.expectedBoxesAfterMirroring()
# Case where the boxes are not flipped.
boxes_expected2 = boxes
# After elementwise multiplication, the result should be all-zero since one
# of them is all-zero.
boxes_diff = tf.multiply(
tf.squared_difference(boxes, boxes_expected1),
tf.squared_difference(boxes, boxes_expected2))
expected_result = tf.zeros_like(boxes_diff)
with self.test_session() as sess:
(boxes_diff, expected_result) = sess.run([boxes_diff, expected_result])
self.assertAllEqual(boxes_diff, expected_result)
def testFlipMasks(self):
test_mask = self.createTestMasks()
flipped_mask = preprocessor._flip_masks(test_mask)
expected_mask = self.expectedMasksAfterMirroring()
with self.test_session() as sess:
flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
def testRandomHorizontalFlip(self):
preprocess_options = [(preprocessor.random_horizontal_flip, {})]
images = self.expectedImagesAfterNormalization()
boxes = self.createTestBoxes()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes}
images_expected1 = self.expectedImagesAfterMirroring()
boxes_expected1 = self.expectedBoxesAfterMirroring()
images_expected2 = images
boxes_expected2 = boxes
tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
images = tensor_dict[fields.InputDataFields.image]
boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
boxes_diff_expected = tf.zeros_like(boxes_diff)
images_diff1 = tf.squared_difference(images, images_expected1)
images_diff2 = tf.squared_difference(images, images_expected2)
images_diff = tf.multiply(images_diff1, images_diff2)
images_diff_expected = tf.zeros_like(images_diff)
with self.test_session() as sess:
(images_diff_, images_diff_expected_, boxes_diff_,
boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
boxes_diff, boxes_diff_expected])
self.assertAllClose(boxes_diff_, boxes_diff_expected_)
self.assertAllClose(images_diff_, images_diff_expected_)
def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
preprocess_options = [(preprocessor.random_horizontal_flip, {})]
image_height = 3
image_width = 3
images = tf.random_uniform([1, image_height, image_width, 3])
boxes = self.createTestBoxes()
masks = self.createTestMasks()
keypoints = self.createTestKeypoints()
keypoint_flip_permutation = self.createKeypointFlipPermutation()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_instance_masks: masks,
fields.InputDataFields.groundtruth_keypoints: keypoints
}
preprocess_options = [
(preprocessor.random_horizontal_flip,
{'keypoint_flip_permutation': keypoint_flip_permutation})]
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True, include_keypoints=True)
tensor_dict = preprocessor.preprocess(
tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
with self.test_session() as sess:
boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
self.assertTrue(boxes is not None)
self.assertTrue(masks is not None)
self.assertTrue(keypoints is not None)
def testRandomPixelValueScale(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_pixel_value_scale, {}))
images = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images_min = tf.to_float(images) * 0.9 / 255.0
images_max = tf.to_float(images) * 1.1 / 255.0
images = tensor_dict[fields.InputDataFields.image]
values_greater = tf.greater_equal(images, images_min)
values_less = tf.less_equal(images, images_max)
values_true = tf.fill([1, 4, 4, 3], True)
with self.test_session() as sess:
(values_greater_, values_less_, values_true_) = sess.run(
[values_greater, values_less, values_true])
self.assertAllClose(values_greater_, values_true_)
self.assertAllClose(values_less_, values_true_)
def testRandomImageScale(self):
preprocess_options = [(preprocessor.random_image_scale, {})]
images_original = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
images_scaled = tensor_dict[fields.InputDataFields.image]
images_original_shape = tf.shape(images_original)
images_scaled_shape = tf.shape(images_scaled)
with self.test_session() as sess:
(images_original_shape_, images_scaled_shape_) = sess.run(
[images_original_shape, images_scaled_shape])
self.assertTrue(
images_original_shape_[1] * 0.5 <= images_scaled_shape_[1])
self.assertTrue(
images_original_shape_[1] * 2.0 >= images_scaled_shape_[1])
self.assertTrue(
images_original_shape_[2] * 0.5 <= images_scaled_shape_[2])
self.assertTrue(
images_original_shape_[2] * 2.0 >= images_scaled_shape_[2])
def testRandomRGBtoGray(self):
preprocess_options = [(preprocessor.random_rgb_to_gray, {})]
images_original = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
images_gray = tensor_dict[fields.InputDataFields.image]
images_gray_r, images_gray_g, images_gray_b = tf.split(
value=images_gray, num_or_size_splits=3, axis=3)
images_r, images_g, images_b = tf.split(
value=images_original, num_or_size_splits=3, axis=3)
images_r_diff1 = tf.squared_difference(tf.to_float(images_r),
tf.to_float(images_gray_r))
images_r_diff2 = tf.squared_difference(tf.to_float(images_gray_r),
tf.to_float(images_gray_g))
images_r_diff = tf.multiply(images_r_diff1, images_r_diff2)
images_g_diff1 = tf.squared_difference(tf.to_float(images_g),
tf.to_float(images_gray_g))
images_g_diff2 = tf.squared_difference(tf.to_float(images_gray_g),
tf.to_float(images_gray_b))
images_g_diff = tf.multiply(images_g_diff1, images_g_diff2)
images_b_diff1 = tf.squared_difference(tf.to_float(images_b),
tf.to_float(images_gray_b))
images_b_diff2 = tf.squared_difference(tf.to_float(images_gray_b),
tf.to_float(images_gray_r))
images_b_diff = tf.multiply(images_b_diff1, images_b_diff2)
image_zero1 = tf.constant(0, dtype=tf.float32, shape=[1, 4, 4, 1])
with self.test_session() as sess:
(images_r_diff_, images_g_diff_, images_b_diff_, image_zero1_) = sess.run(
[images_r_diff, images_g_diff, images_b_diff, image_zero1])
self.assertAllClose(images_r_diff_, image_zero1_)
self.assertAllClose(images_g_diff_, image_zero1_)
self.assertAllClose(images_b_diff_, image_zero1_)
def testRandomAdjustBrightness(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_adjust_brightness, {}))
images_original = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images_bright = tensor_dict[fields.InputDataFields.image]
image_original_shape = tf.shape(images_original)
image_bright_shape = tf.shape(images_bright)
with self.test_session() as sess:
(image_original_shape_, image_bright_shape_) = sess.run(
[image_original_shape, image_bright_shape])
self.assertAllEqual(image_original_shape_, image_bright_shape_)
def testRandomAdjustContrast(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_adjust_contrast, {}))
images_original = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images_contrast = tensor_dict[fields.InputDataFields.image]
image_original_shape = tf.shape(images_original)
image_contrast_shape = tf.shape(images_contrast)
with self.test_session() as sess:
(image_original_shape_, image_contrast_shape_) = sess.run(
[image_original_shape, image_contrast_shape])
self.assertAllEqual(image_original_shape_, image_contrast_shape_)
def testRandomAdjustHue(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_adjust_hue, {}))
images_original = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images_hue = tensor_dict[fields.InputDataFields.image]
image_original_shape = tf.shape(images_original)
image_hue_shape = tf.shape(images_hue)
with self.test_session() as sess:
(image_original_shape_, image_hue_shape_) = sess.run(
[image_original_shape, image_hue_shape])
self.assertAllEqual(image_original_shape_, image_hue_shape_)
def testRandomDistortColor(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_distort_color, {}))
images_original = self.createTestImages()
images_original_shape = tf.shape(images_original)
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images_distorted_color = tensor_dict[fields.InputDataFields.image]
images_distorted_color_shape = tf.shape(images_distorted_color)
with self.test_session() as sess:
(images_original_shape_, images_distorted_color_shape_) = sess.run(
[images_original_shape, images_distorted_color_shape])
self.assertAllEqual(images_original_shape_, images_distorted_color_shape_)
def testRandomJitterBoxes(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.random_jitter_boxes, {}))
boxes = self.createTestBoxes()
boxes_shape = tf.shape(boxes)
tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
distorted_boxes_shape = tf.shape(distorted_boxes)
with self.test_session() as sess:
(boxes_shape_, distorted_boxes_shape_) = sess.run(
[boxes_shape, distorted_boxes_shape])
self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
def testRandomCropImage(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_crop_image, {}))
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
self.assertEqual(3, distorted_images.get_shape()[3])
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run([
boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testRandomCropImageGrayscale(self):
preprocessing_options = [(preprocessor.rgb_to_gray, {}),
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1,
}),
(preprocessor.random_crop_image, {})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels
}
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
self.assertEqual(1, distorted_images.get_shape()[3])
with self.test_session() as sess:
session_results = sess.run([
boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
])
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = session_results
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testRandomCropImageWithBoxOutOfImage(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_crop_image, {}))
images = self.createTestImages()
boxes = self.createTestBoxesOutOfImage()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run(
[boxes_rank, distorted_boxes_rank, images_rank,
distorted_images_rank])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testRandomCropImageWithRandomCoefOne(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_crop_image, {
'random_coef': 1.0
})]
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
boxes_shape = tf.shape(boxes)
distorted_boxes_shape = tf.shape(distorted_boxes)
images_shape = tf.shape(images)
distorted_images_shape = tf.shape(distorted_images)
with self.test_session() as sess:
(boxes_shape_, distorted_boxes_shape_, images_shape_,
distorted_images_shape_, images_, distorted_images_,
boxes_, distorted_boxes_, labels_, distorted_labels_) = sess.run(
[boxes_shape, distorted_boxes_shape, images_shape,
distorted_images_shape, images, distorted_images,
boxes, distorted_boxes, labels, distorted_labels])
self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
self.assertAllEqual(images_shape_, distorted_images_shape_)
self.assertAllClose(images_, distorted_images_)
self.assertAllClose(boxes_, distorted_boxes_)
self.assertAllEqual(labels_, distorted_labels_)
def testRandomCropWithMockSampleDistortedBoundingBox(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
})]
images = self.createColorfulTestImage()
boxes = tf.constant([[0.1, 0.1, 0.8, 0.3],
[0.2, 0.4, 0.75, 0.75],
[0.3, 0.1, 0.4, 0.7]], dtype=tf.float32)
labels = tf.constant([1, 7, 11], dtype=tf.int32)
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_crop_image, {})]
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (tf.constant(
[6, 143, 0], dtype=tf.int32), tf.constant(
[190, 237, -1], dtype=tf.int32), tf.constant(
[[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
expected_boxes = tf.constant([[0.178947, 0.07173, 0.75789469, 0.66244733],
[0.28421, 0.0, 0.38947365, 0.57805908]],
dtype=tf.float32)
expected_labels = tf.constant([7, 11], dtype=tf.int32)
with self.test_session() as sess:
(distorted_boxes_, distorted_labels_,
expected_boxes_, expected_labels_) = sess.run(
[distorted_boxes, distorted_labels,
expected_boxes, expected_labels])
self.assertAllClose(distorted_boxes_, expected_boxes_)
self.assertAllEqual(distorted_labels_, expected_labels_)
def testStrictRandomCropImageWithMasks(self):
image = self.createColorfulTestImage()[0]
boxes = self.createTestBoxes()
labels = self.createTestLabels()
masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box'
) as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (
tf.constant([6, 143, 0], dtype=tf.int32),
tf.constant([190, 237, -1], dtype=tf.int32),
tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
(new_image, new_boxes, new_labels,
new_masks) = preprocessor._strict_random_crop_image(
image, boxes, labels, masks=masks)
with self.test_session() as sess:
new_image, new_boxes, new_labels, new_masks = sess.run([
new_image, new_boxes, new_labels, new_masks])
expected_boxes = np.array([
[0.0, 0.0, 0.75789469, 1.0],
[0.23157893, 0.24050637, 0.75789469, 1.0],
], dtype=np.float32)
self.assertAllEqual(new_image.shape, [190, 237, 3])
self.assertAllEqual(new_masks.shape, [2, 190, 237])
self.assertAllClose(
new_boxes.flatten(), expected_boxes.flatten())
def testStrictRandomCropImageWithKeypoints(self):
image = self.createColorfulTestImage()[0]
boxes = self.createTestBoxes()
labels = self.createTestLabels()
keypoints = self.createTestKeypoints()
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box'
) as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (
tf.constant([6, 143, 0], dtype=tf.int32),
tf.constant([190, 237, -1], dtype=tf.int32),
tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
(new_image, new_boxes, new_labels,
new_keypoints) = preprocessor._strict_random_crop_image(
image, boxes, labels, keypoints=keypoints)
with self.test_session() as sess:
new_image, new_boxes, new_labels, new_keypoints = sess.run([
new_image, new_boxes, new_labels, new_keypoints])
expected_boxes = np.array([
[0.0, 0.0, 0.75789469, 1.0],
[0.23157893, 0.24050637, 0.75789469, 1.0],
], dtype=np.float32)
expected_keypoints = np.array([
[[np.nan, np.nan],
[np.nan, np.nan],
[np.nan, np.nan]],
[[0.38947368, 0.07173],
[0.49473682, 0.24050637],
[0.60000002, 0.40928277]]
], dtype=np.float32)
self.assertAllEqual(new_image.shape, [190, 237, 3])
self.assertAllClose(
new_boxes.flatten(), expected_boxes.flatten())
self.assertAllClose(
new_keypoints.flatten(), expected_keypoints.flatten())
def testRunRandomCropImageWithMasks(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_instance_masks: masks,
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True)
preprocessing_options = [(preprocessor.random_crop_image, {})]
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box'
) as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (
tf.constant([6, 143, 0], dtype=tf.int32),
tf.constant([190, 237, -1], dtype=tf.int32),
tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
distorted_masks = distorted_tensor_dict[
fields.InputDataFields.groundtruth_instance_masks]
with self.test_session() as sess:
(distorted_image_, distorted_boxes_, distorted_labels_,
distorted_masks_) = sess.run(
[distorted_image, distorted_boxes, distorted_labels,
distorted_masks])
expected_boxes = np.array([
[0.0, 0.0, 0.75789469, 1.0],
[0.23157893, 0.24050637, 0.75789469, 1.0],
], dtype=np.float32)
self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
self.assertAllEqual(distorted_masks_.shape, [2, 190, 237])
self.assertAllEqual(distorted_labels_, [1, 2])
self.assertAllClose(
distorted_boxes_.flatten(), expected_boxes.flatten())
def testRunRandomCropImageWithKeypointsInsideCrop(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
keypoints = self.createTestKeypointsInsideCrop()
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_keypoints: keypoints
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_keypoints=True)
preprocessing_options = [(preprocessor.random_crop_image, {})]
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box'
) as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (
tf.constant([6, 143, 0], dtype=tf.int32),
tf.constant([190, 237, -1], dtype=tf.int32),
tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
distorted_keypoints = distorted_tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
with self.test_session() as sess:
(distorted_image_, distorted_boxes_, distorted_labels_,
distorted_keypoints_) = sess.run(
[distorted_image, distorted_boxes, distorted_labels,
distorted_keypoints])
expected_boxes = np.array([
[0.0, 0.0, 0.75789469, 1.0],
[0.23157893, 0.24050637, 0.75789469, 1.0],
], dtype=np.float32)
expected_keypoints = np.array([
[[0.38947368, 0.07173],
[0.49473682, 0.24050637],
[0.60000002, 0.40928277]],
[[0.38947368, 0.07173],
[0.49473682, 0.24050637],
[0.60000002, 0.40928277]]
])
self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
self.assertAllEqual(distorted_labels_, [1, 2])
self.assertAllClose(
distorted_boxes_.flatten(), expected_boxes.flatten())
self.assertAllClose(
distorted_keypoints_.flatten(), expected_keypoints.flatten())
def testRunRandomCropImageWithKeypointsOutsideCrop(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
keypoints = self.createTestKeypointsOutsideCrop()
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_keypoints: keypoints
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_keypoints=True)
preprocessing_options = [(preprocessor.random_crop_image, {})]
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box'
) as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (
tf.constant([6, 143, 0], dtype=tf.int32),
tf.constant([190, 237, -1], dtype=tf.int32),
tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
distorted_keypoints = distorted_tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
with self.test_session() as sess:
(distorted_image_, distorted_boxes_, distorted_labels_,
distorted_keypoints_) = sess.run(
[distorted_image, distorted_boxes, distorted_labels,
distorted_keypoints])
expected_boxes = np.array([
[0.0, 0.0, 0.75789469, 1.0],
[0.23157893, 0.24050637, 0.75789469, 1.0],
], dtype=np.float32)
expected_keypoints = np.array([
[[np.nan, np.nan],
[np.nan, np.nan],
[np.nan, np.nan]],
[[np.nan, np.nan],
[np.nan, np.nan],
[np.nan, np.nan]],
])
self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
self.assertAllEqual(distorted_labels_, [1, 2])
self.assertAllClose(
distorted_boxes_.flatten(), expected_boxes.flatten())
self.assertAllClose(
distorted_keypoints_.flatten(), expected_keypoints.flatten())
def testRunRetainBoxesAboveThreshold(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
tensor_dict = {
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_label_scores: label_scores
}
preprocessing_options = [
(preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
]
retained_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options)
retained_boxes = retained_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
retained_labels = retained_tensor_dict[
fields.InputDataFields.groundtruth_classes]
retained_label_scores = retained_tensor_dict[
fields.InputDataFields.groundtruth_label_scores]
with self.test_session() as sess:
(retained_boxes_, retained_labels_,
retained_label_scores_, expected_retained_boxes_,
expected_retained_labels_, expected_retained_label_scores_) = sess.run(
[retained_boxes, retained_labels, retained_label_scores,
self.expectedBoxesAfterThresholding(),
self.expectedLabelsAfterThresholding(),
self.expectedLabelScoresAfterThresholding()])
self.assertAllClose(retained_boxes_, expected_retained_boxes_)
self.assertAllClose(retained_labels_, expected_retained_labels_)
self.assertAllClose(
retained_label_scores_, expected_retained_label_scores_)
def testRunRetainBoxesAboveThresholdWithMasks(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
masks = self.createTestMasks()
tensor_dict = {
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_label_scores: label_scores,
fields.InputDataFields.groundtruth_instance_masks: masks
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True)
preprocessing_options = [
(preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
]
retained_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
retained_masks = retained_tensor_dict[
fields.InputDataFields.groundtruth_instance_masks]
with self.test_session() as sess:
(retained_masks_, expected_masks_) = sess.run(
[retained_masks,
self.expectedMasksAfterThresholding()])
self.assertAllClose(retained_masks_, expected_masks_)
def testRunRetainBoxesAboveThresholdWithKeypoints(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
keypoints = self.createTestKeypoints()
tensor_dict = {
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_label_scores: label_scores,
fields.InputDataFields.groundtruth_keypoints: keypoints
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_keypoints=True)
preprocessing_options = [
(preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
]
retained_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
retained_keypoints = retained_tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
with self.test_session() as sess:
(retained_keypoints_, expected_keypoints_) = sess.run(
[retained_keypoints,
self.expectedKeypointsAfterThresholding()])
self.assertAllClose(retained_keypoints_, expected_keypoints_)
def testRunRandomCropToAspectRatioWithMasks(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_instance_masks: masks
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True)
preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
with mock.patch.object(preprocessor,
'_random_integer') as mock_random_integer:
mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
distorted_masks = distorted_tensor_dict[
fields.InputDataFields.groundtruth_instance_masks]
with self.test_session() as sess:
(distorted_image_, distorted_boxes_, distorted_labels_,
distorted_masks_) = sess.run([
distorted_image, distorted_boxes, distorted_labels, distorted_masks
])
expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
self.assertAllEqual(distorted_labels_, [1])
self.assertAllClose(distorted_boxes_.flatten(),
expected_boxes.flatten())
self.assertAllEqual(distorted_masks_.shape, [1, 200, 200])
def testRunRandomCropToAspectRatioWithKeypoints(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
keypoints = self.createTestKeypoints()
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_keypoints: keypoints
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_keypoints=True)
preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
with mock.patch.object(preprocessor,
'_random_integer') as mock_random_integer:
mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
distorted_keypoints = distorted_tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
with self.test_session() as sess:
(distorted_image_, distorted_boxes_, distorted_labels_,
distorted_keypoints_) = sess.run([
distorted_image, distorted_boxes, distorted_labels,
distorted_keypoints
])
expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
expected_keypoints = np.array(
[[0.1, 0.2], [0.2, 0.4], [0.3, 0.6]], dtype=np.float32)
self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
self.assertAllEqual(distorted_labels_, [1])
self.assertAllClose(distorted_boxes_.flatten(),
expected_boxes.flatten())
self.assertAllClose(distorted_keypoints_.flatten(),
expected_keypoints.flatten())
def testRandomPadImage(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_pad_image, {})]
padded_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
padded_images = padded_tensor_dict[fields.InputDataFields.image]
padded_boxes = padded_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_shape = tf.shape(boxes)
padded_boxes_shape = tf.shape(padded_boxes)
images_shape = tf.shape(images)
padded_images_shape = tf.shape(padded_images)
with self.test_session() as sess:
(boxes_shape_, padded_boxes_shape_, images_shape_,
padded_images_shape_, boxes_, padded_boxes_) = sess.run(
[boxes_shape, padded_boxes_shape, images_shape,
padded_images_shape, boxes, padded_boxes])
self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
padded_boxes_[:, 2] - padded_boxes_[:, 0])))
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
padded_boxes_[:, 3] - padded_boxes_[:, 1])))
def testRandomCropPadImageWithRandomCoefOne(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_crop_pad_image, {
'random_coef': 1.0
})]
padded_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
padded_images = padded_tensor_dict[fields.InputDataFields.image]
padded_boxes = padded_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_shape = tf.shape(boxes)
padded_boxes_shape = tf.shape(padded_boxes)
images_shape = tf.shape(images)
padded_images_shape = tf.shape(padded_images)
with self.test_session() as sess:
(boxes_shape_, padded_boxes_shape_, images_shape_,
padded_images_shape_, boxes_, padded_boxes_) = sess.run(
[boxes_shape, padded_boxes_shape, images_shape,
padded_images_shape, boxes, padded_boxes])
self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
padded_boxes_[:, 2] - padded_boxes_[:, 0])))
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
padded_boxes_[:, 3] - padded_boxes_[:, 1])))
def testRandomCropToAspectRatio(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels
}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {
'aspect_ratio': 2.0
})]
cropped_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
cropped_images = cropped_tensor_dict[fields.InputDataFields.image]
cropped_boxes = cropped_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_shape = tf.shape(boxes)
cropped_boxes_shape = tf.shape(cropped_boxes)
images_shape = tf.shape(images)
cropped_images_shape = tf.shape(cropped_images)
with self.test_session() as sess:
(boxes_shape_, cropped_boxes_shape_, images_shape_,
cropped_images_shape_) = sess.run([
boxes_shape, cropped_boxes_shape, images_shape, cropped_images_shape
])
self.assertAllEqual(boxes_shape_, cropped_boxes_shape_)
self.assertEqual(images_shape_[1], cropped_images_shape_[1] * 2)
self.assertEqual(images_shape_[2], cropped_images_shape_[2])
def testRandomBlackPatches(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_black_patches, {
'size_to_image_ratio': 0.5
}))
images = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images}
blacked_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
blacked_images = blacked_tensor_dict[fields.InputDataFields.image]
images_shape = tf.shape(images)
blacked_images_shape = tf.shape(blacked_images)
with self.test_session() as sess:
(images_shape_, blacked_images_shape_) = sess.run(
[images_shape, blacked_images_shape])
self.assertAllEqual(images_shape_, blacked_images_shape_)
def testRandomResizeMethod(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_resize_method, {
'target_size': (75, 150)
}))
images = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images}
resized_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
resized_images = resized_tensor_dict[fields.InputDataFields.image]
resized_images_shape = tf.shape(resized_images)
expected_images_shape = tf.constant([1, 75, 150, 3], dtype=tf.int32)
with self.test_session() as sess:
(expected_images_shape_, resized_images_shape_) = sess.run(
[expected_images_shape, resized_images_shape])
self.assertAllEqual(expected_images_shape_,
resized_images_shape_)
def testResizeToRange(self):
"""Tests image resizing, checking output sizes."""
in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
min_dim = 50
max_dim = 100
expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.random_uniform(in_shape)
out_image = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
with self.test_session() as sess:
out_image_shape = sess.run(out_image_shape)
self.assertAllEqual(out_image_shape, expected_shape)
def testResizeToRangeWithMasks(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
min_dim = 50
max_dim = 100
expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
for (in_image_shape, expected_image_shape, in_masks_shape,
expected_mask_shape) in zip(in_image_shape_list,
expected_image_shape_list,
in_masks_shape_list,
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
with self.test_session() as sess:
out_image_shape, out_masks_shape = sess.run(
[out_image_shape, out_masks_shape])
self.assertAllEqual(out_image_shape, expected_image_shape)
self.assertAllEqual(out_masks_shape, expected_mask_shape)
def testResizeToRangeWithNoInstanceMask(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
min_dim = 50
max_dim = 100
expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
for (in_image_shape, expected_image_shape, in_masks_shape,
expected_mask_shape) in zip(in_image_shape_list,
expected_image_shape_list,
in_masks_shape_list,
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
with self.test_session() as sess:
out_image_shape, out_masks_shape = sess.run(
[out_image_shape, out_masks_shape])
self.assertAllEqual(out_image_shape, expected_image_shape)
self.assertAllEqual(out_masks_shape, expected_mask_shape)
def testResizeImageWithMasks(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
height = 50
width = 100
expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
for (in_image_shape, expected_image_shape, in_masks_shape,
expected_mask_shape) in zip(in_image_shape_list,
expected_image_shape_list,
in_masks_shape_list,
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_image(
in_image, in_masks, new_height=height, new_width=width)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
with self.test_session() as sess:
out_image_shape, out_masks_shape = sess.run(
[out_image_shape, out_masks_shape])
self.assertAllEqual(out_image_shape, expected_image_shape)
self.assertAllEqual(out_masks_shape, expected_mask_shape)
def testResizeImageWithNoInstanceMask(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
height = 50
width = 100
expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]]
for (in_image_shape, expected_image_shape, in_masks_shape,
expected_mask_shape) in zip(in_image_shape_list,
expected_image_shape_list,
in_masks_shape_list,
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_image(
in_image, in_masks, new_height=height, new_width=width)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
with self.test_session() as sess:
out_image_shape, out_masks_shape = sess.run(
[out_image_shape, out_masks_shape])
self.assertAllEqual(out_image_shape, expected_image_shape)
self.assertAllEqual(out_masks_shape, expected_mask_shape)
def testResizeToRange4DImageTensor(self):
image = tf.random_uniform([1, 200, 300, 3])
with self.assertRaises(ValueError):
preprocessor.resize_to_range(image, 500, 600)
def testResizeToRangeSameMinMax(self):
"""Tests image resizing, checking output sizes."""
in_shape_list = [[312, 312, 3], [299, 299, 3]]
min_dim = 320
max_dim = 320
expected_shape_list = [[320, 320, 3], [320, 320, 3]]
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.random_uniform(in_shape)
out_image = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
with self.test_session() as sess:
out_image_shape = sess.run(out_image_shape)
self.assertAllEqual(out_image_shape, expected_shape)
def testScaleBoxesToPixelCoordinates(self):
"""Tests box scaling, checking scaled values."""
in_shape = [60, 40, 3]
in_boxes = [[0.1, 0.2, 0.4, 0.6],
[0.5, 0.3, 0.9, 0.7]]
expected_boxes = [[6., 8., 24., 24.],
[30., 12., 54., 28.]]
in_image = tf.random_uniform(in_shape)
in_boxes = tf.constant(in_boxes)
_, out_boxes = preprocessor.scale_boxes_to_pixel_coordinates(
in_image, boxes=in_boxes)
with self.test_session() as sess:
out_boxes = sess.run(out_boxes)
self.assertAllClose(out_boxes, expected_boxes)
def testScaleBoxesToPixelCoordinatesWithKeypoints(self):
"""Tests box and keypoint scaling, checking scaled values."""
in_shape = [60, 40, 3]
in_boxes = self.createTestBoxes()
in_keypoints = self.createTestKeypoints()
expected_boxes = [[0., 10., 45., 40.],
[15., 20., 45., 40.]]
expected_keypoints = [
[[6., 4.], [12., 8.], [18., 12.]],
[[24., 16.], [30., 20.], [36., 24.]],
]
in_image = tf.random_uniform(in_shape)
_, out_boxes, out_keypoints = preprocessor.scale_boxes_to_pixel_coordinates(
in_image, boxes=in_boxes, keypoints=in_keypoints)
with self.test_session() as sess:
out_boxes_, out_keypoints_ = sess.run([out_boxes, out_keypoints])
self.assertAllClose(out_boxes_, expected_boxes)
self.assertAllClose(out_keypoints_, expected_keypoints)
def testSubtractChannelMean(self):
"""Tests whether channel means have been subtracted."""
with self.test_session():
image = tf.zeros((240, 320, 3))
means = [1, 2, 3]
actual = preprocessor.subtract_channel_mean(image, means=means)
actual = actual.eval()
self.assertTrue((actual[:, :, 0] == -1).all())
self.assertTrue((actual[:, :, 1] == -2).all())
self.assertTrue((actual[:, :, 2] == -3).all())
def testOneHotEncoding(self):
"""Tests one hot encoding of multiclass labels."""
with self.test_session():
labels = tf.constant([1, 4, 2], dtype=tf.int32)
one_hot = preprocessor.one_hot_encoding(labels, num_classes=5)
one_hot = one_hot.eval()
self.assertAllEqual([0, 1, 1, 0, 1], one_hot)
def testSSDRandomCrop(self):
preprocessing_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop, {})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run(
[boxes_rank, distorted_boxes_rank, images_rank,
distorted_images_rank])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testSSDRandomCropPad(self):
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
preprocessing_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop_pad, {})]
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run([
boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testSSDRandomCropFixedAspectRatio(self):
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
preprocessing_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels
}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run(
[boxes_rank, distorted_boxes_rank, images_rank,
distorted_images_rank])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self):
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
masks = self.createTestMasks()
keypoints = self.createTestKeypoints()
preprocessing_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_instance_masks: masks,
fields.InputDataFields.groundtruth_keypoints: keypoints,
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True, include_keypoints=True)
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run(
[boxes_rank, distorted_boxes_rank, images_rank,
distorted_images_rank])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Region Similarity Calculators for BoxLists.
Region Similarity Calculators compare a pairwise measure of similarity
between the boxes in two BoxLists.
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
from object_detection.core import box_list_ops
class RegionSimilarityCalculator(object):
"""Abstract base class for region similarity calculator."""
__metaclass__ = ABCMeta
def compare(self, boxlist1, boxlist2, scope=None):
"""Computes matrix of pairwise similarity between BoxLists.
This op (to be overriden) computes a measure of pairwise similarity between
the boxes in the given BoxLists. Higher values indicate more similarity.
Note that this method simply measures similarity and does not explicitly
perform a matching.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
scope: Op scope name. Defaults to 'Compare' if None.
Returns:
a (float32) tensor of shape [N, M] with pairwise similarity score.
"""
with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope:
return self._compare(boxlist1, boxlist2)
@abstractmethod
def _compare(self, boxlist1, boxlist2):
pass
class IouSimilarity(RegionSimilarityCalculator):
"""Class to compute similarity based on Intersection over Union (IOU) metric.
This class computes pairwise similarity between two BoxLists based on IOU.
"""
def _compare(self, boxlist1, boxlist2):
"""Compute pairwise IOU similarity between the two BoxLists.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing pairwise iou scores.
"""
return box_list_ops.iou(boxlist1, boxlist2)
class NegSqDistSimilarity(RegionSimilarityCalculator):
"""Class to compute similarity based on the squared distance metric.
This class computes pairwise similarity between two BoxLists based on the
negative squared distance metric.
"""
def _compare(self, boxlist1, boxlist2):
"""Compute matrix of (negated) sq distances.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing negated pairwise squared distance.
"""
return -1 * box_list_ops.sq_dist(boxlist1, boxlist2)
class IoaSimilarity(RegionSimilarityCalculator):
"""Class to compute similarity based on Intersection over Area (IOA) metric.
This class computes pairwise similarity between two BoxLists based on their
pairwise intersections divided by the areas of second BoxLists.
"""
def _compare(self, boxlist1, boxlist2):
"""Compute pairwise IOA similarity between the two BoxLists.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing pairwise IOA scores.
"""
return box_list_ops.ioa(boxlist1, boxlist2)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for region_similarity_calculator."""
import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import region_similarity_calculator
class RegionSimilarityCalculatorTest(tf.test.TestCase):
def test_get_correct_pairwise_similarity_based_on_iou(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
iou_similarity_calculator = region_similarity_calculator.IouSimilarity()
iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2)
with self.test_session() as sess:
iou_output = sess.run(iou_similarity)
self.assertAllClose(iou_output, exp_output)
def test_get_correct_pairwise_similarity_based_on_squared_distances(self):
corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
[1.0, 1.0, 0.0, 2.0]])
corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
[-4.0, 0.0, 0.0, 3.0],
[0.0, 0.0, 0.0, 0.0]])
exp_output = [[-26, -25, 0], [-18, -27, -6]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
dist_similarity = dist_similarity_calc.compare(boxes1, boxes2)
with self.test_session() as sess:
dist_output = sess.run(dist_similarity)
self.assertAllClose(dist_output, exp_output)
def test_get_correct_pairwise_similarity_based_on_ioa(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
[1.0 / 12.0, 0.0, 5.0 / 400.0]]
exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
[0, 0],
[6.0 / 6.0, 5.0 / 5.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity()
ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2)
ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1)
with self.test_session() as sess:
iou_output_1, iou_output_2 = sess.run(
[ioa_similarity_1, ioa_similarity_2])
self.assertAllClose(iou_output_1, exp_output_1)
self.assertAllClose(iou_output_2, exp_output_2)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains classes specifying naming conventions used for object detection.
Specifies:
InputDataFields: standard fields used by reader/preprocessor/batcher.
BoxListFields: standard field used by BoxList
TfExampleFields: standard fields for tf-example data format (go/tf-example).
"""
class InputDataFields(object):
"""Names for the input tensors.
Holds the standard data field names to use for identifying input tensors. This
should be used by the decoder to identify keys for the returned tensor_dict
containing input tensors. And it should be used by the model to identify the
tensors it needs.
Attributes:
image: image.
original_image: image in the original input size.
key: unique key corresponding to image.
source_id: source of the original image.
filename: original filename of the dataset (without common path).
groundtruth_image_classes: image-level class labels.
groundtruth_boxes: coordinates of the ground truth boxes in the image.
groundtruth_classes: box-level class labels.
groundtruth_label_types: box-level label types (e.g. explicit negative).
groundtruth_is_crowd: is the groundtruth a single object or a crowd.
groundtruth_area: area of a groundtruth segment.
groundtruth_difficult: is a `difficult` object
proposal_boxes: coordinates of object proposal boxes.
proposal_objectness: objectness score of each proposal.
groundtruth_instance_masks: ground truth instance masks.
groundtruth_instance_classes: instance mask-level class labels.
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_label_scores: groundtruth label scores.
"""
image = 'image'
original_image = 'original_image'
key = 'key'
source_id = 'source_id'
filename = 'filename'
groundtruth_image_classes = 'groundtruth_image_classes'
groundtruth_boxes = 'groundtruth_boxes'
groundtruth_classes = 'groundtruth_classes'
groundtruth_label_types = 'groundtruth_label_types'
groundtruth_is_crowd = 'groundtruth_is_crowd'
groundtruth_area = 'groundtruth_area'
groundtruth_difficult = 'groundtruth_difficult'
proposal_boxes = 'proposal_boxes'
proposal_objectness = 'proposal_objectness'
groundtruth_instance_masks = 'groundtruth_instance_masks'
groundtruth_instance_classes = 'groundtruth_instance_classes'
groundtruth_keypoints = 'groundtruth_keypoints'
groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
groundtruth_label_scores = 'groundtruth_label_scores'
class BoxListFields(object):
"""Naming conventions for BoxLists.
Attributes:
boxes: bounding box coordinates.
classes: classes per bounding box.
scores: scores per bounding box.
weights: sample weights per bounding box.
objectness: objectness score per bounding box.
masks: masks per bounding box.
keypoints: keypoints per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
"""
boxes = 'boxes'
classes = 'classes'
scores = 'scores'
weights = 'weights'
objectness = 'objectness'
masks = 'masks'
keypoints = 'keypoints'
keypoint_heatmaps = 'keypoint_heatmaps'
class TfExampleFields(object):
"""TF-example proto feature names for object detection.
Holds the standard feature names to load from an Example proto for object
detection.
Attributes:
image_encoded: JPEG encoded string
image_format: image format, e.g. "JPEG"
filename: filename
channels: number of channels of image
colorspace: colorspace, e.g. "RGB"
height: height of image in pixels, e.g. 462
width: width of image in pixels, e.g. 581
source_id: original source of the image
object_class_text: labels in text format, e.g. ["person", "cat"]
object_class_text: labels in numbers, e.g. [16, 8]
object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70
object_view: viewpoint of object, e.g. ["frontal", "left"]
object_truncated: is object truncated, e.g. [true, false]
object_occluded: is object occluded, e.g. [true, false]
object_difficult: is object difficult, e.g. [true, false]
object_is_crowd: is the object a single object or a crowd
object_segment_area: the area of the segment.
instance_masks: instance segmentation masks.
instance_classes: Classes for each instance segmentation mask.
"""
image_encoded = 'image/encoded'
image_format = 'image/format' # format is reserved keyword
filename = 'image/filename'
channels = 'image/channels'
colorspace = 'image/colorspace'
height = 'image/height'
width = 'image/width'
source_id = 'image/source_id'
object_class_text = 'image/object/class/text'
object_class_label = 'image/object/class/label'
object_bbox_ymin = 'image/object/bbox/ymin'
object_bbox_xmin = 'image/object/bbox/xmin'
object_bbox_ymax = 'image/object/bbox/ymax'
object_bbox_xmax = 'image/object/bbox/xmax'
object_view = 'image/object/view'
object_truncated = 'image/object/truncated'
object_occluded = 'image/object/occluded'
object_difficult = 'image/object/difficult'
object_is_crowd = 'image/object/is_crowd'
object_segment_area = 'image/object/segment/area'
instance_masks = 'image/segmentation/object'
instance_classes = 'image/segmentation/object/class'
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base target assigner module.
The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
groundtruth detections (bounding boxes), to assign classification and regression
targets to each anchor as well as weights to each anchor (specifying, e.g.,
which anchors should not contribute to training loss).
It assigns classification/regression targets by performing the following steps:
1) Computing pairwise similarity between anchors and groundtruth boxes using a
provided RegionSimilarity Calculator
2) Computing a matching based on the similarity matrix using a provided Matcher
3) Assigning regression targets based on the matching and a provided BoxCoder
4) Assigning classification targets based on the matching and groundtruth labels
Note that TargetAssigners only operate on detections from a single
image at a time, so any logic for applying a TargetAssigner to multiple
images must be handled externally.
"""
import tensorflow as tf
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_coder as bcoder
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
class TargetAssigner(object):
"""Target assigner to compute classification and regression targets."""
def __init__(self, similarity_calc, matcher, box_coder,
positive_class_weight=1.0, negative_class_weight=1.0,
unmatched_cls_target=None):
"""Construct Multibox Target Assigner.
Args:
similarity_calc: a RegionSimilarityCalculator
matcher: an object_detection.core.Matcher used to match groundtruth to
anchors.
box_coder: an object_detection.core.BoxCoder used to encode matching
groundtruth boxes with respect to anchors.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
compatible with the groundtruth labels that are passed to the "assign"
function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
If set to None, unmatched_cls_target is set to be [0] for each anchor.
Raises:
ValueError: if similarity_calc is not a RegionSimilarityCalculator or
if matcher is not a Matcher or if box_coder is not a BoxCoder
"""
if not isinstance(similarity_calc, sim_calc.RegionSimilarityCalculator):
raise ValueError('similarity_calc must be a RegionSimilarityCalculator')
if not isinstance(matcher, mat.Matcher):
raise ValueError('matcher must be a Matcher')
if not isinstance(box_coder, bcoder.BoxCoder):
raise ValueError('box_coder must be a BoxCoder')
self._similarity_calc = similarity_calc
self._matcher = matcher
self._box_coder = box_coder
self._positive_class_weight = positive_class_weight
self._negative_class_weight = negative_class_weight
if unmatched_cls_target is None:
self._unmatched_cls_target = tf.constant([0], tf.float32)
else:
self._unmatched_cls_target = unmatched_cls_target
@property
def box_coder(self):
return self._box_coder
def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
**params):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
to groundtruth_boxes and assign classification and regression targets to
each anchor as well as weights based on the resulting match (specifying,
e.g., which anchors should not contribute to training loss).
Anchors that are not matched to anything are given a classification target
of self._unmatched_cls_target which can be specified via the constructor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth boxes
groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all
ground_truth boxes get a positive label (of 1).
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
cls_weights: a float32 tensor with shape [num_anchors]
reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
reg_weights: a float32 tensor with shape [num_anchors]
match: a matcher.Match object encoding the match between anchors and
groundtruth boxes, with rows corresponding to groundtruth boxes
and columns corresponding to anchors.
Raises:
ValueError: if anchors or groundtruth_boxes are not of type
box_list.BoxList
"""
if not isinstance(anchors, box_list.BoxList):
raise ValueError('anchors must be an BoxList')
if not isinstance(groundtruth_boxes, box_list.BoxList):
raise ValueError('groundtruth_boxes must be an BoxList')
if groundtruth_labels is None:
groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
0))
groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
shape_assert = tf.assert_equal(tf.shape(groundtruth_labels)[1:],
tf.shape(self._unmatched_cls_target))
with tf.control_dependencies([shape_assert]):
match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
anchors)
match = self._matcher.match(match_quality_matrix, **params)
reg_targets = self._create_regression_targets(anchors,
groundtruth_boxes,
match)
cls_targets = self._create_classification_targets(groundtruth_labels,
match)
reg_weights = self._create_regression_weights(match)
cls_weights = self._create_classification_weights(
match, self._positive_class_weight, self._negative_class_weight)
num_anchors = anchors.num_boxes_static()
if num_anchors is not None:
reg_targets = self._reset_target_shape(reg_targets, num_anchors)
cls_targets = self._reset_target_shape(cls_targets, num_anchors)
reg_weights = self._reset_target_shape(reg_weights, num_anchors)
cls_weights = self._reset_target_shape(cls_weights, num_anchors)
return cls_targets, cls_weights, reg_targets, reg_weights, match
def _reset_target_shape(self, target, num_anchors):
"""Sets the static shape of the target.
Args:
target: the target tensor. Its first dimension will be overwritten.
num_anchors: the number of anchors, which is used to override the target's
first dimension.
Returns:
A tensor with the shape info filled in.
"""
target_shape = target.get_shape().as_list()
target_shape[0] = num_anchors
target.set_shape(target_shape)
return target
def _create_regression_targets(self, anchors, groundtruth_boxes, match):
"""Returns a regression target for each anchor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth_boxes
match: a matcher.Match object
Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension]
"""
matched_anchor_indices = match.matched_column_indices()
unmatched_ignored_anchor_indices = (match.
unmatched_or_ignored_column_indices())
matched_gt_indices = match.matched_row_indices()
matched_anchors = box_list_ops.gather(anchors,
matched_anchor_indices)
matched_gt_boxes = box_list_ops.gather(groundtruth_boxes,
matched_gt_indices)
matched_reg_targets = self._box_coder.encode(matched_gt_boxes,
matched_anchors)
unmatched_ignored_reg_targets = tf.tile(
self._default_regression_target(),
tf.stack([tf.size(unmatched_ignored_anchor_indices), 1]))
reg_targets = tf.dynamic_stitch(
[matched_anchor_indices, unmatched_ignored_anchor_indices],
[matched_reg_targets, unmatched_ignored_reg_targets])
# TODO: summarize the number of matches on average.
return reg_targets
def _default_regression_target(self):
"""Returns the default target for anchors to regress to.
Default regression targets are set to zero (though in
this implementation what these targets are set to should
not matter as the regression weight of any box set to
regress to the default target is zero).
Returns:
default_target: a float32 tensor with shape [1, box_code_dimension]
"""
return tf.constant([self._box_coder.code_size*[0]], tf.float32)
def _create_classification_targets(self, groundtruth_labels, match):
"""Create classification targets for each anchor.
Assign a classification target of for each anchor to the matching
groundtruth label that is provided by match. Anchors that are not matched
to anything are given the target self._unmatched_cls_target
Args:
groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar labels).
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
"""
matched_anchor_indices = match.matched_column_indices()
unmatched_ignored_anchor_indices = (match.
unmatched_or_ignored_column_indices())
matched_gt_indices = match.matched_row_indices()
matched_cls_targets = tf.gather(groundtruth_labels, matched_gt_indices)
ones = self._unmatched_cls_target.shape.ndims * [1]
unmatched_ignored_cls_targets = tf.tile(
tf.expand_dims(self._unmatched_cls_target, 0),
tf.stack([tf.size(unmatched_ignored_anchor_indices)] + ones))
cls_targets = tf.dynamic_stitch(
[matched_anchor_indices, unmatched_ignored_anchor_indices],
[matched_cls_targets, unmatched_ignored_cls_targets])
return cls_targets
def _create_regression_weights(self, match):
"""Set regression weight for each anchor.
Only positive anchors are set to contribute to the regression loss, so this
method returns a weight of 1 for every positive anchor and 0 for every
negative anchor.
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
Returns:
reg_weights: a float32 tensor with shape [num_anchors] representing
regression weights
"""
reg_weights = tf.cast(match.matched_column_indicator(), tf.float32)
return reg_weights
def _create_classification_weights(self,
match,
positive_class_weight=1.0,
negative_class_weight=1.0):
"""Create classification weights for each anchor.
Positive (matched) anchors are associated with a weight of
positive_class_weight and negative (unmatched) anchors are associated with
a weight of negative_class_weight. When anchors are ignored, weights are set
to zero. By default, both positive/negative weights are set to 1.0,
but they can be adjusted to handle class imbalance (which is almost always
the case in object detection).
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
positive_class_weight: weight to be associated to positive anchors
negative_class_weight: weight to be associated to negative anchors
Returns:
cls_weights: a float32 tensor with shape [num_anchors] representing
classification weights.
"""
matched_indicator = tf.cast(match.matched_column_indicator(), tf.float32)
ignore_indicator = tf.cast(match.ignored_column_indicator(), tf.float32)
unmatched_indicator = 1.0 - matched_indicator - ignore_indicator
cls_weights = (positive_class_weight * matched_indicator
+ negative_class_weight * unmatched_indicator)
return cls_weights
def get_box_coder(self):
"""Get BoxCoder of this TargetAssigner.
Returns:
BoxCoder: BoxCoder object.
"""
return self._box_coder
# TODO: This method pulls in all the implementation dependencies into core.
# Therefore its best to have this factory method outside of core.
def create_target_assigner(reference, stage=None,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=None):
"""Factory function for creating standard target assigners.
Args:
reference: string referencing the type of TargetAssigner.
stage: string denoting stage: {proposal, detection}.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
compatible with the groundtruth labels that are passed to the Assign
function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
If set to None, unmatched_cls_target is set to be 0 for each anchor.
Returns:
TargetAssigner: desired target assigner.
Raises:
ValueError: if combination reference+stage is invalid.
"""
if reference == 'Multibox' and stage == 'proposal':
similarity_calc = sim_calc.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
elif reference == 'FasterRCNN' and stage == 'proposal':
similarity_calc = sim_calc.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7,
unmatched_threshold=0.3,
force_match_for_each_row=True)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=[10.0, 10.0, 5.0, 5.0])
elif reference == 'FasterRCNN' and stage == 'detection':
similarity_calc = sim_calc.IouSimilarity()
# Uses all proposals with IOU < 0.5 as candidate negatives.
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
negatives_lower_than_unmatched=True)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=[10.0, 10.0, 5.0, 5.0])
elif reference == 'FastRCNN':
similarity_calc = sim_calc.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.1,
force_match_for_each_row=False,
negatives_lower_than_unmatched=False)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
else:
raise ValueError('No valid combination of reference and stage.')
return TargetAssigner(similarity_calc, matcher, box_coder,
positive_class_weight=positive_class_weight,
negative_class_weight=negative_class_weight,
unmatched_cls_target=unmatched_cls_target)
def batch_assign_targets(target_assigner,
anchors_batch,
gt_box_batch,
gt_class_targets_batch):
"""Batched assignment of classification and regression targets.
Args:
target_assigner: a target assigner.
anchors_batch: BoxList representing N box anchors or list of BoxList objects
with length batch_size representing anchor sets.
gt_box_batch: a list of BoxList objects with length batch_size
representing groundtruth boxes for each image in the batch
gt_class_targets_batch: a list of tensors with length batch_size, where
each tensor has shape [num_gt_boxes_i, classification_target_size] and
num_gt_boxes_i is the number of boxes in the ith boxlist of
gt_box_batch.
Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
num_classes],
batch_cls_weights: a tensor with shape [batch_size, num_anchors],
batch_reg_targets: a tensor with shape [batch_size, num_anchors,
box_code_dimension]
batch_reg_weights: a tensor with shape [batch_size, num_anchors],
match_list: a list of matcher.Match objects encoding the match between
anchors and groundtruth boxes for each image of the batch,
with rows of the Match objects corresponding to groundtruth boxes
and columns corresponding to anchors.
Raises:
ValueError: if input list lengths are inconsistent, i.e.,
batch_size == len(gt_box_batch) == len(gt_class_targets_batch)
and batch_size == len(anchors_batch) unless anchors_batch is a single
BoxList.
"""
if not isinstance(anchors_batch, list):
anchors_batch = len(gt_box_batch) * [anchors_batch]
if not all(
isinstance(anchors, box_list.BoxList) for anchors in anchors_batch):
raise ValueError('anchors_batch must be a BoxList or list of BoxLists.')
if not (len(anchors_batch)
== len(gt_box_batch)
== len(gt_class_targets_batch)):
raise ValueError('batch size incompatible with lengths of anchors_batch, '
'gt_box_batch and gt_class_targets_batch.')
cls_targets_list = []
cls_weights_list = []
reg_targets_list = []
reg_weights_list = []
match_list = []
for anchors, gt_boxes, gt_class_targets in zip(
anchors_batch, gt_box_batch, gt_class_targets_batch):
(cls_targets, cls_weights, reg_targets,
reg_weights, match) = target_assigner.assign(
anchors, gt_boxes, gt_class_targets)
cls_targets_list.append(cls_targets)
cls_weights_list.append(cls_weights)
reg_targets_list.append(reg_targets)
reg_weights_list.append(reg_weights)
match_list.append(match)
batch_cls_targets = tf.stack(cls_targets_list)
batch_cls_weights = tf.stack(cls_weights_list)
batch_reg_targets = tf.stack(reg_targets_list)
batch_reg_weights = tf.stack(reg_weights_list)
return (batch_cls_targets, batch_cls_weights, batch_reg_targets,
batch_reg_weights, match_list)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.target_assigner."""
import numpy as np
import tensorflow as tf
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_list
from object_detection.core import region_similarity_calculator
from object_detection.core import target_assigner as targetassigner
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
class TargetAssignerTest(tf.test.TestCase):
def test_assign_agnostic(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, unmatched_cls_target=None)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0]])
prior_stddevs = tf.constant(3 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9]]
boxes = box_list.BoxList(tf.constant(box_corners))
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
exp_matching_anchors = [0, 1]
result = target_assigner.assign(priors, boxes, num_valid_rows=2)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_with_ignored_matches(self):
# Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0].
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.3)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0.0, 0.5, .9, 1.0]])
prior_stddevs = tf.constant(3 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]]
boxes = box_list.BoxList(tf.constant(box_corners))
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 0]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
exp_matching_anchors = [0, 1]
result = target_assigner.assign(priors, boxes)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_multiclass(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], tf.float32)
exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0]]
exp_cls_weights = [1, 1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0],
[0, 0, -.5, .2]]
exp_reg_weights = [1, 1, 0, 1]
exp_matching_anchors = [0, 1, 3]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_multiclass_unequal_class_weights(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0, negative_class_weight=0.5,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], tf.float32)
exp_cls_weights = [1, 1, .5, 1]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(_, cls_weights, _, _, _) = result
with self.test_session() as sess:
cls_weights_out = sess.run(cls_weights)
self.assertAllClose(cls_weights_out, exp_cls_weights)
def test_assign_multidimensional_class_targets(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[[0, 1], [1, 0]],
[[1, 0], [0, 1]],
[[0, 1], [1, .5]]], tf.float32)
exp_cls_targets = [[[0, 1], [1, 0]],
[[1, 0], [0, 1]],
[[0, 0], [0, 0]],
[[0, 1], [1, .5]]]
exp_cls_weights = [1, 1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0],
[0, 0, -.5, .2]]
exp_reg_weights = [1, 1, 0, 1]
exp_matching_anchors = [0, 1, 3]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_empty_groundtruth(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners_expanded = tf.constant([[0.0, 0.0, 0.0, 0.0]])
box_corners = tf.slice(box_corners_expanded, [0, 0], [0, 4])
boxes = box_list.BoxList(box_corners)
groundtruth_labels_expanded = tf.constant([[0, 0, 0]], tf.float32)
groundtruth_labels = tf.slice(groundtruth_labels_expanded, [0, 0], [0, 3])
exp_cls_targets = [[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]
exp_cls_weights = [1, 1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]
exp_reg_weights = [0, 0, 0, 0]
exp_matching_anchors = []
result = target_assigner.assign(priors, boxes, groundtruth_labels)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_raises_error_on_invalid_groundtruth_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]])
prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)
with self.assertRaises(ValueError):
target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
class BatchTargetAssignerTest(tf.test.TestCase):
def _get_agnostic_target_assigner(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=None)
def _get_multi_class_target_assigner(self, num_classes):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32)
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=unmatched_cls_target)
def _get_multi_dimensional_target_assigner(self, target_dimensions):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant(np.zeros(target_dimensions),
tf.float32)
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=unmatched_cls_target)
def test_batch_assign_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
gt_class_targets = [None, None]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[1], [0], [0], [0]],
[[0], [1], [1], [0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
agnostic_target_assigner = self._get_agnostic_target_assigner()
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
agnostic_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_multiclass_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
class_targets1 = tf.constant([[0, 1, 0, 0]], tf.float32)
class_targets2 = tf.constant([[0, 0, 0, 1],
[0, 0, 1, 0]], tf.float32)
gt_class_targets = [class_targets1, class_targets2]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 0, 0, 0],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[0, 1, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0]],
[[1, 0, 0, 0],
[0, 0, 0, 1],
[0, 0, 1, 0],
[1, 0, 0, 0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_multidimensional_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
class_targets1 = tf.constant([[[0, 1, 1],
[1, 1, 0]]], tf.float32)
class_targets2 = tf.constant([[[0, 1, 1],
[1, 1, 0]],
[[0, 0, 1],
[0, 0, 1]]], tf.float32)
gt_class_targets = [class_targets1, class_targets2]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 0, 0, 0],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[[0., 1., 1.],
[1., 1., 0.]],
[[0., 0., 0.],
[0., 0., 0.]],
[[0., 0., 0.],
[0., 0., 0.]],
[[0., 0., 0.],
[0., 0., 0.]]],
[[[0., 0., 0.],
[0., 0., 0.]],
[[0., 1., 1.],
[1., 1., 0.]],
[[0., 0., 1.],
[0., 0., 1.]],
[[0., 0., 0.],
[0., 0., 0.]]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
target_dimensions=(2, 3))
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_empty_groundtruth(self):
box_coords_expanded = tf.zeros((1, 4), tf.float32)
box_coords = tf.slice(box_coords_expanded, [0, 0], [0, 4])
box_list1 = box_list.BoxList(box_coords)
gt_box_batch = [box_list1]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, 0, 0],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1]]
exp_cls_targets = [[[1, 0, 0, 0],
[1, 0, 0, 0]]]
exp_reg_weights = [[0, 0]]
exp_match_0 = []
num_classes = 3
pad = 1
gt_class_targets = tf.zeros((0, num_classes + pad))
gt_class_targets_batch = [gt_class_targets]
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors,
gt_box_batch, gt_class_targets_batch)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 1)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
class CreateTargetAssignerTest(tf.test.TestCase):
def test_create_target_assigner(self):
"""Tests that named constructor gives working target assigners.
TODO: Make this test more general.
"""
corners = [[0.0, 0.0, 1.0, 1.0]]
groundtruth = box_list.BoxList(tf.constant(corners))
priors = box_list.BoxList(tf.constant(corners))
prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
priors.add_field('stddev', prior_stddevs)
multibox_ta = (targetassigner
.create_target_assigner('Multibox', stage='proposal'))
multibox_ta.assign(priors, groundtruth)
# No tests on output, as that may vary arbitrarily as new target assigners
# are added. As long as it is constructed correctly and runs without errors,
# tests on the individual assigners cover correctness of the assignments.
anchors = box_list.BoxList(tf.constant(corners))
faster_rcnn_proposals_ta = (targetassigner
.create_target_assigner('FasterRCNN',
stage='proposal'))
faster_rcnn_proposals_ta.assign(anchors, groundtruth)
fast_rcnn_ta = (targetassigner
.create_target_assigner('FastRCNN'))
fast_rcnn_ta.assign(anchors, groundtruth)
faster_rcnn_detection_ta = (targetassigner
.create_target_assigner('FasterRCNN',
stage='detection'))
faster_rcnn_detection_ta.assign(anchors, groundtruth)
with self.assertRaises(ValueError):
targetassigner.create_target_assigner('InvalidDetector',
stage='invalid_stage')
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw PASCAL dataset to TFRecord for object_detection.
Example usage:
./create_pascal_tf_record --data_dir=/home/user/VOCdevkit \
--year=VOC2012 \
--output_path=/home/user/pascal.record
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import logging
import os
from lxml import etree
import PIL.Image
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
'merged set.')
flags.DEFINE_string('annotations_dir', 'Annotations',
'(Relative) path to annotations directory.')
flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt',
'Path to label map proto')
flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
'difficult instances')
FLAGS = flags.FLAGS
SETS = ['train', 'val', 'trainval', 'test']
YEARS = ['VOC2007', 'VOC2012', 'merged']
def dict_to_tf_example(data,
dataset_directory,
label_map_dict,
ignore_difficult_instances=False,
image_subdirectory='JPEGImages'):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
by the raw data.
Args:
data: dict holding PASCAL XML fields for a single image (obtained by
running dataset_util.recursive_parse_xml_to_dict)
dataset_directory: Path to root directory holding PASCAL dataset
label_map_dict: A map from string label names to integers ids.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
image_subdirectory: String specifying subdirectory within the
PASCAL dataset directory holding the actual image data.
Returns:
example: The converted tf.Example.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
full_path = os.path.join(dataset_directory, img_path)
with tf.gfile.GFile(full_path) as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
if image.format != 'JPEG':
raise ValueError('Image format not JPEG')
key = hashlib.sha256(encoded_jpg).hexdigest()
width = int(data['size']['width'])
height = int(data['size']['height'])
xmin = []
ymin = []
xmax = []
ymax = []
classes = []
classes_text = []
truncated = []
poses = []
difficult_obj = []
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
xmin.append(float(obj['bndbox']['xmin']) / width)
ymin.append(float(obj['bndbox']['ymin']) / height)
xmax.append(float(obj['bndbox']['xmax']) / width)
ymax.append(float(obj['bndbox']['ymax']) / height)
classes_text.append(obj['name'])
classes.append(label_map_dict[obj['name']])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'])
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(data['filename']),
'image/source_id': dataset_util.bytes_feature(data['filename']),
'image/key/sha256': dataset_util.bytes_feature(key),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature('jpeg'),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/truncated': dataset_util.int64_list_feature(truncated),
'image/object/view': dataset_util.bytes_list_feature(poses),
}))
return example
def main(_):
if FLAGS.set not in SETS:
raise ValueError('set must be in : {}'.format(SETS))
if FLAGS.year not in YEARS:
raise ValueError('year must be in : {}'.format(YEARS))
data_dir = FLAGS.data_dir
years = ['VOC2007', 'VOC2012']
if FLAGS.year != 'merged':
years = [FLAGS.year]
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
for year in years:
logging.info('Reading from PASCAL %s dataset.', year)
examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
'aeroplane_' + FLAGS.set + '.txt')
annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
examples_list = dataset_util.read_examples_list(examples_path)
for idx, example in enumerate(examples_list):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples_list))
path = os.path.join(annotations_dir, example + '.xml')
with tf.gfile.GFile(path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
FLAGS.ignore_difficult_instances)
writer.write(tf_example.SerializeToString())
writer.close()
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for create_pascal_tf_record.py."""
import os
import numpy as np
import PIL.Image
import tensorflow as tf
from object_detection import create_pascal_tf_record
class DictToTFExampleTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list = [p for p in proto_field]
self.assertListEqual(proto_list, expectation)
def test_dict_to_tf_example(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(256, 256, 3)
save_path = os.path.join(self.get_temp_dir(), image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
data = {
'folder': '',
'filename': image_file_name,
'size': {
'height': 256,
'width': 256,
},
'object': [
{
'difficult': 1,
'bndbox': {
'xmin': 64,
'ymin': 64,
'xmax': 192,
'ymax': 192,
},
'name': 'person',
'truncated': 0,
'pose': '',
},
],
}
label_map_dict = {
'background': 0,
'person': 1,
'notperson': 2,
}
example = create_pascal_tf_record.dict_to_tf_example(
data, self.get_temp_dir(), label_map_dict, image_subdirectory='')
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
['person'])
self._assertProtoEqual(
example.features.feature['image/object/class/label'].int64_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/difficult'].int64_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/truncated'].int64_list.value,
[0])
self._assertProtoEqual(
example.features.feature['image/object/view'].bytes_list.value, [''])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert the Oxford pet dataset to TFRecord for object_detection.
See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
Cats and Dogs
IEEE Conference on Computer Vision and Pattern Recognition, 2012
http://www.robots.ox.ac.uk/~vgg/data/pets/
Example usage:
./create_pet_tf_record --data_dir=/home/user/pet \
--output_dir=/home/user/pet/output
"""
import hashlib
import io
import logging
import os
import random
import re
from lxml import etree
import PIL.Image
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt',
'Path to label map proto')
FLAGS = flags.FLAGS
def get_class_name_from_filename(file_name):
"""Gets the class name from a file.
Args:
file_name: The file name to get the class name from.
ie. "american_pit_bull_terrier_105.jpg"
Returns:
example: The converted tf.Example.
"""
match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I)
return match.groups()[0]
def dict_to_tf_example(data,
label_map_dict,
image_subdirectory,
ignore_difficult_instances=False):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
by the raw data.
Args:
data: dict holding PASCAL XML fields for a single image (obtained by
running dataset_util.recursive_parse_xml_to_dict)
label_map_dict: A map from string label names to integers ids.
image_subdirectory: String specifying subdirectory within the
Pascal dataset directory holding the actual image data.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
Returns:
example: The converted tf.Example.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path = os.path.join(image_subdirectory, data['filename'])
with tf.gfile.GFile(img_path) as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
if image.format != 'JPEG':
raise ValueError('Image format not JPEG')
key = hashlib.sha256(encoded_jpg).hexdigest()
width = int(data['size']['width'])
height = int(data['size']['height'])
xmin = []
ymin = []
xmax = []
ymax = []
classes = []
classes_text = []
truncated = []
poses = []
difficult_obj = []
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
xmin.append(float(obj['bndbox']['xmin']) / width)
ymin.append(float(obj['bndbox']['ymin']) / height)
xmax.append(float(obj['bndbox']['xmax']) / width)
ymax.append(float(obj['bndbox']['ymax']) / height)
class_name = get_class_name_from_filename(data['filename'])
classes_text.append(class_name)
classes.append(label_map_dict[class_name])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'])
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(data['filename']),
'image/source_id': dataset_util.bytes_feature(data['filename']),
'image/key/sha256': dataset_util.bytes_feature(key),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature('jpeg'),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/truncated': dataset_util.int64_list_feature(truncated),
'image/object/view': dataset_util.bytes_list_feature(poses),
}))
return example
def create_tf_record(output_filename,
label_map_dict,
annotations_dir,
image_dir,
examples):
"""Creates a TFRecord file from examples.
Args:
output_filename: Path to where output file is saved.
label_map_dict: The label map dictionary.
annotations_dir: Directory where annotation files are stored.
image_dir: Directory where image files are stored.
examples: Examples to parse and save to tf record.
"""
writer = tf.python_io.TFRecordWriter(output_filename)
for idx, example in enumerate(examples):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples))
path = os.path.join(annotations_dir, 'xmls', example + '.xml')
if not os.path.exists(path):
logging.warning('Could not find %s, ignoring example.', path)
continue
with tf.gfile.GFile(path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
writer.write(tf_example.SerializeToString())
writer.close()
# TODO: Add test for pet/PASCAL main files.
def main(_):
data_dir = FLAGS.data_dir
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
logging.info('Reading from Pet dataset.')
image_dir = os.path.join(data_dir, 'images')
annotations_dir = os.path.join(data_dir, 'annotations')
examples_path = os.path.join(annotations_dir, 'trainval.txt')
examples_list = dataset_util.read_examples_list(examples_path)
# Test images are not included in the downloaded data set, so we shall perform
# our own split.
random.seed(42)
random.shuffle(examples_list)
num_examples = len(examples_list)
num_train = int(0.7 * num_examples)
train_examples = examples_list[:num_train]
val_examples = examples_list[num_train:]
logging.info('%d training and %d validation examples.',
len(train_examples), len(val_examples))
train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
create_tf_record(train_output_path, label_map_dict, annotations_dir,
image_dir, train_examples)
create_tf_record(val_output_path, label_map_dict, annotations_dir,
image_dir, val_examples)
if __name__ == '__main__':
tf.app.run()
# Tensorflow Object Detection API: data decoders.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "tf_example_decoder",
srcs = ["tf_example_decoder.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:data_decoder",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_test(
name = "tf_example_decoder_test",
srcs = ["tf_example_decoder_test.py"],
deps = [
":tf_example_decoder",
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow Example proto decoder for object detection.
A decoder to decode string tensors containing serialized tensorflow.Example
protos for object detection.
"""
import tensorflow as tf
from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
slim_example_decoder = tf.contrib.slim.tfexample_decoder
class TfExampleDecoder(data_decoder.DataDecoder):
"""Tensorflow Example proto decoder."""
def __init__(self):
"""Constructor sets keys_to_features and items_to_handlers."""
self.keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/filename': tf.FixedLenFeature((), tf.string, default_value=''),
'image/key/sha256': tf.FixedLenFeature((), tf.string, default_value=''),
'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''),
'image/height': tf.FixedLenFeature((), tf.int64, 1),
'image/width': tf.FixedLenFeature((), tf.int64, 1),
# Object boxes and classes.
'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
'image/object/class/label': tf.VarLenFeature(tf.int64),
'image/object/area': tf.VarLenFeature(tf.float32),
'image/object/is_crowd': tf.VarLenFeature(tf.int64),
'image/object/difficult': tf.VarLenFeature(tf.int64),
# Instance masks and classes.
'image/segmentation/object': tf.VarLenFeature(tf.int64),
'image/segmentation/object/class': tf.VarLenFeature(tf.int64)
}
self.items_to_handlers = {
fields.InputDataFields.image: slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3),
fields.InputDataFields.source_id: (
slim_example_decoder.Tensor('image/source_id')),
fields.InputDataFields.key: (
slim_example_decoder.Tensor('image/key/sha256')),
fields.InputDataFields.filename: (
slim_example_decoder.Tensor('image/filename')),
# Object boxes and classes.
fields.InputDataFields.groundtruth_boxes: (
slim_example_decoder.BoundingBox(
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
fields.InputDataFields.groundtruth_classes: (
slim_example_decoder.Tensor('image/object/class/label')),
fields.InputDataFields.groundtruth_area: slim_example_decoder.Tensor(
'image/object/area'),
fields.InputDataFields.groundtruth_is_crowd: (
slim_example_decoder.Tensor('image/object/is_crowd')),
fields.InputDataFields.groundtruth_difficult: (
slim_example_decoder.Tensor('image/object/difficult')),
# Instance masks and classes.
fields.InputDataFields.groundtruth_instance_masks: (
slim_example_decoder.ItemHandlerCallback(
['image/segmentation/object', 'image/height', 'image/width'],
self._reshape_instance_masks)),
fields.InputDataFields.groundtruth_instance_classes: (
slim_example_decoder.Tensor('image/segmentation/object/class')),
}
def Decode(self, tf_example_string_tensor):
"""Decodes serialized tensorflow example and returns a tensor dictionary.
Args:
tf_example_string_tensor: a string tensor holding a serialized tensorflow
example proto.
Returns:
A dictionary of the following tensors.
fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
containing image.
fields.InputDataFields.source_id - string tensor containing original
image id.
fields.InputDataFields.key - string tensor with unique sha256 hash key.
fields.InputDataFields.filename - string tensor with original dataset
filename.
fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
[None, 4] containing box corners.
fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
[None] containing classes for the boxes.
fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
[None] containing containing object mask area in pixel squared.
fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
[None] indicating if the boxes enclose a crowd.
fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
[None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_instance_masks - 3D int64 tensor of
shape [None, None, None] containing instance masks.
fields.InputDataFields.groundtruth_instance_classes - 1D int64 tensor
of shape [None] containing classes for the instance masks.
"""
serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
self.items_to_handlers)
keys = decoder.list_items()
tensors = decoder.decode(serialized_example, items=keys)
tensor_dict = dict(zip(keys, tensors))
is_crowd = fields.InputDataFields.groundtruth_is_crowd
tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
return tensor_dict
def _reshape_instance_masks(self, keys_to_tensors):
"""Reshape instance segmentation masks.
The instance segmentation masks are reshaped to [num_instances, height,
width] and cast to boolean type to save memory.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D boolean tensor of shape [num_instances, height, width].
"""
masks = keys_to_tensors['image/segmentation/object']
if isinstance(masks, tf.SparseTensor):
masks = tf.sparse_tensor_to_dense(masks)
height = keys_to_tensors['image/height']
width = keys_to_tensors['image/width']
to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32)
return tf.cast(tf.reshape(masks, to_shape), tf.bool)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.data_decoders.tf_example_decoder."""
import numpy as np
import tensorflow as tf
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
class TfExampleDecoderTest(tf.test.TestCase):
def _EncodeImage(self, image_tensor, encoding_type='jpeg'):
with self.test_session():
if encoding_type == 'jpeg':
image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
elif encoding_type == 'png':
image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval()
else:
raise ValueError('Invalid encoding type.')
return image_encoded
def _DecodeImage(self, image_encoded, encoding_type='jpeg'):
with self.test_session():
if encoding_type == 'jpeg':
image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval()
elif encoding_type == 'png':
image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval()
else:
raise ValueError('Invalid encoding type.')
return image_decoded
def _Int64Feature(self, value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _FloatFeature(self, value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _BytesFeature(self, value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def testDecodeJpegImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
decoded_jpeg = self._DecodeImage(encoded_jpeg)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/source_id': self._BytesFeature('image_id'),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeImageKeyAndFilename(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/key/sha256': self._BytesFeature('abc'),
'image/filename': self._BytesFeature('filename')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertEqual('abc', tensor_dict[fields.InputDataFields.key])
self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
def testDecodePngImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_png),
'image/format': self._BytesFeature('png'),
'image/source_id': self._BytesFeature('image_id')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeBoundingBox(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
bbox_ymaxs, bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
def testDecodeObjectLabel(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/class/label': self._Int64Feature(bbox_classes),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(bbox_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectArea(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_area = [100., 174.]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/area': self._FloatFeature(object_area),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
get_shape().as_list()), [None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(object_area,
tensor_dict[fields.InputDataFields.groundtruth_area])
def testDecodeObjectIsCrowd(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_is_crowd = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/is_crowd': self._Int64Feature(object_is_crowd),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_is_crowd],
tensor_dict[
fields.InputDataFields.groundtruth_is_crowd])
def testDecodeObjectDifficult(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_difficult = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/difficult': self._Int64Feature(object_difficult),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_difficult],
tensor_dict[
fields.InputDataFields.groundtruth_difficult])
def testDecodeInstanceSegmentation(self):
num_instances = 4
image_height = 5
image_width = 3
# Randomly generate image.
image_tensor = np.random.randint(255, size=(image_height,
image_width,
3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
# Randomly generate instance segmentation masks.
instance_segmentation = (
np.random.randint(2, size=(num_instances,
image_height,
image_width)).astype(np.int64))
# Randomly generate class labels for each instance.
instance_segmentation_classes = np.random.randint(
100, size=(num_instances)).astype(np.int64)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/height': self._Int64Feature([image_height]),
'image/width': self._Int64Feature([image_width]),
'image/segmentation/object': self._Int64Feature(
instance_segmentation.flatten()),
'image/segmentation/object/class': self._Int64Feature(
instance_segmentation_classes)})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.Decode(tf.convert_to_tensor(example))
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
get_shape().as_list()), [None, None, None])
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_classes].
get_shape().as_list()), [None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
instance_segmentation.astype(np.bool),
tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
self.assertAllEqual(
instance_segmentation_classes,
tensor_dict[fields.InputDataFields.groundtruth_instance_classes])
if __name__ == '__main__':
tf.test.main()
item {
name: "/m/01g317"
id: 1
display_name: "person"
}
item {
name: "/m/0199g"
id: 2
display_name: "bicycle"
}
item {
name: "/m/0k4j"
id: 3
display_name: "car"
}
item {
name: "/m/04_sv"
id: 4
display_name: "motorcycle"
}
item {
name: "/m/05czz6l"
id: 5
display_name: "airplane"
}
item {
name: "/m/01bjv"
id: 6
display_name: "bus"
}
item {
name: "/m/07jdr"
id: 7
display_name: "train"
}
item {
name: "/m/07r04"
id: 8
display_name: "truck"
}
item {
name: "/m/019jd"
id: 9
display_name: "boat"
}
item {
name: "/m/015qff"
id: 10
display_name: "traffic light"
}
item {
name: "/m/01pns0"
id: 11
display_name: "fire hydrant"
}
item {
name: "/m/02pv19"
id: 13
display_name: "stop sign"
}
item {
name: "/m/015qbp"
id: 14
display_name: "parking meter"
}
item {
name: "/m/0cvnqh"
id: 15
display_name: "bench"
}
item {
name: "/m/015p6"
id: 16
display_name: "bird"
}
item {
name: "/m/01yrx"
id: 17
display_name: "cat"
}
item {
name: "/m/0bt9lr"
id: 18
display_name: "dog"
}
item {
name: "/m/03k3r"
id: 19
display_name: "horse"
}
item {
name: "/m/07bgp"
id: 20
display_name: "sheep"
}
item {
name: "/m/01xq0k1"
id: 21
display_name: "cow"
}
item {
name: "/m/0bwd_0j"
id: 22
display_name: "elephant"
}
item {
name: "/m/01dws"
id: 23
display_name: "bear"
}
item {
name: "/m/0898b"
id: 24
display_name: "zebra"
}
item {
name: "/m/03bk1"
id: 25
display_name: "giraffe"
}
item {
name: "/m/01940j"
id: 27
display_name: "backpack"
}
item {
name: "/m/0hnnb"
id: 28
display_name: "umbrella"
}
item {
name: "/m/080hkjn"
id: 31
display_name: "handbag"
}
item {
name: "/m/01rkbr"
id: 32
display_name: "tie"
}
item {
name: "/m/01s55n"
id: 33
display_name: "suitcase"
}
item {
name: "/m/02wmf"
id: 34
display_name: "frisbee"
}
item {
name: "/m/071p9"
id: 35
display_name: "skis"
}
item {
name: "/m/06__v"
id: 36
display_name: "snowboard"
}
item {
name: "/m/018xm"
id: 37
display_name: "sports ball"
}
item {
name: "/m/02zt3"
id: 38
display_name: "kite"
}
item {
name: "/m/03g8mr"
id: 39
display_name: "baseball bat"
}
item {
name: "/m/03grzl"
id: 40
display_name: "baseball glove"
}
item {
name: "/m/06_fw"
id: 41
display_name: "skateboard"
}
item {
name: "/m/019w40"
id: 42
display_name: "surfboard"
}
item {
name: "/m/0dv9c"
id: 43
display_name: "tennis racket"
}
item {
name: "/m/04dr76w"
id: 44
display_name: "bottle"
}
item {
name: "/m/09tvcd"
id: 46
display_name: "wine glass"
}
item {
name: "/m/08gqpm"
id: 47
display_name: "cup"
}
item {
name: "/m/0dt3t"
id: 48
display_name: "fork"
}
item {
name: "/m/04ctx"
id: 49
display_name: "knife"
}
item {
name: "/m/0cmx8"
id: 50
display_name: "spoon"
}
item {
name: "/m/04kkgm"
id: 51
display_name: "bowl"
}
item {
name: "/m/09qck"
id: 52
display_name: "banana"
}
item {
name: "/m/014j1m"
id: 53
display_name: "apple"
}
item {
name: "/m/0l515"
id: 54
display_name: "sandwich"
}
item {
name: "/m/0cyhj_"
id: 55
display_name: "orange"
}
item {
name: "/m/0hkxq"
id: 56
display_name: "broccoli"
}
item {
name: "/m/0fj52s"
id: 57
display_name: "carrot"
}
item {
name: "/m/01b9xk"
id: 58
display_name: "hot dog"
}
item {
name: "/m/0663v"
id: 59
display_name: "pizza"
}
item {
name: "/m/0jy4k"
id: 60
display_name: "donut"
}
item {
name: "/m/0fszt"
id: 61
display_name: "cake"
}
item {
name: "/m/01mzpv"
id: 62
display_name: "chair"
}
item {
name: "/m/02crq1"
id: 63
display_name: "couch"
}
item {
name: "/m/03fp41"
id: 64
display_name: "potted plant"
}
item {
name: "/m/03ssj5"
id: 65
display_name: "bed"
}
item {
name: "/m/04bcr3"
id: 67
display_name: "dining table"
}
item {
name: "/m/09g1w"
id: 70
display_name: "toilet"
}
item {
name: "/m/07c52"
id: 72
display_name: "tv"
}
item {
name: "/m/01c648"
id: 73
display_name: "laptop"
}
item {
name: "/m/020lf"
id: 74
display_name: "mouse"
}
item {
name: "/m/0qjjc"
id: 75
display_name: "remote"
}
item {
name: "/m/01m2v"
id: 76
display_name: "keyboard"
}
item {
name: "/m/050k8"
id: 77
display_name: "cell phone"
}
item {
name: "/m/0fx9l"
id: 78
display_name: "microwave"
}
item {
name: "/m/029bxz"
id: 79
display_name: "oven"
}
item {
name: "/m/01k6s3"
id: 80
display_name: "toaster"
}
item {
name: "/m/0130jx"
id: 81
display_name: "sink"
}
item {
name: "/m/040b_t"
id: 82
display_name: "refrigerator"
}
item {
name: "/m/0bt_c3"
id: 84
display_name: "book"
}
item {
name: "/m/01x3z"
id: 85
display_name: "clock"
}
item {
name: "/m/02s195"
id: 86
display_name: "vase"
}
item {
name: "/m/01lsmm"
id: 87
display_name: "scissors"
}
item {
name: "/m/0kmg4"
id: 88
display_name: "teddy bear"
}
item {
name: "/m/03wvsk"
id: 89
display_name: "hair drier"
}
item {
name: "/m/012xff"
id: 90
display_name: "toothbrush"
}
item {
id: 0
name: 'none_of_the_above'
}
item {
id: 1
name: 'aeroplane'
}
item {
id: 2
name: 'bicycle'
}
item {
id: 3
name: 'bird'
}
item {
id: 4
name: 'boat'
}
item {
id: 5
name: 'bottle'
}
item {
id: 6
name: 'bus'
}
item {
id: 7
name: 'car'
}
item {
id: 8
name: 'cat'
}
item {
id: 9
name: 'chair'
}
item {
id: 10
name: 'cow'
}
item {
id: 11
name: 'diningtable'
}
item {
id: 12
name: 'dog'
}
item {
id: 13
name: 'horse'
}
item {
id: 14
name: 'motorbike'
}
item {
id: 15
name: 'person'
}
item {
id: 16
name: 'pottedplant'
}
item {
id: 17
name: 'sheep'
}
item {
id: 18
name: 'sofa'
}
item {
id: 19
name: 'train'
}
item {
id: 20
name: 'tvmonitor'
}
item {
id: 0
name: 'none_of_the_above'
}
item {
id: 1
name: 'Abyssinian'
}
item {
id: 2
name: 'american_bulldog'
}
item {
id: 3
name: 'american_pit_bull_terrier'
}
item {
id: 4
name: 'basset_hound'
}
item {
id: 5
name: 'beagle'
}
item {
id: 6
name: 'Bengal'
}
item {
id: 7
name: 'Birman'
}
item {
id: 8
name: 'Bombay'
}
item {
id: 9
name: 'boxer'
}
item {
id: 10
name: 'British_Shorthair'
}
item {
id: 11
name: 'chihuahua'
}
item {
id: 12
name: 'Egyptian_Mau'
}
item {
id: 13
name: 'english_cocker_spaniel'
}
item {
id: 14
name: 'english_setter'
}
item {
id: 15
name: 'german_shorthaired'
}
item {
id: 16
name: 'great_pyrenees'
}
item {
id: 17
name: 'havanese'
}
item {
id: 18
name: 'japanese_chin'
}
item {
id: 19
name: 'keeshond'
}
item {
id: 20
name: 'leonberger'
}
item {
id: 21
name: 'Maine_Coon'
}
item {
id: 22
name: 'miniature_pinscher'
}
item {
id: 23
name: 'newfoundland'
}
item {
id: 24
name: 'Persian'
}
item {
id: 25
name: 'pomeranian'
}
item {
id: 26
name: 'pug'
}
item {
id: 27
name: 'Ragdoll'
}
item {
id: 28
name: 'Russian_Blue'
}
item {
id: 29
name: 'saint_bernard'
}
item {
id: 30
name: 'samoyed'
}
item {
id: 31
name: 'scottish_terrier'
}
item {
id: 32
name: 'shiba_inu'
}
item {
id: 33
name: 'Siamese'
}
item {
id: 34
name: 'Sphynx'
}
item {
id: 35
name: 'staffordshire_bull_terrier'
}
item {
id: 36
name: 'wheaten_terrier'
}
item {
id: 37
name: 'yorkshire_terrier'
}
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Evaluation executable for detection models.
This executable is used to evaluate DetectionModels. There are two ways of
configuring the eval job.
1) A single pipeline_pb2.TrainEvalPipelineConfig file maybe specified instead.
In this mode, the --eval_training_data flag may be given to force the pipeline
to evaluate on training data instead.
Example usage:
./eval \
--logtostderr \
--checkpoint_dir=path/to/checkpoint_dir \
--eval_dir=path/to/eval_dir \
--pipeline_config_path=pipeline_config.pbtxt
2) Three configuration files may be provided: a model_pb2.DetectionModel
configuration file to define what type of DetectionModel is being evaulated, an
input_reader_pb2.InputReader file to specify what data the model is evaluating
and an eval_pb2.EvalConfig file to configure evaluation parameters.
Example usage:
./eval \
--logtostderr \
--checkpoint_dir=path/to/checkpoint_dir \
--eval_dir=path/to/eval_dir \
--eval_config_path=eval_config.pbtxt \
--model_config_path=model_config.pbtxt \
--input_config_path=eval_input_config.pbtxt
"""
import functools
import tensorflow as tf
from google.protobuf import text_format
from object_detection import evaluator
from object_detection.builders import input_reader_builder
from object_detection.builders import model_builder
from object_detection.protos import eval_pb2
from object_detection.protos import input_reader_pb2
from object_detection.protos import model_pb2
from object_detection.protos import pipeline_pb2
from object_detection.utils import label_map_util
tf.logging.set_verbosity(tf.logging.INFO)
flags = tf.app.flags
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
flags.DEFINE_string('checkpoint_dir', '',
'Directory containing checkpoints to evaluate, typically '
'set to `train_dir` used in the training job.')
flags.DEFINE_string('eval_dir', '',
'Directory to write eval summaries to.')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored')
flags.DEFINE_string('eval_config_path', '',
'Path to an eval_pb2.EvalConfig config file.')
flags.DEFINE_string('input_config_path', '',
'Path to an input_reader_pb2.InputReader config file.')
flags.DEFINE_string('model_config_path', '',
'Path to a model_pb2.DetectionModel config file.')
FLAGS = flags.FLAGS
def get_configs_from_pipeline_file():
"""Reads evaluation configuration from a pipeline_pb2.TrainEvalPipelineConfig.
Reads evaluation config from file specified by pipeline_config_path flag.
Returns:
model_config: a model_pb2.DetectionModel
eval_config: a eval_pb2.EvalConfig
input_config: a input_reader_pb2.InputReader
"""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
model_config = pipeline_config.model
if FLAGS.eval_training_data:
eval_config = pipeline_config.train_config
else:
eval_config = pipeline_config.eval_config
input_config = pipeline_config.eval_input_reader
return model_config, eval_config, input_config
def get_configs_from_multiple_files():
"""Reads evaluation configuration from multiple config files.
Reads the evaluation config from the following files:
model_config: Read from --model_config_path
eval_config: Read from --eval_config_path
input_config: Read from --input_config_path
Returns:
model_config: a model_pb2.DetectionModel
eval_config: a eval_pb2.EvalConfig
input_config: a input_reader_pb2.InputReader
"""
eval_config = eval_pb2.EvalConfig()
with tf.gfile.GFile(FLAGS.eval_config_path, 'r') as f:
text_format.Merge(f.read(), eval_config)
model_config = model_pb2.DetectionModel()
with tf.gfile.GFile(FLAGS.model_config_path, 'r') as f:
text_format.Merge(f.read(), model_config)
input_config = input_reader_pb2.InputReader()
with tf.gfile.GFile(FLAGS.input_config_path, 'r') as f:
text_format.Merge(f.read(), input_config)
return model_config, eval_config, input_config
def main(unused_argv):
assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
assert FLAGS.eval_dir, '`eval_dir` is missing.'
if FLAGS.pipeline_config_path:
model_config, eval_config, input_config = get_configs_from_pipeline_file()
else:
model_config, eval_config, input_config = get_configs_from_multiple_files()
model_fn = functools.partial(
model_builder.build,
model_config=model_config,
is_training=False)
create_input_dict_fn = functools.partial(
input_reader_builder.build,
input_config)
label_map = label_map_util.load_labelmap(input_config.label_map_path)
max_num_classes = max([item.id for item in label_map.item])
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes)
evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
FLAGS.checkpoint_dir, FLAGS.eval_dir)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common functions for repeatedly evaluating a checkpoint.
"""
import copy
import logging
import os
import time
import numpy as np
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import object_detection_evaluation
from object_detection.utils import visualization_utils as vis_utils
slim = tf.contrib.slim
def write_metrics(metrics, global_step, summary_dir):
"""Write metrics to a summary directory.
Args:
metrics: A dictionary containing metric names and values.
global_step: Global step at which the metrics are computed.
summary_dir: Directory to write tensorflow summaries to.
"""
logging.info('Writing metrics to tf summary.')
summary_writer = tf.summary.FileWriter(summary_dir)
for key in sorted(metrics):
summary = tf.Summary(value=[
tf.Summary.Value(tag=key, simple_value=metrics[key]),
])
summary_writer.add_summary(summary, global_step)
logging.info('%s: %f', key, metrics[key])
summary_writer.close()
logging.info('Metrics written to tf summary.')
def evaluate_detection_results_pascal_voc(result_lists,
categories,
label_id_offset=0,
iou_thres=0.5,
corloc_summary=False):
"""Computes Pascal VOC detection metrics given groundtruth and detections.
This function computes Pascal VOC metrics. This function by default
takes detections and groundtruth boxes encoded in result_lists and writes
evaluation results to tf summaries which can be viewed on tensorboard.
Args:
result_lists: a dictionary holding lists of groundtruth and detection
data corresponding to each image being evaluated. The following keys
are required:
'image_id': a list of string ids
'detection_boxes': a list of float32 numpy arrays of shape [N, 4]
'detection_scores': a list of float32 numpy arrays of shape [N]
'detection_classes': a list of int32 numpy arrays of shape [N]
'groundtruth_boxes': a list of float32 numpy arrays of shape [M, 4]
'groundtruth_classes': a list of int32 numpy arrays of shape [M]
and the remaining fields below are optional:
'difficult': a list of boolean arrays of shape [M] indicating the
difficulty of groundtruth boxes. Some datasets like PASCAL VOC provide
this information and it is used to remove difficult examples from eval
in order to not penalize the models on them.
Note that it is okay to have additional fields in result_lists --- they
are simply ignored.
categories: a list of dictionaries representing all possible categories.
Each dict in this list has the following keys:
'id': (required) an integer id uniquely identifying this category
'name': (required) string representing category name
e.g., 'cat', 'dog', 'pizza'
label_id_offset: an integer offset for the label space.
iou_thres: float determining the IoU threshold at which a box is considered
correct. Defaults to the standard 0.5.
corloc_summary: boolean. If True, also outputs CorLoc metrics.
Returns:
A dictionary of metric names to scalar values.
Raises:
ValueError: if the set of keys in result_lists is not a superset of the
expected list of keys. Unexpected keys are ignored.
ValueError: if the lists in result_lists have inconsistent sizes.
"""
# check for expected keys in result_lists
expected_keys = [
'detection_boxes', 'detection_scores', 'detection_classes', 'image_id'
]
expected_keys += ['groundtruth_boxes', 'groundtruth_classes']
if not set(expected_keys).issubset(set(result_lists.keys())):
raise ValueError('result_lists does not have expected key set.')
num_results = len(result_lists[expected_keys[0]])
for key in expected_keys:
if len(result_lists[key]) != num_results:
raise ValueError('Inconsistent list sizes in result_lists')
# Pascal VOC evaluator assumes foreground index starts from zero.
categories = copy.deepcopy(categories)
for idx in range(len(categories)):
categories[idx]['id'] -= label_id_offset
# num_classes (maybe encoded as categories)
num_classes = max([cat['id'] for cat in categories]) + 1
logging.info('Computing Pascal VOC metrics on results.')
if all(image_id.isdigit() for image_id in result_lists['image_id']):
image_ids = [int(image_id) for image_id in result_lists['image_id']]
else:
image_ids = range(num_results)
evaluator = object_detection_evaluation.ObjectDetectionEvaluation(
num_classes, matching_iou_threshold=iou_thres)
difficult_lists = None
if 'difficult' in result_lists and result_lists['difficult']:
difficult_lists = result_lists['difficult']
for idx, image_id in enumerate(image_ids):
difficult = None
if difficult_lists is not None and difficult_lists[idx].size:
difficult = difficult_lists[idx].astype(np.bool)
evaluator.add_single_ground_truth_image_info(
image_id, result_lists['groundtruth_boxes'][idx],
result_lists['groundtruth_classes'][idx] - label_id_offset,
difficult)
evaluator.add_single_detected_image_info(
image_id, result_lists['detection_boxes'][idx],
result_lists['detection_scores'][idx],
result_lists['detection_classes'][idx] - label_id_offset)
per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc = (
evaluator.evaluate())
metrics = {'Precision/mAP@{}IOU'.format(iou_thres): mean_ap}
category_index = label_map_util.create_category_index(categories)
for idx in range(per_class_ap.size):
if idx in category_index:
display_name = ('PerformanceByCategory/mAP@{}IOU/{}'
.format(iou_thres, category_index[idx]['name']))
metrics[display_name] = per_class_ap[idx]
if corloc_summary:
metrics['CorLoc/CorLoc@{}IOU'.format(iou_thres)] = mean_corloc
for idx in range(per_class_corloc.size):
if idx in category_index:
display_name = (
'PerformanceByCategory/CorLoc@{}IOU/{}'.format(
iou_thres, category_index[idx]['name']))
metrics[display_name] = per_class_corloc[idx]
return metrics
# TODO: Add tests.
def visualize_detection_results(result_dict,
tag,
global_step,
categories,
summary_dir='',
export_dir='',
agnostic_mode=False,
show_groundtruth=False,
min_score_thresh=.5,
max_num_predictions=20):
"""Visualizes detection results and writes visualizations to image summaries.
This function visualizes an image with its detected bounding boxes and writes
to image summaries which can be viewed on tensorboard. It optionally also
writes images to a directory. In the case of missing entry in the label map,
unknown class name in the visualization is shown as "N/A".
Args:
result_dict: a dictionary holding groundtruth and detection
data corresponding to each image being evaluated. The following keys
are required:
'original_image': a numpy array representing the image with shape
[1, height, width, 3]
'detection_boxes': a numpy array of shape [N, 4]
'detection_scores': a numpy array of shape [N]
'detection_classes': a numpy array of shape [N]
The following keys are optional:
'groundtruth_boxes': a numpy array of shape [N, 4]
'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2]
Detections are assumed to be provided in decreasing order of score and for
display, and we assume that scores are probabilities between 0 and 1.
tag: tensorboard tag (string) to associate with image.
global_step: global step at which the visualization are generated.
categories: a list of dictionaries representing all possible categories.
Each dict in this list has the following keys:
'id': (required) an integer id uniquely identifying this category
'name': (required) string representing category name
e.g., 'cat', 'dog', 'pizza'
'supercategory': (optional) string representing the supercategory
e.g., 'animal', 'vehicle', 'food', etc
summary_dir: the output directory to which the image summaries are written.
export_dir: the output directory to which images are written. If this is
empty (default), then images are not exported.
agnostic_mode: boolean (default: False) controlling whether to evaluate in
class-agnostic mode or not.
show_groundtruth: boolean (default: False) controlling whether to show
groundtruth boxes in addition to detected boxes
min_score_thresh: minimum score threshold for a box to be visualized
max_num_predictions: maximum number of detections to visualize
Raises:
ValueError: if result_dict does not contain the expected keys (i.e.,
'original_image', 'detection_boxes', 'detection_scores',
'detection_classes')
"""
if not set([
'original_image', 'detection_boxes', 'detection_scores',
'detection_classes'
]).issubset(set(result_dict.keys())):
raise ValueError('result_dict does not contain all expected keys.')
if show_groundtruth and 'groundtruth_boxes' not in result_dict:
raise ValueError('If show_groundtruth is enabled, result_dict must contain '
'groundtruth_boxes.')
logging.info('Creating detection visualizations.')
category_index = label_map_util.create_category_index(categories)
image = np.squeeze(result_dict['original_image'], axis=0)
detection_boxes = result_dict['detection_boxes']
detection_scores = result_dict['detection_scores']
detection_classes = np.int32((result_dict['detection_classes']))
detection_keypoints = result_dict.get('detection_keypoints', None)
detection_masks = result_dict.get('detection_masks', None)
# Plot groundtruth underneath detections
if show_groundtruth:
groundtruth_boxes = result_dict['groundtruth_boxes']
groundtruth_keypoints = result_dict.get('groundtruth_keypoints', None)
vis_utils.visualize_boxes_and_labels_on_image_array(
image,
groundtruth_boxes,
None,
None,
category_index,
keypoints=groundtruth_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=None)
vis_utils.visualize_boxes_and_labels_on_image_array(
image,
detection_boxes,
detection_classes,
detection_scores,
category_index,
instance_masks=detection_masks,
keypoints=detection_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=max_num_predictions,
min_score_thresh=min_score_thresh,
agnostic_mode=agnostic_mode)
if export_dir:
export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))
vis_utils.save_image_array_as_png(image, export_path)
summary = tf.Summary(value=[
tf.Summary.Value(tag=tag, image=tf.Summary.Image(
encoded_image_string=vis_utils.encode_image_array_as_png_str(
image)))
])
summary_writer = tf.summary.FileWriter(summary_dir)
summary_writer.add_summary(summary, global_step)
summary_writer.close()
logging.info('Detection visualizations written to summary with tag %s.', tag)
# TODO: Add tests.
# TODO: Have an argument called `aggregated_processor_tensor_keys` that contains
# a whitelist of tensors used by the `aggregated_result_processor` instead of a
# blacklist. This will prevent us from inadvertently adding any evaluated
# tensors into the `results_list` data structure that are not needed by
# `aggregated_result_preprocessor`.
def run_checkpoint_once(tensor_dict,
update_op,
summary_dir,
aggregated_result_processor=None,
batch_processor=None,
checkpoint_dirs=None,
variables_to_restore=None,
restore_fn=None,
num_batches=1,
master='',
save_graph=False,
save_graph_dir='',
metric_names_to_values=None,
keys_to_exclude_from_results=()):
"""Evaluates both python metrics and tensorflow slim metrics.
Python metrics are processed in batch by the aggregated_result_processor,
while tensorflow slim metrics statistics are computed by running
metric_names_to_updates tensors and aggregated using metric_names_to_values
tensor.
Args:
tensor_dict: a dictionary holding tensors representing a batch of detections
and corresponding groundtruth annotations.
update_op: a tensorflow update op that will run for each batch along with
the tensors in tensor_dict..
summary_dir: a directory to write metrics summaries.
aggregated_result_processor: a function taking one arguments:
1. result_lists: a dictionary with keys matching those in tensor_dict
and corresponding values being the list of results for each tensor
in tensor_dict. The length of each such list is num_batches.
batch_processor: a function taking four arguments:
1. tensor_dict: the same tensor_dict that is passed in as the first
argument to this function.
2. sess: a tensorflow session
3. batch_index: an integer representing the index of the batch amongst
all batches
4. update_op: a tensorflow update op that will run for each batch.
and returns result_dict, a dictionary of results for that batch.
By default, batch_processor is None, which defaults to running:
return sess.run(tensor_dict)
To skip an image, it suffices to return an empty dictionary in place of
result_dict.
checkpoint_dirs: list of directories to load into an EnsembleModel. If it
has only one directory, EnsembleModel will not be used -- a DetectionModel
will be instantiated directly. Not used if restore_fn is set.
variables_to_restore: None, or a dictionary mapping variable names found in
a checkpoint to model variables. The dictionary would normally be
generated by creating a tf.train.ExponentialMovingAverage object and
calling its variables_to_restore() method. Not used if restore_fn is set.
restore_fn: None, or a function that takes a tf.Session object and correctly
restores all necessary variables from the correct checkpoint file. If
None, attempts to restore from the first directory in checkpoint_dirs.
num_batches: the number of batches to use for evaluation.
master: the location of the Tensorflow session.
save_graph: whether or not the Tensorflow graph is stored as a pbtxt file.
save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
is True this must be non-empty.
metric_names_to_values: A dictionary containing metric names to tensors
which will be evaluated after processing all batches
of [tensor_dict, update_op]. If any metrics depend on statistics computed
during each batch ensure that `update_op` tensor has a control dependency
on the update ops that compute the statistics.
keys_to_exclude_from_results: keys in tensor_dict that will be excluded
from results_list. Note that the tensors corresponding to these keys will
still be evaluated for each batch, but won't be added to results_list.
Raises:
ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least
one element.
ValueError: if save_graph is True and save_graph_dir is not defined.
"""
if save_graph and not save_graph_dir:
raise ValueError('`save_graph_dir` must be defined.')
sess = tf.Session(master, graph=tf.get_default_graph())
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
if restore_fn:
restore_fn(sess)
else:
if not checkpoint_dirs:
raise ValueError('`checkpoint_dirs` must have at least one entry.')
checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0])
saver = tf.train.Saver(variables_to_restore)
saver.restore(sess, checkpoint_file)
if save_graph:
tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt')
valid_keys = list(set(tensor_dict.keys()) - set(keys_to_exclude_from_results))
result_lists = {key: [] for key in valid_keys}
counters = {'skipped': 0, 'success': 0}
other_metrics = None
with tf.contrib.slim.queues.QueueRunners(sess):
try:
for batch in range(int(num_batches)):
if (batch + 1) % 100 == 0:
logging.info('Running eval ops batch %d/%d', batch + 1, num_batches)
if not batch_processor:
try:
(result_dict, _) = sess.run([tensor_dict, update_op])
counters['success'] += 1
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
counters['skipped'] += 1
result_dict = {}
else:
result_dict = batch_processor(
tensor_dict, sess, batch, counters, update_op)
for key in result_dict:
if key in valid_keys:
result_lists[key].append(result_dict[key])
if metric_names_to_values is not None:
other_metrics = sess.run(metric_names_to_values)
logging.info('Running eval batches done.')
except tf.errors.OutOfRangeError:
logging.info('Done evaluating -- epoch limit reached')
finally:
# When done, ask the threads to stop.
metrics = aggregated_result_processor(result_lists)
if other_metrics is not None:
metrics.update(other_metrics)
global_step = tf.train.global_step(sess, slim.get_global_step())
write_metrics(metrics, global_step, summary_dir)
logging.info('# success: %d', counters['success'])
logging.info('# skipped: %d', counters['skipped'])
sess.close()
# TODO: Add tests.
def repeated_checkpoint_run(tensor_dict,
update_op,
summary_dir,
aggregated_result_processor=None,
batch_processor=None,
checkpoint_dirs=None,
variables_to_restore=None,
restore_fn=None,
num_batches=1,
eval_interval_secs=120,
max_number_of_evaluations=None,
master='',
save_graph=False,
save_graph_dir='',
metric_names_to_values=None,
keys_to_exclude_from_results=()):
"""Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
This function repeatedly loads a checkpoint and evaluates a desired
set of tensors (provided by tensor_dict) and hands the resulting numpy
arrays to a function result_processor which can be used to further
process/save/visualize the results.
Args:
tensor_dict: a dictionary holding tensors representing a batch of detections
and corresponding groundtruth annotations.
update_op: a tensorflow update op that will run for each batch along with
the tensors in tensor_dict.
summary_dir: a directory to write metrics summaries.
aggregated_result_processor: a function taking one argument:
1. result_lists: a dictionary with keys matching those in tensor_dict
and corresponding values being the list of results for each tensor
in tensor_dict. The length of each such list is num_batches.
batch_processor: a function taking three arguments:
1. tensor_dict: the same tensor_dict that is passed in as the first
argument to this function.
2. sess: a tensorflow session
3. batch_index: an integer representing the index of the batch amongst
all batches
4. update_op: a tensorflow update op that will run for each batch.
and returns result_dict, a dictionary of results for that batch.
By default, batch_processor is None, which defaults to running:
return sess.run(tensor_dict)
checkpoint_dirs: list of directories to load into a DetectionModel or an
EnsembleModel if restore_fn isn't set. Also used to determine when to run
next evaluation. Must have at least one element.
variables_to_restore: None, or a dictionary mapping variable names found in
a checkpoint to model variables. The dictionary would normally be
generated by creating a tf.train.ExponentialMovingAverage object and
calling its variables_to_restore() method. Not used if restore_fn is set.
restore_fn: a function that takes a tf.Session object and correctly restores
all necessary variables from the correct checkpoint file.
num_batches: the number of batches to use for evaluation.
eval_interval_secs: the number of seconds between each evaluation run.
max_number_of_evaluations: the max number of iterations of the evaluation.
If the value is left as None the evaluation continues indefinitely.
master: the location of the Tensorflow session.
save_graph: whether or not the Tensorflow graph is saved as a pbtxt file.
save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
is True this must be non-empty.
metric_names_to_values: A dictionary containing metric names to tensors
which will be evaluated after processing all batches
of [tensor_dict, update_op]. If any metrics depend on statistics computed
during each batch ensure that `update_op` tensor has a control dependency
on the update ops that compute the statistics.
keys_to_exclude_from_results: keys in tensor_dict that will be excluded
from results_list. Note that the tensors corresponding to these keys will
still be evaluated for each batch, but won't be added to results_list.
Raises:
ValueError: if max_num_of_evaluations is not None or a positive number.
ValueError: if checkpoint_dirs doesn't have at least one element.
"""
if max_number_of_evaluations and max_number_of_evaluations <= 0:
raise ValueError(
'`number_of_steps` must be either None or a positive number.')
if not checkpoint_dirs:
raise ValueError('`checkpoint_dirs` must have at least one entry.')
last_evaluated_model_path = None
number_of_evaluations = 0
while True:
start = time.time()
logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
time.gmtime()))
model_path = tf.train.latest_checkpoint(checkpoint_dirs[0])
if not model_path:
logging.info('No model found in %s. Will try again in %d seconds',
checkpoint_dirs[0], eval_interval_secs)
elif model_path == last_evaluated_model_path:
logging.info('Found already evaluated checkpoint. Will try again in %d '
'seconds', eval_interval_secs)
else:
last_evaluated_model_path = model_path
run_checkpoint_once(tensor_dict, update_op, summary_dir,
aggregated_result_processor,
batch_processor, checkpoint_dirs,
variables_to_restore, restore_fn, num_batches, master,
save_graph, save_graph_dir, metric_names_to_values,
keys_to_exclude_from_results)
number_of_evaluations += 1
if (max_number_of_evaluations and
number_of_evaluations >= max_number_of_evaluations):
logging.info('Finished evaluation!')
break
time_to_next_eval = start + eval_interval_secs - time.time()
if time_to_next_eval > 0:
time.sleep(time_to_next_eval)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Detection model evaluator.
This file provides a generic evaluation method that can be used to evaluate a
DetectionModel.
"""
import logging
import tensorflow as tf
from object_detection import eval_util
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import prefetcher
from object_detection.core import standard_fields as fields
from object_detection.utils import ops
slim = tf.contrib.slim
EVAL_METRICS_FN_DICT = {
'pascal_voc_metrics': eval_util.evaluate_detection_results_pascal_voc
}
def _extract_prediction_tensors(model,
create_input_dict_fn,
ignore_groundtruth=False):
"""Restores the model in a tensorflow session.
Args:
model: model to perform predictions with.
create_input_dict_fn: function to create input tensor dictionaries.
ignore_groundtruth: whether groundtruth should be ignored.
Returns:
tensor_dict: A tensor dictionary with evaluations.
"""
input_dict = create_input_dict_fn()
prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
input_dict = prefetch_queue.dequeue()
original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
preprocessed_image = model.preprocess(tf.to_float(original_image))
prediction_dict = model.predict(preprocessed_image)
detections = model.postprocess(prediction_dict)
original_image_shape = tf.shape(original_image)
absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
original_image_shape[1], original_image_shape[2])
label_id_offset = 1
tensor_dict = {
'original_image': original_image,
'image_id': input_dict[fields.InputDataFields.source_id],
'detection_boxes': absolute_detection_boxlist.get(),
'detection_scores': tf.squeeze(detections['detection_scores'], axis=0),
'detection_classes': (
tf.squeeze(detections['detection_classes'], axis=0) +
label_id_offset),
}
if 'detection_masks' in detections:
detection_masks = tf.squeeze(detections['detection_masks'],
axis=0)
detection_boxes = tf.squeeze(detections['detection_boxes'],
axis=0)
# TODO: This should be done in model's postprocess function ideally.
detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
detection_masks,
detection_boxes,
original_image_shape[1], original_image_shape[2])
detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed,
0.5))
tensor_dict['detection_masks'] = detection_masks_reframed
# load groundtruth fields into tensor_dict
if not ignore_groundtruth:
normalized_gt_boxlist = box_list.BoxList(
input_dict[fields.InputDataFields.groundtruth_boxes])
gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
tf.shape(original_image)[1],
tf.shape(original_image)[2])
groundtruth_boxes = gt_boxlist.get()
groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes]
tensor_dict['groundtruth_boxes'] = groundtruth_boxes
tensor_dict['groundtruth_classes'] = groundtruth_classes
tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area]
tensor_dict['is_crowd'] = input_dict[
fields.InputDataFields.groundtruth_is_crowd]
tensor_dict['difficult'] = input_dict[
fields.InputDataFields.groundtruth_difficult]
if 'detection_masks' in tensor_dict:
tensor_dict['groundtruth_instance_masks'] = input_dict[
fields.InputDataFields.groundtruth_instance_masks]
return tensor_dict
def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
checkpoint_dir, eval_dir):
"""Evaluation function for detection models.
Args:
create_input_dict_fn: a function to create a tensor input dictionary.
create_model_fn: a function that creates a DetectionModel.
eval_config: a eval_pb2.EvalConfig protobuf.
categories: a list of category dictionaries. Each dict in the list should
have an integer 'id' field and string 'name' field.
checkpoint_dir: directory to load the checkpoints to evaluate from.
eval_dir: directory to write evaluation metrics summary to.
"""
model = create_model_fn()
if eval_config.ignore_groundtruth and not eval_config.export_path:
logging.fatal('If ignore_groundtruth=True then an export_path is '
'required. Aborting!!!')
tensor_dict = _extract_prediction_tensors(
model=model,
create_input_dict_fn=create_input_dict_fn,
ignore_groundtruth=eval_config.ignore_groundtruth)
def _process_batch(tensor_dict, sess, batch_index, counters, update_op):
"""Evaluates tensors in tensor_dict, visualizing the first K examples.
This function calls sess.run on tensor_dict, evaluating the original_image
tensor only on the first K examples and visualizing detections overlaid
on this original_image.
Args:
tensor_dict: a dictionary of tensors
sess: tensorflow session
batch_index: the index of the batch amongst all batches in the run.
counters: a dictionary holding 'success' and 'skipped' fields which can
be updated to keep track of number of successful and failed runs,
respectively. If these fields are not updated, then the success/skipped
counter values shown at the end of evaluation will be incorrect.
update_op: An update op that has to be run along with output tensors. For
example this could be an op to compute statistics for slim metrics.
Returns:
result_dict: a dictionary of numpy arrays
"""
if batch_index >= eval_config.num_visualizations:
if 'original_image' in tensor_dict:
tensor_dict = {k: v for (k, v) in tensor_dict.iteritems()
if k != 'original_image'}
try:
(result_dict, _) = sess.run([tensor_dict, update_op])
counters['success'] += 1
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
counters['skipped'] += 1
return {}
global_step = tf.train.global_step(sess, slim.get_global_step())
if batch_index < eval_config.num_visualizations:
tag = 'image-{}'.format(batch_index)
eval_util.visualize_detection_results(
result_dict, tag, global_step, categories=categories,
summary_dir=eval_dir,
export_dir=eval_config.visualization_export_dir,
show_groundtruth=eval_config.visualization_export_dir)
return result_dict
def _process_aggregated_results(result_lists):
eval_metric_fn_key = eval_config.metrics_set
if eval_metric_fn_key not in EVAL_METRICS_FN_DICT:
raise ValueError('Metric not found: {}'.format(eval_metric_fn_key))
return EVAL_METRICS_FN_DICT[eval_metric_fn_key](result_lists,
categories=categories)
variables_to_restore = tf.global_variables()
global_step = slim.get_or_create_global_step()
variables_to_restore.append(global_step)
if eval_config.use_moving_averages:
variable_averages = tf.train.ExponentialMovingAverage(0.0)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
def _restore_latest_checkpoint(sess):
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
saver.restore(sess, latest_checkpoint)
eval_util.repeated_checkpoint_run(
tensor_dict=tensor_dict,
update_op=tf.no_op(),
summary_dir=eval_dir,
aggregated_result_processor=_process_aggregated_results,
batch_processor=_process_batch,
checkpoint_dirs=[checkpoint_dir],
variables_to_restore=None,
restore_fn=_restore_latest_checkpoint,
num_batches=eval_config.num_examples,
eval_interval_secs=eval_config.eval_interval_secs,
max_number_of_evaluations=(
1 if eval_config.ignore_groundtruth else
eval_config.max_evals if eval_config.max_evals else
None),
master=eval_config.eval_master,
save_graph=eval_config.save_graph,
save_graph_dir=(eval_dir if eval_config.save_graph else ''))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Tool to export an object detection model for inference.
Prepares an object detection tensorflow graph for inference using model
configuration and an optional trained checkpoint.
The inference graph contains one of two input nodes depending on the user
specified option.
* `image_tensor`: Accepts a uint8 4-D tensor of shape [1, None, None, 3]
* `tf_example`: Accepts a serialized TFExample proto. The batch size in this
case is always 1.
and the following output nodes:
* `num_detections` : Outputs float32 tensors of the form [batch]
that specifies the number of valid boxes per image in the batch.
* `detection_boxes` : Outputs float32 tensors of the form
[batch, num_boxes, 4] containing detected boxes.
* `detection_scores` : Outputs float32 tensors of the form
[batch, num_boxes] containing class scores for the detections.
* `detection_classes`: Outputs float32 tensors of the form
[batch, num_boxes] containing classes for the detections.
Note that currently `batch` is always 1, but we will support `batch` > 1 in
the future.
Optionally, one can freeze the graph by converting the weights in the provided
checkpoint as graph constants thereby eliminating the need to use a checkpoint
file during inference.
Note that this tool uses `use_moving_averages` from eval_config to decide
which weights to freeze.
Example Usage:
--------------
python export_inference_graph \
--input_type image_tensor \
--pipeline_config_path path/to/ssd_inception_v2.config \
--checkpoint_path path/to/model-ckpt \
--inference_graph_path path/to/inference_graph.pb
"""
import tensorflow as tf
from google.protobuf import text_format
from object_detection import exporter
from object_detection.protos import pipeline_pb2
slim = tf.contrib.slim
flags = tf.app.flags
flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
'one of [`image_tensor` `tf_example_proto`]')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file.')
flags.DEFINE_string('checkpoint_path', '', 'Optional path to checkpoint file. '
'If provided, bakes the weights from the checkpoint into '
'the graph.')
flags.DEFINE_string('inference_graph_path', '', 'Path to write the output '
'inference graph.')
FLAGS = flags.FLAGS
def main(_):
assert FLAGS.pipeline_config_path, 'TrainEvalPipelineConfig missing.'
assert FLAGS.inference_graph_path, 'Inference graph path missing.'
assert FLAGS.input_type, 'Input type missing.'
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
exporter.export_inference_graph(FLAGS.input_type, pipeline_config,
FLAGS.checkpoint_path,
FLAGS.inference_graph_path)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to export object detection inference graph."""
import logging
import os
import tensorflow as tf
from tensorflow.python import pywrap_tensorflow
from tensorflow.python.client import session
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import importer
from tensorflow.python.platform import gfile
from tensorflow.python.training import saver as saver_lib
from object_detection.builders import model_builder
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
slim = tf.contrib.slim
# TODO: Replace with freeze_graph.freeze_graph_with_def_protos when newer
# version of Tensorflow becomes more common.
def freeze_graph_with_def_protos(
input_graph_def,
input_saver_def,
input_checkpoint,
output_node_names,
restore_op_name,
filename_tensor_name,
output_graph,
clear_devices,
initializer_nodes,
variable_names_blacklist=''):
"""Converts all variables in a graph and checkpoint into constants."""
del restore_op_name, filename_tensor_name # Unused by updated loading code.
# 'input_checkpoint' may be a prefix if we're using Saver V2 format
if not saver_lib.checkpoint_exists(input_checkpoint):
logging.info('Input checkpoint "' + input_checkpoint + '" does not exist!')
return -1
if not output_node_names:
logging.info('You must supply the name of a node to --output_node_names.')
return -1
# Remove all the explicit device specifications for this node. This helps to
# make the graph more portable.
if clear_devices:
for node in input_graph_def.node:
node.device = ''
_ = importer.import_graph_def(input_graph_def, name='')
with session.Session() as sess:
if input_saver_def:
saver = saver_lib.Saver(saver_def=input_saver_def)
saver.restore(sess, input_checkpoint)
else:
var_list = {}
reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint)
var_to_shape_map = reader.get_variable_to_shape_map()
for key in var_to_shape_map:
try:
tensor = sess.graph.get_tensor_by_name(key + ':0')
except KeyError:
# This tensor doesn't exist in the graph (for example it's
# 'global_step' or a similar housekeeping element) so skip it.
continue
var_list[key] = tensor
saver = saver_lib.Saver(var_list=var_list)
saver.restore(sess, input_checkpoint)
if initializer_nodes:
sess.run(initializer_nodes)
variable_names_blacklist = (variable_names_blacklist.split(',') if
variable_names_blacklist else None)
output_graph_def = graph_util.convert_variables_to_constants(
sess,
input_graph_def,
output_node_names.split(','),
variable_names_blacklist=variable_names_blacklist)
with gfile.GFile(output_graph, 'wb') as f:
f.write(output_graph_def.SerializeToString())
logging.info('%d ops in the final graph.', len(output_graph_def.node))
# TODO: Support batch tf example inputs.
def _tf_example_input_placeholder():
tf_example_placeholder = tf.placeholder(
tf.string, shape=[], name='tf_example')
tensor_dict = tf_example_decoder.TfExampleDecoder().Decode(
tf_example_placeholder)
image = tensor_dict[fields.InputDataFields.image]
return tf.expand_dims(image, axis=0)
def _image_tensor_input_placeholder():
return tf.placeholder(dtype=tf.uint8,
shape=(1, None, None, 3),
name='image_tensor')
input_placeholder_fn_map = {
'tf_example': _tf_example_input_placeholder,
'image_tensor': _image_tensor_input_placeholder
}
def _add_output_tensor_nodes(postprocessed_tensors):
"""Adds output nodes for detection boxes and scores.
Adds the following nodes for output tensors -
* num_detections: float32 tensor of shape [batch_size].
* detection_boxes: float32 tensor of shape [batch_size, num_boxes, 4]
containing detected boxes.
* detection_scores: float32 tensor of shape [batch_size, num_boxes]
containing scores for the detected boxes.
* detection_classes: float32 tensor of shape [batch_size, num_boxes]
containing class predictions for the detected boxes.
Args:
postprocessed_tensors: a dictionary containing the following fields
'detection_boxes': [batch, max_detections, 4]
'detection_scores': [batch, max_detections]
'detection_classes': [batch, max_detections]
'num_detections': [batch]
"""
label_id_offset = 1
boxes = postprocessed_tensors.get('detection_boxes')
scores = postprocessed_tensors.get('detection_scores')
classes = postprocessed_tensors.get('detection_classes') + label_id_offset
num_detections = postprocessed_tensors.get('num_detections')
tf.identity(boxes, name='detection_boxes')
tf.identity(scores, name='detection_scores')
tf.identity(classes, name='detection_classes')
tf.identity(num_detections, name='num_detections')
def _write_inference_graph(inference_graph_path,
checkpoint_path=None,
use_moving_averages=False,
output_node_names=(
'num_detections,detection_scores,'
'detection_boxes,detection_classes')):
"""Writes inference graph to disk with the option to bake in weights.
If checkpoint_path is not None bakes the weights into the graph thereby
eliminating the need of checkpoint files during inference. If the model
was trained with moving averages, setting use_moving_averages to true
restores the moving averages, otherwise the original set of variables
is restored.
Args:
inference_graph_path: Path to write inference graph.
checkpoint_path: Optional path to the checkpoint file.
use_moving_averages: Whether to export the original or the moving averages
of the trainable variables from the checkpoint.
output_node_names: Output tensor names, defaults are: num_detections,
detection_scores, detection_boxes, detection_classes.
"""
inference_graph_def = tf.get_default_graph().as_graph_def()
if checkpoint_path:
saver = None
if use_moving_averages:
variable_averages = tf.train.ExponentialMovingAverage(0.0)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
else:
saver = tf.train.Saver()
freeze_graph_with_def_protos(
input_graph_def=inference_graph_def,
input_saver_def=saver.as_saver_def(),
input_checkpoint=checkpoint_path,
output_node_names=output_node_names,
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
output_graph=inference_graph_path,
clear_devices=True,
initializer_nodes='')
return
tf.train.write_graph(inference_graph_def,
os.path.dirname(inference_graph_path),
os.path.basename(inference_graph_path),
as_text=False)
def _export_inference_graph(input_type,
detection_model,
use_moving_averages,
checkpoint_path,
inference_graph_path):
if input_type not in input_placeholder_fn_map:
raise ValueError('Unknown input type: {}'.format(input_type))
inputs = tf.to_float(input_placeholder_fn_map[input_type]())
preprocessed_inputs = detection_model.preprocess(inputs)
output_tensors = detection_model.predict(preprocessed_inputs)
postprocessed_tensors = detection_model.postprocess(output_tensors)
_add_output_tensor_nodes(postprocessed_tensors)
_write_inference_graph(inference_graph_path, checkpoint_path,
use_moving_averages)
def export_inference_graph(input_type, pipeline_config, checkpoint_path,
inference_graph_path):
"""Exports inference graph for the model specified in the pipeline config.
Args:
input_type: Type of input for the graph. Can be one of [`image_tensor`,
`tf_example`].
pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto.
checkpoint_path: Path to the checkpoint file to freeze.
inference_graph_path: Path to write inference graph to.
"""
detection_model = model_builder.build(pipeline_config.model,
is_training=False)
_export_inference_graph(input_type, detection_model,
pipeline_config.eval_config.use_moving_averages,
checkpoint_path, inference_graph_path)

Tensorflow Object Detection API

Creating accurate machine learning models capable of localizing and identifying multiple objects in a single image remains a core challenge in computer vision. The TensorFlow Object Detection API is an open source framework built on top of TensorFlow that makes it easy to construct, train and deploy object detection models. At Google we’ve certainly found this codebase to be useful for our computer vision needs, and we hope that you will as well.

Contributions to the codebase are welcome and we would love to hear back from you if you find this API useful. Finally if you use the Tensorflow Object Detection API for a research publication, please consider citing:
"Speed/accuracy trade-offs for modern convolutional object detectors."
Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z,
Song Y, Guadarrama S, Murphy K, CVPR 2017

[link][bibtex]

Maintainers

Table of contents

Quick Start:

Setup:

Running:

Extras:

Release information

June 15, 2017

In addition to our base Tensorflow detection model definitions, this release includes:

  • A selection of trainable detection models, including:
    • Single Shot Multibox Detector (SSD) with MobileNet,
    • SSD with Inception V2,
    • Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
    • Faster RCNN with Resnet 101,
    • Faster RCNN with Inception Resnet v2
  • Frozen weights (trained on the COCO dataset) for each of the above models to be used for out-of-the-box inference purposes.
  • A Jupyter notebook for performing out-of-the-box inference with one of our released models
  • Convenient local training scripts as well as distributed training and evaluation pipelines via Google Cloud.

Thanks to contributors: Jonathan Huang, Vivek Rathod, Derek Chow, Chen Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings, Viacheslav Kovalevskyi, Kevin Murphy

Object-Detector-App

A real-time object recognition application using Google's TensorFlow Object Detection API and OpenCV.

Getting Started

  1. conda env create -f environment.yml
  2. python object_detection_app.py Optional arguments (default value):
    • Device index of the camera --source=0
    • Width of the frames in the video stream --width=480
    • Height of the frames in the video stream --height=360
    • Number of workers --num-workers=2
    • Size of the queue --queue-size=5

Requirements

Notes

  • OpenCV 3.1 might crash on OSX after a while, so that's why I had to switch to version 3.0. See open issue and solution here.
  • Moving the .read() part of the video stream in a multiple child processes did not work. However, it was possible to move it to a separate thread.

Copyright

See LICENSE for details. Copyright (c) 2017 Dat Tran.

This file has been truncated, but you can view the full file.
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detect
View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment