Skip to content

Instantly share code, notes, and snippets.

@ddrpa
Last active April 11, 2017 07:43
Show Gist options
  • Save ddrpa/59cccd834692e4d7b812186e1e278ab9 to your computer and use it in GitHub Desktop.
Save ddrpa/59cccd834692e4d7b812186e1e278ab9 to your computer and use it in GitHub Desktop.
Add Python 3.x and TensorFlow 1.x's support to caffe-tensorflow ImageNet validation example

2017/4/11

To make caffe-tensorflow's .\examples\imagenet\validate.py work with python 3 and tensorflow 1.x

  • exchange .\examples\imagenet\dataset.py with dataset.py
  • exchange .\kaffe\tensorflow\network.py with network.py
'''Utility functions and classes for handling image datasets.'''
import os.path as osp
import numpy as np
import tensorflow as tf
def process_image(img, scale, isotropic, crop, mean):
'''Crops, scales, and normalizes the given image.
scale : The image wil be first scaled to this size.
If isotropic is true, the smaller side is rescaled to this,
preserving the aspect ratio.
crop : After scaling, a central crop of this size is taken.
mean : Subtracted from the image
'''
# Rescale
if isotropic:
img_shape = tf.to_float(tf.shape(img)[:2])
min_length = tf.minimum(img_shape[0], img_shape[1])
new_shape = tf.to_int32((scale / min_length) * img_shape)
else:
new_shape = tf.stack([scale, scale])
img = tf.image.resize_images(img, new_shape)
# Center crop
# Use the slice workaround until crop_to_bounding_box supports deferred tensor shapes
# See: https://github.com/tensorflow/tensorflow/issues/521
offset = (new_shape - crop) / 2
img = tf.slice(img, begin=tf.to_int64(tf.stack([offset[0], offset[1], 0])), size=tf.to_int64(tf.stack([crop, crop, -1])))
# Mean subtraction
return tf.to_float(img) - mean
class ImageProducer(object):
'''
Loads and processes batches of images in parallel.
'''
def __init__(self, image_paths, data_spec, num_concurrent=4, batch_size=None, labels=None):
# The data specifications describe how to process the image
self.data_spec = data_spec
# A list of full image paths
self.image_paths = image_paths
# An optional list of labels corresponding to each image path
self.labels = labels
# A boolean flag per image indicating whether its a JPEG or PNG
self.extension_mask = self.create_extension_mask(self.image_paths)
# Create the loading and processing operations
self.setup(batch_size=batch_size, num_concurrent=num_concurrent)
def setup(self, batch_size, num_concurrent):
# Validate the batch size
num_images = len(self.image_paths)
batch_size = min(num_images, batch_size or self.data_spec.batch_size)
if num_images % batch_size != 0:
raise ValueError(
'The total number of images ({}) must be divisible by the batch size ({}).'.format(
num_images, batch_size))
self.num_batches = num_images / batch_size
# Create a queue that will contain image paths (and their indices and extension indicator)
self.path_queue = tf.FIFOQueue(capacity=num_images,
dtypes=[tf.int32, tf.bool, tf.string],
name='path_queue')
# Enqueue all image paths, along with their indices
indices = tf.range(num_images)
self.enqueue_paths_op = self.path_queue.enqueue_many([indices, self.extension_mask,
self.image_paths])
# Close the path queue (no more additions)
self.close_path_queue_op = self.path_queue.close()
# Create an operation that dequeues a single path and returns a processed image
(idx, processed_image) = self.process()
# Create a queue that will contain the processed images (and their indices)
image_shape = (self.data_spec.crop_size, self.data_spec.crop_size, self.data_spec.channels)
processed_queue = tf.FIFOQueue(capacity=int(np.ceil(num_images / float(num_concurrent))),
dtypes=[tf.int32, tf.float32],
shapes=[(), image_shape],
name='processed_queue')
# Enqueue the processed image and path
enqueue_processed_op = processed_queue.enqueue([idx, processed_image])
# Create a dequeue op that fetches a batch of processed images off the queue
self.dequeue_op = processed_queue.dequeue_many(batch_size)
# Create a queue runner to perform the processing operations in parallel
num_concurrent = min(num_concurrent, num_images)
self.queue_runner = tf.train.QueueRunner(processed_queue,
[enqueue_processed_op] * num_concurrent)
def start(self, session, coordinator, num_concurrent=4):
'''Start the processing worker threads.'''
# Queue all paths
session.run(self.enqueue_paths_op)
# Close the path queue
session.run(self.close_path_queue_op)
# Start the queue runner and return the created threads
return self.queue_runner.create_threads(session, coord=coordinator, start=True)
def get(self, session):
'''
Get a single batch of images along with their indices. If a set of labels were provided,
the corresponding labels are returned instead of the indices.
'''
(indices, images) = session.run(self.dequeue_op)
if self.labels is not None:
labels = [self.labels[idx] for idx in indices]
return (labels, images)
return (indices, images)
def batches(self, session):
'''Yield a batch until no more images are left.'''
# range() replaced xrange() in Python 3
for _ in range(int(self.num_batches)):
yield self.get(session=session)
def load_image(self, image_path, is_jpeg):
# Read the file
file_data = tf.read_file(image_path)
# Decode the image data
img = tf.cond(
is_jpeg,
lambda: tf.image.decode_jpeg(file_data, channels=self.data_spec.channels),
lambda: tf.image.decode_png(file_data, channels=self.data_spec.channels))
if self.data_spec.expects_bgr:
# Convert from RGB channel ordering to BGR
# This matches, for instance, how OpenCV orders the channels.
img = tf.reverse(img, [2, ])
return img
def process(self):
# Dequeue a single image path
idx, is_jpeg, image_path = self.path_queue.dequeue()
# Load the image
img = self.load_image(image_path, is_jpeg)
# Process the image
processed_img = process_image(img=img,
scale=self.data_spec.scale_size,
isotropic=self.data_spec.isotropic,
crop=self.data_spec.crop_size,
mean=self.data_spec.mean)
# Return the processed image, along with its index
return (idx, processed_img)
@staticmethod
def create_extension_mask(paths):
def is_jpeg(path):
extension = osp.splitext(path)[-1].lower()
if extension in ('.jpg', '.jpeg'):
return True
if extension != '.png':
raise ValueError('Unsupported image format: {}'.format(extension))
return False
return [is_jpeg(p) for p in paths]
def __len__(self):
return len(self.image_paths)
class ImageNetProducer(ImageProducer):
def __init__(self, val_path, data_path, data_spec):
# Read in the ground truth labels for the validation set
# The get_ilsvrc_aux.sh in Caffe's data/ilsvrc12 folder can fetch a copy of val.txt
gt_lines = open(val_path).readlines()
gt_pairs = [line.split() for line in gt_lines]
# Get the full image paths
# You will need a copy of the ImageNet validation set for this.
image_paths = [osp.join(data_path, p[0]) for p in gt_pairs]
# The corresponding ground truth labels
labels = np.array([int(p[1]) for p in gt_pairs])
# Initialize base
super(ImageNetProducer, self).__init__(image_paths=image_paths,
data_spec=data_spec,
labels=labels)
import numpy as np
import tensorflow as tf
DEFAULT_PADDING = 'SAME'
def layer(op):
'''Decorator for composable network layers.'''
def layer_decorated(self, *args, **kwargs):
# Automatically set a name if not provided.
name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
# Figure out the layer inputs.
if len(self.terminals) == 0:
raise RuntimeError('No input variables found for layer %s.' % name)
elif len(self.terminals) == 1:
layer_input = self.terminals[0]
else:
layer_input = list(self.terminals)
# Perform the operation and get the output.
layer_output = op(self, layer_input, *args, **kwargs)
# Add to layer LUT.
self.layers[name] = layer_output
# This output is now the input for the next layer.
self.feed(layer_output)
# Return self for chained calls.
return self
return layer_decorated
class Network(object):
def __init__(self, inputs, trainable=True):
# The input nodes for this network
self.inputs = inputs
# The current list of terminal nodes
self.terminals = []
# Mapping from layer names to layers
self.layers = dict(inputs)
# If true, the resulting variables are set as trainable
self.trainable = trainable
# Switch variable for dropout
self.use_dropout = tf.placeholder_with_default(tf.constant(1.0),
shape=[],
name='use_dropout')
self.setup()
def setup(self):
'''Construct the network. '''
raise NotImplementedError('Must be implemented by the subclass.')
def load(self, data_path, session, ignore_missing=False):
'''Load network weights.
data_path: The path to the numpy-serialized network weights
session: The current TensorFlow session
ignore_missing: If true, serialized weights for missing layers are ignored.
'''
# www.cs.toronto.edu/~guerzhoy/tf_alexnet/myalexnet_forward_newtf.py
data_dict = np.load(open(data_path, 'rb'), encoding='latin1').item()
for op_name in data_dict:
with tf.variable_scope(op_name, reuse=True):
for param_name, data in iter(data_dict[op_name].items()):
try:
var = tf.get_variable(param_name)
session.run(var.assign(data))
except ValueError:
if not ignore_missing:
raise
def feed(self, *args):
'''Set the input(s) for the next operation by replacing the terminal nodes.
The arguments can be either layer names or the actual layers.
'''
assert len(args) != 0
self.terminals = []
for fed_layer in args:
# str replaced basestring in python 3
if isinstance(fed_layer, str):
try:
fed_layer = self.layers[fed_layer]
except KeyError:
raise KeyError('Unknown layer name fed: %s' % fed_layer)
self.terminals.append(fed_layer)
return self
def get_output(self):
'''Returns the current network output.'''
return self.terminals[-1]
def get_unique_name(self, prefix):
'''Returns an index-suffixed unique name for the given prefix.
This is used for auto-generating layer names based on the type-prefix.
'''
ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
return '%s_%d' % (prefix, ident)
def make_var(self, name, shape):
'''Creates a new TensorFlow variable.'''
return tf.get_variable(name, shape, trainable=self.trainable)
def validate_padding(self, padding):
'''Verifies that the padding is one of the supported ones.'''
assert padding in ('SAME', 'VALID')
@layer
def conv(self,
input,
k_h,
k_w,
c_o,
s_h,
s_w,
name,
relu=True,
padding=DEFAULT_PADDING,
group=1,
biased=True):
# Verify that the padding is acceptable
self.validate_padding(padding)
# Get the number of channels in the input
c_i = input.get_shape()[-1]
# Verify that the grouping parameter is valid
assert c_i % group == 0
assert c_o % group == 0
# Convolution for a given input and kernel
convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
with tf.variable_scope(name) as scope:
kernel = self.make_var('weights', shape=[k_h, k_w, int(c_i) / group, c_o])
if group == 1:
# This is the common-case. Convolve the input without any further complications.
output = convolve(input, kernel)
else:
# Split the input into groups and then convolve each of them independently
input_groups = tf.split(axis=3, num_or_size_splits=group, value=input)
kernel_groups = tf.split(axis=3, num_or_size_splits=group, value=kernel)
output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
# Concatenate the groups
output = tf.concat(axis=3, values=output_groups)
# Add the biases
if biased:
biases = self.make_var('biases', [c_o])
output = tf.nn.bias_add(output, biases)
if relu:
# ReLU non-linearity
output = tf.nn.relu(output, name=scope.name)
return output
@layer
def relu(self, input, name):
return tf.nn.relu(input, name=name)
@layer
def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
self.validate_padding(padding)
return tf.nn.max_pool(input,
ksize=[1, k_h, k_w, 1],
strides=[1, s_h, s_w, 1],
padding=padding,
name=name)
@layer
def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
self.validate_padding(padding)
return tf.nn.avg_pool(input,
ksize=[1, k_h, k_w, 1],
strides=[1, s_h, s_w, 1],
padding=padding,
name=name)
@layer
def lrn(self, input, radius, alpha, beta, name, bias=1.0):
return tf.nn.local_response_normalization(input,
depth_radius=radius,
alpha=alpha,
beta=beta,
bias=bias,
name=name)
@layer
def concat(self, inputs, axis, name):
return tf.concat(axis=axis, values=inputs, name=name)
@layer
def add(self, inputs, name):
return tf.add_n(inputs, name=name)
@layer
def fc(self, input, num_out, name, relu=True):
with tf.variable_scope(name) as scope:
input_shape = input.get_shape()
if input_shape.ndims == 4:
# The input is spatial. Vectorize it first.
dim = 1
for d in input_shape[1:].as_list():
dim *= d
feed_in = tf.reshape(input, [-1, dim])
else:
feed_in, dim = (input, input_shape[-1].value)
weights = self.make_var('weights', shape=[dim, num_out])
biases = self.make_var('biases', [num_out])
op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
fc = op(feed_in, weights, biases, name=scope.name)
return fc
@layer
def softmax(self, input, name):
input_shape = list(map(lambda v: v.value, input.get_shape()))
if len(input_shape) > 2:
# For certain models (like NiN), the singleton spatial dimensions
# need to be explicitly squeezed, since they're not broadcast-able
# in TensorFlow's NHWC ordering (unlike Caffe's NCHW).
if input_shape[1] == 1 and input_shape[2] == 1:
input = tf.squeeze(input, axis=[1, 2])
else:
raise ValueError('Rank 2 tensor input expected for softmax!')
return tf.nn.softmax(input, name=name)
@layer
def batch_normalization(self, input, name, scale_offset=True, relu=False):
# NOTE: Currently, only inference is supported
with tf.variable_scope(name) as scope:
shape = [input.get_shape()[-1]]
if scale_offset:
scale = self.make_var('scale', shape=shape)
offset = self.make_var('offset', shape=shape)
else:
scale, offset = (None, None)
output = tf.nn.batch_normalization(
input,
mean=self.make_var('mean', shape=shape),
variance=self.make_var('variance', shape=shape),
offset=offset,
scale=scale,
# TODO: This is the default Caffe batch norm eps
# Get the actual eps from parameters
variance_epsilon=1e-5,
name=name)
if relu:
output = tf.nn.relu(output)
return output
@layer
def dropout(self, input, keep_prob, name):
keep = 1 - self.use_dropout + (self.use_dropout * keep_prob)
return tf.nn.dropout(input, keep, name=name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment