2017/4/11
To make caffe-tensorflow's .\examples\imagenet\validate.py
work with python 3 and tensorflow 1.x
- exchange
.\examples\imagenet\dataset.py
withdataset.py
- exchange
.\kaffe\tensorflow\network.py
withnetwork.py
2017/4/11
To make caffe-tensorflow's .\examples\imagenet\validate.py
work with python 3 and tensorflow 1.x
.\examples\imagenet\dataset.py
with dataset.py
.\kaffe\tensorflow\network.py
with network.py
'''Utility functions and classes for handling image datasets.''' | |
import os.path as osp | |
import numpy as np | |
import tensorflow as tf | |
def process_image(img, scale, isotropic, crop, mean): | |
'''Crops, scales, and normalizes the given image. | |
scale : The image wil be first scaled to this size. | |
If isotropic is true, the smaller side is rescaled to this, | |
preserving the aspect ratio. | |
crop : After scaling, a central crop of this size is taken. | |
mean : Subtracted from the image | |
''' | |
# Rescale | |
if isotropic: | |
img_shape = tf.to_float(tf.shape(img)[:2]) | |
min_length = tf.minimum(img_shape[0], img_shape[1]) | |
new_shape = tf.to_int32((scale / min_length) * img_shape) | |
else: | |
new_shape = tf.stack([scale, scale]) | |
img = tf.image.resize_images(img, new_shape) | |
# Center crop | |
# Use the slice workaround until crop_to_bounding_box supports deferred tensor shapes | |
# See: https://github.com/tensorflow/tensorflow/issues/521 | |
offset = (new_shape - crop) / 2 | |
img = tf.slice(img, begin=tf.to_int64(tf.stack([offset[0], offset[1], 0])), size=tf.to_int64(tf.stack([crop, crop, -1]))) | |
# Mean subtraction | |
return tf.to_float(img) - mean | |
class ImageProducer(object): | |
''' | |
Loads and processes batches of images in parallel. | |
''' | |
def __init__(self, image_paths, data_spec, num_concurrent=4, batch_size=None, labels=None): | |
# The data specifications describe how to process the image | |
self.data_spec = data_spec | |
# A list of full image paths | |
self.image_paths = image_paths | |
# An optional list of labels corresponding to each image path | |
self.labels = labels | |
# A boolean flag per image indicating whether its a JPEG or PNG | |
self.extension_mask = self.create_extension_mask(self.image_paths) | |
# Create the loading and processing operations | |
self.setup(batch_size=batch_size, num_concurrent=num_concurrent) | |
def setup(self, batch_size, num_concurrent): | |
# Validate the batch size | |
num_images = len(self.image_paths) | |
batch_size = min(num_images, batch_size or self.data_spec.batch_size) | |
if num_images % batch_size != 0: | |
raise ValueError( | |
'The total number of images ({}) must be divisible by the batch size ({}).'.format( | |
num_images, batch_size)) | |
self.num_batches = num_images / batch_size | |
# Create a queue that will contain image paths (and their indices and extension indicator) | |
self.path_queue = tf.FIFOQueue(capacity=num_images, | |
dtypes=[tf.int32, tf.bool, tf.string], | |
name='path_queue') | |
# Enqueue all image paths, along with their indices | |
indices = tf.range(num_images) | |
self.enqueue_paths_op = self.path_queue.enqueue_many([indices, self.extension_mask, | |
self.image_paths]) | |
# Close the path queue (no more additions) | |
self.close_path_queue_op = self.path_queue.close() | |
# Create an operation that dequeues a single path and returns a processed image | |
(idx, processed_image) = self.process() | |
# Create a queue that will contain the processed images (and their indices) | |
image_shape = (self.data_spec.crop_size, self.data_spec.crop_size, self.data_spec.channels) | |
processed_queue = tf.FIFOQueue(capacity=int(np.ceil(num_images / float(num_concurrent))), | |
dtypes=[tf.int32, tf.float32], | |
shapes=[(), image_shape], | |
name='processed_queue') | |
# Enqueue the processed image and path | |
enqueue_processed_op = processed_queue.enqueue([idx, processed_image]) | |
# Create a dequeue op that fetches a batch of processed images off the queue | |
self.dequeue_op = processed_queue.dequeue_many(batch_size) | |
# Create a queue runner to perform the processing operations in parallel | |
num_concurrent = min(num_concurrent, num_images) | |
self.queue_runner = tf.train.QueueRunner(processed_queue, | |
[enqueue_processed_op] * num_concurrent) | |
def start(self, session, coordinator, num_concurrent=4): | |
'''Start the processing worker threads.''' | |
# Queue all paths | |
session.run(self.enqueue_paths_op) | |
# Close the path queue | |
session.run(self.close_path_queue_op) | |
# Start the queue runner and return the created threads | |
return self.queue_runner.create_threads(session, coord=coordinator, start=True) | |
def get(self, session): | |
''' | |
Get a single batch of images along with their indices. If a set of labels were provided, | |
the corresponding labels are returned instead of the indices. | |
''' | |
(indices, images) = session.run(self.dequeue_op) | |
if self.labels is not None: | |
labels = [self.labels[idx] for idx in indices] | |
return (labels, images) | |
return (indices, images) | |
def batches(self, session): | |
'''Yield a batch until no more images are left.''' | |
# range() replaced xrange() in Python 3 | |
for _ in range(int(self.num_batches)): | |
yield self.get(session=session) | |
def load_image(self, image_path, is_jpeg): | |
# Read the file | |
file_data = tf.read_file(image_path) | |
# Decode the image data | |
img = tf.cond( | |
is_jpeg, | |
lambda: tf.image.decode_jpeg(file_data, channels=self.data_spec.channels), | |
lambda: tf.image.decode_png(file_data, channels=self.data_spec.channels)) | |
if self.data_spec.expects_bgr: | |
# Convert from RGB channel ordering to BGR | |
# This matches, for instance, how OpenCV orders the channels. | |
img = tf.reverse(img, [2, ]) | |
return img | |
def process(self): | |
# Dequeue a single image path | |
idx, is_jpeg, image_path = self.path_queue.dequeue() | |
# Load the image | |
img = self.load_image(image_path, is_jpeg) | |
# Process the image | |
processed_img = process_image(img=img, | |
scale=self.data_spec.scale_size, | |
isotropic=self.data_spec.isotropic, | |
crop=self.data_spec.crop_size, | |
mean=self.data_spec.mean) | |
# Return the processed image, along with its index | |
return (idx, processed_img) | |
@staticmethod | |
def create_extension_mask(paths): | |
def is_jpeg(path): | |
extension = osp.splitext(path)[-1].lower() | |
if extension in ('.jpg', '.jpeg'): | |
return True | |
if extension != '.png': | |
raise ValueError('Unsupported image format: {}'.format(extension)) | |
return False | |
return [is_jpeg(p) for p in paths] | |
def __len__(self): | |
return len(self.image_paths) | |
class ImageNetProducer(ImageProducer): | |
def __init__(self, val_path, data_path, data_spec): | |
# Read in the ground truth labels for the validation set | |
# The get_ilsvrc_aux.sh in Caffe's data/ilsvrc12 folder can fetch a copy of val.txt | |
gt_lines = open(val_path).readlines() | |
gt_pairs = [line.split() for line in gt_lines] | |
# Get the full image paths | |
# You will need a copy of the ImageNet validation set for this. | |
image_paths = [osp.join(data_path, p[0]) for p in gt_pairs] | |
# The corresponding ground truth labels | |
labels = np.array([int(p[1]) for p in gt_pairs]) | |
# Initialize base | |
super(ImageNetProducer, self).__init__(image_paths=image_paths, | |
data_spec=data_spec, | |
labels=labels) |
import numpy as np | |
import tensorflow as tf | |
DEFAULT_PADDING = 'SAME' | |
def layer(op): | |
'''Decorator for composable network layers.''' | |
def layer_decorated(self, *args, **kwargs): | |
# Automatically set a name if not provided. | |
name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) | |
# Figure out the layer inputs. | |
if len(self.terminals) == 0: | |
raise RuntimeError('No input variables found for layer %s.' % name) | |
elif len(self.terminals) == 1: | |
layer_input = self.terminals[0] | |
else: | |
layer_input = list(self.terminals) | |
# Perform the operation and get the output. | |
layer_output = op(self, layer_input, *args, **kwargs) | |
# Add to layer LUT. | |
self.layers[name] = layer_output | |
# This output is now the input for the next layer. | |
self.feed(layer_output) | |
# Return self for chained calls. | |
return self | |
return layer_decorated | |
class Network(object): | |
def __init__(self, inputs, trainable=True): | |
# The input nodes for this network | |
self.inputs = inputs | |
# The current list of terminal nodes | |
self.terminals = [] | |
# Mapping from layer names to layers | |
self.layers = dict(inputs) | |
# If true, the resulting variables are set as trainable | |
self.trainable = trainable | |
# Switch variable for dropout | |
self.use_dropout = tf.placeholder_with_default(tf.constant(1.0), | |
shape=[], | |
name='use_dropout') | |
self.setup() | |
def setup(self): | |
'''Construct the network. ''' | |
raise NotImplementedError('Must be implemented by the subclass.') | |
def load(self, data_path, session, ignore_missing=False): | |
'''Load network weights. | |
data_path: The path to the numpy-serialized network weights | |
session: The current TensorFlow session | |
ignore_missing: If true, serialized weights for missing layers are ignored. | |
''' | |
# www.cs.toronto.edu/~guerzhoy/tf_alexnet/myalexnet_forward_newtf.py | |
data_dict = np.load(open(data_path, 'rb'), encoding='latin1').item() | |
for op_name in data_dict: | |
with tf.variable_scope(op_name, reuse=True): | |
for param_name, data in iter(data_dict[op_name].items()): | |
try: | |
var = tf.get_variable(param_name) | |
session.run(var.assign(data)) | |
except ValueError: | |
if not ignore_missing: | |
raise | |
def feed(self, *args): | |
'''Set the input(s) for the next operation by replacing the terminal nodes. | |
The arguments can be either layer names or the actual layers. | |
''' | |
assert len(args) != 0 | |
self.terminals = [] | |
for fed_layer in args: | |
# str replaced basestring in python 3 | |
if isinstance(fed_layer, str): | |
try: | |
fed_layer = self.layers[fed_layer] | |
except KeyError: | |
raise KeyError('Unknown layer name fed: %s' % fed_layer) | |
self.terminals.append(fed_layer) | |
return self | |
def get_output(self): | |
'''Returns the current network output.''' | |
return self.terminals[-1] | |
def get_unique_name(self, prefix): | |
'''Returns an index-suffixed unique name for the given prefix. | |
This is used for auto-generating layer names based on the type-prefix. | |
''' | |
ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 | |
return '%s_%d' % (prefix, ident) | |
def make_var(self, name, shape): | |
'''Creates a new TensorFlow variable.''' | |
return tf.get_variable(name, shape, trainable=self.trainable) | |
def validate_padding(self, padding): | |
'''Verifies that the padding is one of the supported ones.''' | |
assert padding in ('SAME', 'VALID') | |
@layer | |
def conv(self, | |
input, | |
k_h, | |
k_w, | |
c_o, | |
s_h, | |
s_w, | |
name, | |
relu=True, | |
padding=DEFAULT_PADDING, | |
group=1, | |
biased=True): | |
# Verify that the padding is acceptable | |
self.validate_padding(padding) | |
# Get the number of channels in the input | |
c_i = input.get_shape()[-1] | |
# Verify that the grouping parameter is valid | |
assert c_i % group == 0 | |
assert c_o % group == 0 | |
# Convolution for a given input and kernel | |
convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) | |
with tf.variable_scope(name) as scope: | |
kernel = self.make_var('weights', shape=[k_h, k_w, int(c_i) / group, c_o]) | |
if group == 1: | |
# This is the common-case. Convolve the input without any further complications. | |
output = convolve(input, kernel) | |
else: | |
# Split the input into groups and then convolve each of them independently | |
input_groups = tf.split(axis=3, num_or_size_splits=group, value=input) | |
kernel_groups = tf.split(axis=3, num_or_size_splits=group, value=kernel) | |
output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] | |
# Concatenate the groups | |
output = tf.concat(axis=3, values=output_groups) | |
# Add the biases | |
if biased: | |
biases = self.make_var('biases', [c_o]) | |
output = tf.nn.bias_add(output, biases) | |
if relu: | |
# ReLU non-linearity | |
output = tf.nn.relu(output, name=scope.name) | |
return output | |
@layer | |
def relu(self, input, name): | |
return tf.nn.relu(input, name=name) | |
@layer | |
def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): | |
self.validate_padding(padding) | |
return tf.nn.max_pool(input, | |
ksize=[1, k_h, k_w, 1], | |
strides=[1, s_h, s_w, 1], | |
padding=padding, | |
name=name) | |
@layer | |
def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): | |
self.validate_padding(padding) | |
return tf.nn.avg_pool(input, | |
ksize=[1, k_h, k_w, 1], | |
strides=[1, s_h, s_w, 1], | |
padding=padding, | |
name=name) | |
@layer | |
def lrn(self, input, radius, alpha, beta, name, bias=1.0): | |
return tf.nn.local_response_normalization(input, | |
depth_radius=radius, | |
alpha=alpha, | |
beta=beta, | |
bias=bias, | |
name=name) | |
@layer | |
def concat(self, inputs, axis, name): | |
return tf.concat(axis=axis, values=inputs, name=name) | |
@layer | |
def add(self, inputs, name): | |
return tf.add_n(inputs, name=name) | |
@layer | |
def fc(self, input, num_out, name, relu=True): | |
with tf.variable_scope(name) as scope: | |
input_shape = input.get_shape() | |
if input_shape.ndims == 4: | |
# The input is spatial. Vectorize it first. | |
dim = 1 | |
for d in input_shape[1:].as_list(): | |
dim *= d | |
feed_in = tf.reshape(input, [-1, dim]) | |
else: | |
feed_in, dim = (input, input_shape[-1].value) | |
weights = self.make_var('weights', shape=[dim, num_out]) | |
biases = self.make_var('biases', [num_out]) | |
op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b | |
fc = op(feed_in, weights, biases, name=scope.name) | |
return fc | |
@layer | |
def softmax(self, input, name): | |
input_shape = list(map(lambda v: v.value, input.get_shape())) | |
if len(input_shape) > 2: | |
# For certain models (like NiN), the singleton spatial dimensions | |
# need to be explicitly squeezed, since they're not broadcast-able | |
# in TensorFlow's NHWC ordering (unlike Caffe's NCHW). | |
if input_shape[1] == 1 and input_shape[2] == 1: | |
input = tf.squeeze(input, axis=[1, 2]) | |
else: | |
raise ValueError('Rank 2 tensor input expected for softmax!') | |
return tf.nn.softmax(input, name=name) | |
@layer | |
def batch_normalization(self, input, name, scale_offset=True, relu=False): | |
# NOTE: Currently, only inference is supported | |
with tf.variable_scope(name) as scope: | |
shape = [input.get_shape()[-1]] | |
if scale_offset: | |
scale = self.make_var('scale', shape=shape) | |
offset = self.make_var('offset', shape=shape) | |
else: | |
scale, offset = (None, None) | |
output = tf.nn.batch_normalization( | |
input, | |
mean=self.make_var('mean', shape=shape), | |
variance=self.make_var('variance', shape=shape), | |
offset=offset, | |
scale=scale, | |
# TODO: This is the default Caffe batch norm eps | |
# Get the actual eps from parameters | |
variance_epsilon=1e-5, | |
name=name) | |
if relu: | |
output = tf.nn.relu(output) | |
return output | |
@layer | |
def dropout(self, input, keep_prob, name): | |
keep = 1 - self.use_dropout + (self.use_dropout * keep_prob) | |
return tf.nn.dropout(input, keep, name=name) |