2017/4/11
To make caffe-tensorflow's .\examples\imagenet\validate.py work with python 3 and tensorflow 1.x
- exchange
.\examples\imagenet\dataset.pywithdataset.py - exchange
.\kaffe\tensorflow\network.pywithnetwork.py
2017/4/11
To make caffe-tensorflow's .\examples\imagenet\validate.py work with python 3 and tensorflow 1.x
.\examples\imagenet\dataset.py with dataset.py.\kaffe\tensorflow\network.py with network.py| '''Utility functions and classes for handling image datasets.''' | |
| import os.path as osp | |
| import numpy as np | |
| import tensorflow as tf | |
| def process_image(img, scale, isotropic, crop, mean): | |
| '''Crops, scales, and normalizes the given image. | |
| scale : The image wil be first scaled to this size. | |
| If isotropic is true, the smaller side is rescaled to this, | |
| preserving the aspect ratio. | |
| crop : After scaling, a central crop of this size is taken. | |
| mean : Subtracted from the image | |
| ''' | |
| # Rescale | |
| if isotropic: | |
| img_shape = tf.to_float(tf.shape(img)[:2]) | |
| min_length = tf.minimum(img_shape[0], img_shape[1]) | |
| new_shape = tf.to_int32((scale / min_length) * img_shape) | |
| else: | |
| new_shape = tf.stack([scale, scale]) | |
| img = tf.image.resize_images(img, new_shape) | |
| # Center crop | |
| # Use the slice workaround until crop_to_bounding_box supports deferred tensor shapes | |
| # See: https://github.com/tensorflow/tensorflow/issues/521 | |
| offset = (new_shape - crop) / 2 | |
| img = tf.slice(img, begin=tf.to_int64(tf.stack([offset[0], offset[1], 0])), size=tf.to_int64(tf.stack([crop, crop, -1]))) | |
| # Mean subtraction | |
| return tf.to_float(img) - mean | |
| class ImageProducer(object): | |
| ''' | |
| Loads and processes batches of images in parallel. | |
| ''' | |
| def __init__(self, image_paths, data_spec, num_concurrent=4, batch_size=None, labels=None): | |
| # The data specifications describe how to process the image | |
| self.data_spec = data_spec | |
| # A list of full image paths | |
| self.image_paths = image_paths | |
| # An optional list of labels corresponding to each image path | |
| self.labels = labels | |
| # A boolean flag per image indicating whether its a JPEG or PNG | |
| self.extension_mask = self.create_extension_mask(self.image_paths) | |
| # Create the loading and processing operations | |
| self.setup(batch_size=batch_size, num_concurrent=num_concurrent) | |
| def setup(self, batch_size, num_concurrent): | |
| # Validate the batch size | |
| num_images = len(self.image_paths) | |
| batch_size = min(num_images, batch_size or self.data_spec.batch_size) | |
| if num_images % batch_size != 0: | |
| raise ValueError( | |
| 'The total number of images ({}) must be divisible by the batch size ({}).'.format( | |
| num_images, batch_size)) | |
| self.num_batches = num_images / batch_size | |
| # Create a queue that will contain image paths (and their indices and extension indicator) | |
| self.path_queue = tf.FIFOQueue(capacity=num_images, | |
| dtypes=[tf.int32, tf.bool, tf.string], | |
| name='path_queue') | |
| # Enqueue all image paths, along with their indices | |
| indices = tf.range(num_images) | |
| self.enqueue_paths_op = self.path_queue.enqueue_many([indices, self.extension_mask, | |
| self.image_paths]) | |
| # Close the path queue (no more additions) | |
| self.close_path_queue_op = self.path_queue.close() | |
| # Create an operation that dequeues a single path and returns a processed image | |
| (idx, processed_image) = self.process() | |
| # Create a queue that will contain the processed images (and their indices) | |
| image_shape = (self.data_spec.crop_size, self.data_spec.crop_size, self.data_spec.channels) | |
| processed_queue = tf.FIFOQueue(capacity=int(np.ceil(num_images / float(num_concurrent))), | |
| dtypes=[tf.int32, tf.float32], | |
| shapes=[(), image_shape], | |
| name='processed_queue') | |
| # Enqueue the processed image and path | |
| enqueue_processed_op = processed_queue.enqueue([idx, processed_image]) | |
| # Create a dequeue op that fetches a batch of processed images off the queue | |
| self.dequeue_op = processed_queue.dequeue_many(batch_size) | |
| # Create a queue runner to perform the processing operations in parallel | |
| num_concurrent = min(num_concurrent, num_images) | |
| self.queue_runner = tf.train.QueueRunner(processed_queue, | |
| [enqueue_processed_op] * num_concurrent) | |
| def start(self, session, coordinator, num_concurrent=4): | |
| '''Start the processing worker threads.''' | |
| # Queue all paths | |
| session.run(self.enqueue_paths_op) | |
| # Close the path queue | |
| session.run(self.close_path_queue_op) | |
| # Start the queue runner and return the created threads | |
| return self.queue_runner.create_threads(session, coord=coordinator, start=True) | |
| def get(self, session): | |
| ''' | |
| Get a single batch of images along with their indices. If a set of labels were provided, | |
| the corresponding labels are returned instead of the indices. | |
| ''' | |
| (indices, images) = session.run(self.dequeue_op) | |
| if self.labels is not None: | |
| labels = [self.labels[idx] for idx in indices] | |
| return (labels, images) | |
| return (indices, images) | |
| def batches(self, session): | |
| '''Yield a batch until no more images are left.''' | |
| # range() replaced xrange() in Python 3 | |
| for _ in range(int(self.num_batches)): | |
| yield self.get(session=session) | |
| def load_image(self, image_path, is_jpeg): | |
| # Read the file | |
| file_data = tf.read_file(image_path) | |
| # Decode the image data | |
| img = tf.cond( | |
| is_jpeg, | |
| lambda: tf.image.decode_jpeg(file_data, channels=self.data_spec.channels), | |
| lambda: tf.image.decode_png(file_data, channels=self.data_spec.channels)) | |
| if self.data_spec.expects_bgr: | |
| # Convert from RGB channel ordering to BGR | |
| # This matches, for instance, how OpenCV orders the channels. | |
| img = tf.reverse(img, [2, ]) | |
| return img | |
| def process(self): | |
| # Dequeue a single image path | |
| idx, is_jpeg, image_path = self.path_queue.dequeue() | |
| # Load the image | |
| img = self.load_image(image_path, is_jpeg) | |
| # Process the image | |
| processed_img = process_image(img=img, | |
| scale=self.data_spec.scale_size, | |
| isotropic=self.data_spec.isotropic, | |
| crop=self.data_spec.crop_size, | |
| mean=self.data_spec.mean) | |
| # Return the processed image, along with its index | |
| return (idx, processed_img) | |
| @staticmethod | |
| def create_extension_mask(paths): | |
| def is_jpeg(path): | |
| extension = osp.splitext(path)[-1].lower() | |
| if extension in ('.jpg', '.jpeg'): | |
| return True | |
| if extension != '.png': | |
| raise ValueError('Unsupported image format: {}'.format(extension)) | |
| return False | |
| return [is_jpeg(p) for p in paths] | |
| def __len__(self): | |
| return len(self.image_paths) | |
| class ImageNetProducer(ImageProducer): | |
| def __init__(self, val_path, data_path, data_spec): | |
| # Read in the ground truth labels for the validation set | |
| # The get_ilsvrc_aux.sh in Caffe's data/ilsvrc12 folder can fetch a copy of val.txt | |
| gt_lines = open(val_path).readlines() | |
| gt_pairs = [line.split() for line in gt_lines] | |
| # Get the full image paths | |
| # You will need a copy of the ImageNet validation set for this. | |
| image_paths = [osp.join(data_path, p[0]) for p in gt_pairs] | |
| # The corresponding ground truth labels | |
| labels = np.array([int(p[1]) for p in gt_pairs]) | |
| # Initialize base | |
| super(ImageNetProducer, self).__init__(image_paths=image_paths, | |
| data_spec=data_spec, | |
| labels=labels) |
| import numpy as np | |
| import tensorflow as tf | |
| DEFAULT_PADDING = 'SAME' | |
| def layer(op): | |
| '''Decorator for composable network layers.''' | |
| def layer_decorated(self, *args, **kwargs): | |
| # Automatically set a name if not provided. | |
| name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) | |
| # Figure out the layer inputs. | |
| if len(self.terminals) == 0: | |
| raise RuntimeError('No input variables found for layer %s.' % name) | |
| elif len(self.terminals) == 1: | |
| layer_input = self.terminals[0] | |
| else: | |
| layer_input = list(self.terminals) | |
| # Perform the operation and get the output. | |
| layer_output = op(self, layer_input, *args, **kwargs) | |
| # Add to layer LUT. | |
| self.layers[name] = layer_output | |
| # This output is now the input for the next layer. | |
| self.feed(layer_output) | |
| # Return self for chained calls. | |
| return self | |
| return layer_decorated | |
| class Network(object): | |
| def __init__(self, inputs, trainable=True): | |
| # The input nodes for this network | |
| self.inputs = inputs | |
| # The current list of terminal nodes | |
| self.terminals = [] | |
| # Mapping from layer names to layers | |
| self.layers = dict(inputs) | |
| # If true, the resulting variables are set as trainable | |
| self.trainable = trainable | |
| # Switch variable for dropout | |
| self.use_dropout = tf.placeholder_with_default(tf.constant(1.0), | |
| shape=[], | |
| name='use_dropout') | |
| self.setup() | |
| def setup(self): | |
| '''Construct the network. ''' | |
| raise NotImplementedError('Must be implemented by the subclass.') | |
| def load(self, data_path, session, ignore_missing=False): | |
| '''Load network weights. | |
| data_path: The path to the numpy-serialized network weights | |
| session: The current TensorFlow session | |
| ignore_missing: If true, serialized weights for missing layers are ignored. | |
| ''' | |
| # www.cs.toronto.edu/~guerzhoy/tf_alexnet/myalexnet_forward_newtf.py | |
| data_dict = np.load(open(data_path, 'rb'), encoding='latin1').item() | |
| for op_name in data_dict: | |
| with tf.variable_scope(op_name, reuse=True): | |
| for param_name, data in iter(data_dict[op_name].items()): | |
| try: | |
| var = tf.get_variable(param_name) | |
| session.run(var.assign(data)) | |
| except ValueError: | |
| if not ignore_missing: | |
| raise | |
| def feed(self, *args): | |
| '''Set the input(s) for the next operation by replacing the terminal nodes. | |
| The arguments can be either layer names or the actual layers. | |
| ''' | |
| assert len(args) != 0 | |
| self.terminals = [] | |
| for fed_layer in args: | |
| # str replaced basestring in python 3 | |
| if isinstance(fed_layer, str): | |
| try: | |
| fed_layer = self.layers[fed_layer] | |
| except KeyError: | |
| raise KeyError('Unknown layer name fed: %s' % fed_layer) | |
| self.terminals.append(fed_layer) | |
| return self | |
| def get_output(self): | |
| '''Returns the current network output.''' | |
| return self.terminals[-1] | |
| def get_unique_name(self, prefix): | |
| '''Returns an index-suffixed unique name for the given prefix. | |
| This is used for auto-generating layer names based on the type-prefix. | |
| ''' | |
| ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 | |
| return '%s_%d' % (prefix, ident) | |
| def make_var(self, name, shape): | |
| '''Creates a new TensorFlow variable.''' | |
| return tf.get_variable(name, shape, trainable=self.trainable) | |
| def validate_padding(self, padding): | |
| '''Verifies that the padding is one of the supported ones.''' | |
| assert padding in ('SAME', 'VALID') | |
| @layer | |
| def conv(self, | |
| input, | |
| k_h, | |
| k_w, | |
| c_o, | |
| s_h, | |
| s_w, | |
| name, | |
| relu=True, | |
| padding=DEFAULT_PADDING, | |
| group=1, | |
| biased=True): | |
| # Verify that the padding is acceptable | |
| self.validate_padding(padding) | |
| # Get the number of channels in the input | |
| c_i = input.get_shape()[-1] | |
| # Verify that the grouping parameter is valid | |
| assert c_i % group == 0 | |
| assert c_o % group == 0 | |
| # Convolution for a given input and kernel | |
| convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) | |
| with tf.variable_scope(name) as scope: | |
| kernel = self.make_var('weights', shape=[k_h, k_w, int(c_i) / group, c_o]) | |
| if group == 1: | |
| # This is the common-case. Convolve the input without any further complications. | |
| output = convolve(input, kernel) | |
| else: | |
| # Split the input into groups and then convolve each of them independently | |
| input_groups = tf.split(axis=3, num_or_size_splits=group, value=input) | |
| kernel_groups = tf.split(axis=3, num_or_size_splits=group, value=kernel) | |
| output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] | |
| # Concatenate the groups | |
| output = tf.concat(axis=3, values=output_groups) | |
| # Add the biases | |
| if biased: | |
| biases = self.make_var('biases', [c_o]) | |
| output = tf.nn.bias_add(output, biases) | |
| if relu: | |
| # ReLU non-linearity | |
| output = tf.nn.relu(output, name=scope.name) | |
| return output | |
| @layer | |
| def relu(self, input, name): | |
| return tf.nn.relu(input, name=name) | |
| @layer | |
| def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): | |
| self.validate_padding(padding) | |
| return tf.nn.max_pool(input, | |
| ksize=[1, k_h, k_w, 1], | |
| strides=[1, s_h, s_w, 1], | |
| padding=padding, | |
| name=name) | |
| @layer | |
| def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): | |
| self.validate_padding(padding) | |
| return tf.nn.avg_pool(input, | |
| ksize=[1, k_h, k_w, 1], | |
| strides=[1, s_h, s_w, 1], | |
| padding=padding, | |
| name=name) | |
| @layer | |
| def lrn(self, input, radius, alpha, beta, name, bias=1.0): | |
| return tf.nn.local_response_normalization(input, | |
| depth_radius=radius, | |
| alpha=alpha, | |
| beta=beta, | |
| bias=bias, | |
| name=name) | |
| @layer | |
| def concat(self, inputs, axis, name): | |
| return tf.concat(axis=axis, values=inputs, name=name) | |
| @layer | |
| def add(self, inputs, name): | |
| return tf.add_n(inputs, name=name) | |
| @layer | |
| def fc(self, input, num_out, name, relu=True): | |
| with tf.variable_scope(name) as scope: | |
| input_shape = input.get_shape() | |
| if input_shape.ndims == 4: | |
| # The input is spatial. Vectorize it first. | |
| dim = 1 | |
| for d in input_shape[1:].as_list(): | |
| dim *= d | |
| feed_in = tf.reshape(input, [-1, dim]) | |
| else: | |
| feed_in, dim = (input, input_shape[-1].value) | |
| weights = self.make_var('weights', shape=[dim, num_out]) | |
| biases = self.make_var('biases', [num_out]) | |
| op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b | |
| fc = op(feed_in, weights, biases, name=scope.name) | |
| return fc | |
| @layer | |
| def softmax(self, input, name): | |
| input_shape = list(map(lambda v: v.value, input.get_shape())) | |
| if len(input_shape) > 2: | |
| # For certain models (like NiN), the singleton spatial dimensions | |
| # need to be explicitly squeezed, since they're not broadcast-able | |
| # in TensorFlow's NHWC ordering (unlike Caffe's NCHW). | |
| if input_shape[1] == 1 and input_shape[2] == 1: | |
| input = tf.squeeze(input, axis=[1, 2]) | |
| else: | |
| raise ValueError('Rank 2 tensor input expected for softmax!') | |
| return tf.nn.softmax(input, name=name) | |
| @layer | |
| def batch_normalization(self, input, name, scale_offset=True, relu=False): | |
| # NOTE: Currently, only inference is supported | |
| with tf.variable_scope(name) as scope: | |
| shape = [input.get_shape()[-1]] | |
| if scale_offset: | |
| scale = self.make_var('scale', shape=shape) | |
| offset = self.make_var('offset', shape=shape) | |
| else: | |
| scale, offset = (None, None) | |
| output = tf.nn.batch_normalization( | |
| input, | |
| mean=self.make_var('mean', shape=shape), | |
| variance=self.make_var('variance', shape=shape), | |
| offset=offset, | |
| scale=scale, | |
| # TODO: This is the default Caffe batch norm eps | |
| # Get the actual eps from parameters | |
| variance_epsilon=1e-5, | |
| name=name) | |
| if relu: | |
| output = tf.nn.relu(output) | |
| return output | |
| @layer | |
| def dropout(self, input, keep_prob, name): | |
| keep = 1 - self.use_dropout + (self.use_dropout * keep_prob) | |
| return tf.nn.dropout(input, keep, name=name) |