Last active
April 27, 2018 03:43
-
-
Save atqamar/a263b430a5d210dd9e895ca0d44a8fb4 to your computer and use it in GitHub Desktop.
Rough draft of fashionnet using slim
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import tensorflow as tf | |
# from nets import inception_utils | |
from slim.nets.tg_layers import pad_conv2d, pad_max_pool2d, pad_avg_pool2d | |
slim = tf.contrib.slim | |
# TODO: make xavier initilization default | |
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) | |
def build_inception_module(name, net, nfilters): | |
with tf.variable_scope(name): | |
with tf.variable_scope('branch_0'): | |
branch_0 = slim.conv2d(net, nfilters[0], kernel_size=1, scope='1x1') | |
with tf.variable_scope('branch_1'): | |
branch_1 = slim.conv2d(net, nfilters[1], kernel_size=1, scope='3x3_reduce') | |
branch_1 = pad_conv2d(branch_1, nfilters[2], kernel_size=3, | |
padding=1, scope='3x3') | |
with tf.variable_scope('branch_2'): | |
branch_2 = slim.conv2d(net, nfilters[3], kernel_size=1, scope='5x5_reduce') | |
branch_2 = pad_conv2d(branch_2, nfilters[4], kernel_size=5, | |
padding=2, scope='5x5') | |
with tf.variable_scope('branch_3'): | |
branch_3 = pad_max_pool2d(net, kernel_size=3, stride=1, | |
padding=1, scope='pool') | |
branch_3 = slim.conv2d(branch_3, nfilters[5], kernel_size=1, scope='pool_proj') | |
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) | |
return net | |
def fashionnet_base(inputs, | |
final_endpoint='common_layer_drop_fc', | |
is_training=True, | |
dropout_keep_prob=0.6, | |
scope='fashionnet'): | |
"""Defines the fashionnet base architecture. | |
Args: | |
inputs: a tensor of size [batch_size, height, width, channels]. | |
final_endpoint: specifies the endpoint to construct the network up to. | |
is_training: whether is training or not. | |
dropout_keep_prob: the percentage of activation values that are retained. | |
scope: Optional variable_scope. | |
Returns: | |
A dictionary from components of the network to the corresponding activation. | |
Raises: | |
ValueError: if final_endpoint is not set to one of the predefined values. | |
""" | |
end_points = {} | |
with tf.variable_scope(scope, 'fashionnet', [inputs]): | |
with slim.arg_scope( | |
[slim.conv2d, slim.fully_connected], # NOTE: pad_conv2d isn't added to arg_scope | |
weights_initializer=trunc_normal(0.01) | |
), slim.arg_scope( | |
[slim.conv2d], # NOTE: pad_conv2d isn't added to arg_scope | |
stride=1, | |
): | |
end_point = 'conv1_7x7_s2' | |
net = pad_conv2d(inputs, 64, kernel_size=7, stride=2, | |
padding=3, scope=end_point) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'pool1_3x3_s2' | |
net = slim.max_pool2d(net, kernel_size=3, stride=2, | |
padding='SAME', scope=end_point) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
# TODO: see if these can be replaced by slim.*_norm() | |
end_point = 'pool1_norm1' | |
net = tf.nn.lrn(net, 2, 1.0, 2e-05, 0.75) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'conv2_3x3_reduce' | |
net = slim.conv2d(net, 64, kernel_size=1, scope=end_point) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'conv2_3x3' | |
net = pad_conv2d(net, 192, kernel_size=3, | |
padding=1, scope=end_point) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
# TODO: see if these can be replaced by slim.*_norm() | |
end_point = 'conv2_norm2' | |
net = tf.nn.lrn(net, 2, 1.0, 2e-05, 0.75) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'pool2_3x3_s2' | |
net = slim.max_pool2d(net, kernel_size=3, stride=2, scope=end_point) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'inception_3a' | |
nfilters = [64, 96, 128, 16, 32, 32] | |
net = build_inception_module(end_point, net, nfilters) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'inception_3b' | |
nfilters = [128, 128, 192, 32, 96, 64] | |
net = build_inception_module(end_point, net, nfilters) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'pool3_3x3_s2' | |
net = slim.max_pool2d(net, kernel_size=3, stride=2, scope=end_point) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'inception_4a' | |
nfilters = [192, 96, 208, 16, 48, 64] | |
net = build_inception_module(end_point, net, nfilters) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'inception_4b' | |
nfilters = [160, 112, 224, 24, 64, 64] | |
net = build_inception_module(end_point, net, nfilters) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'inception_4c' | |
nfilters = [128, 128, 256, 24, 64, 64] | |
net = build_inception_module(end_point, net, nfilters) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'inception_4d' | |
nfilters = [112, 144, 288, 32, 64, 64] | |
net = build_inception_module(end_point, net, nfilters) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'inception_4e' | |
nfilters = [256, 160, 320, 32, 128, 128] | |
net = build_inception_module(end_point, net, nfilters) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'pool4_3x3_s2' | |
net = slim.max_pool2d(net, kernel_size=3, stride=2, scope=end_point) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'inception_5a' | |
nfilters = [256, 160, 320, 32, 128, 128] | |
net = build_inception_module(end_point, net, nfilters) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'inception_5b' | |
nfilters = [384, 192, 384, 48, 128, 128] | |
net = build_inception_module(end_point, net, nfilters) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'pool5_7x7_s1' | |
net = pad_avg_pool2d(net, kernel_size=6, stride=6, scope=end_point) # TODO: 7x7 or 6x6? | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'common_layer' | |
net = slim.fully_connected(net, 1024, scope=end_point) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
end_point = 'common_layer_drop_fc' | |
net = slim.dropout(net, | |
dropout_keep_prob, scope=end_point) | |
end_points[end_point] = net | |
if final_endpoint == end_point: return net, end_points | |
raise ValueError('Unknown final endpoint %s' % final_endpoint) | |
def fashionnet(inputs, | |
is_training=True, | |
reuse=None, | |
scope='fashionnet'): | |
"""Defines the fashionnet architecture. | |
The default image size used to train this network is 224x224. | |
Args: | |
inputs: a tensor of size [batch_size, height, width, channels]. | |
is_training: whether is training or not. | |
reuse: whether or not the network and its variables should be reused. To be | |
able to reuse 'scope' must be given. | |
scope: Optional variable_scope. | |
Returns: | |
logits: the pre-softmax activations, a tensor of size | |
[batch_size, num_classes] | |
end_points: a dictionary from components of the network to the corresponding | |
activation. | |
""" | |
# Prediction of fashion modules | |
# TODO: right now we're hard-coding in the fashion modules, but this should be imported | |
MODULES_SPECS = [ | |
('color', 55), | |
('pattern', 105), | |
('detail', 123), | |
('shape', 398) | |
] | |
with tf.variable_scope(scope, 'fashionnet', [inputs], # TODO: previously, the [inputs] included num_classes | |
reuse=reuse) as scope: | |
common_layer, end_points = fashionnet_base(inputs, | |
is_training=True, | |
scope=scope) | |
with tf.variable_scope('modules'): | |
for mod_name, mod_size in MODULES_SPECS: | |
end_point = '%s-classifier' % mod_name | |
net = slim.fully_connected(common_layer, mod_size, scope=end_point) | |
end_points[end_point] = net | |
end_point = '%s-prob' % mod_name | |
net = slim.softmax(net, scope=end_point) | |
end_points[end_point] = net | |
return end_points | |
fashionnet.default_image_size = 224 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment