Last active
January 6, 2020 12:07
-
-
Save nukadelic/fe2ae08e834a6cecc36fb38d73482f89 to your computer and use it in GitHub Desktop.
Modfied barracuda converter ( tensorflow-pb to unity-nn ) [ 0.3.2 ]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import numpy as np | |
import struct # convert from Python values and C structs | |
import tensorflow as tf | |
import re | |
import barracuda | |
from barracuda import Struct | |
from google.protobuf import descriptor | |
from google.protobuf.json_format import MessageToJson | |
if __name__ == '__main__': | |
# Handle command line argumengts | |
args = barracuda.parse_args( | |
description = 'Convert Tensorflow model to Barracuda binary', | |
source_extension = '.pb', | |
help = 'input Tensorflow serialized .pb file') | |
# Te following code can be used as an example of API used from another module | |
# convert() is the main entry point for converter | |
import tensorflow_to_barracuda as tf2bc | |
tf2bc.convert(args.source_file, args.target_file, args.trim_unused_by_output, args) | |
# TODO: support more than 1 LSTM layer per model - prepend scope to names and inputs | |
# TODO: support different activation functions in LSTM | |
# TODO: strip output Identity node, instead patch upstream layer names | |
# TODO: use ScaleBias and Pow with alpha when input is constant Tensor | |
# TODO: support all data format types (curretly only NHWC) | |
# TODO: support all data types (currently only FLOAT, INT32, BOOL) | |
# TODO: implement FusedResizeAndPadConv2D | |
# Important ProtoBuf definitions: | |
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto | |
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto | |
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/node_def.proto | |
# | |
# Node descriptions: | |
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/nn_ops.cc | |
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/math_ops.cc | |
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/random_ops.cc | |
# | |
# Class doc: | |
# https://www.tensorflow.org/api_docs/cc/ | |
# | |
known_classes = { | |
'Dense': Struct( | |
id = 1, | |
rank = 2, | |
out_shapes = lambda shapes: [ | |
[shapes[0][0], 1, 1, shapes[0][1]] if len(shapes[0]) > 1 else [1,1,1,1], # W | |
[1, 1, 1, shapes[-1][-1]] # B | |
], | |
patch_data = lambda data: [ | |
data[0], | |
data[1] | |
]), | |
'MatMul': Struct( | |
id = 1, | |
rank = 2, | |
out_shapes = lambda shapes: [ | |
[shapes[0][0], 1, 1, shapes[0][1]], # W | |
[1, 1, 1, shapes[0][1]] # B | |
], | |
patch_data = lambda data: [ | |
data[0], | |
np.zeros(np.shape(data[1])) | |
]), | |
'BiasAdd': Struct( | |
id = 51, # implemented as ScaleBias | |
out_shapes = lambda shapes: [ | |
[1, 1, 1, shapes[0][0]], # ONE | |
[1, 1, 1, shapes[0][0]], # B | |
], | |
patch_data = lambda data: [ | |
np.ones(np.shape(data[0])), | |
data[0] | |
]), | |
# TODO: NCHW | |
'Conv2D': Struct( | |
id = 20, | |
rank = 4, | |
out_shapes = lambda shapes: [ | |
shapes[0], # K | |
[1, 1, 1, shapes[-1][-1]] # B | |
], | |
patch_data = lambda data: [ | |
data[0], | |
data[1] | |
]), | |
'DepthwiseConv2dNative': Struct( # DepthwiseConv2D | |
id = 21, | |
rank = 4, | |
out_shapes = lambda s: [ | |
[s[0][0], s[0][1], s[0][3], s[0][2]], # K TF:[H, W, in_channels, channel_multiplier] => [H, W, 1, in_channels] | |
[1, 1, 1, s[-1][-1]] if len(s) > 1 | |
else [1, 1, 1, s[0][2]] # B | |
], | |
patch_data = lambda data: [ | |
np.transpose(data[0], (0,1,3,2)), | |
data[1] | |
]), | |
'Conv2DBackpropInput': Struct( # Conv2DTranspose | |
id = 22, | |
rank = 4, | |
out_shapes = lambda s: [ | |
[s[0][0], s[0][1], s[0][3], s[0][2]], # K TF:[H, W, in_channels, out_channels] => [H, W, out_channels, in_channels] | |
[1, 1, 1, s[-1][-1]] if len(s) > 1 | |
else [1, 1, 1, s[0][2]] # B | |
], | |
patch_data = lambda data: [ | |
np.transpose(data[0], (0,1,3,2)), | |
data[1] | |
]), | |
'Border2D': 29, | |
'Pad2DReflect': 160, | |
'Pad2DSymmetric': 161, | |
# TODO: 3D | |
'ResizeNearestNeighbor': | |
23, # implemented as Upsample2D | |
'ResizeBilinear': 23, # implemented as Upsample2D | |
'ResizeBicubic': 23, # implemented as Upsample2D | |
'MaxPool': 25, | |
'AvgPool': 26, | |
'GlobalAveragePool':28, | |
'GlobalAvgPool': 28, | |
'Activation': 50, | |
'BatchNormalization': Struct( | |
id = 51, # after fusion implemented as ScaleBias | |
out_shapes = lambda shapes: [ | |
[1, 1, 1, shapes[0][0]], # S | |
[1, 1, 1, shapes[0][0]], # B | |
], | |
patch_data = lambda data: | |
# fuse [gamma, beta, mean, var, epsilon] => [scale, bias] | |
# TODO: double-check if epsilon is the last data argument and not the 1st? | |
barracuda.fuse_batchnorm_weights(data[0], data[1], data[2], data[3], data[4]) if len(data) == 5 else | |
# fuse [ONE, beta, mean, var, epsilon] => [scale, bias] | |
# TODO: double-check if epsilon is the last data argument and not the 1st? | |
barracuda.fuse_batchnorm_weights(np.ones(np.shape(data[0])), data[0], data[1], data[2], data[3]) | |
), | |
'FusedBatchNorm': Struct( | |
id = 51, # after fusion implemented as ScaleBias | |
out_shapes = lambda shapes: [ | |
[1, 1, 1, shapes[0][0]], # S | |
[1, 1, 1, shapes[0][0]], # B | |
], | |
patch_data = lambda data, layer: | |
# fuse [gamma, beta, mean, var, epsilon] => [scale, bias] | |
barracuda.fuse_batchnorm_weights(data[0], data[1], data[2], data[3], get_epsilon(layer)) | |
), | |
'BatchNormalizationRuntime': Struct( | |
id = 52, | |
out_shapes = lambda shapes: [ | |
[1, 1, 1, shapes[0][0]], # G | |
[1, 1, 1, shapes[0][0]], # B | |
], | |
patch_data = lambda data: | |
[data[0], data[1]] if len(data) == 4 else | |
[np.ones(np.shape(data[0])), data[0]] | |
), | |
'InstanceNormalization': Struct( # TODO: epsilon | |
id = 52, | |
out_shapes = lambda shapes: [ | |
[1, 1, 1, shapes[0][0]], # G | |
[1, 1, 1, shapes[0][0]], # B | |
], | |
patch_data = lambda data: | |
[data[0], data[1]] if len(data) == 2 else | |
[np.ones(np.shape(data[0])), data[0]] | |
), | |
'LRN': 53, | |
'RandomStandardNormal': | |
64, | |
'RandomUniform': 65, | |
'Multinomial': Struct( | |
id=66, rank = 2), | |
'OneHot': Struct(id=67, rank = lambda inputs: inputs[0] + 1), | |
# Broadcast ops | |
'Add': Struct(id=100, rank = lambda inputs: np.max(inputs)), | |
'AddV2': Struct(id=100, rank = lambda inputs: np.max(inputs)), | |
'Sub': Struct(id=101, rank = lambda inputs: np.max(inputs)), | |
'Mul': Struct(id=102, rank = lambda inputs: np.max(inputs)), | |
'RealDiv':Struct(id=103, rank = lambda inputs: np.max(inputs)), | |
'Pow': Struct(id=104, rank = lambda inputs: np.max(inputs)), | |
'Minimum':Struct(id=110, rank = lambda inputs: np.max(inputs)), | |
'Maximum':Struct(id=111, rank = lambda inputs: np.max(inputs)), | |
# Comparison ops with broadcast | |
'Greater': Struct(id=140, rank = lambda inputs: np.max(inputs)), | |
'GreaterEqual': Struct(id=141, rank = lambda inputs: np.max(inputs)), | |
'Less': Struct(id=142, rank = lambda inputs: np.max(inputs)), | |
'LessEqual': Struct(id=143, rank = lambda inputs: np.max(inputs)), | |
'Equal': Struct(id=144, rank = lambda inputs: np.max(inputs)), | |
# Logical ops with broadcast | |
'LogicalOr': Struct(id=145, rank = lambda inputs: np.max(inputs)), | |
'LogicalAnd': Struct(id=146, rank = lambda inputs: np.max(inputs)), | |
'LogicalNot': Struct(id=147, rank = lambda inputs: np.max(inputs)), | |
'LogicalXor': Struct(id=148, rank = lambda inputs: np.max(inputs)), | |
# Reduce ops | |
'Max': Struct(id=124, rank = lambda inputs: inputs[0] - 1), | |
'Mean': Struct(id=125, rank = lambda inputs: inputs[0] - 1), | |
'Min': Struct(id=126, rank = lambda inputs: inputs[0] - 1), | |
'Prod': Struct(id=127, rank = lambda inputs: inputs[0] - 1), | |
'Sum': Struct(id=128, rank = lambda inputs: inputs[0] - 1), | |
'Flatten':Struct(id=200, rank = 2), | |
'Reshape': 201, | |
'Concat': 210, | |
'StridedSlice': 211, | |
'Nop': 0, | |
} | |
requires_runtime_flag = { | |
'Dropout' : 'DropoutRuntime', | |
'BatchNormalization' : 'BatchNormalizationRuntime', | |
} | |
known_activations = { | |
'Linear' : 0, | |
'Relu' : 1, | |
'Softmax' : 2, | |
'Tanh' : 3, | |
'Sigmoid' : 4, | |
'Elu' : 5, | |
'Relu6' : 6, | |
'LeakyRelu' : 7, | |
'Selu' : 8, | |
'Swish' : 9, | |
'LogSoftmax' : 10, | |
'Softplus' : 11, | |
'Softsign' : 12, | |
'Abs' : 100, | |
'Neg' : 101, | |
'Ceil' : 102, | |
'Floor' : 104, | |
'Sqrt' : 111, | |
'Exp' : 113, | |
'Log' : 114, | |
'Acos' : 200, | |
'Acosh' : 201, | |
'Asin' : 202, | |
'Asinh' : 203, | |
'Atan' : 204, | |
'Atanh' : 205, | |
'Cos' : 206, | |
'Cosh' : 207, | |
'Sin' : 208, | |
'Sinh' : 209, | |
'Tan' : 210 | |
} | |
known_paddings = { | |
'VALID' : [0,0,0,0], | |
'SAME' : [-1] # SameUpper | |
} | |
supported_data_formats = { | |
'NHWC' | |
} | |
known_patterns = { | |
# TODO: Flatten pattern using namespace regexp | |
repr(['Shape', 'StridedSlice', 'Pack', 'Reshape']) : "Flatten", | |
repr(['Shape', 'StridedSlice', 'Prod', 'Pack', 'Reshape']) : "Flatten", | |
repr(['Shape', 'Slice', 'Slice', 'Prod', | |
'ExpandDims', 'ConcatV2', 'Reshape']) : "Flatten", | |
repr(['Add', 'Rsqrt', 'Mul', 'Mul', 'Sub', 'Add']) : 'BatchNormalization', | |
repr(['Add', 'Rsqrt', 'Mul', 'Mul', 'Mul', 'Sub', 'Add']) : 'BatchNormalization', | |
repr(['Mean', 'StopGradient', 'SquaredDifference', 'Mean', | |
'Sub', 'Add', 'Pow', 'RealDiv', 'Mul', 'Add']) : 'InstanceNormalization_ByTensorOrder', | |
repr(['Mean', 'StopGradient', 'SquaredDifference', 'Mean', | |
'Squeeze', 'Squeeze', | |
'Add', 'Rsqrt', 'Mul', 'Mul', 'Mul', 'Sub', 'Add']) : 'InstanceNormalization_ByTensorName', | |
repr(['MatMul', 'BiasAdd']) : 'Dense', | |
repr(['Conv2D', 'BiasAdd']) : 'Conv2D', | |
repr(['DepthwiseConv2dNative', 'BiasAdd']) : 'DepthwiseConv2dNative', | |
repr(['Conv2DBackpropInput', 'BiasAdd']) : 'Conv2DBackpropInput', | |
repr(['Conv2DBackpropInput']) : 'Conv2DBackpropInput', | |
repr(['Shape', 'StridedSlice', 'StridedSlice', 'StridedSlice', 'Mul', | |
'Mul', 'Pack', 'Conv2DBackpropInput', 'BiasAdd']) : 'Conv2DBackpropInput', | |
repr(['Shape', 'StridedSlice', 'StridedSlice', 'StridedSlice', 'Mul', | |
'Mul', 'Pack', 'Conv2DBackpropInput']) : 'Conv2DBackpropInput', | |
repr(['Shape', 'StridedSlice', 'Mul', 'ResizeNearestNeighbor']) | |
: 'ResizeNearestNeighbor', | |
repr(['Pack', 'Reshape']) : 'Flatten$', # for now we assume that this combination is trivial Flatten | |
# for exmaple it is used in ML-agents LSTM nets with sequence_length==1 | |
repr(['StridedSlice', 'Reshape', | |
re.compile('^[a-zA-Z/]*lstm/'), | |
'Reshape', 'ConcatV2', 'Identity']) : 'BasicLSTMReshapeOut', | |
repr([re.compile('^[a-zA-Z/]*lstm/'), | |
'Reshape', 'ConcatV2', 'Identity']) : 'BasicLSTMReshapeOut', | |
repr(['Reshape', re.compile('^[a-zA-Z/]*lstm_[a-z]*/'),'Reshape', 'ConcatV2']) : 'BasicLSTMReshapeOut', | |
repr(['Reshape', re.compile('^[a-zA-Z/]*lstm_[a-z]*/'),'ConcatV2']) : 'BasicLSTMConcatOut', | |
repr(['Sigmoid', 'Mul']) : "Swish", | |
repr(['Mul', 'Abs', 'Mul', 'Add']) : "LeakyRelu", | |
repr(['Shape', 'Reshape']) : 'ReshapeLikeInput0', # shape comes from the 1st node as input[0] | |
repr(['Reshape']) : 'Reshape', | |
repr(['ConcatV2']) : 'ConcatV2', | |
repr(['Mean']) : 'Mean', | |
repr(['Pad']) : 'Pad', | |
repr(['PadV2']) : 'Pad', | |
repr(['MirrorPad']) : 'Pad', | |
repr(['Multinomial']) : 'Multinomial', | |
repr(['OneHot']) : 'OneHot', | |
repr(['Square']) : 'Square', | |
repr(['SquaredDifference']) : 'SquaredDifference', | |
repr(['StridedSlice']) : 'StridedSlice', | |
repr(['Squeeze']) : 'Squeeze', | |
repr(['ExpandDims']) : 'ExpandDims', | |
# TODO: FusedResizeAndPadConv2D | |
} | |
def by_name(args, name): | |
for a in args: | |
if a.name.endswith(name): | |
return a | |
def by_op(args, op): | |
for a in args: | |
if a.op == op: | |
return a | |
def order_by(args, names): | |
ordered = [] | |
arg_count = len(args) | |
for name in names: | |
ordered += [a for a in args if a.endswith(name)] | |
args = [a for a in args if not a.endswith(name)] | |
ordered += args # append what is left | |
assert(len(ordered) == arg_count) | |
return ordered | |
transform_patterns = { | |
'Flatten' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'Flatten', | |
input = inputs | |
), | |
'Flatten$' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'Flatten', | |
input = [inputs[-1]] # take only the last input, assume all other arguments are trivial (like sequence_length==1 always in ML-agents LSTM nets) | |
), | |
'Reshape' : lambda nodes, inputs, tensors, context: | |
Struct( | |
op = 'Reshape', | |
rank = len(tensors[0].data) if len(tensors) > 0 # tensor data is treated as reshape coefficient, if not empty | |
else context.layer_ranks[inputs[1]] if len(inputs) == 2 # otherwise shape of the 2nd input tensor is used | |
else -1, | |
input = inputs, | |
shape = [tensors[0].data[0], tensors[0].data[1], tensors[0].data[2], tensors[0].data[3]] if len(tensors) > 0 and len(tensors[0].data) == 4 | |
else [tensors[0].data[0], 1, tensors[0].data[1], tensors[0].data[2]] if len(tensors) > 0 and len(tensors[0].data) == 3 | |
else [tensors[0].data[0], 1, 1, tensors[0].data[1]] if len(tensors) > 0 and len(tensors[0].data) == 2 | |
else [1, 1, 1, tensors[0].data[0]] if len(tensors) > 0 and len(tensors[0].data) == 1 | |
else [] | |
), | |
'ReshapeLikeInput0' : lambda nodes, inputs, tensors, context: | |
Struct( | |
op = 'Reshape', | |
rank = context.layer_ranks[inputs[0]] if len(inputs) == 2 # unlike standard 'Reshape' input[0] is used as shape & input[1] as data | |
else -1, | |
input = [inputs[1], inputs[0]] if len(inputs) == 2 # unlike standard 'Reshape' input[0] is used as shape & input[1] as data | |
else inputs, | |
), | |
'Pad' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'BarracudaUnsupportedPad' if (len(tensors) == 0 or np.shape(tensors[0]) != [4,2]) else | |
'Pad2DReflect' if (get_attr(nodes[-1], 'mode', default='constant').lower() == 'reflect') else | |
'Pad2DSymmetric' if (get_attr(nodes[-1], 'mode', default='constant').lower() == 'symmetric') else | |
'Border2D' if (get_attr(nodes[-1], 'mode', default='constant').lower() == 'constant') else | |
'BarracudaUnsupportedPad', | |
input = inputs, | |
pads = [tensors[0].data[2,0], tensors[0].data[1,0], tensors[0].data[2,1], tensors[0].data[1,1]] if (len(tensors) > 0 and np.shape(tensors[0]) == [4,2]) | |
else [0,0,0,0], | |
beta = tensors[1].data[0] if len(tensors) > 1 and np.shape(tensors[1]) == (1,) else get_attr(nodes[-1], 'constant_values') or 0, | |
), | |
'Squeeze' : lambda nodes, inputs, tensors, context: | |
Struct( | |
op = 'Nop', # Squeeze is no-operation in Barracuda | |
input = inputs, | |
rank = context.layer_ranks[inputs[0]] - len(get_attr(nodes[-1], 'squeeze_dims')) if len(get_attr(nodes[-1], 'squeeze_dims')) > 0 | |
else -1 # if list of squeeze axis is not specified, it is unknown what would be the rank of result | |
), | |
'ExpandDims' : lambda nodes, inputs, tensors, context: | |
Struct( | |
op = 'Nop', # ExpandDims is no-operation in Barracuda | |
input = [inputs[0]], | |
rank = context.layer_ranks[inputs[0]] + 1 | |
), | |
'Multinomial' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'Multinomial', | |
input = inputs, | |
shape = [int(by_name(tensors, '/num_samples').data[0])], | |
#seed = get_attr(nodes[0], 'seed'), | |
), | |
'OneHot' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'OneHot', | |
input = inputs, | |
shape = [int(by_name(tensors, '/depth').data[0])], | |
alpha = by_name(tensors, '/on_value').data[0], | |
beta = by_name(tensors, '/off_value').data[0], | |
), | |
'Square' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'Mul', | |
input = [inputs[0], inputs[0]], # input * input | |
), | |
'ConcatV2' : lambda nodes, inputs, tensors, context: | |
Struct( | |
op = 'Concat', | |
input = inputs, | |
axis = axis_to_barracuda( | |
int(by_name(tensors, '/axis').data[0]), | |
context.layer_ranks[inputs[0]]) | |
), | |
'StridedSlice' : lambda nodes, inputs, tensors, context: | |
strided_slice(nodes[-1].name, | |
inputs[0], context.layer_ranks[inputs[0]], | |
begin = tensors[0].data, | |
end = tensors[1].data, | |
strides = tensors[2].data, | |
begin_mask = get_attr(nodes[-1], 'begin_mask'), | |
end_mask = get_attr(nodes[-1], 'end_mask'), | |
ellipsis_mask = get_attr(nodes[-1], 'ellipsis_mask'), | |
new_axis_mask = get_attr(nodes[-1], 'new_axis_mask'), | |
shrink_axis_mask= get_attr(nodes[-1], 'shrink_axis_mask') | |
), | |
'BatchNormalization' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'BatchNormalization', | |
input = [i for i in inputs] + | |
order_by([t.name for t in tensors], ['gamma', 'beta', 'mean', 'variance']), | |
), | |
'InstanceNormalization_ByTensorName' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'InstanceNormalization', | |
input = [i for i in inputs] + | |
order_by([t.name for t in tensors], ['scale', 'offset']), | |
), | |
'InstanceNormalization_ByTensorOrder' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'InstanceNormalization', | |
input = [i for i in inputs] + [t.name for t in tensors][-2:], | |
), | |
'Dense' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'Dense', | |
input = [i for i in inputs] + [t.name for t in tensors], | |
data_frmt = get_attr(by_op(nodes, 'Dense') or by_op(nodes, 'MatMul'), 'data_format'), | |
), | |
'Conv2D' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'Conv2D', | |
input = [i for i in inputs] + [t.name for t in tensors], | |
padding = get_attr(by_op(nodes, 'Conv2D'), 'padding'), | |
strides = get_attr(by_op(nodes, 'Conv2D'), 'strides'), | |
dilations = get_attr(by_op(nodes, 'Conv2D'), 'dilations'), | |
data_frmt = get_attr(by_op(nodes, 'Conv2D'), 'data_format'), | |
), | |
'DepthwiseConv2dNative' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'DepthwiseConv2dNative', | |
input = [i for i in inputs] + [t.name for t in tensors], | |
padding = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'padding'), | |
strides = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'strides'), | |
dilations = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'dilations'), | |
data_frmt = get_attr(by_op(nodes, 'DepthwiseConv2dNative'), 'data_format'), | |
), | |
'Conv2DBackpropInput' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'Conv2DBackpropInput', | |
input = [i for i in inputs] + [t.name for t in tensors][1:][-2:], # [1:] - skips the 0th tensor, since Conv2DBackpropInput 0th tensor is 'input_sizes' (which differs from other Conv layers) | |
# [-2:] - take only last 2 tensors, this allows to process large patterns with the same code | |
padding = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'padding'), | |
strides = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'strides'), | |
dilations = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'dilations'), | |
data_frmt = get_attr(by_op(nodes, 'Conv2DBackpropInput'), 'data_format'), | |
), | |
'ResizeNearestNeighbor' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'ResizeNearestNeighbor', | |
input = [i for i in inputs], | |
ksize = [int(tensors[0].data[0]), int(tensors[0].data[1])] if len(tensors) == 1 and len(tensors[0].data) == 2 | |
else [int(tensors[-1].data[0]), int(tensors[-1].data[1])] if len(tensors) >= 4 and len(tensors[-1].data) == 2 | |
else [1,1] | |
), | |
'Mean' : lambda nodes, inputs, tensors, _: | |
# take only the last input | |
barracuda.mean(nodes[-1].name, inputs[-1], axis=tensors[0].data), | |
'SquaredDifference' : lambda nodes, inputs, tensors, _: | |
sqr_diff(nodes[-1].name, inputs[0], inputs[1]), | |
'BasicLSTMReshapeOut' : lambda nodes, inputs, tensors, context: | |
basic_lstm(nodes, inputs, tensors, context, find_type='Reshape'), | |
'BasicLSTMConcatOut' : lambda nodes, inputs, tensors, context: | |
basic_lstm(nodes, inputs, tensors, context, find_type='ConcatV2'), | |
'Swish' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'Swish', | |
input = inputs | |
), | |
'LeakyRelu' : lambda nodes, inputs, tensors, _: | |
Struct( | |
op = 'LeakyRelu', | |
input = inputs | |
), | |
# TODO:'Round' | |
# TODO:'Rsqrt' | |
} | |
# Debug | |
def debug(s): | |
print(s) | |
return s | |
# Helper | |
def embody(v, default=0): | |
return default if v is None else v | |
# Parse | |
def get_attr(node, attr_name, default=None): | |
if type(node) == Struct: | |
if hasattr(node, attr_name): | |
return getattr(node, attr_name) | |
else: | |
return default | |
# See: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto | |
val = node.attr[attr_name] | |
if val.HasField("list"): | |
return val.list.i | |
# NOTE: can't find way to identify type of list BUT it is almost always list(int) | |
# except list(float) in FractionalAvg/MaxPool | |
if val.HasField("b"): | |
return val.b | |
if val.HasField("i"): | |
return val.i | |
if val.HasField("f"): | |
return val.f | |
if val.HasField("s"): | |
return val.s.decode("utf-8") | |
if val.HasField("shape"): | |
return val.shape | |
if val.HasField("tensor"): | |
return val.tensor | |
return default | |
def get_epsilon(layer): | |
return get_attr(layer, 'epsilon', default=0.001) # default epsilon taken from tf.layers.batch_normalization | |
def get_layer_rank(layer): | |
shape = get_attr(layer, 'shape') | |
if not shape: | |
return None | |
if isinstance(shape, list): | |
return 1 | |
shape = [dim.size for dim in shape.dim] | |
return len(shape) | |
def get_layer_shape(layer): | |
shape = get_attr(layer, 'shape') | |
if not shape: | |
return [-1, -1, -1, -1] | |
shape = [dim.size for dim in shape.dim] | |
if len(shape) == 1: | |
return [1, 1, 1, shape[0]] | |
if len(shape) == 2: | |
return [shape[0], 1, 1, shape[1]] | |
if len(shape) == 3: | |
return [shape[0], 1, shape[1], shape[2]] | |
return shape | |
def get_tensor_dims(tensor): | |
if isinstance(tensor, np.ndarray): | |
return np.shape(tensor) | |
dims = [] | |
if tensor.tensor_shape: | |
dims = [v.size for v in tensor.tensor_shape.dim] | |
if tensor.float_val: | |
dims = np.shape(tensor.float_val) | |
if tensor.int_val: | |
dims = np.shape(tensor.int_val) | |
if tensor.bool_val: | |
dims = np.shape(tensor.bool_val) | |
return dims | |
def get_tensor_dtype(tensor): | |
if isinstance(tensor, np.ndarray): | |
return tensor.dtype | |
dataType = '' | |
fields = tensor.ListFields() | |
for field, value in fields: | |
if field.name == 'dtype' and field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: | |
dataType = field.enum_type.values_by_number.get(value, None).name | |
return dataType | |
def get_tensor_data(tensor): | |
if isinstance(tensor, np.ndarray): | |
return tensor.astype(float) | |
dims = get_tensor_dims(tensor) | |
elems = np.product(dims) | |
if tensor.tensor_content: | |
# TODO: support other types | |
dataType = get_tensor_dtype(tensor) | |
if dataType == "DT_FLOAT": | |
data = struct.unpack('<'+str(elems)+'f', tensor.tensor_content) | |
elif dataType == "DT_INT32": | |
data = struct.unpack('<'+str(elems)+'i', tensor.tensor_content) | |
elif dataType == "DT_BOOL": | |
data = struct.unpack('<'+str(elems)+'?', tensor.tensor_content) | |
else: | |
print('UNSUPPORTED: data type', dataType) | |
elif tensor.float_val: | |
data = tensor.float_val | |
elif tensor.int_val: | |
data = np.array(tensor.int_val, dtype=float) | |
elif tensor.bool_val: | |
data = np.array(tensor.bool_val, dtype=float) | |
else: | |
print('[x] CRITICAL ! UNSUPPORTED: data type', get_tensor_dtype(tensor)) | |
return None | |
return np.array(data).reshape(dims) | |
def flatten(items,enter=lambda x:isinstance(x, list)): | |
# http://stackoverflow.com/a/40857703 | |
# https://github.com/ctmakro/canton/blob/master/canton/misc.py | |
"""Yield items from any nested iterable; see REF.""" | |
for x in items: | |
if enter(x): | |
yield from flatten(x) | |
else: | |
yield x | |
def replace_strings_in_list(array_of_strigs, replace_with_strings): | |
"A value in replace_with_strings can be either single string or list of strings" | |
potentially_nested_list = [replace_with_strings.get(s) or s for s in array_of_strigs] | |
return list(flatten(potentially_nested_list)) | |
def remove_duplicates_from_list(array): | |
"Preserves the order of elements in the list" | |
output = [] | |
unique = set() | |
for a in array: | |
if a not in unique: | |
unique.add(a) | |
output.append(a) | |
return output | |
######################################################### | |
def pool_to_HW(shape, data_frmt): | |
""" Convert from NHWC|NCHW => HW | |
""" | |
if len(shape) != 4: | |
return shape # Not NHWC|NCHW, return as is | |
if data_frmt == 'NCHW': | |
return [shape[2], shape[3]] | |
return [shape[1], shape[2]] | |
def strides_to_HW(shape, format): | |
return pool_to_HW(shape, format) | |
def axis_to_barracuda(axis, input_rank): | |
N = 0; H = 1; W = 2; C = 3 | |
if axis < 0: | |
axis = input_rank - axis | |
assert(axis >= 0) | |
assert(axis < input_rank) | |
if (input_rank == 4): | |
# [NHWC] | |
return [N,H,W,C][axis] | |
if (input_rank == 3): | |
# [N_WC] | |
return [N,W,C][axis] | |
elif (input_rank == 2): | |
# [N__C] | |
return [N,C][axis] | |
elif (input_rank == 1): | |
# [___C] | |
return [C][axis] | |
return -1 | |
######################################################### | |
def sqr_diff(name, a, b): | |
nn = barracuda.Build(name) | |
d = nn.sub(a, b) | |
nn.mul(d, d, out=name) | |
return nn.layers | |
def strided_slice(name, input, input_rank, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask): | |
assert (input_rank != -1) | |
begin = begin.astype(np.int32).tolist() | |
end = end.astype(np.int32).tolist() | |
strides = strides.astype(np.int32).tolist() | |
# StridedSlice range and mask descriptions: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/strided-slice | |
# TODO: I don't think elipsis and newaxis would work together well with current implementation | |
assert len(begin) == len(end) | |
assert len(begin) == len(strides) | |
# prepare begin, end, stride arrays | |
output_rank = input_rank | |
insert_pos = 0 | |
while (ellipsis_mask): | |
ellipsis_mask >>= 1 | |
insert_pos += 1 | |
# NOTE: begin=0, end=0, stride=1 <= full range from existing axis | |
# begin=0, end=0, stride=0 <= new axis OR shrink axis to single 1st element | |
# begin=N, end=N, stride=0 <= shrink axis to single Nth element | |
while len(begin) < input_rank: | |
if insert_pos: | |
begin.insert(insert_pos, 0) | |
end.insert(insert_pos, 0) | |
strides.insert(insert_pos, 1) | |
else: | |
begin.append(0) | |
end.append(0) | |
strides.append(1) | |
assert len(begin) <= input_rank | |
descriptor_count = input_rank | |
for i in range(len(begin)): | |
if begin_mask & (1 << i): begin[i] = 0 | |
if end_mask & (1 << i): end[i] = 0 | |
if new_axis_mask & (1 << i): | |
begin[i] = end[i] = strides[i] = 0 | |
output_rank += 1 | |
if shrink_axis_mask & (1 << i): | |
end[i] = begin[i] | |
strides[i] = 0 | |
output_rank -= 1 | |
# convert to Barracuda layout | |
descriptor_count = len(begin) | |
assert(descriptor_count <= 4) | |
if (descriptor_count == 3): | |
begin = [begin[0], 0, begin[1], begin[2]] | |
end = [end[0], 0, end[1], end[2]] | |
strides = [strides[0], 1, strides[1], strides[2]] | |
elif (descriptor_count == 2): | |
begin = [begin[0], 0, 0, begin[1]] | |
end = [end[0], 0, 0, end[1]] | |
strides = [strides[0], 1, 1, strides[1]] | |
elif (descriptor_count == 1): | |
begin = [0, 0, 0, begin[0]] | |
end = [0, 0, 0, end[0]] | |
strides = [1, 1, 1, strides[0]] | |
nn = barracuda.Build(name) | |
nn.strided_slice(input, begin, end, strides, output_rank, out=name) | |
return nn.layers | |
# search backwards starting from index_of_actual_output_node for non-const node | |
def locate_actual_output_node(nodes, index_of_actual_output_node=-1, find_type='Reshape'): | |
while (-index_of_actual_output_node-1) < len(nodes) and nodes[index_of_actual_output_node].op != find_type: | |
index_of_actual_output_node -= 1 | |
actual_output_node = nodes[index_of_actual_output_node] | |
assert(-index_of_actual_output_node < len(nodes)) | |
return actual_output_node | |
def gru(nodes, inputs, tensors, context, index_of_actual_output_node, assert_output_node_op_type=None): | |
assert(len(inputs) == 2) | |
def find_tensor_by_name(name, default=None): | |
nonlocal tensors | |
candidates = [t for t in tensors if t.name.endswith(name)] | |
return candidates[0].data if candidates else default | |
input = inputs[-1] | |
state = inputs[0] | |
gates_kernel = find_tensor_by_name('/gates/kernel') | |
gates_bias = find_tensor_by_name('/gates/bias', default=np.zeros(np.shape(gates_kernel)[-1])) | |
candidate_kernel = find_tensor_by_name('/candidate/kernel') | |
candidate_bias = find_tensor_by_name('/candidate/bias', default=np.zeros(np.shape(candidate_kernel)[-1])) | |
new_state = nodes[-1].name + '_h' | |
assert(np.shape(gates_kernel)[-1] == np.shape(gates_bias)[-1]) | |
assert(np.shape(candidate_kernel)[-1] == np.shape(candidate_bias)[-1]) | |
num_gates = 2 | |
seq_length = 1 | |
hidden_size = np.shape(gates_kernel)[-1] // num_gates | |
gate_kernels = np.split(gates_kernel, num_gates, axis=-1) | |
gate_biases = np.split(gates_bias, num_gates, axis=-1) | |
context.model_tensors['kernel_r'] = gate_kernels[0] | |
context.model_tensors['kernel_u'] = gate_kernels[1] | |
context.model_tensors['kernel_c'] = candidate_kernel | |
context.model_tensors['bias_r'] = gate_biases[0] | |
context.model_tensors['bias_u'] = gate_biases[1] | |
context.model_tensors['bias_c'] = candidate_bias | |
context.layer_ranks[state] = 2 | |
new_layers = barracuda.gru('gru', input, state, | |
'kernel_r', 'kernel_u', 'kernel_c', | |
'bias_r', 'bias_u', 'bias_c', | |
new_state) | |
state_shape = [1, 1, seq_length, hidden_size] | |
context.model_memories += [state_shape, state, new_state] | |
# map exptected output of the replaced pattern to output from our GRU cell | |
actual_output_node = locate_actual_output_node(nodes, index_of_actual_output_node, assert_output_node_op_type) | |
context.map_ignored_layer_to_its_input[actual_output_node.name] = new_state | |
return new_layers | |
def basic_lstm(nodes, inputs, tensors, context, find_type='Reshape'): | |
assert(len(inputs) == 2) | |
def find_tensor_by_name(name, default=None): | |
nonlocal tensors | |
candidates = [t for t in tensors if t.name.endswith(name)] | |
return candidates[0].data if candidates else default | |
def find_forget_bias(): | |
nonlocal nodes | |
nonlocal tensors | |
# TODO: make it more fault-tolerant | |
# search for scalar float constant that is input to Add node | |
# and hope it is not a constant for some complex activation function | |
for t in tensors: | |
if np.prod(t.shape) == 1 and get_tensor_dtype(t.obj) == "DT_FLOAT": | |
for n in nodes: | |
if n.op == 'Add' and t.name in n.input: | |
return t.data | |
return np.zeros(1) | |
input = inputs[-1] | |
state_c = inputs[0] + '_c' | |
state_h = inputs[0] + '_h' | |
kernel = find_tensor_by_name('/kernel') | |
bias = find_tensor_by_name('/bias', default=np.zeros(np.shape(kernel)[-1])) | |
forget_bias = find_forget_bias() | |
new_state_c = nodes[-1].name + '_c' | |
new_state_h = nodes[-1].name + '_h' | |
assert(np.shape(kernel)[-1] == np.shape(bias)[-1]) | |
num_gates = 4 | |
seq_length = 1 | |
hidden_size = np.shape(kernel)[-1] // num_gates | |
kernels = np.split(kernel, num_gates, axis=-1) | |
biases = np.split(bias, num_gates, axis=-1) | |
context.model_tensors['kernel_i'] = kernels[0] | |
context.model_tensors['kernel_j'] = kernels[1] | |
context.model_tensors['kernel_f'] = kernels[2] | |
context.model_tensors['kernel_o'] = kernels[3] | |
context.model_tensors['bias_i'] = biases[0] | |
context.model_tensors['bias_j'] = biases[1] | |
context.model_tensors['bias_f'] = biases[2] + forget_bias | |
context.model_tensors['bias_o'] = biases[3] | |
context.layer_ranks[state_c] = 2 | |
context.layer_ranks[state_h] = 2 | |
# lstm_value/strided_slice/stack => lstm_value | |
lstm_name = re.match('^([a-zA-Z/]*lstm[_a-z]*)/.*', next(i.name for i in nodes if re.match('^[a-zA-Z/]*lstm[_a-z]*/.*', i.name))).group(1) | |
new_layers = barracuda.lstm(lstm_name, input, state_c, state_h, | |
'kernel_i', 'kernel_j', 'kernel_f', 'kernel_o', | |
'bias_i', 'bias_j', 'bias_f', 'bias_o', | |
new_state_c, new_state_h) | |
state_shape = [1, 1, seq_length, hidden_size] | |
context.model_memories += [state_shape, state_c, new_state_c] | |
context.model_memories += [state_shape, state_h, new_state_h] | |
# map expected output of the replaced pattern to output from our LSTM cell | |
actual_output_node = locate_actual_output_node(nodes, -1, find_type) | |
concat_out_node = locate_actual_output_node(nodes, -1, 'ConcatV2') | |
context.map_ignored_layer_to_its_input[actual_output_node.name] = new_state_h | |
context.map_ignored_layer_to_its_input[concat_out_node.name] = new_state_c | |
return new_layers | |
######################################################### | |
def process_layer(layer, context, args): | |
model_tensors = context.model_tensors | |
input_shapes = context.input_shapes | |
layer_ranks = context.layer_ranks | |
map_ignored_layer_to_its_input = context.map_ignored_layer_to_its_input | |
name = layer.name | |
class_name = layer.op | |
inputs = layer.input # Tensorflow inputs are always explicit, but in case of Keras we had 'inputs = layer.input or [prev_layer_name]' | |
inputs = replace_strings_in_list(inputs, map_ignored_layer_to_its_input) | |
if class_name == 'Nop': | |
assert(len(inputs) <= 1) | |
map_ignored_layer_to_its_input[name] = inputs | |
return | |
if class_name == 'Const': | |
model_tensors[name] = layer.attr["value"].tensor | |
layer_ranks[name] = get_layer_rank(layer) or 1 # we treast constants without shape as rank=1 (scalar converted to tensor) | |
return | |
if class_name == 'Placeholder': | |
assert(inputs == []) | |
map_ignored_layer_to_its_input[name] = inputs | |
input_shapes[name] = get_layer_shape(layer) | |
layer_ranks[name] = get_layer_rank(layer) | |
return | |
if class_name == 'Identity': | |
connected_to_const = len(inputs) == 1 and inputs[0] in model_tensors | |
if connected_to_const: | |
map_ignored_layer_to_its_input[name] = inputs | |
return | |
else: | |
# treat Identity layer that are connected to processing nodes | |
# as output from the network | |
class_name = 'Linear' | |
if args.print_layers or args.verbose: | |
var_tensors = [i for i in inputs if i not in model_tensors] | |
const_tensors = [i for i in inputs if i in model_tensors] | |
print("'%s' %s Vars:%s Const:%s" % (name, class_name, var_tensors, const_tensors)) | |
if class_name in known_activations: | |
activation = class_name | |
class_name = 'Activation' | |
else: | |
activation = 'Linear' | |
if not class_name in known_classes: | |
if class_name in requires_runtime_flag: | |
print('SKIP:', class_name, 'layer is used only for training') | |
else: | |
print('IGNORED:', class_name, 'unknown layer') | |
map_ignored_layer_to_its_input[name] = inputs | |
return | |
klass = known_classes[class_name] | |
if type(klass) == int: | |
klass = Struct(id = klass) | |
o_l = Struct() | |
o_l.type = klass.id | |
o_l.class_name = class_name | |
o_l.name = name | |
auto_pad = get_attr(layer, 'padding') # layer.attr['padding'].s.decode("utf-8") | |
pads = get_attr(layer, 'pads') | |
strides = get_attr(layer, 'strides') # layer.attr['strides'].list.i | |
pool_size = get_attr(layer, 'ksize') # layer.attr['ksize'].list.i | |
shape = get_attr(layer, 'shape') | |
starts = get_attr(layer, 'starts') | |
ends = get_attr(layer, 'ends') | |
slice_strides = get_attr(layer, 'slice_strides') | |
rank = get_attr(layer, 'rank') or get_layer_rank(layer) | |
data_frmt = get_attr(layer, 'data_format') # layer.attr['data_format'].s.decode("utf-8") | |
axis = get_attr(layer, 'axis') | |
alpha = get_attr(layer, 'alpha', default=1) | |
beta = get_attr(layer, 'beta') | |
if activation and not activation in known_activations: | |
print('IGNORED: unknown activation', activation) | |
if auto_pad and not auto_pad in known_paddings: | |
print('IGNORED: unknown padding', auto_pad) | |
if data_frmt and not data_frmt in supported_data_formats: | |
print('UNSUPPORTED: data format', data_frmt) | |
o_l.activation = known_activations.get(activation) or 0 | |
o_l.pads = known_paddings.get(auto_pad) if auto_pad else pads or starts or [0,0,0,0] | |
o_l.strides = strides_to_HW(strides, data_frmt) if strides else slice_strides or [] | |
o_l.pool_size = pool_to_HW(pool_size, data_frmt) if pool_size else ends or shape or [] | |
o_l.axis = embody(axis, default=-1) | |
o_l.alpha = embody(alpha, default=1) | |
o_l.beta = beta or 0 | |
o_l.rank = -1 # default initialization, actual value will be set later on in this function | |
tensor_names = [i for i in inputs if i in model_tensors] | |
# temp_tensor_data = get_tensor_data(model_tensors[x]) | |
# if temp_tensor_data is not None: | |
o_l.tensors = [Struct(name = x, shape = get_tensor_dims(model_tensors[x]), data = get_tensor_data(model_tensors[x])) | |
for x in tensor_names] | |
# Patch shapes & data | |
layer_has_model_tensors = len(o_l.tensors) > 0 | |
if hasattr(klass, 'out_shapes') and layer_has_model_tensors: | |
shapes = klass.out_shapes([x.shape for x in o_l.tensors]) | |
# if we have more shapes than actual tensors, | |
# then create & fill missing tensors with zeros | |
in_tensor_num = len(o_l.tensors) | |
for index, new_shape in enumerate(shapes): | |
if index >= in_tensor_num: | |
new_tensor = Struct(name = ('%s/patch:%i') % (name, index-in_tensor_num), | |
shape = new_shape, | |
data = np.zeros(new_shape)) | |
o_l.tensors.append(new_tensor) | |
assert(len(shapes) <= len(o_l.tensors)) | |
if hasattr(klass, 'patch_data'): | |
data = [x.data for x in o_l.tensors] | |
patch_data_fn = klass.patch_data | |
patch_data_expected_arg_count = patch_data_fn.__code__.co_argcount | |
patch_data_args = (data, layer) if patch_data_expected_arg_count > 1 else (data,) | |
tensor_data = patch_data_fn(*patch_data_args) | |
o_l.tensors = o_l.tensors[:len(tensor_data)] # resize tensor array to match patched data - patching might reduce number of tensors | |
for x, data in zip(o_l.tensors, tensor_data): | |
x.data = data | |
# after this point we should have equal amount of shapes and tensors | |
assert(len(o_l.tensors) == len(shapes)) | |
for x, shape in zip(o_l.tensors, shapes): | |
assert x.data.size == np.prod(shape) | |
x.shape = shape | |
o_l.inputs = [i for i in inputs if i not in model_tensors] | |
else: | |
# no 'patch_data' lambda was specifiowned, op does not require tensor args | |
o_l.tensors = [] | |
o_l.inputs = inputs | |
# Force all tensors to float32 | |
for x in o_l.tensors: | |
x.data = x.data.astype(np.float32) | |
input_ranks = [layer_ranks.get(i, -1) for i in o_l.inputs] | |
for i in o_l.inputs: | |
if i not in layer_ranks and 'lstm' not in i: | |
print("WARNING: rank unknown for tensor", i, "while processing node", name) | |
if hasattr(klass, 'rank'): | |
rank = klass.rank | |
if hasattr(rank, '__call__'): | |
assert(-1 not in input_ranks) # for rank() lambda all input ranks have to be known (not -1) | |
rank = rank(input_ranks) | |
if rank == None: | |
def all_elements_equal(arr): # http://stackoverflow.com/q/3844948/ | |
return arr.count(arr[0]) == len(arr) | |
assert(len(input_ranks) > 0) | |
assert(all_elements_equal(input_ranks)) | |
rank = input_ranks[0] | |
layer_ranks[name] = rank | |
o_l.rank = rank | |
# Layer is ready | |
context.layers.append(o_l) | |
class ModelBuilderContext: | |
def __init__(self): | |
self.layers = [] | |
self.input_shapes = {} | |
self.model_tensors = {} | |
self.model_memories = [] | |
self.layer_ranks = {} | |
self.map_ignored_layer_to_its_input = {} | |
def process_model(model, args): | |
o_context = ModelBuilderContext() | |
# Find node patterns | |
nodes_as_array = [node for node in model.node] | |
nodes_as_array = slow_but_stable_topological_sort(nodes_as_array, verbose=True) | |
node_index = 0 | |
while node_index < len(nodes_as_array): | |
node = nodes_as_array[node_index] | |
match = False | |
for pattern_repr, pattern_name in known_patterns.items(): | |
pattern = eval(pattern_repr) | |
if node_index + len(pattern) > len(nodes_as_array): | |
continue # pattern too long, skip | |
require_exact_match = (pattern[0] == 'Const' or pattern[0] == 'Identity') | |
pattern_end = node_index | |
def match_node(node, pattern): | |
return node.op == pattern or (hasattr(pattern, 'match') and pattern.match(node.name)) | |
for p in pattern: | |
if not require_exact_match: | |
while pattern_end < len(nodes_as_array) and nodes_as_array[pattern_end].op != p and ( | |
nodes_as_array[pattern_end].op == 'Const' or | |
nodes_as_array[pattern_end].op == 'Identity'): | |
pattern_end += 1 | |
if pattern_end >= len(nodes_as_array): | |
break | |
match = False | |
if (hasattr(p, 'match')): # regexp | |
while pattern_end < len(nodes_as_array) and p.match(nodes_as_array[pattern_end].name): | |
match = True | |
pattern_end += 1 | |
else: # exact string | |
match = nodes_as_array[pattern_end].op == p | |
pattern_end += 1 | |
if not match: | |
break | |
def get_tensors(pattern_nodes): | |
nonlocal o_context | |
map_ignored_layer_to_its_input = o_context.map_ignored_layer_to_its_input | |
model_tensors = o_context.model_tensors | |
# tensors <= all Const nodes within this pattern | |
const_nodes = [n for n in pattern_nodes if n.op == 'Const'] | |
# TODO: unify / reuse code from process_layer | |
identity_nodes = [n for n in pattern_nodes if n.op == 'Identity'] | |
for i in identity_nodes: | |
inputs = replace_strings_in_list(i.input, map_ignored_layer_to_its_input) | |
map_ignored_layer_to_its_input[i.name] = inputs | |
# gather inputs from Op nodes (not Const, not Identity) | |
op_nodes = [n for n in pattern_nodes if n not in const_nodes and n not in identity_nodes] | |
inputs_to_op_nodes = list(flatten([list(flatten(n.input)) for n in op_nodes])) | |
inputs_to_op_nodes = replace_strings_in_list(inputs_to_op_nodes, map_ignored_layer_to_its_input) | |
inputs_to_op_nodes = [i.split(':')[0] for i in inputs_to_op_nodes] | |
const_nodes_by_name = {n.name:n for n in const_nodes} | |
tensors = [] | |
for i in inputs_to_op_nodes: | |
if i in model_tensors: | |
src = model_tensors[i] | |
temp_tensor_data = get_tensor_data(src) | |
if not ( temp_tensor_data is None ): | |
tensors += [Struct(name = i, obj = src, shape = get_tensor_dims(src), data = temp_tensor_data)] | |
elif i in const_nodes_by_name: | |
src = const_nodes_by_name[i].attr["value"].tensor | |
temp_tensor_data = get_tensor_data(src) | |
if not ( temp_tensor_data is None ): | |
tensors += [Struct(name = i, obj = src, shape = get_tensor_dims(src), data = temp_tensor_data)] | |
tensor_names = [n.name for n in tensors] | |
# filter only inputs that are coming from nodes that are outside this pattern | |
# preserve the order | |
pattern_nodes = [n.name for n in pattern_nodes] + tensor_names | |
#inputs_from_outside_pattern = remove_duplicates_from_list([i for i in inputs_to_op_nodes if nodes_by_name[i] not in pattern_nodes]) | |
inputs_from_outside_pattern = remove_duplicates_from_list([i for i in inputs_to_op_nodes if i not in pattern_nodes]) | |
return inputs_from_outside_pattern, tensors | |
if match: | |
nodes = nodes_as_array[node_index:pattern_end] | |
name = nodes[-1].name | |
var_tensors, const_tensors = get_tensors(nodes) | |
if args.print_patterns or args.verbose: | |
print('PATTERN:', name, '~~', pattern_name, '<-', var_tensors, '+', [t.name for t in const_tensors]) | |
print(' ', pattern) | |
for n in nodes: | |
if n.op == 'Const' or n.op == 'Identity': | |
process_layer(n, o_context, args) | |
new_layers = transform_patterns[pattern_name](nodes, var_tensors, const_tensors, o_context) | |
if not isinstance(new_layers, list): | |
if not hasattr(new_layers, name): new_layers.name = name | |
new_layers = [new_layers] | |
for l in new_layers: | |
# TODO: prefix new layer names with scope, patch inputs | |
#l.name = name + '/' + l.name | |
process_layer(l, o_context, args) | |
node_index = pattern_end | |
break # pattern found & processed | |
if not match: | |
# TODO: gather tensors in the same way as patterns do | |
process_layer(node, o_context, args) | |
node_index += 1 | |
def find_unconnected_const_nodes(nodes): | |
nodes_with_consts = {node.name: node for node in nodes if node.op == 'Const'} | |
for node in nodes: | |
for i in node.input: | |
nodes_with_consts.pop(i, None) | |
return list(nodes_with_consts.keys()) | |
return o_context.layers, o_context.input_shapes, o_context.model_tensors, o_context.model_memories, \ | |
find_unconnected_const_nodes(nodes_as_array) | |
# Sort nodes so that all input dependencies are satisfied beforehand | |
# while preserving original order of the nodes in the model whenever possible. | |
# NOITE: preservation of original order is important for pattern matching | |
def slow_but_stable_topological_sort(nodes, verbose): | |
nodes_with_consts = [node for node in nodes if node.op == 'Const'] | |
nodes_for_sorting = [node for node in nodes if node.op != 'Const'] | |
# TODO: optimize for performance | |
# based on http://blog.gapotchenko.com/stable-topological-sort | |
def assign_ids(nodes): | |
ids = [] | |
id_by_name = {} | |
id = 0 | |
for node in nodes: | |
id_by_name[node.name] = id; | |
ids.append(id) | |
id += 1 | |
inputs_by_id = [None] * len(nodes) | |
for node in nodes: | |
id = id_by_name[node.name] | |
inputs_by_id[id] = {id_by_name.get(i, -1) for i in node.input} | |
return ids, inputs_by_id | |
def sort(ids, inputs_by_id, verbose_lambda): | |
sorted = False | |
n = len(ids) | |
while not sorted: | |
sorted = True | |
for i in range(n): | |
for j in range (i): | |
if ids[i] in inputs_by_id[ids[j]]: | |
tmp = ids.pop(i) | |
ids.insert(j, tmp) | |
sorted = False | |
verbose_lambda(sorted) | |
return ids | |
prefix_printed = False | |
def print_status(sorted): | |
nonlocal prefix_printed | |
if not sorted: | |
if not prefix_printed: | |
print('Sorting model, may take a while...', end="", flush=True) | |
prefix_printed = True | |
else: | |
print('.', end="", flush=True) | |
else: | |
if prefix_printed: | |
print(' Done!') | |
ids, inputs_by_id = assign_ids(nodes_for_sorting) | |
ids = sort(ids, inputs_by_id, lambda sorted: print_status(sorted) if verbose else None) | |
assert(len(ids) == len(nodes_for_sorting)) | |
assert(len(ids) + len(nodes_with_consts) == len(nodes)) | |
return nodes_with_consts + [nodes_for_sorting[id] for id in ids] | |
def very_slow_but_stable_topological_sort(nodes, verbose): | |
# TODO: optimize for performance | |
# based on http://blog.gapotchenko.com/stable-topological-sort | |
n = len(nodes) | |
sorted = False | |
while not sorted: | |
sorted = True | |
for i in range(n): | |
for j in range (i): | |
if nodes[i].name in nodes[j].input: | |
tmp = nodes.pop(i) | |
nodes.insert(j, tmp) | |
sorted = False | |
assert(len(nodes) == n) | |
return nodes | |
######################################################### | |
def convert(source_file, target_file, trim_unused_by_output="", verbose=False, compress_f16=False): | |
""" | |
Converts a TensorFlow model into a Barracuda model. | |
:param source_file: The TensorFlow Model | |
:param target_file: The name of the file the converted model will be saved to | |
:param trim_unused_by_output: The regexp to match output nodes to remain in the model. All other uconnected nodes will be removed. | |
:param verbose: If True, will display debug messages | |
:param compress_f16: If true, the float values will be converted to f16 | |
:return: | |
""" | |
if (type(verbose)==bool): | |
args = Struct() | |
args.verbose = verbose | |
args.print_layers = verbose | |
args.print_source_json = verbose | |
args.print_barracuda_json = verbose | |
args.print_layer_links = verbose | |
args.print_patterns = verbose | |
args.print_tensors = verbose | |
args.print_supported_ops = verbose | |
else: | |
args = verbose | |
if args.print_supported_ops: | |
barracuda.print_known_operations(known_classes, known_activations) | |
# Load Tensorflow model | |
print("Converting %s to %s" % (source_file, target_file)) | |
f = open(source_file, 'rb') | |
i_model = tf.compat.v1.GraphDef() | |
i_model.ParseFromString(f.read()) | |
if args.verbose: | |
print('OP_TYPES:', {layer.op for layer in i_model.node}) | |
if args.print_source_json or args.verbose: | |
for layer in i_model.node: | |
if not layer.op == 'Const': | |
print('MODEL:', MessageToJson(layer) + ",") | |
# Convert | |
o_model = barracuda.Model() | |
o_model.layers, o_input_shapes, o_model.tensors, o_model.memories, o_model.globals = \ | |
process_model(i_model, args) | |
# Cleanup unconnected Identities (they might linger after processing complex node patterns like LSTM) | |
def cleanup_layers(layers): | |
all_layers = {l.name for l in layers} | |
all_inputs = {i for l in layers for i in l.inputs} | |
def is_unconnected_identity(layer): | |
if layer.class_name == 'Activation' and layer.activation == 0: # Identity | |
assert(len(layer.inputs) == 1) | |
if layer.inputs[0] not in all_layers and layer.name not in all_inputs: | |
return True; | |
return False; | |
return [l for l in layers if not is_unconnected_identity(l)] | |
o_model.layers = cleanup_layers(o_model.layers) | |
# Trim | |
if trim_unused_by_output: | |
o_model.layers = barracuda.trim(o_model.layers, trim_unused_by_output, args.verbose) | |
# Create load layer for constants | |
def dims_to_barracuda_shape(dims): | |
shape = list(dims) | |
while len(shape) < 4: | |
shape = [1] + shape | |
return shape | |
# temp_tensor_data = get_tensor_data(tensor) | |
# if not ( temp_tensor_data is None ): | |
# if temp_tensor_data is not None: | |
barracuda.setup_constants(o_model, | |
lambda tensor: dims_to_barracuda_shape(get_tensor_dims(tensor)), | |
lambda tensor: get_tensor_data(tensor)) | |
# Find model inputs & outputs | |
all_inputs = {i for l in o_model.layers for i in l.inputs} | |
all_layers = {l.name for l in o_model.layers} | |
# global inputs => are inputs that are NOT connected to any layer in the network | |
# global outputs => are outputs that are NOT feeding any layer in the network OR are coming from Identity layers | |
o_model.inputs = {i:o_input_shapes[i] for l in o_model.layers for i in l.inputs if i not in all_layers and i not in o_model.memories} | |
def is_output_layer(layer): | |
if layer.class_name == 'Const': # Constants never count as global output even when unconnected | |
return False; | |
if layer.name not in all_inputs: # this layer is not inputing to any other layer | |
return True | |
if layer.class_name == 'Activation' and layer.activation == 0: # Identity marks global output | |
return True | |
return False | |
o_model.outputs = [l.name for l in o_model.layers if is_output_layer(l)] | |
# Compress | |
if compress_f16: | |
o_model = barracuda.compress(o_model) | |
# Sort model so that layer inputs are always ready upfront | |
o_model.layers = barracuda.sort(o_model.layers, o_model.inputs, o_model.memories, args.verbose) | |
o_model.layers = barracuda.fuse(o_model.layers, args.verbose) | |
# Summary | |
barracuda.summary(o_model, | |
print_layer_links = args.print_layer_links or args.verbose, | |
print_barracuda_json = args.print_barracuda_json or args.verbose, | |
print_tensors = args.print_tensors or args.verbose) | |
# Write to file | |
barracuda.write(o_model, target_file) | |
print('DONE: wrote', target_file, 'file.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment