Created
April 26, 2019 15:36
-
-
Save aallan/69333770aec3023b6cb304698fd1dbb1 to your computer and use it in GitHub Desktop.
Benchmarking script for TensorFlow + TensorRT inferencing on the NVIDIA Jetson Nano
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import platform | |
PLATFORM = platform.system().lower() | |
GOOGLE = 'edge_tpu' | |
INTEL = 'ncs2' | |
NVIDIA = 'jetson_nano' | |
PI = 'raspberry_pi' | |
IS_LINUX = (PLATFORM == 'linux') | |
if IS_LINUX: | |
PLATFORM = platform.linux_distribution()[0].lower() | |
if PLATFORM == 'debian': | |
try: | |
with open('/proc/cpuinfo') as f: | |
for line in f: | |
line = line.strip() | |
if line.startswith('Hardware') and ( line.endswith('BCM2708') or line.endswith('BCM2835')): | |
PLATFORM = PI | |
print("Running on a Raspberry Pi.") | |
break | |
except: | |
print("Unknown platform based on Debian.") | |
pass | |
elif PLATFORM == 'mendel': | |
PLATFORM = GOOGLE | |
print("Running on a Coral Dev Board.") | |
try: | |
from edgetpu.detection.engine import DetectionEngine | |
print("DetectionEngine support present.") | |
except ImportError: | |
try: | |
from openvino.inference_engine import IENetwork, IEPlugin | |
print("OpenVINO present.") | |
print("Assuming Movidius hardware.") | |
PLATFORM = INTEL | |
except ImportError: | |
try: | |
import tensorflow as tf | |
if (tf.test.is_built_with_cuda()): | |
print("TensorFlow with GPU support present.") | |
print("Assuming Jetson Nano.") | |
try: | |
import tensorflow.contrib.tensorrt as trt | |
print("TensorRT support present.") | |
PLATFORM = NVIDIA | |
from object_detection.protos import pipeline_pb2 | |
from object_detection.protos import image_resizer_pb2 | |
from object_detection import exporter | |
from google.protobuf import text_format | |
except: | |
print("No TensorRT support found.") | |
print("Unknown TensorFlow platform.") | |
PLATFORM = 'unknown' | |
else: | |
print("No GPU support in TensorFlow.") | |
except ImportError: | |
print("No TensorFlow support found.") | |
LEGAL_PLATFORMS = NVIDIA | |
assert PLATFORM in LEGAL_PLATFORMS, "Don't understand platform %s." % PLATFORM | |
INPUT_NAME='image_tensor' | |
BOXES_NAME='detection_boxes' | |
CLASSES_NAME='detection_classes' | |
SCORES_NAME='detection_scores' | |
MASKS_NAME='detection_masks' | |
NUM_DETECTIONS_NAME='num_detections' | |
FROZEN_GRAPH_NAME='frozen_inference_graph.pb' | |
PIPELINE_CONFIG_NAME='pipeline.config' | |
CHECKPOINT_PREFIX='model.ckpt' | |
import sys | |
import os | |
import logging as log | |
import argparse | |
import subprocess | |
from timeit import default_timer as timer | |
import cv2 | |
from PIL import Image | |
from PIL import ImageFont, ImageDraw | |
def make_const6(const6_name='const6'): | |
graph = tf.Graph() | |
with graph.as_default(): | |
tf_6 = tf.constant(dtype=tf.float32, value=6.0, name=const6_name) | |
return graph.as_graph_def() | |
def make_relu6(output_name, input_name, const6_name='const6'): | |
graph = tf.Graph() | |
with graph.as_default(): | |
tf_x = tf.placeholder(tf.float32, [10, 10], name=input_name) | |
tf_6 = tf.constant(dtype=tf.float32, value=6.0, name=const6_name) | |
with tf.name_scope(output_name): | |
tf_y1 = tf.nn.relu(tf_x, name='relu1') | |
tf_y2 = tf.nn.relu(tf.subtract(tf_x, tf_6, name='sub1'), name='relu2') | |
#tf_y = tf.nn.relu(tf.subtract(tf_6, tf.nn.relu(tf_x, name='relu1'), name='sub'), name='relu2') | |
#tf_y = tf.subtract(tf_6, tf_y, name=output_name) | |
tf_y = tf.subtract(tf_y1, tf_y2, name=output_name) | |
graph_def = graph.as_graph_def() | |
graph_def.node[-1].name = output_name | |
# remove unused nodes | |
for node in graph_def.node: | |
if node.name == input_name: | |
graph_def.node.remove(node) | |
for node in graph_def.node: | |
if node.name == const6_name: | |
graph_def.node.remove(node) | |
for node in graph_def.node: | |
if node.op == '_Neg': | |
node.op = 'Neg' | |
return graph_def | |
def convert_relu6(graph_def, const6_name='const6'): | |
# add constant 6 | |
has_const6 = False | |
for node in graph_def.node: | |
if node.name == const6_name: | |
has_const6 = True | |
if not has_const6: | |
const6_graph_def = make_const6(const6_name=const6_name) | |
graph_def.node.extend(const6_graph_def.node) | |
for node in graph_def.node: | |
if node.op == 'Relu6': | |
input_name = node.input[0] | |
output_name = node.name | |
relu6_graph_def = make_relu6(output_name, input_name, const6_name=const6_name) | |
graph_def.node.remove(node) | |
graph_def.node.extend(relu6_graph_def.node) | |
return graph_def | |
def remove_node(graph_def, node): | |
for n in graph_def.node: | |
if node.name in n.input: | |
n.input.remove(node.name) | |
ctrl_name = '^' + node.name | |
if ctrl_name in n.input: | |
n.input.remove(ctrl_name) | |
graph_def.node.remove(node) | |
def remove_op(graph_def, op_name): | |
matches = [node for node in graph_def.node if node.op == op_name] | |
for match in matches: | |
remove_node(graph_def, match) | |
def f_force_nms_cpu(frozen_graph): | |
for node in frozen_graph.node: | |
if 'NonMaxSuppression' in node.name: | |
node.device = '/device:CPU:0' | |
return frozen_graph | |
def f_replace_relu6(frozen_graph): | |
return convert_relu6(frozen_graph) | |
def f_remove_assert(frozen_graph): | |
remove_op(frozen_graph, 'Assert') | |
return frozen_graph | |
def build_detection_graph(config, checkpoint, | |
batch_size=1, | |
score_threshold=None, | |
force_nms_cpu=True, | |
replace_relu6=True, | |
remove_assert=True, | |
input_shape=None, | |
output_dir='.generated_model'): | |
"""Builds a frozen graph for a pre-trained object detection model""" | |
config_path = config | |
checkpoint_path = checkpoint | |
# parse config from file | |
config = pipeline_pb2.TrainEvalPipelineConfig() | |
with open(config_path, 'r') as f: | |
text_format.Merge(f.read(), config, allow_unknown_extension=True) | |
# override some config parameters | |
if config.model.HasField('ssd'): | |
config.model.ssd.feature_extractor.override_base_feature_extractor_hyperparams = True | |
if score_threshold is not None: | |
config.model.ssd.post_processing.batch_non_max_suppression.score_threshold = score_threshold | |
if input_shape is not None: | |
config.model.ssd.image_resizer.fixed_shape_resizer.height = input_shape[0] | |
config.model.ssd.image_resizer.fixed_shape_resizer.width = input_shape[1] | |
elif config.model.HasField('faster_rcnn'): | |
if score_threshold is not None: | |
config.model.faster_rcnn.second_stage_post_processing.score_threshold = score_threshold | |
if input_shape is not None: | |
config.model.faster_rcnn.image_resizer.fixed_shape_resizer.height = input_shape[0] | |
config.model.faster_rcnn.image_resizer.fixed_shape_resizer.width = input_shape[1] | |
if os.path.isdir(output_dir): | |
subprocess.call(['rm', '-rf', output_dir]) | |
tf_config = tf.ConfigProto() | |
tf_config.gpu_options.allow_growth = True | |
# export inference graph to file (initial) | |
with tf.Session(config=tf_config) as tf_sess: | |
with tf.Graph().as_default() as tf_graph: | |
exporter.export_inference_graph( | |
'image_tensor', | |
config, | |
checkpoint_path, | |
output_dir, | |
input_shape=[batch_size, None, None, 3] | |
) | |
# read frozen graph from file | |
frozen_graph = tf.GraphDef() | |
with open(os.path.join(output_dir, FROZEN_GRAPH_NAME), 'rb') as f: | |
frozen_graph.ParseFromString(f.read()) | |
# apply graph modifications | |
if force_nms_cpu: | |
frozen_graph = f_force_nms_cpu(frozen_graph) | |
if replace_relu6: | |
frozen_graph = f_replace_relu6(frozen_graph) | |
if remove_assert: | |
frozen_graph = f_remove_assert(frozen_graph) | |
# get input names | |
# TODO: handle mask_rcnn | |
input_names = [INPUT_NAME] | |
output_names = [BOXES_NAME, CLASSES_NAME, SCORES_NAME, NUM_DETECTIONS_NAME] | |
# remove temporary directory | |
subprocess.call(['rm', '-rf', output_dir]) | |
return frozen_graph, input_names, output_names | |
# Function to draw a rectangle with width > 1 | |
def draw_rectangle(draw, coordinates, color, width=1): | |
for i in range(width): | |
rect_start = (coordinates[0] - i, coordinates[1] - i) | |
rect_end = (coordinates[2] + i, coordinates[3] + i) | |
draw.rectangle((rect_start, rect_end), outline = color, fill = color) | |
# Function to read labels from text files. | |
def ReadLabelFile(file_path): | |
with open(file_path, 'r') as f: | |
lines = f.readlines() | |
ret = {} | |
for line in lines: | |
pair = line.strip().split(maxsplit=1) | |
ret[int(pair[0])] = pair[1].strip() | |
return ret | |
def inference_tf(runs, image, model, output, label=None): | |
if label: | |
labels = ReadLabelFile(label) | |
else: | |
labels = None | |
tf_config = tf.ConfigProto() | |
tf_config.gpu_options.allow_growth = True | |
config_path = os.path.join(model, PIPELINE_CONFIG_NAME) | |
checkpoint_path = os.path.join(model, CHECKPOINT_PREFIX) | |
frozen_graph, input_names, output_names = build_detection_graph( | |
config=config_path, checkpoint=checkpoint_path, score_threshold=0.3, batch_size=1) | |
trt_graph = trt.create_inference_graph( | |
input_graph_def=frozen_graph, outputs=output_names, max_batch_size=1, | |
max_workspace_size_bytes=1 << 25, precision_mode='FP16', minimum_segment_size=50 ) | |
with tf.Session(config=tf_config) as sess: | |
sess.graph.as_default() | |
tf.import_graph_def(trt_graph, name='') | |
img = Image.open(image) | |
draw = ImageDraw.Draw(img, 'RGBA') | |
helvetica=ImageFont.truetype("./Helvetica.ttf", size=72) | |
picture = cv2.imread(image) | |
initial_h, initial_w, channels = picture.shape | |
frame = cv2.resize(picture, (300, 300)) | |
frame = frame[:, :, [2, 1, 0]] # BGR2RGB | |
frame = frame.reshape(1, frame.shape[0], frame.shape[1], 3) | |
# Start synchronous inference and get inference result | |
# Run inference. | |
print("Running inferencing for ", runs, " times.") | |
if runs == 1: | |
start = timer() | |
out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'), | |
sess.graph.get_tensor_by_name('detection_scores:0'), | |
sess.graph.get_tensor_by_name('detection_boxes:0'), | |
sess.graph.get_tensor_by_name('detection_classes:0')], | |
feed_dict={'image_tensor:0': frame}) | |
end = timer() | |
print('Elapsed time is ', ((end - start)/runs)*1000, 'ms' ) | |
else: | |
start = timer() | |
print('Initial run, discarding.') | |
out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'), | |
sess.graph.get_tensor_by_name('detection_scores:0'), | |
sess.graph.get_tensor_by_name('detection_boxes:0'), | |
sess.graph.get_tensor_by_name('detection_classes:0')], | |
feed_dict={'image_tensor:0': frame}) | |
end = timer() | |
print('First run time is ', (end - start)*1000, 'ms') | |
start = timer() | |
for i in range(runs): | |
out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'), | |
sess.graph.get_tensor_by_name('detection_scores:0'), | |
sess.graph.get_tensor_by_name('detection_boxes:0'), | |
sess.graph.get_tensor_by_name('detection_classes:0')], | |
feed_dict={'image_tensor:0': frame}) | |
end = timer() | |
print('Elapsed time is ', ((end - start)/runs)*1000, 'ms' ) | |
# Visualize detected bounding boxes. | |
num_detections = int(out[0][0]) | |
for i in range(num_detections): | |
classId = int(out[3][0][i]) | |
score = float(out[1][0][i]) | |
bbox = [float(v) for v in out[2][0][i]] | |
if score > 0.5: | |
xmin = bbox[1] * initial_w | |
ymin = bbox[0] * initial_h | |
xmax = bbox[3] * initial_w | |
ymax = bbox[2] * initial_h | |
if labels: | |
print(labels[classId], 'score = ', score) | |
else: | |
print ('score = ', score) | |
box = [xmin, ymin, xmax, ymax] | |
print( 'box = ', box ) | |
draw_rectangle(draw, box, (0,128,128,20), width=5) | |
if labels: | |
draw.text((box[0] + 20, box[1] + 20), labels[classId], fill=(255,255,255,20), font=helvetica) | |
img.save(output) | |
print ('Saved to ', output) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--model', help='Path of the detection model directory.', required=True) | |
parser.add_argument('--label', help='Path of the labels file.') | |
parser.add_argument('--input', help='File path of the input image.', required=True) | |
parser.add_argument('--output', help='File path of the output image.') | |
parser.add_argument('--runs', help='Number of times to run the inference', type=int, default=1) | |
args = parser.parse_args() | |
if ( args.output): | |
output_file = args.output | |
else: | |
output_file = 'out.jpg' | |
if ( args.label ): | |
label_file = args.label | |
else: | |
label_file = None | |
result = inference_tf( args.runs, args.input, args.model, output_file, label_file) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment