Skip to content

Instantly share code, notes, and snippets.

@ivanpanshin
Created September 7, 2020 11:30
Show Gist options
  • Save ivanpanshin/6f032beef699ae221e51a47f7c854c1c to your computer and use it in GitHub Desktop.
Save ivanpanshin/6f032beef699ae221e51a47f7c854c1c to your computer and use it in GitHub Desktop.
trt
import onnx
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
#import cv2
from PIL import Image
import os
import matplotlib.pyplot as plt
import time
#import albumentations as A
#from albumentations.pytorch import transforms
#from albumentations import (Compose,Resize,)
#from albumentations.augmentations.transforms import Normalize
#from albumentations.pytorch.transforms import ToTensor
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
def preprocess_image(img_path):
# transformations for the input data
#transforms = Compose([
# Resize(224, 224, interpolation=cv2.INTER_NEAREST),
# Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
# ToTensor(),
#])
transform = transforms.Compose(
[
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
# read input image
input_img = Image.open(img_path)
#input_img = cv2.imread(img_path)
# do transformations
input_data = transform(input_img)
#input_data = transform(image=input_img)["image"]
# prepare batch
batch_data = torch.unsqueeze(input_data, 0)
return batch_data
def postprocess(output_data, imagenet_labels):
# get class names
# calculate human-readable value by softmax
confidences = torch.nn.functional.softmax(output_data, dim=1)[0] * 100
# find top predicted classes
_, indices = torch.sort(output_data, descending=True)
i = 0
# print the top classes predicted by the model
while confidences[indices[0][i]] > 0.5:
class_idx = indices[0][i]
# print(
# "class:",
# imagenet_labels[class_idx],
# ", confidence:",
# confidences[class_idx].item(),
# "%, index:",
# class_idx.item(),
# )
i += 1
def return_imagenet_labels(imagenet_classes_path = 'imagenet_classes.txt'):
imagenet_labels = []
with open(imagenet_classes_path) as file:
for line in file:
_, class_name = line.split(':')
class_name = class_name[2:-3]
imagenet_labels.append(class_name)
return imagenet_labels
def return_model():
model = torchvision.models.resnet18(pretrained=True)
model.cuda()
model.eval()
return model
def postprocess_outputs(outputs):
softmax_outputs = torch.nn.functional.softmax(outputs)
output_label = torch.argmax(softmax_outputs)
confidence = torch.max(softmax_outputs)
return output_label, confidence
def build_engine(onnx_file_path):
# initialize TensorRT engine and parse ONNX model
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()
parser = trt.OnnxParser(network, TRT_LOGGER)
print(network.num_layers)
# allow TensorRT to use up to 1GB of GPU memory for tactic selection
builder.max_workspace_size = 1 << 30
# we have only one image in batch
builder.max_batch_size = 1
# use FP16 mode if possible
if builder.platform_has_fast_fp16:
builder.fp16_mode = True
# parse ONNX
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
parser.parse(model.read())
print('Completed parsing of ONNX file')
# generate TensorRT engine optimized for the target platform
print('Building an engine...')
engine = builder.build_cuda_engine(network)
context = engine.create_execution_context()
print("Completed creating Engine")
return engine, context
if __name__ == '__main__':
amount_of_frames = 100
image_path = 'hotdog.jpg'
image = preprocess_image(image_path)
image = image.cuda()
imagenet_labels = return_imagenet_labels()
model = return_model()
total_preprocess_time = 0
total_inference_time = 0
total_postprocess_time = 0
outputs = model(image)
output_label, confidence = postprocess_outputs(outputs)
print(outputs)
ONNX_FILE_PATH = 'resnet18.onnx'
torch.onnx.export(model, image, ONNX_FILE_PATH, input_names=['input'],
output_names=['output'], export_params=True)
onnx_model = onnx.load(ONNX_FILE_PATH)
onnx.checker.check_model(onnx_model)
TRT_LOGGER = trt.Logger()
engine, context = build_engine(ONNX_FILE_PATH)
for binding in engine:
if engine.binding_is_input(binding): # we expect only one input
input_shape = engine.get_binding_shape(binding)
input_size = trt.volume(input_shape) * engine.max_batch_size * np.dtype(np.float32).itemsize # in bytes
device_input = cuda.mem_alloc(input_size)
else: # and one output
output_shape = engine.get_binding_shape(binding)
# create page-locked memory buffers (i.e. won't be swapped to disk)
host_output = cuda.pagelocked_empty(trt.volume(output_shape) * engine.max_batch_size, dtype=np.float32)
device_output = cuda.mem_alloc(host_output.nbytes)
# Create a stream in which to copy inputs/outputs and run inference.
stream = cuda.Stream()
# preprocess input data
total_preprocess_time = 0
total_inference_time = 0
total_postprocess_time = 0
total_amount_of_frames = 0
for i in range(amount_of_frames):
start_time = time.time()
host_input = np.array(preprocess_image(image_path).cpu().numpy(), dtype=np.float32, order='C')
cuda.memcpy_htod_async(device_input, host_input, stream)
total_preprocess_time += (time.time() - start_time)
# run inference
start_time = time.time()
context.execute_async(bindings=[int(device_input), int(device_output)], stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_output, device_output, stream)
stream.synchronize()
total_inference_time += (time.time() - start_time)
# postprocess results
start_time = time.time()
output_data = torch.Tensor(host_output).reshape(engine.max_batch_size, output_shape[0])
postprocess(output_data, imagenet_labels)
total_postprocess_time += (time.time() - start_time)
print('TensorRT preprocess: {}/inference: {}/postprocess time: {}'.format(amount_of_frames/total_preprocess_time, amount_of_frames/total_inference_time, amount_of_frames/total_postprocess_time))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment