Created
September 7, 2020 11:30
-
-
Save ivanpanshin/6f032beef699ae221e51a47f7c854c1c to your computer and use it in GitHub Desktop.
trt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import onnx | |
import torch | |
import torch.nn as nn | |
import torchvision | |
import torchvision.transforms as transforms | |
#import cv2 | |
from PIL import Image | |
import os | |
import matplotlib.pyplot as plt | |
import time | |
#import albumentations as A | |
#from albumentations.pytorch import transforms | |
#from albumentations import (Compose,Resize,) | |
#from albumentations.augmentations.transforms import Normalize | |
#from albumentations.pytorch.transforms import ToTensor | |
import numpy as np | |
import pycuda.driver as cuda | |
import pycuda.autoinit | |
import tensorrt as trt | |
def preprocess_image(img_path): | |
# transformations for the input data | |
#transforms = Compose([ | |
# Resize(224, 224, interpolation=cv2.INTER_NEAREST), | |
# Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | |
# ToTensor(), | |
#]) | |
transform = transforms.Compose( | |
[ | |
transforms.Resize(224), | |
transforms.ToTensor(), | |
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) | |
# read input image | |
input_img = Image.open(img_path) | |
#input_img = cv2.imread(img_path) | |
# do transformations | |
input_data = transform(input_img) | |
#input_data = transform(image=input_img)["image"] | |
# prepare batch | |
batch_data = torch.unsqueeze(input_data, 0) | |
return batch_data | |
def postprocess(output_data, imagenet_labels): | |
# get class names | |
# calculate human-readable value by softmax | |
confidences = torch.nn.functional.softmax(output_data, dim=1)[0] * 100 | |
# find top predicted classes | |
_, indices = torch.sort(output_data, descending=True) | |
i = 0 | |
# print the top classes predicted by the model | |
while confidences[indices[0][i]] > 0.5: | |
class_idx = indices[0][i] | |
# print( | |
# "class:", | |
# imagenet_labels[class_idx], | |
# ", confidence:", | |
# confidences[class_idx].item(), | |
# "%, index:", | |
# class_idx.item(), | |
# ) | |
i += 1 | |
def return_imagenet_labels(imagenet_classes_path = 'imagenet_classes.txt'): | |
imagenet_labels = [] | |
with open(imagenet_classes_path) as file: | |
for line in file: | |
_, class_name = line.split(':') | |
class_name = class_name[2:-3] | |
imagenet_labels.append(class_name) | |
return imagenet_labels | |
def return_model(): | |
model = torchvision.models.resnet18(pretrained=True) | |
model.cuda() | |
model.eval() | |
return model | |
def postprocess_outputs(outputs): | |
softmax_outputs = torch.nn.functional.softmax(outputs) | |
output_label = torch.argmax(softmax_outputs) | |
confidence = torch.max(softmax_outputs) | |
return output_label, confidence | |
def build_engine(onnx_file_path): | |
# initialize TensorRT engine and parse ONNX model | |
builder = trt.Builder(TRT_LOGGER) | |
network = builder.create_network() | |
parser = trt.OnnxParser(network, TRT_LOGGER) | |
print(network.num_layers) | |
# allow TensorRT to use up to 1GB of GPU memory for tactic selection | |
builder.max_workspace_size = 1 << 30 | |
# we have only one image in batch | |
builder.max_batch_size = 1 | |
# use FP16 mode if possible | |
if builder.platform_has_fast_fp16: | |
builder.fp16_mode = True | |
# parse ONNX | |
with open(onnx_file_path, 'rb') as model: | |
print('Beginning ONNX file parsing') | |
parser.parse(model.read()) | |
print('Completed parsing of ONNX file') | |
# generate TensorRT engine optimized for the target platform | |
print('Building an engine...') | |
engine = builder.build_cuda_engine(network) | |
context = engine.create_execution_context() | |
print("Completed creating Engine") | |
return engine, context | |
if __name__ == '__main__': | |
amount_of_frames = 100 | |
image_path = 'hotdog.jpg' | |
image = preprocess_image(image_path) | |
image = image.cuda() | |
imagenet_labels = return_imagenet_labels() | |
model = return_model() | |
total_preprocess_time = 0 | |
total_inference_time = 0 | |
total_postprocess_time = 0 | |
outputs = model(image) | |
output_label, confidence = postprocess_outputs(outputs) | |
print(outputs) | |
ONNX_FILE_PATH = 'resnet18.onnx' | |
torch.onnx.export(model, image, ONNX_FILE_PATH, input_names=['input'], | |
output_names=['output'], export_params=True) | |
onnx_model = onnx.load(ONNX_FILE_PATH) | |
onnx.checker.check_model(onnx_model) | |
TRT_LOGGER = trt.Logger() | |
engine, context = build_engine(ONNX_FILE_PATH) | |
for binding in engine: | |
if engine.binding_is_input(binding): # we expect only one input | |
input_shape = engine.get_binding_shape(binding) | |
input_size = trt.volume(input_shape) * engine.max_batch_size * np.dtype(np.float32).itemsize # in bytes | |
device_input = cuda.mem_alloc(input_size) | |
else: # and one output | |
output_shape = engine.get_binding_shape(binding) | |
# create page-locked memory buffers (i.e. won't be swapped to disk) | |
host_output = cuda.pagelocked_empty(trt.volume(output_shape) * engine.max_batch_size, dtype=np.float32) | |
device_output = cuda.mem_alloc(host_output.nbytes) | |
# Create a stream in which to copy inputs/outputs and run inference. | |
stream = cuda.Stream() | |
# preprocess input data | |
total_preprocess_time = 0 | |
total_inference_time = 0 | |
total_postprocess_time = 0 | |
total_amount_of_frames = 0 | |
for i in range(amount_of_frames): | |
start_time = time.time() | |
host_input = np.array(preprocess_image(image_path).cpu().numpy(), dtype=np.float32, order='C') | |
cuda.memcpy_htod_async(device_input, host_input, stream) | |
total_preprocess_time += (time.time() - start_time) | |
# run inference | |
start_time = time.time() | |
context.execute_async(bindings=[int(device_input), int(device_output)], stream_handle=stream.handle) | |
cuda.memcpy_dtoh_async(host_output, device_output, stream) | |
stream.synchronize() | |
total_inference_time += (time.time() - start_time) | |
# postprocess results | |
start_time = time.time() | |
output_data = torch.Tensor(host_output).reshape(engine.max_batch_size, output_shape[0]) | |
postprocess(output_data, imagenet_labels) | |
total_postprocess_time += (time.time() - start_time) | |
print('TensorRT preprocess: {}/inference: {}/postprocess time: {}'.format(amount_of_frames/total_preprocess_time, amount_of_frames/total_inference_time, amount_of_frames/total_postprocess_time)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment