Last active
July 31, 2024 08:11
-
-
Save yuekaizhang/29907e45a438d6012ec0d34beeaf90a9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Generation Command: polygraphy run --gen test.py my_define_network.py --trt --fp16 --load-inputs inputs.json --load-outputs layerwise_golden.json --check-error-stat median --atol 0.1 --rtol 0.1 | |
# It will check against outputs stored in layerwise_golden.json | |
from polygraphy.logger import G_LOGGER | |
from polygraphy import func | |
from polygraphy.backend.trt import NetworkFromOnnxPath | |
import tensorrt as trt | |
from polygraphy import util | |
from polygraphy.backend.common import InvokeFromScript, BytesFromPath | |
from polygraphy.backend.trt import CreateConfig as CreateTrtConfig, EngineFromNetwork, SaveEngine, TrtRunner, EngineFromBytes | |
from polygraphy.comparator import Comparator, CompareFunc, RunResults | |
from polygraphy.json import load_json | |
import time | |
import sys | |
def predecessors(network, layer): | |
predecessors = [] | |
for i in range(layer.num_inputs): | |
in_name = layer.get_input(i).name | |
for l2 in network: | |
for j in range(l2.num_outputs): | |
if l2.get_output(j).name == in_name: | |
yield l2 | |
def match(layer, op): | |
return layer.name.startswith(op) | |
def promote_to_fp32(layer): | |
G_LOGGER.info("Promoting {} to FP32".format(layer.name)) | |
layer.precision = trt.DataType.FLOAT | |
def promote_initial_fp32_layers(network): | |
"""Get the initial set of layers that need to be promoted to FP32, and return the list of promoted MatMuls""" | |
fp32_matmul_layers = [] | |
for layer in network: | |
if match(layer, "Div") or match(layer, "Mul"): | |
promote_to_fp32(layer) | |
if match(layer, "MatMul"): | |
for l2 in predecessors(network, layer): | |
if match(l2, "Div") or match(l2, "Mul"): | |
fp32_matmul_layers.append(layer) | |
promote_to_fp32(layer) | |
return fp32_matmul_layers | |
# Parse the initial network (the same network will be reused) | |
onnx_path = './encoder-folded.onnx' | |
G_LOGGER.info("Loading: {}".format(onnx_path)) | |
with G_LOGGER.indent(): | |
parse_network_from_onnx = NetworkFromOnnxPath('./encoder-folded.onnx') | |
(builder, network, parser) = parse_network_from_onnx() | |
G_LOGGER.info("Promoting initial FP32 layers") | |
with G_LOGGER.indent(): | |
fp32_matmul_layers = promote_initial_fp32_layers(network) | |
# Create the TRT config (will be reused) | |
config = CreateTrtConfig(fp16=True, load_timing_cache='timing.cache.fp16')(builder, network) | |
G_LOGGER.info("Loading inputs and golden values") | |
with G_LOGGER.indent(): | |
input_data = list(load_json('inputs.json', description='input data')) | |
golden_results = list(RunResults.load('layerwise_golden.json')) | |
def test_built_engine(demoted_layer_name): | |
signature = "test_built_engine({})".format(demoted_layer_name) | |
G_LOGGER.start("RUNNING | {}".format(signature)) | |
begin = time.time() | |
with G_LOGGER.indent(): | |
build_engine = EngineFromNetwork((builder, network, parser), config=config) | |
save_engine = SaveEngine(build_engine, path="try_demote_{}.engine".format(demoted_layer_name)) | |
#load_engine_bytes = BytesFromPath("try_demote_no_layers_demoted.engine") | |
#deserialize_engine = EngineFromBytes(load_engine_bytes) | |
# Runners | |
runners = (TrtRunner(save_engine),) | |
#runners = (TrtRunner(deserialize_engine),) | |
# Runner Execution | |
results = Comparator.run(runners, data_loader=input_data) | |
results.extend(golden_results) | |
# Accuracy Comparison | |
compare_func = CompareFunc.simple(rtol={'': 0.1}, atol={'': 0.1}, check_error_stat={'': 'median'}) | |
success = bool(Comparator.compare_accuracy(results, compare_func=compare_func)) | |
end = time.time() | |
duration = end - begin | |
# Report Results | |
if not success: | |
G_LOGGER.finish("FAILED | {} | Duration: {}s".format(signature, duration)) | |
else: | |
G_LOGGER.finish("PASSED | {} | Duration: {}s".format(signature, duration)) | |
return success | |
# List of MatMuls known already to require FP32 | |
exclude_list = [ | |
"MatMul_141", | |
] | |
# List of MatMuls known already to be runnable in FP16 | |
include_list = [ | |
"MatMul_218", | |
"MatMul_220", | |
] | |
def demote_matmuls(): | |
G_LOGGER.info("Beginning demoting loop") | |
# Check that the full engine build works as expected | |
#assert test_built_engine("no_layers_demoted") | |
num_matmul_layers = len(fp32_matmul_layers) | |
def can_run_in_fp16(layer): | |
if layer.name in include_list: | |
return True | |
elif layer.name in exclude_list: | |
return False | |
else: | |
return test_built_engine(layer.name) | |
for i, layer in enumerate(fp32_matmul_layers): | |
layer.precision = trt.DataType.HALF | |
if can_run_in_fp16(layer): | |
G_LOGGER.info("Layer {} may be run in FP16".format(layer.name)) | |
else: | |
layer.precision = trt.DataType.FLOAT | |
G_LOGGER.info("Layer {} must be run in FP32".format(layer.name)) | |
G_LOGGER.info("{} layers left to test".format(num_matmul_layers - i)) | |
demote_matmuls() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment