Skip to content

Instantly share code, notes, and snippets.

@yuekaizhang
Last active July 31, 2024 08:11
Show Gist options
  • Save yuekaizhang/29907e45a438d6012ec0d34beeaf90a9 to your computer and use it in GitHub Desktop.
Save yuekaizhang/29907e45a438d6012ec0d34beeaf90a9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# Generation Command: polygraphy run --gen test.py my_define_network.py --trt --fp16 --load-inputs inputs.json --load-outputs layerwise_golden.json --check-error-stat median --atol 0.1 --rtol 0.1
# It will check against outputs stored in layerwise_golden.json
from polygraphy.logger import G_LOGGER
from polygraphy import func
from polygraphy.backend.trt import NetworkFromOnnxPath
import tensorrt as trt
from polygraphy import util
from polygraphy.backend.common import InvokeFromScript, BytesFromPath
from polygraphy.backend.trt import CreateConfig as CreateTrtConfig, EngineFromNetwork, SaveEngine, TrtRunner, EngineFromBytes
from polygraphy.comparator import Comparator, CompareFunc, RunResults
from polygraphy.json import load_json
import time
import sys
def predecessors(network, layer):
predecessors = []
for i in range(layer.num_inputs):
in_name = layer.get_input(i).name
for l2 in network:
for j in range(l2.num_outputs):
if l2.get_output(j).name == in_name:
yield l2
def match(layer, op):
return layer.name.startswith(op)
def promote_to_fp32(layer):
G_LOGGER.info("Promoting {} to FP32".format(layer.name))
layer.precision = trt.DataType.FLOAT
def promote_initial_fp32_layers(network):
"""Get the initial set of layers that need to be promoted to FP32, and return the list of promoted MatMuls"""
fp32_matmul_layers = []
for layer in network:
if match(layer, "Div") or match(layer, "Mul"):
promote_to_fp32(layer)
if match(layer, "MatMul"):
for l2 in predecessors(network, layer):
if match(l2, "Div") or match(l2, "Mul"):
fp32_matmul_layers.append(layer)
promote_to_fp32(layer)
return fp32_matmul_layers
# Parse the initial network (the same network will be reused)
onnx_path = './encoder-folded.onnx'
G_LOGGER.info("Loading: {}".format(onnx_path))
with G_LOGGER.indent():
parse_network_from_onnx = NetworkFromOnnxPath('./encoder-folded.onnx')
(builder, network, parser) = parse_network_from_onnx()
G_LOGGER.info("Promoting initial FP32 layers")
with G_LOGGER.indent():
fp32_matmul_layers = promote_initial_fp32_layers(network)
# Create the TRT config (will be reused)
config = CreateTrtConfig(fp16=True, load_timing_cache='timing.cache.fp16')(builder, network)
G_LOGGER.info("Loading inputs and golden values")
with G_LOGGER.indent():
input_data = list(load_json('inputs.json', description='input data'))
golden_results = list(RunResults.load('layerwise_golden.json'))
def test_built_engine(demoted_layer_name):
signature = "test_built_engine({})".format(demoted_layer_name)
G_LOGGER.start("RUNNING | {}".format(signature))
begin = time.time()
with G_LOGGER.indent():
build_engine = EngineFromNetwork((builder, network, parser), config=config)
save_engine = SaveEngine(build_engine, path="try_demote_{}.engine".format(demoted_layer_name))
#load_engine_bytes = BytesFromPath("try_demote_no_layers_demoted.engine")
#deserialize_engine = EngineFromBytes(load_engine_bytes)
# Runners
runners = (TrtRunner(save_engine),)
#runners = (TrtRunner(deserialize_engine),)
# Runner Execution
results = Comparator.run(runners, data_loader=input_data)
results.extend(golden_results)
# Accuracy Comparison
compare_func = CompareFunc.simple(rtol={'': 0.1}, atol={'': 0.1}, check_error_stat={'': 'median'})
success = bool(Comparator.compare_accuracy(results, compare_func=compare_func))
end = time.time()
duration = end - begin
# Report Results
if not success:
G_LOGGER.finish("FAILED | {} | Duration: {}s".format(signature, duration))
else:
G_LOGGER.finish("PASSED | {} | Duration: {}s".format(signature, duration))
return success
# List of MatMuls known already to require FP32
exclude_list = [
"MatMul_141",
]
# List of MatMuls known already to be runnable in FP16
include_list = [
"MatMul_218",
"MatMul_220",
]
def demote_matmuls():
G_LOGGER.info("Beginning demoting loop")
# Check that the full engine build works as expected
#assert test_built_engine("no_layers_demoted")
num_matmul_layers = len(fp32_matmul_layers)
def can_run_in_fp16(layer):
if layer.name in include_list:
return True
elif layer.name in exclude_list:
return False
else:
return test_built_engine(layer.name)
for i, layer in enumerate(fp32_matmul_layers):
layer.precision = trt.DataType.HALF
if can_run_in_fp16(layer):
G_LOGGER.info("Layer {} may be run in FP16".format(layer.name))
else:
layer.precision = trt.DataType.FLOAT
G_LOGGER.info("Layer {} must be run in FP32".format(layer.name))
G_LOGGER.info("{} layers left to test".format(num_matmul_layers - i))
demote_matmuls()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment