Skip to content

Instantly share code, notes, and snippets.

@vuiseng9
Created July 18, 2022 05:27
Show Gist options
  • Save vuiseng9/e1d0a701a50cc55c61eb700aaeace983 to your computer and use it in GitHub Desktop.
Save vuiseng9/e1d0a701a50cc55c61eb700aaeace983 to your computer and use it in GitHub Desktop.
import time
import numpy as np
import logging as log
from openvino.runtime import AsyncInferQueue, Core, PartialShape
from openvino.tools.benchmark.utils.constants import CPU_DEVICE_NAME
log.info = print
model_path="/data1/vchua/jpqd-bert/r0.010-squad-bert-b-mvmt-8bit/ir/squad-BertForQuestionAnswering.cropped.8bit.onnx"
def get_input_output_names(ports):
return [port.any_name for port in ports]
def get_node_names(ports):
return [port.node.friendly_name for port in ports]
def print_inputs_and_outputs_info(model):
inputs = model.inputs
input_names = get_input_output_names(inputs)
for i in range(len(inputs)):
log.info(f"Model input '{input_names[i]}' precision {inputs[i].element_type.get_type_name()}, "
f"dimensions ({str(inputs[i].node.layout)}): "
f"{' '.join(str(x) for x in inputs[i].partial_shape)}")
outputs = model.outputs
output_names = get_input_output_names(outputs)
for i in range(len(outputs)):
log.info(f"Model output '{output_names[i]}' precision {outputs[i].element_type.get_type_name()}, "
f"dimensions ({str(outputs[i].node.layout)}): "
f"{' '.join(str(x) for x in outputs[i].partial_shape)}")
log.info('\nCreating OpenVINO Runtime Core')
core = Core()
device_config = {
CPU_DEVICE_NAME :
dict(
PERF_COUNT='NO',
PERFORMANCE_HINT='THROUGHPUT',
NUM_STREAMS='-1'
)
}
core.set_property(CPU_DEVICE_NAME, device_config[CPU_DEVICE_NAME])
keys = core.get_property(CPU_DEVICE_NAME, 'SUPPORTED_PROPERTIES')
log.info(f'\nDEVICE: {CPU_DEVICE_NAME}')
for k in keys:
if k not in ('SUPPORTED_METRICS', 'SUPPORTED_CONFIG_KEYS', 'SUPPORTED_PROPERTIES'):
try:
log.info(f' {k} , {core.get_property(CPU_DEVICE_NAME, k)}')
except:
pass
log.info(f'\nReading the model: {model_path}\n')
model = core.read_model(model_path)
### !!! Toggle this variable
dynamic_length = True
if dynamic_length is True:
seqlen= -1
new_shape_cfg = {}
for iport in model.inputs:
new_shape_cfg[iport.any_name] = PartialShape([1, -1])
model.reshape(new_shape_cfg)
else:
seqlen=384
new_shape_cfg = {}
for iport in model.inputs:
new_shape_cfg[iport.any_name] = PartialShape([1, seqlen])
model.reshape(new_shape_cfg)
compiled_model = core.compile_model(model, CPU_DEVICE_NAME)
input_port_names = [iport.any_name for iport in compiled_model.inputs]
print_inputs_and_outputs_info(compiled_model)
def create_input(seqlen):
return {
input_port_names[0]: np.expand_dims(np.random.randint(999, size=seqlen), axis=0).astype('int64'),
input_port_names[1]: np.expand_dims(np.random.randint( 2, size=seqlen), axis=0).astype('int64'),
input_port_names[2]: np.expand_dims(np.random.randint(999, size=seqlen), axis=0).astype('int64')
}
N_SAMPLE = 1024
if dynamic_length is True:
loaded_samples = []
sl_list = [64, 192, 256, 384]
for loop in range(int(N_SAMPLE/len(sl_list))):
for sl in sl_list:
loaded_samples.append(create_input(sl))
else:
loaded_samples = [create_input(seqlen) for i in range(N_SAMPLE)]
infer_queue = AsyncInferQueue(compiled_model, 0)
# warmup
for it in range(100):
infer_queue.get_idle_request_id()
infer_queue.start_async(inputs=loaded_samples[it % N_SAMPLE])
infer_queue.wait_all()
niter=2500
# benchmark
start = time.time()
for it in range(niter):
infer_queue.get_idle_request_id()
infer_queue.start_async(inputs=loaded_samples[it % N_SAMPLE])
infer_queue.wait_all()
e2e_elapse = time.time() - start
log.info( '\nSeqLen {} | {} iter '
'| E2E: {:.3f} s '
'| TPT: {:6.2f} fps'.format(
seqlen, niter, e2e_elapse, niter/e2e_elapse)
)
@vuiseng9
Copy link
Author

toggle bool variable in line 58 to switch between dynamic and fixed length inference.

SeqLen 384 | 2500 iter | E2E: 6.383 s | TPT: 391.64 fps

# Inputs are of length [64, 192, 256, 384], fed to model in round robin manner
SeqLen -1 | 2500 iter | E2E: 9.121 s | TPT: 274.10 fps

@vuiseng9
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment