Created
          July 18, 2022 05:27 
        
      - 
      
- 
        Save vuiseng9/e1d0a701a50cc55c61eb700aaeace983 to your computer and use it in GitHub Desktop. 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import time | |
| import numpy as np | |
| import logging as log | |
| from openvino.runtime import AsyncInferQueue, Core, PartialShape | |
| from openvino.tools.benchmark.utils.constants import CPU_DEVICE_NAME | |
| log.info = print | |
| model_path="/data1/vchua/jpqd-bert/r0.010-squad-bert-b-mvmt-8bit/ir/squad-BertForQuestionAnswering.cropped.8bit.onnx" | |
| def get_input_output_names(ports): | |
| return [port.any_name for port in ports] | |
| def get_node_names(ports): | |
| return [port.node.friendly_name for port in ports] | |
| def print_inputs_and_outputs_info(model): | |
| inputs = model.inputs | |
| input_names = get_input_output_names(inputs) | |
| for i in range(len(inputs)): | |
| log.info(f"Model input '{input_names[i]}' precision {inputs[i].element_type.get_type_name()}, " | |
| f"dimensions ({str(inputs[i].node.layout)}): " | |
| f"{' '.join(str(x) for x in inputs[i].partial_shape)}") | |
| outputs = model.outputs | |
| output_names = get_input_output_names(outputs) | |
| for i in range(len(outputs)): | |
| log.info(f"Model output '{output_names[i]}' precision {outputs[i].element_type.get_type_name()}, " | |
| f"dimensions ({str(outputs[i].node.layout)}): " | |
| f"{' '.join(str(x) for x in outputs[i].partial_shape)}") | |
| log.info('\nCreating OpenVINO Runtime Core') | |
| core = Core() | |
| device_config = { | |
| CPU_DEVICE_NAME : | |
| dict( | |
| PERF_COUNT='NO', | |
| PERFORMANCE_HINT='THROUGHPUT', | |
| NUM_STREAMS='-1' | |
| ) | |
| } | |
| core.set_property(CPU_DEVICE_NAME, device_config[CPU_DEVICE_NAME]) | |
| keys = core.get_property(CPU_DEVICE_NAME, 'SUPPORTED_PROPERTIES') | |
| log.info(f'\nDEVICE: {CPU_DEVICE_NAME}') | |
| for k in keys: | |
| if k not in ('SUPPORTED_METRICS', 'SUPPORTED_CONFIG_KEYS', 'SUPPORTED_PROPERTIES'): | |
| try: | |
| log.info(f' {k} , {core.get_property(CPU_DEVICE_NAME, k)}') | |
| except: | |
| pass | |
| log.info(f'\nReading the model: {model_path}\n') | |
| model = core.read_model(model_path) | |
| ### !!! Toggle this variable | |
| dynamic_length = True | |
| if dynamic_length is True: | |
| seqlen= -1 | |
| new_shape_cfg = {} | |
| for iport in model.inputs: | |
| new_shape_cfg[iport.any_name] = PartialShape([1, -1]) | |
| model.reshape(new_shape_cfg) | |
| else: | |
| seqlen=384 | |
| new_shape_cfg = {} | |
| for iport in model.inputs: | |
| new_shape_cfg[iport.any_name] = PartialShape([1, seqlen]) | |
| model.reshape(new_shape_cfg) | |
| compiled_model = core.compile_model(model, CPU_DEVICE_NAME) | |
| input_port_names = [iport.any_name for iport in compiled_model.inputs] | |
| print_inputs_and_outputs_info(compiled_model) | |
| def create_input(seqlen): | |
| return { | |
| input_port_names[0]: np.expand_dims(np.random.randint(999, size=seqlen), axis=0).astype('int64'), | |
| input_port_names[1]: np.expand_dims(np.random.randint( 2, size=seqlen), axis=0).astype('int64'), | |
| input_port_names[2]: np.expand_dims(np.random.randint(999, size=seqlen), axis=0).astype('int64') | |
| } | |
| N_SAMPLE = 1024 | |
| if dynamic_length is True: | |
| loaded_samples = [] | |
| sl_list = [64, 192, 256, 384] | |
| for loop in range(int(N_SAMPLE/len(sl_list))): | |
| for sl in sl_list: | |
| loaded_samples.append(create_input(sl)) | |
| else: | |
| loaded_samples = [create_input(seqlen) for i in range(N_SAMPLE)] | |
| infer_queue = AsyncInferQueue(compiled_model, 0) | |
| # warmup | |
| for it in range(100): | |
| infer_queue.get_idle_request_id() | |
| infer_queue.start_async(inputs=loaded_samples[it % N_SAMPLE]) | |
| infer_queue.wait_all() | |
| niter=2500 | |
| # benchmark | |
| start = time.time() | |
| for it in range(niter): | |
| infer_queue.get_idle_request_id() | |
| infer_queue.start_async(inputs=loaded_samples[it % N_SAMPLE]) | |
| infer_queue.wait_all() | |
| e2e_elapse = time.time() - start | |
| log.info( '\nSeqLen {} | {} iter ' | |
| '| E2E: {:.3f} s ' | |
| '| TPT: {:6.2f} fps'.format( | |
| seqlen, niter, e2e_elapse, niter/e2e_elapse) | |
| ) | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment
  
            
model can be downloaded here.
https://huggingface.co/vuiseng9/r0.010-squad-bert-b-mvmt-8bit/tree/main/ir