Last active
March 23, 2024 05:58
-
-
Save tonyreina/13f54d30f86f6b645b58359d7a439367 to your computer and use it in GitHub Desktop.
Generic script for doing inference on OpenVINO model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# python openvino_inference.py -l /opt/intel/openvino/inference_engine/lib/libcpu_extension.so | |
import sys | |
import os | |
from argparse import ArgumentParser | |
import numpy as np | |
import logging as log | |
from timeit import default_timer as timer | |
from openvino.inference_engine import IENetwork, IEPlugin | |
def load_model(model_xml): | |
""" | |
Load the OpenVINO model. | |
""" | |
log.info("Loading OpenVINO model to the plugin") | |
model_bin = os.path.splitext(model_xml)[0] + ".bin" | |
return model_xml, model_bin | |
def build_argparser(): | |
import psutil | |
p = psutil.Process() # Get the process ID for this script | |
parser = ArgumentParser() | |
parser.add_argument("-bz", "--batch_size", | |
help="Batch size", default=1, type=int) | |
parser.add_argument("-number_iter", "--number_iter", | |
help="Number of iterations", default=10, type=int) | |
parser.add_argument("--num_threads", default=len(p.cpu_affinity()), # number of cores | |
type=int, | |
help="number of threads to use") | |
parser.add_argument("--blocktime", default=1, type=int, help="KMP Blocktime") | |
parser.add_argument("-stats", "--stats", | |
help="Performance count statistics", action="store_true") | |
parser.add_argument("-l", "--cpu_extension", | |
help="MKLDNN (CPU)-targeted custom layers. " | |
"Absolute path to a shared library with " | |
"the kernels impl.", type=str, default=None) | |
parser.add_argument("-pp", "--plugin_dir", help="Path to a plugin folder", | |
type=str, default=None) | |
parser.add_argument("-d", "--device", | |
help="Specify the target device to infer on; " | |
"CPU, GPU, FPGA or MYRIAD is acceptable. Sample " | |
"will look for a suitable plugin for device " | |
"specified (CPU by default)", default="CPU", | |
type=str) | |
parser.add_argument("-m", "--model", | |
help="The name of the OpenVINO XML file", required=True, | |
type=str) | |
return parser | |
def load_openvino_model(args): | |
""" | |
Loads the OpenVINO model (.xml for graph and .bin for weights). | |
""" | |
# Plugin initialization for specified device and | |
# load extensions library if specified | |
plugin = IEPlugin(device=args.device, plugin_dirs=args.plugin_dir) | |
if args.cpu_extension and "CPU" in args.device: | |
plugin.add_cpu_extension(args.cpu_extension) | |
# Read IR | |
# If using MYRIAD then we need to load FP16 model version | |
model_xml, model_bin = load_model(args.model) | |
log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) | |
net = IENetwork(model=model_xml, weights=model_bin) | |
if "CPU" in plugin.device: | |
supported_layers = plugin.get_supported_layers(net) | |
not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] | |
if len(not_supported_layers) != 0: | |
log.error("Following layers are not supported by the plugin " | |
" for specified device {}:\n {}". | |
format(plugin.device, ", ".join(not_supported_layers))) | |
log.error("Please try to specify cpu extensions library path " | |
"in sample's command line parameters using -l " | |
"or --cpu_extension command line argument") | |
sys.exit(1) | |
log.info("The network inputs are:") | |
for idx, input_layer in enumerate(net.inputs.keys()): | |
log.info("{}: {}, shape = {}".format(idx,input_layer,net.inputs[input_layer].shape)) | |
log.info("The network outputs are:") | |
for idx, output_layer in enumerate(net.outputs.keys()): | |
log.info("{}: {}, shape = {}".format(idx,output_layer,net.outputs[output_layer].shape)) | |
net.batch_size = args.batch_size | |
# Loading model to the plugin | |
exec_net = plugin.load(network=net) | |
del plugin | |
return exec_net, net | |
def print_stats(exec_net): | |
perf_counts = exec_net.requests[0].get_perf_counts() | |
log.info("Performance counters:") | |
log.info("{:<70} {:<15} {:<15} {:<15} {:<10}".format("name", | |
"layer_type", | |
"exec_type", | |
"status", | |
"real_time, us")) | |
for layer, stats in perf_counts.items(): | |
log.info("{:<70} {:<15} {:<15} {:<15} {:<10}".format(layer, | |
stats["layer_type"], | |
stats["exec_type"], | |
stats["status"], | |
stats["real_time"])) | |
def main(): | |
log.basicConfig(format="[ %(levelname)s ] %(message)s", | |
level=log.INFO, stream=sys.stdout) | |
args = build_argparser().parse_args() | |
# CPU specific settings for multi-threading cores | |
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # Get rid of the AVX, SSE warnings | |
os.environ["OMP_NUM_THREADS"] = str(args.num_threads) | |
os.environ["KMP_BLOCKTIME"] = str(args.blocktime) | |
os.environ["KMP_AFFINITY"] = "granularity=thread,compact,1,0" | |
exec_net, net = load_openvino_model(args) | |
# Start sync inference | |
inputdata = np.random.random((len(net.inputs.keys()),args.batch_size, 2, 2048, 2048)) | |
""" | |
Inference on OpenVINO looks just like TensorFlow feed_dict | |
There are multiple inputs to the model so we'll use for loop | |
""" | |
inputs_dict = {} | |
for idx, input_layer in enumerate(net.inputs.keys()): | |
inputs_dict.update({input_layer : inputdata[idx]}) | |
log.info("Starting inference ({} iterations)".format(args.number_iter)) | |
infer_time = np.zeros((args.number_iter)) | |
for idx in range(args.number_iter): | |
start = timer() | |
res = exec_net.infer(inputs=inputs_dict) | |
infer_time[idx] = timer() - start | |
log.info("Iteration #{}/{}: Finished inference in {:.6f} seconds.".format(idx+1, args.number_iter, infer_time[idx])) | |
average_inference = infer_time.mean() | |
log.info("Average running time of one batch: {:.5f} seconds".format(average_inference)) | |
log.info("Standard deviation of one batch: {:.5f} seconds".format(infer_time.std())) | |
log.info("Images per second = {:.3f}".format(args.batch_size / average_inference)) | |
""" | |
Statistics print out layer by layer costs in inference time | |
""" | |
if args.stats: | |
print_stats(exec_net) | |
for idx, output_layer in enumerate(net.outputs.keys()): | |
res_out = res[output_layer] | |
for batch, prediction in enumerate(res_out): | |
output_data = prediction | |
del exec_net | |
if __name__ == '__main__': | |
sys.exit(main() or 0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment