swdee · May 4, 2025 10:54
diff --git a/inference_npu_bench.py b/inference_npu_bench.py
 # ---------------------------------------------------------------------
 # Copyright 2024 Cix Technology Group Co., Ltd.  All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 # ---------------------------------------------------------------------
 """
 This is the script of cix noe umd api for inference over npu,
 with timing / benchmarking support, and textual detection logs.
 """
 import cv2
 import numpy as np
 import argparse
 import os
 import sys
 import time
 from tqdm import tqdm

 # Determine the directory containing the current script
 script_dir = os.path.dirname(os.path.realpath(__file__))
 parent_dir = os.path.abspath(os.path.join(script_dir, ".."))
 sys.path.insert(0, parent_dir)

 from utils.tools import get_file_list
 from utils.image_process import preprocess_object_detect_method1
 from utils.object_detect_postprocess import postprocess_yolo, xywh2xyxy
 from utils.draw import draw_coco as draw
 from utils.NOE_Engine import EngineInfer


 def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--image_path",
        default="test_data",
        help="path to the image file or directory of images",
    )
    parser.add_argument(
        "--model_path",
        default="yolov8_l.cix",
        help="path to the quant model file",
    )
    parser.add_argument(
        "--output_dir", default="./output_npu", help="path to the result output"
    )
    parser.add_argument(
        "--conf_thr",
        type=float,
        default=0.3,
        help="Score threshold to filter the result.",
    )
    parser.add_argument(
        "--nms_thr", type=float, default=0.45, help="NMS threshold for detections."
    )
    parser.add_argument(
        "--benchmark",
        action="store_true",
        help="Enable COCO metric evaluation on val set.",
    )
    parser.add_argument(
        "--runs",
        type=int,
        default=10,
        help="Number of timed runs per image (first is warm-up).",
    )
    return parser.parse_args()


 if __name__ == "__main__":
    args = get_args()
    os.makedirs(args.output_dir, exist_ok=True)

    # build image list
    if args.benchmark:
        from utils.evaluate.coco_metric import COCO_Metric
        save_pred_json = "pred_yolov8_l_npu.json"
        coco_metric = COCO_Metric(saved_json_path=save_pred_json)
        image_list = coco_metric.get_image_ids()
    else:
        image_list = get_file_list(args.image_path)

    model = EngineInfer(args.model_path)

    # we'll collect all timings here
    all_times = []

    for img_entry in tqdm(image_list, desc="Images"):
        # get actual image filename & id if needed
        if args.benchmark:
            img_id = img_entry
            img_name = coco_metric.get_image_path(img_id)
        else:
            img_name = img_entry

        # preprocess once
        src_shape, new_shape, show_image, data = preprocess_object_detect_method1(
            img_name, target_size=(640, 640), mode="BGR"
        )
        data = data.astype(np.float32)

        # --- Warm-up run (not timed) ---
        _ = model.forward(data)[0]

        # --- Timed runs ---
        times = []
        for _ in range(args.runs):
            t0 = time.perf_counter()
            out = model.forward(data)[0]
            dt = (time.perf_counter() - t0) * 1000.0  # ms
            times.append(dt)
        all_times.extend(times)

        # take the last output for downstream processing
        pred = out.reshape(84, 8400).transpose(1, 0)
        results = postprocess_yolo(pred, args.conf_thr, args.nms_thr)

        # --- Textual output of detections ---
        print(f"\n=== Detections for {os.path.basename(img_name)} ===")
        if len(results) == 0:
            print("  No objects detected.")
        else:
            # convert xywh to xyxy on original image scale
            bbox_xywh = results[:, :4]
            bbox_xyxy = xywh2xyxy(bbox_xywh)
            x_scale = src_shape[1] / new_shape[1]
            y_scale = src_shape[0] / new_shape[0]
            bbox_xyxy *= (x_scale, y_scale, x_scale, y_scale)

            for idx, row in enumerate(results):
                cls_id = int(row[5])
                conf  = float(row[4])
                x1, y1, x2, y2 = bbox_xyxy[idx]
                print(f"  [{idx}] class={cls_id}, conf={conf:.3f}, "
                      f"bbox=({x1:.1f},{y1:.1f},{x2:.1f},{y2:.1f})")

        # standard postprocessing / drawing / coco metric
        if args.benchmark:
            if len(results) > 0:
                coco_metric.append_bboxes(
                    img_id, bbox_xyxy, results[:, 5], results[:, 4]
                )
        else:
            # draw boxes on image
            if len(results) > 0:
                ret_img = draw(show_image, bbox_xyxy, results[:, 5], results[:, 4])
            else:
                ret_img = show_image

            out_path = os.path.join(args.output_dir, os.path.basename(img_name))
            cv2.imwrite(out_path, ret_img)

    # final COCO metric eval
    if args.benchmark:
        coco_metric.saved_json()
        coco_metric.evaluate()

    # print timing summary
    if all_times:
        avg = sum(all_times) / len(all_times)
        mn = min(all_times)
        mx = max(all_times)
        print(f"\nInference over {len(all_times)} runs:")
        print(f"  avg = {avg:.2f} ms   min = {mn:.2f} ms   max = {mx:.2f} ms")

    model.clean()
	# ---------------------------------------------------------------------
	# Copyright 2024 Cix Technology Group Co., Ltd. All rights reserved.
	# SPDX-License-Identifier: Apache-2.0
	# ---------------------------------------------------------------------
	"""
	This is the script of cix noe umd api for inference over npu,
	with timing / benchmarking support, and textual detection logs.
	"""
	import cv2
	import numpy as np
	import argparse
	import os
	import sys
	import time
	from tqdm import tqdm

	# Determine the directory containing the current script
	script_dir = os.path.dirname(os.path.realpath(__file__))
	parent_dir = os.path.abspath(os.path.join(script_dir, ".."))
	sys.path.insert(0, parent_dir)

	from utils.tools import get_file_list
	from utils.image_process import preprocess_object_detect_method1
	from utils.object_detect_postprocess import postprocess_yolo, xywh2xyxy
	from utils.draw import draw_coco as draw
	from utils.NOE_Engine import EngineInfer


	def get_args():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--image_path",
	default="test_data",
	help="path to the image file or directory of images",
	)
	parser.add_argument(
	"--model_path",
	default="yolov8_l.cix",
	help="path to the quant model file",
	)
	parser.add_argument(
	"--output_dir", default="./output_npu", help="path to the result output"
	)
	parser.add_argument(
	"--conf_thr",
	type=float,
	default=0.3,
	help="Score threshold to filter the result.",
	)
	parser.add_argument(
	"--nms_thr", type=float, default=0.45, help="NMS threshold for detections."
	)
	parser.add_argument(
	"--benchmark",
	action="store_true",
	help="Enable COCO metric evaluation on val set.",
	)
	parser.add_argument(
	"--runs",
	type=int,
	default=10,
	help="Number of timed runs per image (first is warm-up).",
	)
	return parser.parse_args()


	if __name__ == "__main__":
	args = get_args()
	os.makedirs(args.output_dir, exist_ok=True)

	# build image list
	if args.benchmark:
	from utils.evaluate.coco_metric import COCO_Metric
	save_pred_json = "pred_yolov8_l_npu.json"
	coco_metric = COCO_Metric(saved_json_path=save_pred_json)
	image_list = coco_metric.get_image_ids()
	else:
	image_list = get_file_list(args.image_path)

	model = EngineInfer(args.model_path)

	# we'll collect all timings here
	all_times = []

	for img_entry in tqdm(image_list, desc="Images"):
	# get actual image filename & id if needed
	if args.benchmark:
	img_id = img_entry
	img_name = coco_metric.get_image_path(img_id)
	else:
	img_name = img_entry

	# preprocess once
	src_shape, new_shape, show_image, data = preprocess_object_detect_method1(
	img_name, target_size=(640, 640), mode="BGR"
	)
	data = data.astype(np.float32)

	# --- Warm-up run (not timed) ---
	_ = model.forward(data)[0]

	# --- Timed runs ---
	times = []
	for _ in range(args.runs):
	t0 = time.perf_counter()
	out = model.forward(data)[0]
	dt = (time.perf_counter() - t0) * 1000.0 # ms
	times.append(dt)
	all_times.extend(times)

	# take the last output for downstream processing
	pred = out.reshape(84, 8400).transpose(1, 0)
	results = postprocess_yolo(pred, args.conf_thr, args.nms_thr)

	# --- Textual output of detections ---
	print(f"\n=== Detections for {os.path.basename(img_name)} ===")
	if len(results) == 0:
	print(" No objects detected.")
	else:
	# convert xywh to xyxy on original image scale
	bbox_xywh = results[:, :4]
	bbox_xyxy = xywh2xyxy(bbox_xywh)
	x_scale = src_shape[1] / new_shape[1]
	y_scale = src_shape[0] / new_shape[0]
	bbox_xyxy *= (x_scale, y_scale, x_scale, y_scale)

	for idx, row in enumerate(results):
	cls_id = int(row[5])
	conf = float(row[4])
	x1, y1, x2, y2 = bbox_xyxy[idx]
	print(f" [{idx}] class={cls_id}, conf={conf:.3f}, "
	f"bbox=({x1:.1f},{y1:.1f},{x2:.1f},{y2:.1f})")

	# standard postprocessing / drawing / coco metric
	if args.benchmark:
	if len(results) > 0:
	coco_metric.append_bboxes(
	img_id, bbox_xyxy, results[:, 5], results[:, 4]
	)
	else:
	# draw boxes on image
	if len(results) > 0:
	ret_img = draw(show_image, bbox_xyxy, results[:, 5], results[:, 4])
	else:
	ret_img = show_image

	out_path = os.path.join(args.output_dir, os.path.basename(img_name))
	cv2.imwrite(out_path, ret_img)

	# final COCO metric eval
	if args.benchmark:
	coco_metric.saved_json()
	coco_metric.evaluate()

	# print timing summary
	if all_times:
	avg = sum(all_times) / len(all_times)
	mn = min(all_times)
	mx = max(all_times)
	print(f"\nInference over {len(all_times)} runs:")
	print(f" avg = {avg:.2f} ms min = {mn:.2f} ms max = {mx:.2f} ms")

	model.clean()