lix19937 · November 15, 2024 02:04
diff --git a/j6_infer.cpp b/j6_infer.cpp

 #include <algorithm>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
 #include <iterator>
 #include <map>
 #include <queue>
 #include <utility>

 #include "gflags/gflags.h"

 // hori
 #include "hlog/logging.h"
 #include "hobot/dnn/hb_dnn.h"
 #include "hobot/hb_ucp.h"
 #include "hobot/hb_ucp_sys.h"

 // opencv
 #include "opencv2/core/mat.hpp"
 #include "opencv2/imgcodecs.hpp"
 #include "opencv2/imgproc.hpp"
 #define EMPTY ""

 DEFINE_string(model_file, EMPTY, "model file path");
 DEFINE_string(image_file, EMPTY, "Test image path");
 DEFINE_int32(top_k, 5, "Top k classes, 5 by default");

 #define MOUDULE_NAME "DNN_BASIC_SAMPLE"
 #define LOGD(err_msg, ...) HFLOGM_D(MOUDULE_NAME, err_msg, ##__VA_ARGS__)
 #define LOGI(err_msg, ...) HFLOGM_I(MOUDULE_NAME, err_msg, ##__VA_ARGS__)
 #define LOGE(err_msg, ...) HFLOGM_E(MOUDULE_NAME, err_msg, ##__VA_ARGS__)
 #define LOGW(err_msg, ...) HFLOGM_W(MOUDULE_NAME, err_msg, ##__VA_ARGS__)

 #define HB_CHECK_SUCCESS(value, errmsg)             \
  do                                                \
  {                                                 \
    /*value can be call of function*/               \
    auto ret_code = value;                          \
    if (ret_code != 0)                              \
    {                                               \
      LOGE("{}, error code: {}", errmsg, ret_code); \
      return ret_code;                              \
    }                                               \
  } while (0);

 typedef struct Classification
 {
  int id;
  float score;
  const char *class_name;

  Classification() : class_name(0), id(0), score(0.0) {}
  Classification(int id, float score, const char *class_name)
      : id(id), score(score), class_name(class_name) {}

  friend bool operator>(const Classification &lhs, const Classification &rhs)
  {
    return (lhs.score > rhs.score);
  }

  ~Classification() {}
 } Classification;

 int prepare_tensor(hbDNNTensor *input_tensor, hbDNNTensor *output_tensor,
                   hbDNNHandle_t dnn_handle);

 int32_t read_image_2_tensor_as_nv12(std::string &image_file,
                                    hbDNNTensor *input_tensor);

 void get_topk_result(hbDNNTensor *tensor,
                     std::vector<Classification> &top_k_cls, int top_k);

 /**
 * Step1: get model handle
 * Step2: prepare input and output tensor
 * Step3: set input data to input tensor
 * Step4: run inference
 * Step5: do postprocess with output data
 * Step6: release resources
 */

 /*
  ./run_resnet \
  --model_file=../model/resnet50_224x224_nv12.hbm \
  --image_file=../data/cls_images/zebra_cls.jpg \
  --top_k=5
 */

 int main(int argc, char **argv)
 {
  gflags::SetUsageMessage(argv[0]);
  gflags::ParseCommandLineFlags(&argc, &argv, true);
  std::cout << gflags::GetArgv() << std::endl;

  // Init logging
  hobot::hlog::HobotLog::Instance()->SetLogLevel("DNN_BASIC_SAMPLE", hobot::hlog::LogLevel::log_info);

  //////////////////////-----------------------------------------   // Step1: get model handle
  hbDNNPackedHandle_t packed_dnn_handle;
  hbDNNHandle_t dnn_handle;
  const char **model_name_list;
  auto modelFileName = FLAGS_model_file.c_str();
  int model_count = 0;
  {
    HB_CHECK_SUCCESS(
        hbDNNInitializeFromFiles(&packed_dnn_handle, &modelFileName, 1),
        "hbDNNInitializeFromFiles failed");
    HB_CHECK_SUCCESS(hbDNNGetModelNameList(&model_name_list, &model_count,
                                           packed_dnn_handle),
                     "hbDNNGetModelNameList failed");
    HB_CHECK_SUCCESS(
        hbDNNGetModelHandle(&dnn_handle, packed_dnn_handle, model_name_list[0]),
        "hbDNNGetModelHandle failed");
  }

  //////////////////////-----------------------------------------  // Step2: prepare input and output tensor
  std::vector<hbDNNTensor> input_tensors, output_tensors;
  int input_count = 0, output_count = 0;
  {
    HB_CHECK_SUCCESS(hbDNNGetInputCount(&input_count, dnn_handle),
                     "hbDNNGetInputCount failed");
    HB_CHECK_SUCCESS(hbDNNGetOutputCount(&output_count, dnn_handle),
                     "hbDNNGetOutputCount failed");
    input_tensors.resize(input_count);
    output_tensors.resize(output_count);
    prepare_tensor(input_tensors.data(), output_tensors.data(), dnn_handle);
  }

  //////////////////////-----------------------------------------  // Step3: set input data to input tensor
  {
    // read a single picture for input_tensor[0], for multi_input model, you
    // should set other input data according to model input properties. !!! will changed depends on models
    HB_CHECK_SUCCESS(
        read_image_2_tensor_as_nv12(FLAGS_image_file, input_tensors.data()),
        "read_image_2_tensor_as_nv12 failed");
    LOGI("read image to tensor as nv12 success");
  }

  //////////////////////-----------------------------------------  // Step4: run inference
  hbUCPTaskHandle_t task_handle{nullptr};
  hbDNNTensor *output = output_tensors.data();
  {
    // make sure memory data is flushed to DDR before inference
    for (int i = 0; i < input_count; i++)
    {
      hbUCPMemFlush(&input_tensors[i].sysMem[0], HB_SYS_MEM_CACHE_CLEAN);
    }

    // generate task handle
    HB_CHECK_SUCCESS(
        hbDNNInferV2(&task_handle, output, input_tensors.data(), dnn_handle),
        "hbDNNInferV2 failed");

    // submit task
    hbUCPSchedParam ctrl_param;
    HB_UCP_INITIALIZE_SCHED_PARAM(&ctrl_param);
    ctrl_param.backend = HB_UCP_BPU_CORE_ANY;
    HB_CHECK_SUCCESS(hbUCPSubmitTask(task_handle, &ctrl_param),
                     "hbUCPSubmitTask failed");

    // wait task done
    HB_CHECK_SUCCESS(hbUCPWaitTaskDone(task_handle, 0),
                     "hbUCPWaitTaskDone failed");
  }

  // Step5: do postprocess with output data
  std::vector<Classification> top_k_cls;
  {
    // make sure CPU read data from DDR before using output tensor data
    for (int i = 0; i < output_count; i++)
    {
      hbUCPMemFlush(&output_tensors[i].sysMem[0], HB_SYS_MEM_CACHE_INVALIDATE);
    }

    get_topk_result(output, top_k_cls, FLAGS_top_k);
    for (int i = 0; i < FLAGS_top_k; i++)
    {
      LOGI("TOP {} result id: {}", i, top_k_cls[i].id);
    }
  }

  // Step6: release resources
  {
    // release task handle
    HB_CHECK_SUCCESS(hbUCPReleaseTask(task_handle), "hbUCPReleaseTask failed");
    // free input mem
    for (int i = 0; i < input_count; i++)
    {
      HB_CHECK_SUCCESS(hbUCPFree(&(input_tensors[i].sysMem[0])),
                       "hbUCPFree failed");
    }
    // free output mem
    for (int i = 0; i < output_count; i++)
    {
      HB_CHECK_SUCCESS(hbUCPFree(&(output_tensors[i].sysMem[0])),
                       "hbUCPFree failed");
    }
    // release model
    HB_CHECK_SUCCESS(hbDNNRelease(packed_dnn_handle), "hbDNNRelease failed");
  }

  return 0;
 }

 #define ALIGN(value, alignment) (((value) + ((alignment) - 1)) & ~((alignment) - 1))
 #define ALIGN_32(value) ALIGN(value, 32)

 int prepare_tensor(hbDNNTensor *input_tensor, hbDNNTensor *output_tensor,
                   hbDNNHandle_t dnn_handle)
 {
  int input_count = 0;
  int output_count = 0;
  hbDNNGetInputCount(&input_count, dnn_handle);
  hbDNNGetOutputCount(&output_count, dnn_handle);

  /** Tips:
   * For input memory size in most cases:
   * *   input_memSize = input[i].properties.alignedByteSize
   * but here for dynamic stride of y and uv，alignedByteSize is not fixed
   * For output memory size:
   * *   output_memSize = output[i].properties.alignedByteSize
   */
  hbDNNTensor *input = input_tensor;
  for (int i = 0; i < input_count; i++)
  {
    HB_CHECK_SUCCESS(
        hbDNNGetInputTensorProperties(&input[i].properties, dnn_handle, i),
        "hbDNNGetInputTensorProperties failed");

    /** Tips:
     * For input tensor, usually need to pad the input data according to stride obtained from properties.
     * but here for dynamic stride of y and uv，user needs to specify a value which should be 32 bytes aligned for the -1 position in stride.
     * */
    auto dim_len = input[i].properties.validShape.numDimensions;
    for (int32_t dim_i = dim_len - 1; dim_i >= 0; --dim_i)
    {
      if (input[i].properties.stride[dim_i] == -1)
      {
        auto cur_stride =
            input[i].properties.stride[dim_i + 1] *
            input[i].properties.validShape.dimensionSize[dim_i + 1];
        input[i].properties.stride[dim_i] = ALIGN_32(cur_stride);
      }
    }

    int input_memSize = input[i].properties.stride[0] *
                        input[i].properties.validShape.dimensionSize[0];
    HB_CHECK_SUCCESS(hbUCPMallocCached(&input[i].sysMem[0], input_memSize, 0),
                     "hbUCPMallocCached failed");

    // Show how to get input name
    const char *input_name;
    HB_CHECK_SUCCESS(hbDNNGetInputName(&input_name, dnn_handle, i),
                     "hbDNNGetInputName failed");
    LOGI("input[{}] name is {}", i, input_name);
  }

  hbDNNTensor *output = output_tensor;
  for (int i = 0; i < output_count; i++)
  {
    HB_CHECK_SUCCESS(
        hbDNNGetOutputTensorProperties(&output[i].properties, dnn_handle, i),
        "hbDNNGetOutputTensorProperties failed");
    int output_memSize = output[i].properties.alignedByteSize;
    HB_CHECK_SUCCESS(hbUCPMallocCached(&output[i].sysMem[0], output_memSize, 0),
                     "hbUCPMallocCached failed");

    // Show how to get output name
    const char *output_name;
    HB_CHECK_SUCCESS(hbDNNGetOutputName(&output_name, dnn_handle, i),
                     "hbDNNGetOutputName failed");
    LOGI("output[{}] name is {}", i, output_name);
  }
  return 0;
 }

 /** You can define read_image_2_tensor_as_other_type to prepare your data **/
 int32_t read_image_2_tensor_as_nv12(std::string &image_file,
                                    hbDNNTensor *input_tensor)
 {
  // the struct of input shape is NHWC
  int input_h = input_tensor[0].properties.validShape.dimensionSize[1];
  int input_w = input_tensor[0].properties.validShape.dimensionSize[2];

  cv::Mat bgr_mat = cv::imread(image_file, cv::IMREAD_COLOR);
  if (bgr_mat.empty())
  {
    LOGE("image file not exist!");
    return -1;
  }
  // resize
  cv::Mat mat;
  mat.create(input_h, input_w, bgr_mat.type());
  cv::resize(bgr_mat, mat, mat.size(), 0, 0);
  // convert to YUV420
  if (input_h % 2 || input_w % 2)
  {
    LOGE("input img height and width must aligned by 2!");
    return -1;
  }
  cv::Mat yuv_mat;
  cv::cvtColor(mat, yuv_mat, cv::COLOR_BGR2YUV_I420);
  uint8_t *yuv_data = yuv_mat.ptr<uint8_t>();
  uint8_t *y_data_src = yuv_data;

  // copy y data
  uint8_t *y_data_dst =
      reinterpret_cast<uint8_t *>(input_tensor[0].sysMem[0].virAddr);
  for (int32_t h = 0; h < input_h; ++h)
  {
    memcpy(y_data_dst, y_data_src, input_w);
    y_data_src += input_w;
    // add padding
    y_data_dst += input_tensor[0].properties.stride[1];
  }

  // copy uv data
  int32_t uv_height = input_tensor[1].properties.validShape.dimensionSize[1];
  int32_t uv_width = input_tensor[1].properties.validShape.dimensionSize[2];
  uint8_t *uv_data_dst =
      reinterpret_cast<uint8_t *>(input_tensor[1].sysMem[0].virAddr);
  uint8_t *u_data_src = yuv_data + input_h * input_w;
  uint8_t *v_data_src = u_data_src + uv_height * uv_width;

  for (int32_t h = 0; h < uv_height; ++h)
  {
    auto *cur_data = uv_data_dst;
    for (int32_t w = 0; w < uv_width; ++w)
    {
      *cur_data++ = *u_data_src++;
      *cur_data++ = *v_data_src++;
    }
    // add padding
    uv_data_dst += input_tensor[1].properties.stride[1];
  }
  return 0;
 }

 void get_topk_result(hbDNNTensor *tensor,
                     std::vector<Classification> &top_k_cls, int top_k)
 {
  hbUCPMemFlush(&(tensor->sysMem[0]), HB_SYS_MEM_CACHE_INVALIDATE);
  std::priority_queue<Classification, std::vector<Classification>,
                      std::greater<Classification>>
      queue;
  // The type reinterpret_cast should be determined according to the output type
  // For example: HB_DNN_TENSOR_TYPE_F32 is float
  auto data = reinterpret_cast<float *>(tensor->sysMem[0].virAddr);
  auto quanti_type{tensor->properties.quantiType};
  // For example model, quantiType is NONE and no dequantize processing is required.
  if (quanti_type != hbDNNQuantiType::NONE)
  {
    LOGE("quanti_type is not NONE, and the output needs to be dequantized!");
  }
  // 1000 classification score values
  int tensor_len = 1000;
  for (auto i = 0; i < tensor_len; i++)
  {
    float score = data[i];
    queue.push(Classification(i, score, ""));
    if (queue.size() > top_k)
    {
      queue.pop();
    }
  }
  while (!queue.empty())
  {
    top_k_cls.emplace_back(queue.top());
    queue.pop();
  }
  std::reverse(top_k_cls.begin(), top_k_cls.end());
 }

 /*
 root@9ed8cd874b9d:/open_explorer/J6_start/nni_ucp/build#   ./run_resnet \
  --model_file=../model/resnet50_224x224_nv12.hbm \
  --image_file=../data/cls_images/zebra_cls.jpg \
  --top_k=5
 [UCP]: log level = 3
 [UCP]: UCP version = 3.1.2
 [VP]: log level = 3
 [DNN]: log level = 3
 [HPL]: log level = 3
 [UCPT]: log level = 6
 ./run_resnet --model_file=../model/resnet50_224x224_nv12.hbm --image_file=../data/cls_images/zebra_cls.jpg --top_k=5
 [I][26367][11-12][08:35:50:282][main.cc:267][run_resnet][DNN_BASIC_SAMPLE] input[0] name is input_y
 [I][26367][11-12][08:35:50:282][main.cc:267][run_resnet][DNN_BASIC_SAMPLE] input[1] name is input_uv
 [I][26367][11-12][08:35:50:282][main.cc:284][run_resnet][DNN_BASIC_SAMPLE] output[0] name is output
 [I][26367][11-12][08:35:50:356][main.cc:148][run_resnet][DNN_BASIC_SAMPLE] read image to tensor as nv12 success
 [BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
 [BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
 [BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
 [BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
 [BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
 [BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
 [I][26367][11-12][08:35:56:401][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 0 result id: 340
 [I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 1 result id: 292
 [I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 2 result id: 9
 [I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 3 result id: 353
 [I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 4 result id: 343

 */

	#include <algorithm>
	#include <fstream>
	#include <iomanip>
	#include <iostream>
	#include <iterator>
	#include <map>
	#include <queue>
	#include <utility>

	#include "gflags/gflags.h"

	// hori
	#include "hlog/logging.h"
	#include "hobot/dnn/hb_dnn.h"
	#include "hobot/hb_ucp.h"
	#include "hobot/hb_ucp_sys.h"

	// opencv
	#include "opencv2/core/mat.hpp"
	#include "opencv2/imgcodecs.hpp"
	#include "opencv2/imgproc.hpp"
	#define EMPTY ""

	DEFINE_string(model_file, EMPTY, "model file path");
	DEFINE_string(image_file, EMPTY, "Test image path");
	DEFINE_int32(top_k, 5, "Top k classes, 5 by default");

	#define MOUDULE_NAME "DNN_BASIC_SAMPLE"
	#define LOGD(err_msg, ...) HFLOGM_D(MOUDULE_NAME, err_msg, ##__VA_ARGS__)
	#define LOGI(err_msg, ...) HFLOGM_I(MOUDULE_NAME, err_msg, ##__VA_ARGS__)
	#define LOGE(err_msg, ...) HFLOGM_E(MOUDULE_NAME, err_msg, ##__VA_ARGS__)
	#define LOGW(err_msg, ...) HFLOGM_W(MOUDULE_NAME, err_msg, ##__VA_ARGS__)

	#define HB_CHECK_SUCCESS(value, errmsg) \
	do \
	{ \
	/value can be call of function/ \
	auto ret_code = value; \
	if (ret_code != 0) \
	{ \
	LOGE("{}, error code: {}", errmsg, ret_code); \
	return ret_code; \
	} \
	} while (0);

	typedef struct Classification
	{
	int id;
	float score;
	const char *class_name;

	Classification() : class_name(0), id(0), score(0.0) {}
	Classification(int id, float score, const char *class_name)
	: id(id), score(score), class_name(class_name) {}

	friend bool operator>(const Classification &lhs, const Classification &rhs)
	{
	return (lhs.score > rhs.score);
	}

	~Classification() {}
	} Classification;

	int prepare_tensor(hbDNNTensor input_tensor, hbDNNTensor output_tensor,
	hbDNNHandle_t dnn_handle);

	int32_t read_image_2_tensor_as_nv12(std::string &image_file,
	hbDNNTensor *input_tensor);

	void get_topk_result(hbDNNTensor *tensor,
	std::vector<Classification> &top_k_cls, int top_k);

	/**
	* Step1: get model handle
	* Step2: prepare input and output tensor
	* Step3: set input data to input tensor
	* Step4: run inference
	* Step5: do postprocess with output data
	* Step6: release resources
	*/

	/*
	./run_resnet \
	--model_file=../model/resnet50_224x224_nv12.hbm \
	--image_file=../data/cls_images/zebra_cls.jpg \
	--top_k=5
	*/

	int main(int argc, char **argv)
	{
	gflags::SetUsageMessage(argv[0]);
	gflags::ParseCommandLineFlags(&argc, &argv, true);
	std::cout << gflags::GetArgv() << std::endl;

	// Init logging
	hobot::hlog::HobotLog::Instance()->SetLogLevel("DNN_BASIC_SAMPLE", hobot::hlog::LogLevel::log_info);

	//////////////////////----------------------------------------- // Step1: get model handle
	hbDNNPackedHandle_t packed_dnn_handle;
	hbDNNHandle_t dnn_handle;
	const char **model_name_list;
	auto modelFileName = FLAGS_model_file.c_str();
	int model_count = 0;
	{
	HB_CHECK_SUCCESS(
	hbDNNInitializeFromFiles(&packed_dnn_handle, &modelFileName, 1),
	"hbDNNInitializeFromFiles failed");
	HB_CHECK_SUCCESS(hbDNNGetModelNameList(&model_name_list, &model_count,
	packed_dnn_handle),
	"hbDNNGetModelNameList failed");
	HB_CHECK_SUCCESS(
	hbDNNGetModelHandle(&dnn_handle, packed_dnn_handle, model_name_list[0]),
	"hbDNNGetModelHandle failed");
	}

	//////////////////////----------------------------------------- // Step2: prepare input and output tensor
	std::vector<hbDNNTensor> input_tensors, output_tensors;
	int input_count = 0, output_count = 0;
	{
	HB_CHECK_SUCCESS(hbDNNGetInputCount(&input_count, dnn_handle),
	"hbDNNGetInputCount failed");
	HB_CHECK_SUCCESS(hbDNNGetOutputCount(&output_count, dnn_handle),
	"hbDNNGetOutputCount failed");
	input_tensors.resize(input_count);
	output_tensors.resize(output_count);
	prepare_tensor(input_tensors.data(), output_tensors.data(), dnn_handle);
	}

	//////////////////////----------------------------------------- // Step3: set input data to input tensor
	{
	// read a single picture for input_tensor[0], for multi_input model, you
	// should set other input data according to model input properties. !!! will changed depends on models
	HB_CHECK_SUCCESS(
	read_image_2_tensor_as_nv12(FLAGS_image_file, input_tensors.data()),
	"read_image_2_tensor_as_nv12 failed");
	LOGI("read image to tensor as nv12 success");
	}

	//////////////////////----------------------------------------- // Step4: run inference
	hbUCPTaskHandle_t task_handle{nullptr};
	hbDNNTensor *output = output_tensors.data();
	{
	// make sure memory data is flushed to DDR before inference
	for (int i = 0; i < input_count; i++)
	{
	hbUCPMemFlush(&input_tensors[i].sysMem[0], HB_SYS_MEM_CACHE_CLEAN);
	}

	// generate task handle
	HB_CHECK_SUCCESS(
	hbDNNInferV2(&task_handle, output, input_tensors.data(), dnn_handle),
	"hbDNNInferV2 failed");

	// submit task
	hbUCPSchedParam ctrl_param;
	HB_UCP_INITIALIZE_SCHED_PARAM(&ctrl_param);
	ctrl_param.backend = HB_UCP_BPU_CORE_ANY;
	HB_CHECK_SUCCESS(hbUCPSubmitTask(task_handle, &ctrl_param),
	"hbUCPSubmitTask failed");

	// wait task done
	HB_CHECK_SUCCESS(hbUCPWaitTaskDone(task_handle, 0),
	"hbUCPWaitTaskDone failed");
	}

	// Step5: do postprocess with output data
	std::vector<Classification> top_k_cls;
	{
	// make sure CPU read data from DDR before using output tensor data
	for (int i = 0; i < output_count; i++)
	{
	hbUCPMemFlush(&output_tensors[i].sysMem[0], HB_SYS_MEM_CACHE_INVALIDATE);
	}

	get_topk_result(output, top_k_cls, FLAGS_top_k);
	for (int i = 0; i < FLAGS_top_k; i++)
	{
	LOGI("TOP {} result id: {}", i, top_k_cls[i].id);
	}
	}

	// Step6: release resources
	{
	// release task handle
	HB_CHECK_SUCCESS(hbUCPReleaseTask(task_handle), "hbUCPReleaseTask failed");
	// free input mem
	for (int i = 0; i < input_count; i++)
	{
	HB_CHECK_SUCCESS(hbUCPFree(&(input_tensors[i].sysMem[0])),
	"hbUCPFree failed");
	}
	// free output mem
	for (int i = 0; i < output_count; i++)
	{
	HB_CHECK_SUCCESS(hbUCPFree(&(output_tensors[i].sysMem[0])),
	"hbUCPFree failed");
	}
	// release model
	HB_CHECK_SUCCESS(hbDNNRelease(packed_dnn_handle), "hbDNNRelease failed");
	}

	return 0;
	}

	#define ALIGN(value, alignment) (((value) + ((alignment) - 1)) & ~((alignment) - 1))
	#define ALIGN_32(value) ALIGN(value, 32)

	int prepare_tensor(hbDNNTensor input_tensor, hbDNNTensor output_tensor,
	hbDNNHandle_t dnn_handle)
	{
	int input_count = 0;
	int output_count = 0;
	hbDNNGetInputCount(&input_count, dnn_handle);
	hbDNNGetOutputCount(&output_count, dnn_handle);

	/** Tips:
	* For input memory size in most cases:
	* * input_memSize = input[i].properties.alignedByteSize
	* but here for dynamic stride of y and uv，alignedByteSize is not fixed
	* For output memory size:
	* * output_memSize = output[i].properties.alignedByteSize
	*/
	hbDNNTensor *input = input_tensor;
	for (int i = 0; i < input_count; i++)
	{
	HB_CHECK_SUCCESS(
	hbDNNGetInputTensorProperties(&input[i].properties, dnn_handle, i),
	"hbDNNGetInputTensorProperties failed");

	/** Tips:
	* For input tensor, usually need to pad the input data according to stride obtained from properties.
	* but here for dynamic stride of y and uv，user needs to specify a value which should be 32 bytes aligned for the -1 position in stride.
	* */
	auto dim_len = input[i].properties.validShape.numDimensions;
	for (int32_t dim_i = dim_len - 1; dim_i >= 0; --dim_i)
	{
	if (input[i].properties.stride[dim_i] == -1)
	{
	auto cur_stride =
	input[i].properties.stride[dim_i + 1] *
	input[i].properties.validShape.dimensionSize[dim_i + 1];
	input[i].properties.stride[dim_i] = ALIGN_32(cur_stride);
	}
	}

	int input_memSize = input[i].properties.stride[0] *
	input[i].properties.validShape.dimensionSize[0];
	HB_CHECK_SUCCESS(hbUCPMallocCached(&input[i].sysMem[0], input_memSize, 0),
	"hbUCPMallocCached failed");

	// Show how to get input name
	const char *input_name;
	HB_CHECK_SUCCESS(hbDNNGetInputName(&input_name, dnn_handle, i),
	"hbDNNGetInputName failed");
	LOGI("input[{}] name is {}", i, input_name);
	}

	hbDNNTensor *output = output_tensor;
	for (int i = 0; i < output_count; i++)
	{
	HB_CHECK_SUCCESS(
	hbDNNGetOutputTensorProperties(&output[i].properties, dnn_handle, i),
	"hbDNNGetOutputTensorProperties failed");
	int output_memSize = output[i].properties.alignedByteSize;
	HB_CHECK_SUCCESS(hbUCPMallocCached(&output[i].sysMem[0], output_memSize, 0),
	"hbUCPMallocCached failed");

	// Show how to get output name
	const char *output_name;
	HB_CHECK_SUCCESS(hbDNNGetOutputName(&output_name, dnn_handle, i),
	"hbDNNGetOutputName failed");
	LOGI("output[{}] name is {}", i, output_name);
	}
	return 0;
	}

	/ You can define read_image_2_tensor_as_other_type to prepare your data /
	int32_t read_image_2_tensor_as_nv12(std::string &image_file,
	hbDNNTensor *input_tensor)
	{
	// the struct of input shape is NHWC
	int input_h = input_tensor[0].properties.validShape.dimensionSize[1];
	int input_w = input_tensor[0].properties.validShape.dimensionSize[2];

	cv::Mat bgr_mat = cv::imread(image_file, cv::IMREAD_COLOR);
	if (bgr_mat.empty())
	{
	LOGE("image file not exist!");
	return -1;
	}
	// resize
	cv::Mat mat;
	mat.create(input_h, input_w, bgr_mat.type());
	cv::resize(bgr_mat, mat, mat.size(), 0, 0);
	// convert to YUV420
	if (input_h % 2 \|\| input_w % 2)
	{
	LOGE("input img height and width must aligned by 2!");
	return -1;
	}
	cv::Mat yuv_mat;
	cv::cvtColor(mat, yuv_mat, cv::COLOR_BGR2YUV_I420);
	uint8_t *yuv_data = yuv_mat.ptr<uint8_t>();
	uint8_t *y_data_src = yuv_data;

	// copy y data
	uint8_t *y_data_dst =
	reinterpret_cast<uint8_t *>(input_tensor[0].sysMem[0].virAddr);
	for (int32_t h = 0; h < input_h; ++h)
	{
	memcpy(y_data_dst, y_data_src, input_w);
	y_data_src += input_w;
	// add padding
	y_data_dst += input_tensor[0].properties.stride[1];
	}

	// copy uv data
	int32_t uv_height = input_tensor[1].properties.validShape.dimensionSize[1];
	int32_t uv_width = input_tensor[1].properties.validShape.dimensionSize[2];
	uint8_t *uv_data_dst =
	reinterpret_cast<uint8_t *>(input_tensor[1].sysMem[0].virAddr);
	uint8_t u_data_src = yuv_data + input_h input_w;
	uint8_t v_data_src = u_data_src + uv_height uv_width;

	for (int32_t h = 0; h < uv_height; ++h)
	{
	auto *cur_data = uv_data_dst;
	for (int32_t w = 0; w < uv_width; ++w)
	{
	cur_data++ = u_data_src++;
	cur_data++ = v_data_src++;
	}
	// add padding
	uv_data_dst += input_tensor[1].properties.stride[1];
	}
	return 0;
	}

	void get_topk_result(hbDNNTensor *tensor,
	std::vector<Classification> &top_k_cls, int top_k)
	{
	hbUCPMemFlush(&(tensor->sysMem[0]), HB_SYS_MEM_CACHE_INVALIDATE);
	std::priority_queue<Classification, std::vector<Classification>,
	std::greater<Classification>>
	queue;
	// The type reinterpret_cast should be determined according to the output type
	// For example: HB_DNN_TENSOR_TYPE_F32 is float
	auto data = reinterpret_cast<float *>(tensor->sysMem[0].virAddr);
	auto quanti_type{tensor->properties.quantiType};
	// For example model, quantiType is NONE and no dequantize processing is required.
	if (quanti_type != hbDNNQuantiType::NONE)
	{
	LOGE("quanti_type is not NONE, and the output needs to be dequantized!");
	}
	// 1000 classification score values
	int tensor_len = 1000;
	for (auto i = 0; i < tensor_len; i++)
	{
	float score = data[i];
	queue.push(Classification(i, score, ""));
	if (queue.size() > top_k)
	{
	queue.pop();
	}
	}
	while (!queue.empty())
	{
	top_k_cls.emplace_back(queue.top());
	queue.pop();
	}
	std::reverse(top_k_cls.begin(), top_k_cls.end());
	}

	/*
	root@9ed8cd874b9d:/open_explorer/J6_start/nni_ucp/build# ./run_resnet \
	--model_file=../model/resnet50_224x224_nv12.hbm \
	--image_file=../data/cls_images/zebra_cls.jpg \
	--top_k=5
	[UCP]: log level = 3
	[UCP]: UCP version = 3.1.2
	[VP]: log level = 3
	[DNN]: log level = 3
	[HPL]: log level = 3
	[UCPT]: log level = 6
	./run_resnet --model_file=../model/resnet50_224x224_nv12.hbm --image_file=../data/cls_images/zebra_cls.jpg --top_k=5
	[I][26367][11-12][08:35:50:282][main.cc:267][run_resnet][DNN_BASIC_SAMPLE] input[0] name is input_y
	[I][26367][11-12][08:35:50:282][main.cc:267][run_resnet][DNN_BASIC_SAMPLE] input[1] name is input_uv
	[I][26367][11-12][08:35:50:282][main.cc:284][run_resnet][DNN_BASIC_SAMPLE] output[0] name is output
	[I][26367][11-12][08:35:50:356][main.cc:148][run_resnet][DNN_BASIC_SAMPLE] read image to tensor as nv12 success
	[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
	[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
	[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
	[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
	[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
	[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator
	[I][26367][11-12][08:35:56:401][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 0 result id: 340
	[I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 1 result id: 292
	[I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 2 result id: 9
	[I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 3 result id: 353
	[I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 4 result id: 343

	*/
No results found