Created
November 15, 2024 02:04
-
-
Save lix19937/48c75aaf665839d6692a7c2f13c57066 to your computer and use it in GitHub Desktop.
horizon_j6 infer cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <fstream> | |
#include <iomanip> | |
#include <iostream> | |
#include <iterator> | |
#include <map> | |
#include <queue> | |
#include <utility> | |
#include "gflags/gflags.h" | |
// hori | |
#include "hlog/logging.h" | |
#include "hobot/dnn/hb_dnn.h" | |
#include "hobot/hb_ucp.h" | |
#include "hobot/hb_ucp_sys.h" | |
// opencv | |
#include "opencv2/core/mat.hpp" | |
#include "opencv2/imgcodecs.hpp" | |
#include "opencv2/imgproc.hpp" | |
#define EMPTY "" | |
DEFINE_string(model_file, EMPTY, "model file path"); | |
DEFINE_string(image_file, EMPTY, "Test image path"); | |
DEFINE_int32(top_k, 5, "Top k classes, 5 by default"); | |
#define MOUDULE_NAME "DNN_BASIC_SAMPLE" | |
#define LOGD(err_msg, ...) HFLOGM_D(MOUDULE_NAME, err_msg, ##__VA_ARGS__) | |
#define LOGI(err_msg, ...) HFLOGM_I(MOUDULE_NAME, err_msg, ##__VA_ARGS__) | |
#define LOGE(err_msg, ...) HFLOGM_E(MOUDULE_NAME, err_msg, ##__VA_ARGS__) | |
#define LOGW(err_msg, ...) HFLOGM_W(MOUDULE_NAME, err_msg, ##__VA_ARGS__) | |
#define HB_CHECK_SUCCESS(value, errmsg) \ | |
do \ | |
{ \ | |
/*value can be call of function*/ \ | |
auto ret_code = value; \ | |
if (ret_code != 0) \ | |
{ \ | |
LOGE("{}, error code: {}", errmsg, ret_code); \ | |
return ret_code; \ | |
} \ | |
} while (0); | |
typedef struct Classification | |
{ | |
int id; | |
float score; | |
const char *class_name; | |
Classification() : class_name(0), id(0), score(0.0) {} | |
Classification(int id, float score, const char *class_name) | |
: id(id), score(score), class_name(class_name) {} | |
friend bool operator>(const Classification &lhs, const Classification &rhs) | |
{ | |
return (lhs.score > rhs.score); | |
} | |
~Classification() {} | |
} Classification; | |
int prepare_tensor(hbDNNTensor *input_tensor, hbDNNTensor *output_tensor, | |
hbDNNHandle_t dnn_handle); | |
int32_t read_image_2_tensor_as_nv12(std::string &image_file, | |
hbDNNTensor *input_tensor); | |
void get_topk_result(hbDNNTensor *tensor, | |
std::vector<Classification> &top_k_cls, int top_k); | |
/** | |
* Step1: get model handle | |
* Step2: prepare input and output tensor | |
* Step3: set input data to input tensor | |
* Step4: run inference | |
* Step5: do postprocess with output data | |
* Step6: release resources | |
*/ | |
/* | |
./run_resnet \ | |
--model_file=../model/resnet50_224x224_nv12.hbm \ | |
--image_file=../data/cls_images/zebra_cls.jpg \ | |
--top_k=5 | |
*/ | |
int main(int argc, char **argv) | |
{ | |
gflags::SetUsageMessage(argv[0]); | |
gflags::ParseCommandLineFlags(&argc, &argv, true); | |
std::cout << gflags::GetArgv() << std::endl; | |
// Init logging | |
hobot::hlog::HobotLog::Instance()->SetLogLevel("DNN_BASIC_SAMPLE", hobot::hlog::LogLevel::log_info); | |
//////////////////////----------------------------------------- // Step1: get model handle | |
hbDNNPackedHandle_t packed_dnn_handle; | |
hbDNNHandle_t dnn_handle; | |
const char **model_name_list; | |
auto modelFileName = FLAGS_model_file.c_str(); | |
int model_count = 0; | |
{ | |
HB_CHECK_SUCCESS( | |
hbDNNInitializeFromFiles(&packed_dnn_handle, &modelFileName, 1), | |
"hbDNNInitializeFromFiles failed"); | |
HB_CHECK_SUCCESS(hbDNNGetModelNameList(&model_name_list, &model_count, | |
packed_dnn_handle), | |
"hbDNNGetModelNameList failed"); | |
HB_CHECK_SUCCESS( | |
hbDNNGetModelHandle(&dnn_handle, packed_dnn_handle, model_name_list[0]), | |
"hbDNNGetModelHandle failed"); | |
} | |
//////////////////////----------------------------------------- // Step2: prepare input and output tensor | |
std::vector<hbDNNTensor> input_tensors, output_tensors; | |
int input_count = 0, output_count = 0; | |
{ | |
HB_CHECK_SUCCESS(hbDNNGetInputCount(&input_count, dnn_handle), | |
"hbDNNGetInputCount failed"); | |
HB_CHECK_SUCCESS(hbDNNGetOutputCount(&output_count, dnn_handle), | |
"hbDNNGetOutputCount failed"); | |
input_tensors.resize(input_count); | |
output_tensors.resize(output_count); | |
prepare_tensor(input_tensors.data(), output_tensors.data(), dnn_handle); | |
} | |
//////////////////////----------------------------------------- // Step3: set input data to input tensor | |
{ | |
// read a single picture for input_tensor[0], for multi_input model, you | |
// should set other input data according to model input properties. !!! will changed depends on models | |
HB_CHECK_SUCCESS( | |
read_image_2_tensor_as_nv12(FLAGS_image_file, input_tensors.data()), | |
"read_image_2_tensor_as_nv12 failed"); | |
LOGI("read image to tensor as nv12 success"); | |
} | |
//////////////////////----------------------------------------- // Step4: run inference | |
hbUCPTaskHandle_t task_handle{nullptr}; | |
hbDNNTensor *output = output_tensors.data(); | |
{ | |
// make sure memory data is flushed to DDR before inference | |
for (int i = 0; i < input_count; i++) | |
{ | |
hbUCPMemFlush(&input_tensors[i].sysMem[0], HB_SYS_MEM_CACHE_CLEAN); | |
} | |
// generate task handle | |
HB_CHECK_SUCCESS( | |
hbDNNInferV2(&task_handle, output, input_tensors.data(), dnn_handle), | |
"hbDNNInferV2 failed"); | |
// submit task | |
hbUCPSchedParam ctrl_param; | |
HB_UCP_INITIALIZE_SCHED_PARAM(&ctrl_param); | |
ctrl_param.backend = HB_UCP_BPU_CORE_ANY; | |
HB_CHECK_SUCCESS(hbUCPSubmitTask(task_handle, &ctrl_param), | |
"hbUCPSubmitTask failed"); | |
// wait task done | |
HB_CHECK_SUCCESS(hbUCPWaitTaskDone(task_handle, 0), | |
"hbUCPWaitTaskDone failed"); | |
} | |
// Step5: do postprocess with output data | |
std::vector<Classification> top_k_cls; | |
{ | |
// make sure CPU read data from DDR before using output tensor data | |
for (int i = 0; i < output_count; i++) | |
{ | |
hbUCPMemFlush(&output_tensors[i].sysMem[0], HB_SYS_MEM_CACHE_INVALIDATE); | |
} | |
get_topk_result(output, top_k_cls, FLAGS_top_k); | |
for (int i = 0; i < FLAGS_top_k; i++) | |
{ | |
LOGI("TOP {} result id: {}", i, top_k_cls[i].id); | |
} | |
} | |
// Step6: release resources | |
{ | |
// release task handle | |
HB_CHECK_SUCCESS(hbUCPReleaseTask(task_handle), "hbUCPReleaseTask failed"); | |
// free input mem | |
for (int i = 0; i < input_count; i++) | |
{ | |
HB_CHECK_SUCCESS(hbUCPFree(&(input_tensors[i].sysMem[0])), | |
"hbUCPFree failed"); | |
} | |
// free output mem | |
for (int i = 0; i < output_count; i++) | |
{ | |
HB_CHECK_SUCCESS(hbUCPFree(&(output_tensors[i].sysMem[0])), | |
"hbUCPFree failed"); | |
} | |
// release model | |
HB_CHECK_SUCCESS(hbDNNRelease(packed_dnn_handle), "hbDNNRelease failed"); | |
} | |
return 0; | |
} | |
#define ALIGN(value, alignment) (((value) + ((alignment) - 1)) & ~((alignment) - 1)) | |
#define ALIGN_32(value) ALIGN(value, 32) | |
int prepare_tensor(hbDNNTensor *input_tensor, hbDNNTensor *output_tensor, | |
hbDNNHandle_t dnn_handle) | |
{ | |
int input_count = 0; | |
int output_count = 0; | |
hbDNNGetInputCount(&input_count, dnn_handle); | |
hbDNNGetOutputCount(&output_count, dnn_handle); | |
/** Tips: | |
* For input memory size in most cases: | |
* * input_memSize = input[i].properties.alignedByteSize | |
* but here for dynamic stride of y and uv,alignedByteSize is not fixed | |
* For output memory size: | |
* * output_memSize = output[i].properties.alignedByteSize | |
*/ | |
hbDNNTensor *input = input_tensor; | |
for (int i = 0; i < input_count; i++) | |
{ | |
HB_CHECK_SUCCESS( | |
hbDNNGetInputTensorProperties(&input[i].properties, dnn_handle, i), | |
"hbDNNGetInputTensorProperties failed"); | |
/** Tips: | |
* For input tensor, usually need to pad the input data according to stride obtained from properties. | |
* but here for dynamic stride of y and uv,user needs to specify a value which should be 32 bytes aligned for the -1 position in stride. | |
* */ | |
auto dim_len = input[i].properties.validShape.numDimensions; | |
for (int32_t dim_i = dim_len - 1; dim_i >= 0; --dim_i) | |
{ | |
if (input[i].properties.stride[dim_i] == -1) | |
{ | |
auto cur_stride = | |
input[i].properties.stride[dim_i + 1] * | |
input[i].properties.validShape.dimensionSize[dim_i + 1]; | |
input[i].properties.stride[dim_i] = ALIGN_32(cur_stride); | |
} | |
} | |
int input_memSize = input[i].properties.stride[0] * | |
input[i].properties.validShape.dimensionSize[0]; | |
HB_CHECK_SUCCESS(hbUCPMallocCached(&input[i].sysMem[0], input_memSize, 0), | |
"hbUCPMallocCached failed"); | |
// Show how to get input name | |
const char *input_name; | |
HB_CHECK_SUCCESS(hbDNNGetInputName(&input_name, dnn_handle, i), | |
"hbDNNGetInputName failed"); | |
LOGI("input[{}] name is {}", i, input_name); | |
} | |
hbDNNTensor *output = output_tensor; | |
for (int i = 0; i < output_count; i++) | |
{ | |
HB_CHECK_SUCCESS( | |
hbDNNGetOutputTensorProperties(&output[i].properties, dnn_handle, i), | |
"hbDNNGetOutputTensorProperties failed"); | |
int output_memSize = output[i].properties.alignedByteSize; | |
HB_CHECK_SUCCESS(hbUCPMallocCached(&output[i].sysMem[0], output_memSize, 0), | |
"hbUCPMallocCached failed"); | |
// Show how to get output name | |
const char *output_name; | |
HB_CHECK_SUCCESS(hbDNNGetOutputName(&output_name, dnn_handle, i), | |
"hbDNNGetOutputName failed"); | |
LOGI("output[{}] name is {}", i, output_name); | |
} | |
return 0; | |
} | |
/** You can define read_image_2_tensor_as_other_type to prepare your data **/ | |
int32_t read_image_2_tensor_as_nv12(std::string &image_file, | |
hbDNNTensor *input_tensor) | |
{ | |
// the struct of input shape is NHWC | |
int input_h = input_tensor[0].properties.validShape.dimensionSize[1]; | |
int input_w = input_tensor[0].properties.validShape.dimensionSize[2]; | |
cv::Mat bgr_mat = cv::imread(image_file, cv::IMREAD_COLOR); | |
if (bgr_mat.empty()) | |
{ | |
LOGE("image file not exist!"); | |
return -1; | |
} | |
// resize | |
cv::Mat mat; | |
mat.create(input_h, input_w, bgr_mat.type()); | |
cv::resize(bgr_mat, mat, mat.size(), 0, 0); | |
// convert to YUV420 | |
if (input_h % 2 || input_w % 2) | |
{ | |
LOGE("input img height and width must aligned by 2!"); | |
return -1; | |
} | |
cv::Mat yuv_mat; | |
cv::cvtColor(mat, yuv_mat, cv::COLOR_BGR2YUV_I420); | |
uint8_t *yuv_data = yuv_mat.ptr<uint8_t>(); | |
uint8_t *y_data_src = yuv_data; | |
// copy y data | |
uint8_t *y_data_dst = | |
reinterpret_cast<uint8_t *>(input_tensor[0].sysMem[0].virAddr); | |
for (int32_t h = 0; h < input_h; ++h) | |
{ | |
memcpy(y_data_dst, y_data_src, input_w); | |
y_data_src += input_w; | |
// add padding | |
y_data_dst += input_tensor[0].properties.stride[1]; | |
} | |
// copy uv data | |
int32_t uv_height = input_tensor[1].properties.validShape.dimensionSize[1]; | |
int32_t uv_width = input_tensor[1].properties.validShape.dimensionSize[2]; | |
uint8_t *uv_data_dst = | |
reinterpret_cast<uint8_t *>(input_tensor[1].sysMem[0].virAddr); | |
uint8_t *u_data_src = yuv_data + input_h * input_w; | |
uint8_t *v_data_src = u_data_src + uv_height * uv_width; | |
for (int32_t h = 0; h < uv_height; ++h) | |
{ | |
auto *cur_data = uv_data_dst; | |
for (int32_t w = 0; w < uv_width; ++w) | |
{ | |
*cur_data++ = *u_data_src++; | |
*cur_data++ = *v_data_src++; | |
} | |
// add padding | |
uv_data_dst += input_tensor[1].properties.stride[1]; | |
} | |
return 0; | |
} | |
void get_topk_result(hbDNNTensor *tensor, | |
std::vector<Classification> &top_k_cls, int top_k) | |
{ | |
hbUCPMemFlush(&(tensor->sysMem[0]), HB_SYS_MEM_CACHE_INVALIDATE); | |
std::priority_queue<Classification, std::vector<Classification>, | |
std::greater<Classification>> | |
queue; | |
// The type reinterpret_cast should be determined according to the output type | |
// For example: HB_DNN_TENSOR_TYPE_F32 is float | |
auto data = reinterpret_cast<float *>(tensor->sysMem[0].virAddr); | |
auto quanti_type{tensor->properties.quantiType}; | |
// For example model, quantiType is NONE and no dequantize processing is required. | |
if (quanti_type != hbDNNQuantiType::NONE) | |
{ | |
LOGE("quanti_type is not NONE, and the output needs to be dequantized!"); | |
} | |
// 1000 classification score values | |
int tensor_len = 1000; | |
for (auto i = 0; i < tensor_len; i++) | |
{ | |
float score = data[i]; | |
queue.push(Classification(i, score, "")); | |
if (queue.size() > top_k) | |
{ | |
queue.pop(); | |
} | |
} | |
while (!queue.empty()) | |
{ | |
top_k_cls.emplace_back(queue.top()); | |
queue.pop(); | |
} | |
std::reverse(top_k_cls.begin(), top_k_cls.end()); | |
} | |
/* | |
root@9ed8cd874b9d:/open_explorer/J6_start/nni_ucp/build# ./run_resnet \ | |
--model_file=../model/resnet50_224x224_nv12.hbm \ | |
--image_file=../data/cls_images/zebra_cls.jpg \ | |
--top_k=5 | |
[UCP]: log level = 3 | |
[UCP]: UCP version = 3.1.2 | |
[VP]: log level = 3 | |
[DNN]: log level = 3 | |
[HPL]: log level = 3 | |
[UCPT]: log level = 6 | |
./run_resnet --model_file=../model/resnet50_224x224_nv12.hbm --image_file=../data/cls_images/zebra_cls.jpg --top_k=5 | |
[I][26367][11-12][08:35:50:282][main.cc:267][run_resnet][DNN_BASIC_SAMPLE] input[0] name is input_y | |
[I][26367][11-12][08:35:50:282][main.cc:267][run_resnet][DNN_BASIC_SAMPLE] input[1] name is input_uv | |
[I][26367][11-12][08:35:50:282][main.cc:284][run_resnet][DNN_BASIC_SAMPLE] output[0] name is output | |
[I][26367][11-12][08:35:50:356][main.cc:148][run_resnet][DNN_BASIC_SAMPLE] read image to tensor as nv12 success | |
[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator | |
[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator | |
[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator | |
[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator | |
[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator | |
[BPU][[BPU_DEV]][INFO]bpu_core_get_est_time not implemented in simulator | |
[I][26367][11-12][08:35:56:401][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 0 result id: 340 | |
[I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 1 result id: 292 | |
[I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 2 result id: 9 | |
[I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 3 result id: 353 | |
[I][26367][11-12][08:35:56:402][main.cc:190][run_resnet][DNN_BASIC_SAMPLE] TOP 4 result id: 343 | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment