Created
May 18, 2025 11:01
-
-
Save swdee/8ffdf5bffc2f9ea30d17d0452b1cb3d0 to your computer and use it in GitHub Desktop.
Preliminary code trying to get yolov8 inference working on NPU with C++
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// yolov8 | |
#include <iostream> | |
#include <fstream> | |
#include <vector> | |
#include <algorithm> | |
#include <iomanip> | |
#include <numeric> | |
#include <cstdint> | |
#include <chrono> | |
#include <fcntl.h> | |
#include <unistd.h> | |
#include <sys/ioctl.h> | |
#include <sys/mman.h> | |
#include <opencv2/opencv.hpp> | |
#include "cix_noe_standard_api.h" | |
const char* dataTypeToString(noe_data_type_t dt) { | |
switch (dt) { | |
case NOE_DATA_TYPE_NONE: return "NONE"; | |
case NOE_DATA_TYPE_BOOL: return "BOOL"; | |
case NOE_DATA_TYPE_U8: return "U8"; | |
case NOE_DATA_TYPE_S8: return "S8"; | |
case NOE_DATA_TYPE_U16: return "U16"; | |
case NOE_DATA_TYPE_S16: return "S16"; | |
case NOE_DATA_TYPE_U32: return "U32"; | |
case NOE_DATA_TYPE_S32: return "S32"; | |
case NOE_DATA_TYPE_U64: return "U64"; | |
case NOE_DATA_TYPE_S64: return "S64"; | |
case NOE_DATA_TYPE_F16: return "F16"; | |
case NOE_DATA_TYPE_F32: return "F32"; | |
case NOE_DATA_TYPE_F64: return "F64"; | |
case NOE_DATA_TYPE_BF16: return "BF16"; | |
default: return "UNKNOWN"; | |
} | |
} | |
int main(int argc, char** argv) { | |
if (argc != 5) { | |
std::cerr << "Usage: " << argv[0] << " model.cix image.jpg confThreshold nmsThreshold\n"; | |
return 1; | |
} | |
const char* model_path = argv[1]; | |
const char* image_path = argv[2]; | |
float confThreshold = std::stof(argv[3]); | |
float nmsThreshold = std::stof(argv[4]); | |
// Initialize context and load graph | |
context_handler_t* ctx = nullptr; | |
noe_status_t status; | |
const char* status_msg = nullptr; | |
if ((status = noe_init_context(&ctx)) != NOE_STATUS_SUCCESS) { | |
noe_get_error_message(ctx, status, &status_msg); | |
std::cerr | |
<< "noe_init_context failed (code " << status << "): " | |
<< (status_msg ? status_msg : "Unknown error") | |
<< std::endl; | |
return 1; | |
} | |
std::cout << "NOE context initialized" << std::endl; | |
// load Model/graph | |
uint64_t graph_id = 0; | |
if ((status = noe_load_graph(ctx, model_path, &graph_id)) != NOE_STATUS_SUCCESS) { | |
noe_get_error_message(ctx, status, &status_msg); | |
std::cerr | |
<< "noe_load_graph failed (code " << status << "): " | |
<< (status_msg ? status_msg : "Unknown error") | |
<< std::endl; | |
noe_deinit_context(ctx); | |
return 1; | |
} | |
std::cout << "Model/Graph loaded" << std::endl; | |
// Query tensor input/count counts | |
uint32_t in_cnt=0, out_cnt=0; | |
noe_get_tensor_count(ctx, graph_id, NOE_TENSOR_TYPE_INPUT, &in_cnt); | |
noe_get_tensor_count(ctx, graph_id, NOE_TENSOR_TYPE_OUTPUT, &out_cnt); | |
std::cout << "Tensor Counts, Input=" | |
<< in_cnt << ", Output=" << out_cnt | |
<< std::endl; | |
// Print Input tensor | |
tensor_desc_t in_desc; | |
noe_get_tensor_descriptor(ctx, graph_id, NOE_TENSOR_TYPE_INPUT, 0, &in_desc); | |
std::cout << "Input tensor descriptor:\n" | |
<< " id: " << in_desc.id << "\n" | |
<< " size: " << in_desc.size << "\n" | |
<< " scale: " << in_desc.scale << "\n" | |
<< " zero_point: " << in_desc.zero_point << "\n" | |
<< " data_type: " << dataTypeToString(in_desc.data_type) << "\n"; | |
// Print Output tensor | |
tensor_desc_t out_desc; | |
noe_get_tensor_descriptor(ctx, graph_id, NOE_TENSOR_TYPE_OUTPUT, 0, &out_desc); | |
std::cout << "Output tensor descriptor:\n" | |
<< " id: " << out_desc.id << "\n" | |
<< " size: " << out_desc.size << "\n" | |
<< " scale: " << out_desc.scale << "\n" | |
<< " zero_point: " << out_desc.zero_point << "\n" | |
<< " data_type: " << dataTypeToString(out_desc.data_type) << "\n"; | |
// Create job | |
noe_dynshape_param_t dynshape = {0, 0}; | |
job_config_npu_t job_cfg_npu{}; | |
job_cfg_npu.partition_id = 0; // defalut 0, in partition-0, only for cix npu | |
job_cfg_npu.dbg_dispatch = 0; // debug dispatch flag, set 1 to indicate specify job to debug core to run | |
job_cfg_npu.dbg_core_id = 0; // specify debug core id, [0, max_core_id in cluster] | |
job_cfg_npu.qos_level = 0; // default 0, low priority, only for cix npu | |
job_cfg_npu.fm_mem_region = 0; // default 0, feature map buffer memory region | |
job_cfg_npu.wt_mem_region = 0; // default 0, weight buffer memory region | |
job_cfg_npu.fm_idxes = nullptr; // specify feature maps allocated from 'fm_mem_region' | |
job_cfg_npu.fm_idxes_cnt = 0; // the element number in fm_idxes | |
job_cfg_npu.dynshape = &dynshape; | |
job_config_t job_cfg{ &job_cfg_npu }; | |
uint64_t job_id = 0; | |
if ((status = noe_create_job(ctx, graph_id, &job_id, &job_cfg)) != NOE_STATUS_SUCCESS) { | |
noe_get_error_message(ctx, status, &status_msg); | |
std::cerr | |
<< "noe_create_job failed (code " << status << "): " | |
<< (status_msg ? status_msg : "Unknown error") | |
<< std::endl; | |
noe_unload_graph(ctx, graph_id); | |
noe_deinit_context(ctx); | |
return 1; | |
} | |
std::cout << "Created Job: " << job_id << std::endl; | |
// load input tensor data exported from python file | |
constexpr uint32_t ELEMENT_COUNT = 1228800; // 1 × 3 × 640 × 640 | |
constexpr uint32_t BYTE_SIZE = ELEMENT_COUNT * sizeof(float); | |
// Read exactly that many bytes into a float buffer | |
std::vector<float> tmp_buf(ELEMENT_COUNT); | |
std::ifstream in{"/home/radxa/devel/onnx_yolov8_l/input_tensor.bin", std::ios::binary}; | |
if (!in.read(reinterpret_cast<char*>(tmp_buf.data()), BYTE_SIZE)) { | |
std::cerr << "Failed to read input_tensor.bin\n"; | |
return 1; | |
} | |
// quantize float32 to uint8 | |
std::vector<uint8_t> input_buf(ELEMENT_COUNT); | |
for (size_t i = 0; i < ELEMENT_COUNT; ++i) { | |
int q = static_cast<int>(std::round(tmp_buf[i] / in_desc.scale) | |
+ in_desc.zero_point); | |
input_buf[i] = static_cast<uint8_t>(std::max(0, std::min(255, q))); | |
} | |
// load tensor into NPU | |
auto t0 = std::chrono::high_resolution_clock::now(); | |
if ((status = noe_load_tensor(ctx, job_id, 0, input_buf.data())) != NOE_STATUS_SUCCESS) { | |
noe_get_error_message(ctx, status, &status_msg); | |
std::cerr | |
<< "noe_load_tensor failed (code " << status << "): " | |
<< (status_msg ? status_msg : "Unknown error") | |
<< std::endl; | |
return 1; | |
} | |
auto t1 = std::chrono::high_resolution_clock::now(); | |
double prep_ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); | |
std::cout << "Tensor load time: " << prep_ms << " ms" << std::endl; | |
// Run job | |
int32_t timeout_ms = 5000; | |
auto t2 = std::chrono::high_resolution_clock::now(); | |
if ((status = noe_job_infer_sync(ctx, job_id, timeout_ms)) != NOE_STATUS_SUCCESS) { | |
noe_get_error_message(ctx, status, &status_msg); | |
std::cerr | |
<< "noe_job_infer_sync failed (code " << status << "): " | |
<< (status_msg ? status_msg : "Unknown error") | |
<< std::endl; | |
noe_clean_job(ctx, job_id); | |
noe_unload_graph(ctx, graph_id); | |
noe_deinit_context(ctx); | |
return 1; | |
} | |
auto t3 = std::chrono::high_resolution_clock::now(); | |
double infer_ms = std::chrono::duration<double, std::milli>(t3 - t2).count(); | |
std::cout << "Inference sync time: " << infer_ms << " ms" << std::endl; | |
std::cout << "Ran job inference sync" << std::endl; | |
// Get tensor output results | |
auto t4 = std::chrono::high_resolution_clock::now(); | |
size_t out_size = out_desc.size; // / sizeof(float) | |
std::vector<int16_t> raw_buf(out_size); | |
if ((status = noe_get_tensor(ctx, job_id, NOE_TENSOR_TYPE_OUTPUT, 0, raw_buf.data())) != NOE_STATUS_SUCCESS) { | |
noe_get_error_message(ctx, status, &status_msg); | |
std::cerr | |
<< "noe_get_tensor failed (code " << status << "): " | |
<< (status_msg ? status_msg : "Unknown error") | |
<< std::endl; | |
noe_clean_job(ctx, job_id); | |
noe_unload_graph(ctx, graph_id); | |
noe_deinit_context(ctx); | |
return 1; | |
} | |
auto t5 = std::chrono::high_resolution_clock::now(); | |
double fetch_ms = std::chrono::duration<double, std::milli>(t5 - t4).count(); | |
std::cout << "Fetch outputs time: " << fetch_ms << " ms" << std::endl; | |
//Dequantize into floats | |
std::vector<float> output_buf(out_size); | |
for (size_t i = 0; i < out_size; ++i) { | |
output_buf[i] = (static_cast<int32_t>(raw_buf[i]) - out_desc.zero_point) | |
* out_desc.scale; | |
} | |
// show first few bytes of output tensor | |
constexpr size_t DUMP_COUNT = 100; | |
size_t to_dump = std::min(output_buf.size(), DUMP_COUNT); | |
std::cout << "First " << to_dump << " output floats:" << std::endl; | |
std::cout << std::fixed << std::setprecision(6); | |
for (size_t i = 0; i < to_dump; ++i) { | |
std::cout << output_buf[i] | |
<< (i + 1 < to_dump ? ", " : "\n"); | |
} | |
std::cout << std::endl; | |
/* | |
* Deinit / Cleanup below | |
*/ | |
// unload job | |
if ((status = noe_clean_job(ctx, job_id)) != NOE_STATUS_SUCCESS) { | |
noe_get_error_message(ctx, status, &status_msg); | |
std::cerr | |
<< "noe_clean_job failed (code " << status << "): " | |
<< (status_msg ? status_msg : "Unknown error") | |
<< std::endl; | |
noe_unload_graph(ctx, graph_id); | |
noe_deinit_context(ctx); | |
return 1; | |
} | |
std::cout << "Job unloaded" << std::endl; | |
// unload model/graph | |
if ((status = noe_unload_graph(ctx, graph_id)) != NOE_STATUS_SUCCESS) { | |
noe_get_error_message(ctx, status, &status_msg); | |
std::cerr | |
<< "noe_unload_graph failed (code " << status << "): " | |
<< (status_msg ? status_msg : "Unknown error") | |
<< std::endl; | |
noe_deinit_context(ctx); | |
return 1; | |
} | |
std::cout << "Model/Graph unloaded" << std::endl; | |
// clean up NOE context | |
if ((status = noe_deinit_context(ctx)) != NOE_STATUS_SUCCESS) { | |
noe_get_error_message(ctx, status, &status_msg); | |
std::cerr | |
<< "noe_deinit_context failed (code " << status << "): " | |
<< (status_msg ? status_msg : "Unknown error") | |
<< std::endl; | |
return 1; | |
} | |
std::cout << "NOE context cleaned up" << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment