Skip to content

Instantly share code, notes, and snippets.

@swdee
Created May 18, 2025 11:01
Show Gist options
  • Save swdee/8ffdf5bffc2f9ea30d17d0452b1cb3d0 to your computer and use it in GitHub Desktop.
Save swdee/8ffdf5bffc2f9ea30d17d0452b1cb3d0 to your computer and use it in GitHub Desktop.
Preliminary code trying to get yolov8 inference working on NPU with C++
// yolov8
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
#include <iomanip>
#include <numeric>
#include <cstdint>
#include <chrono>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <opencv2/opencv.hpp>
#include "cix_noe_standard_api.h"
const char* dataTypeToString(noe_data_type_t dt) {
switch (dt) {
case NOE_DATA_TYPE_NONE: return "NONE";
case NOE_DATA_TYPE_BOOL: return "BOOL";
case NOE_DATA_TYPE_U8: return "U8";
case NOE_DATA_TYPE_S8: return "S8";
case NOE_DATA_TYPE_U16: return "U16";
case NOE_DATA_TYPE_S16: return "S16";
case NOE_DATA_TYPE_U32: return "U32";
case NOE_DATA_TYPE_S32: return "S32";
case NOE_DATA_TYPE_U64: return "U64";
case NOE_DATA_TYPE_S64: return "S64";
case NOE_DATA_TYPE_F16: return "F16";
case NOE_DATA_TYPE_F32: return "F32";
case NOE_DATA_TYPE_F64: return "F64";
case NOE_DATA_TYPE_BF16: return "BF16";
default: return "UNKNOWN";
}
}
int main(int argc, char** argv) {
if (argc != 5) {
std::cerr << "Usage: " << argv[0] << " model.cix image.jpg confThreshold nmsThreshold\n";
return 1;
}
const char* model_path = argv[1];
const char* image_path = argv[2];
float confThreshold = std::stof(argv[3]);
float nmsThreshold = std::stof(argv[4]);
// Initialize context and load graph
context_handler_t* ctx = nullptr;
noe_status_t status;
const char* status_msg = nullptr;
if ((status = noe_init_context(&ctx)) != NOE_STATUS_SUCCESS) {
noe_get_error_message(ctx, status, &status_msg);
std::cerr
<< "noe_init_context failed (code " << status << "): "
<< (status_msg ? status_msg : "Unknown error")
<< std::endl;
return 1;
}
std::cout << "NOE context initialized" << std::endl;
// load Model/graph
uint64_t graph_id = 0;
if ((status = noe_load_graph(ctx, model_path, &graph_id)) != NOE_STATUS_SUCCESS) {
noe_get_error_message(ctx, status, &status_msg);
std::cerr
<< "noe_load_graph failed (code " << status << "): "
<< (status_msg ? status_msg : "Unknown error")
<< std::endl;
noe_deinit_context(ctx);
return 1;
}
std::cout << "Model/Graph loaded" << std::endl;
// Query tensor input/count counts
uint32_t in_cnt=0, out_cnt=0;
noe_get_tensor_count(ctx, graph_id, NOE_TENSOR_TYPE_INPUT, &in_cnt);
noe_get_tensor_count(ctx, graph_id, NOE_TENSOR_TYPE_OUTPUT, &out_cnt);
std::cout << "Tensor Counts, Input="
<< in_cnt << ", Output=" << out_cnt
<< std::endl;
// Print Input tensor
tensor_desc_t in_desc;
noe_get_tensor_descriptor(ctx, graph_id, NOE_TENSOR_TYPE_INPUT, 0, &in_desc);
std::cout << "Input tensor descriptor:\n"
<< " id: " << in_desc.id << "\n"
<< " size: " << in_desc.size << "\n"
<< " scale: " << in_desc.scale << "\n"
<< " zero_point: " << in_desc.zero_point << "\n"
<< " data_type: " << dataTypeToString(in_desc.data_type) << "\n";
// Print Output tensor
tensor_desc_t out_desc;
noe_get_tensor_descriptor(ctx, graph_id, NOE_TENSOR_TYPE_OUTPUT, 0, &out_desc);
std::cout << "Output tensor descriptor:\n"
<< " id: " << out_desc.id << "\n"
<< " size: " << out_desc.size << "\n"
<< " scale: " << out_desc.scale << "\n"
<< " zero_point: " << out_desc.zero_point << "\n"
<< " data_type: " << dataTypeToString(out_desc.data_type) << "\n";
// Create job
noe_dynshape_param_t dynshape = {0, 0};
job_config_npu_t job_cfg_npu{};
job_cfg_npu.partition_id = 0; // defalut 0, in partition-0, only for cix npu
job_cfg_npu.dbg_dispatch = 0; // debug dispatch flag, set 1 to indicate specify job to debug core to run
job_cfg_npu.dbg_core_id = 0; // specify debug core id, [0, max_core_id in cluster]
job_cfg_npu.qos_level = 0; // default 0, low priority, only for cix npu
job_cfg_npu.fm_mem_region = 0; // default 0, feature map buffer memory region
job_cfg_npu.wt_mem_region = 0; // default 0, weight buffer memory region
job_cfg_npu.fm_idxes = nullptr; // specify feature maps allocated from 'fm_mem_region'
job_cfg_npu.fm_idxes_cnt = 0; // the element number in fm_idxes
job_cfg_npu.dynshape = &dynshape;
job_config_t job_cfg{ &job_cfg_npu };
uint64_t job_id = 0;
if ((status = noe_create_job(ctx, graph_id, &job_id, &job_cfg)) != NOE_STATUS_SUCCESS) {
noe_get_error_message(ctx, status, &status_msg);
std::cerr
<< "noe_create_job failed (code " << status << "): "
<< (status_msg ? status_msg : "Unknown error")
<< std::endl;
noe_unload_graph(ctx, graph_id);
noe_deinit_context(ctx);
return 1;
}
std::cout << "Created Job: " << job_id << std::endl;
// load input tensor data exported from python file
constexpr uint32_t ELEMENT_COUNT = 1228800; // 1 × 3 × 640 × 640
constexpr uint32_t BYTE_SIZE = ELEMENT_COUNT * sizeof(float);
// Read exactly that many bytes into a float buffer
std::vector<float> tmp_buf(ELEMENT_COUNT);
std::ifstream in{"/home/radxa/devel/onnx_yolov8_l/input_tensor.bin", std::ios::binary};
if (!in.read(reinterpret_cast<char*>(tmp_buf.data()), BYTE_SIZE)) {
std::cerr << "Failed to read input_tensor.bin\n";
return 1;
}
// quantize float32 to uint8
std::vector<uint8_t> input_buf(ELEMENT_COUNT);
for (size_t i = 0; i < ELEMENT_COUNT; ++i) {
int q = static_cast<int>(std::round(tmp_buf[i] / in_desc.scale)
+ in_desc.zero_point);
input_buf[i] = static_cast<uint8_t>(std::max(0, std::min(255, q)));
}
// load tensor into NPU
auto t0 = std::chrono::high_resolution_clock::now();
if ((status = noe_load_tensor(ctx, job_id, 0, input_buf.data())) != NOE_STATUS_SUCCESS) {
noe_get_error_message(ctx, status, &status_msg);
std::cerr
<< "noe_load_tensor failed (code " << status << "): "
<< (status_msg ? status_msg : "Unknown error")
<< std::endl;
return 1;
}
auto t1 = std::chrono::high_resolution_clock::now();
double prep_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
std::cout << "Tensor load time: " << prep_ms << " ms" << std::endl;
// Run job
int32_t timeout_ms = 5000;
auto t2 = std::chrono::high_resolution_clock::now();
if ((status = noe_job_infer_sync(ctx, job_id, timeout_ms)) != NOE_STATUS_SUCCESS) {
noe_get_error_message(ctx, status, &status_msg);
std::cerr
<< "noe_job_infer_sync failed (code " << status << "): "
<< (status_msg ? status_msg : "Unknown error")
<< std::endl;
noe_clean_job(ctx, job_id);
noe_unload_graph(ctx, graph_id);
noe_deinit_context(ctx);
return 1;
}
auto t3 = std::chrono::high_resolution_clock::now();
double infer_ms = std::chrono::duration<double, std::milli>(t3 - t2).count();
std::cout << "Inference sync time: " << infer_ms << " ms" << std::endl;
std::cout << "Ran job inference sync" << std::endl;
// Get tensor output results
auto t4 = std::chrono::high_resolution_clock::now();
size_t out_size = out_desc.size; // / sizeof(float)
std::vector<int16_t> raw_buf(out_size);
if ((status = noe_get_tensor(ctx, job_id, NOE_TENSOR_TYPE_OUTPUT, 0, raw_buf.data())) != NOE_STATUS_SUCCESS) {
noe_get_error_message(ctx, status, &status_msg);
std::cerr
<< "noe_get_tensor failed (code " << status << "): "
<< (status_msg ? status_msg : "Unknown error")
<< std::endl;
noe_clean_job(ctx, job_id);
noe_unload_graph(ctx, graph_id);
noe_deinit_context(ctx);
return 1;
}
auto t5 = std::chrono::high_resolution_clock::now();
double fetch_ms = std::chrono::duration<double, std::milli>(t5 - t4).count();
std::cout << "Fetch outputs time: " << fetch_ms << " ms" << std::endl;
//Dequantize into floats
std::vector<float> output_buf(out_size);
for (size_t i = 0; i < out_size; ++i) {
output_buf[i] = (static_cast<int32_t>(raw_buf[i]) - out_desc.zero_point)
* out_desc.scale;
}
// show first few bytes of output tensor
constexpr size_t DUMP_COUNT = 100;
size_t to_dump = std::min(output_buf.size(), DUMP_COUNT);
std::cout << "First " << to_dump << " output floats:" << std::endl;
std::cout << std::fixed << std::setprecision(6);
for (size_t i = 0; i < to_dump; ++i) {
std::cout << output_buf[i]
<< (i + 1 < to_dump ? ", " : "\n");
}
std::cout << std::endl;
/*
* Deinit / Cleanup below
*/
// unload job
if ((status = noe_clean_job(ctx, job_id)) != NOE_STATUS_SUCCESS) {
noe_get_error_message(ctx, status, &status_msg);
std::cerr
<< "noe_clean_job failed (code " << status << "): "
<< (status_msg ? status_msg : "Unknown error")
<< std::endl;
noe_unload_graph(ctx, graph_id);
noe_deinit_context(ctx);
return 1;
}
std::cout << "Job unloaded" << std::endl;
// unload model/graph
if ((status = noe_unload_graph(ctx, graph_id)) != NOE_STATUS_SUCCESS) {
noe_get_error_message(ctx, status, &status_msg);
std::cerr
<< "noe_unload_graph failed (code " << status << "): "
<< (status_msg ? status_msg : "Unknown error")
<< std::endl;
noe_deinit_context(ctx);
return 1;
}
std::cout << "Model/Graph unloaded" << std::endl;
// clean up NOE context
if ((status = noe_deinit_context(ctx)) != NOE_STATUS_SUCCESS) {
noe_get_error_message(ctx, status, &status_msg);
std::cerr
<< "noe_deinit_context failed (code " << status << "): "
<< (status_msg ? status_msg : "Unknown error")
<< std::endl;
return 1;
}
std::cout << "NOE context cleaned up" << std::endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment