Unbinilium · November 21, 2024 06:16
diff --git a/engine.hpp b/engine.hpp
 #pragma once

 #include <hailo/hailort.hpp>

 #include <chrono>
 #include <future>
 #include <iostream>
 #include <mutex>
 #include <string>
 #include <string_view>
 #include <unordered_map>

 using namespace std;

 #ifndef DEBUG_LOG
 #define DEBUG_LOG 1
 #endif

 #if DEBUG_LOG
 #define LOG(...)                                                                                                                              \
    do {                                                                                                                                      \
        printf("[%ld] ", std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count()); \
        printf(__VA_ARGS__);                                                                                                                  \
        printf("\n");                                                                                                                         \
    } while (0)
 #else
 #define LOG(...)
 #endif

 namespace ns {

 using namespace std;
 using namespace std::chrono_literals;

 using namespace hailort;


 class Engine {
 public:
    Engine() {
        LOG("Engine constructor");

        auto vdevice = VDevice::create();
        if (!vdevice) {
            LOG("Failed to create VDevice %d", vdevice.status());
            return;
        }
        _vdevice = move(vdevice.value());
    }

    ~Engine() {
        LOG("Engine destructor");
    }

    bool load(const string& her_file_path) {
        LOG("Engine load %s", her_file_path.c_str());

        configureNetworkGroup(her_file_path);

        return _bindings != nullptr;
    }

    struct Tensor {
        enum class Type { AUTO, UINT8, UINT16, FLOAT32 };

        static size_t bytesOfType(Type type) {
            switch (type) {
                case Type::UINT8:
                    return 1;
                case Type::UINT16:
                    return 2;
                case Type::FLOAT32:
                    return 4;
                default:
                    return 1;
            }
        }

        explicit Tensor(Type type_, vector<size_t> shape_, size_t size_) : type(type_), shape(shape_), size(size_), data(nullptr) {
            data = static_cast<uint8_t*>(aligned_alloc(4096, size));
            if (!data) {
                throw runtime_error("Failed to allocate memory");
            }
            LOG("Allocated %ld bytes at %p", size, data);

            view = MemoryView(data, size);
        }

        ~Tensor() {
            if (data) {
                free(data);
                data = nullptr;
            }
        }

        template <typename T>
        T* dataAs() {
            if (sizeof(T) != bytesOfType(type)) {
                throw runtime_error("Invalid type");
            }
            return reinterpret_cast<T*>(data);
        }


        const Type type;
        const vector<size_t> shape;
        const size_t size;
        uint8_t* data;
        MemoryView view;
    };


    decltype(auto) getRefInputs() {
        vector<Tensor*> ref_tensors;

        if (!_model || !_bindings) {
            return ref_tensors;
        }

        auto inputs = _model->inputs();
        for (auto& tsr : inputs) {
            auto name = tsr.name();
            if (_io_buffers.find(name) != _io_buffers.end()) {
                continue;
            }

            auto shape  = tsr.shape();
            auto size   = tsr.get_frame_size();
            auto type   = tsr.format().type;
            auto tensor = new Tensor(_tsr_type_map[type], {1, shape.height, shape.width, shape.features}, size);

            auto sta = _bindings->input(name)->set_buffer(tensor->view);
            if (sta != HAILO_SUCCESS) {
                LOG("Failed to set buffer %d", sta);
                continue;
            }

            _io_buffers[name] = shared_ptr<Tensor>(tensor, [](Tensor* ptr) {
                LOG("Deleting Tensor");
                delete ptr;
                ptr = nullptr;
            });

            ref_tensors.push_back(tensor);
        }

        return ref_tensors;
    }

    decltype(auto) getRefOutputs() {
        vector<Tensor*> ref_tensors;

        if (!_model || !_bindings) {
            return ref_tensors;
        }

        auto outputs = _model->outputs();
        for (auto& tsr : outputs) {
            auto name = tsr.name();
            if (_io_buffers.find(name) != _io_buffers.end()) {
                continue;
            }

            auto shape  = tsr.shape();
            auto size   = tsr.get_frame_size();
            auto type   = tsr.format().type;
            auto tensor = new Tensor(_tsr_type_map[type], {1, shape.height, shape.width, shape.features}, size);

            auto sta = _bindings->output(name)->set_buffer(tensor->view);
            if (sta != HAILO_SUCCESS) {
                LOG("Failed to set buffer %d", sta);
                continue;
            }

            _io_buffers[name] = shared_ptr<Tensor>(tensor, [](Tensor* ptr) {
                LOG("Deleting Tensor");
                delete ptr;
                ptr = nullptr;
            });

            ref_tensors.push_back(tensor);
        }

        return ref_tensors;
    }

    bool infer() {
        LOG("Engine infer");

        if (!_configured_model || !_bindings) {
            return false;
        }

        auto sta = _configured_model->wait_for_async_ready(1000ms);
        if (sta != HAILO_SUCCESS) {
            LOG("Failed to wait for async ready %d", sta);
            return false;
        }

        auto job = _configured_model->run_async(*_bindings, [](const AsyncInferCompletionInfo& info) { LOG("AsyncInferCompletionInfo %d", info.status); });

        do {
            this_thread::yield();
        } while (job->wait(1000ms) != HAILO_SUCCESS);

        return true;
    }

 private:
    void configureNetworkGroup(const string& her_file_path) {
        LOG("configureNetworkGroup %s", her_file_path.c_str());

        auto hef = Hef::create(her_file_path);
        if (!hef) {
            LOG("Failed to create Hef %d", hef.status());
            return;
        }

        if (!_vdevice) {
            LOG("VDevice is not created");
            return;
        }

        auto model = _vdevice->create_infer_model(her_file_path);
        if (!model) {
            LOG("Failed to create model %d", model.status());
            return;
        }
        _model = move(model.value());
        _model->set_hw_latency_measurement_flags(HAILO_LATENCY_MEASURE);

        static auto type2Str = [](hailo_format_type_t v) -> string {
            switch (v) {
                case HAILO_FORMAT_TYPE_AUTO:
                    return "AUTO";
                case HAILO_FORMAT_TYPE_UINT8:
                    return "UINT8";
                case HAILO_FORMAT_TYPE_UINT16:
                    return "UINT16";
                case HAILO_FORMAT_TYPE_FLOAT32:
                    return "FLOAT32";
                case HAILO_FORMAT_TYPE_MAX_ENUM:
                    return "MAX_ENUM";
                default:
                    return to_string(v);
            }
        };

        static auto order2Str = [](hailo_format_order_t v) -> string {
            switch (v) {
                case HAILO_FORMAT_ORDER_AUTO:
                    return "AUTO";
                case HAILO_FORMAT_ORDER_NHWC:
                    return "NHWC";
                case HAILO_FORMAT_ORDER_NCHW:
                    return "NCHW";
                case HAILO_FORMAT_ORDER_FCR:
                    return "FCR";
                default:
                    return to_string(v);
            }
        };

        static auto flags2Str = [](hailo_format_flags_t v) -> string {
            switch (v) {
                case HAILO_FORMAT_FLAGS_NONE:
                    return "NONE";
                case HAILO_FORMAT_FLAGS_QUANTIZED:
                    return "QUANTIZED";
                case HAILO_FORMAT_FLAGS_TRANSPOSED:
                    return "TRANSPOSED";
                case HAILO_FORMAT_FLAGS_MAX_ENUM:
                    return "MAX_ENUM";
                default:
                    return to_string(v);
            }
        };

        static auto dumpTsrs = [&](string name, const auto& tsrs) {
            LOG("%s %ld:", name.c_str(), tsrs.size());
            for (auto& tsr : tsrs) {
                auto name       = tsr.name();
                auto shape      = tsr.shape();
                auto format     = tsr.format();
                auto quant_info = tsr.get_quant_infos();

                LOG("   %s, (%d, %d, %d)", name.c_str(), shape.height, shape.width, shape.features);
                auto [t, o, f] = tuple(type2Str(format.type), order2Str(format.order), flags2Str(format.flags));
                LOG("       format type %s, order %s, flags %s, quant_info %ld", t.c_str(), o.c_str(), f.c_str(), quant_info.size());
                for (auto& info : quant_info) {
                    LOG("       scale %f zp %d", info.qp_scale, static_cast<int32_t>(info.qp_zp));
                }
            }
        };

        const auto inputs = _model->inputs();
        dumpTsrs("Inputs", inputs);

        const auto outputs = _model->outputs();
        dumpTsrs("Outputs", outputs);


        auto configured_model = _model->configure();
        if (!configured_model) {
            LOG("Failed to configure model %d", configured_model.status());
            return;
        }
        {
            auto shared       = new ConfiguredInferModel(configured_model.value());
            _configured_model = shared_ptr<ConfiguredInferModel>(shared, [dep_ref = _model](ConfiguredInferModel* ptr) {
                LOG("Deleting ConfiguredInferModel");
                delete ptr;
                ptr = nullptr;
                dep_ref.~shared_ptr();
            });
        }

        auto bindings = configured_model.value().create_bindings();
        if (!bindings) {
            LOG("Failed to create bindings %d", bindings.status());
            return;
        }
        {
            auto shared = new ConfiguredInferModel::Bindings(bindings.value());
            _bindings   = shared_ptr<ConfiguredInferModel::Bindings>(shared, [dep_ref = _configured_model](ConfiguredInferModel::Bindings* ptr) {
                LOG("Deleting ConfiguredInferModel::Bindings");
                delete ptr;
                ptr = nullptr;
                dep_ref.~shared_ptr();
            });
        }

        LOG("Model use count %ld", _model.use_count());
        LOG("ConfiguredInferModel use count %ld", _configured_model.use_count());
        LOG("Bindings use count %ld", _bindings.use_count());
    }

 private:
    unique_ptr<VDevice> _vdevice                         = nullptr;
    shared_ptr<InferModel> _model                        = nullptr;
    shared_ptr<ConfiguredInferModel> _configured_model   = nullptr;
    shared_ptr<ConfiguredInferModel::Bindings> _bindings = nullptr;

    unordered_map<string, shared_ptr<Tensor>> _io_buffers;

    unordered_map<hailo_format_type_t, Tensor::Type> _tsr_type_map = {
        {HAILO_FORMAT_TYPE_AUTO, Tensor::Type::AUTO},
        {HAILO_FORMAT_TYPE_UINT8, Tensor::Type::UINT8},
        {HAILO_FORMAT_TYPE_UINT16, Tensor::Type::UINT16},
        {HAILO_FORMAT_TYPE_FLOAT32, Tensor::Type::FLOAT32},
    };
 };

 }  // namespace ns
	#pragma once

	#include <hailo/hailort.hpp>

	#include <chrono>
	#include <future>
	#include <iostream>
	#include <mutex>
	#include <string>
	#include <string_view>
	#include <unordered_map>

	using namespace std;

	#ifndef DEBUG_LOG
	#define DEBUG_LOG 1
	#endif

	#if DEBUG_LOG
	#define LOG(...) \
	do { \
	printf("[%ld] ", std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count()); \
	printf(__VA_ARGS__); \
	printf("\n"); \
	} while (0)
	#else
	#define LOG(...)
	#endif

	namespace ns {

	using namespace std;
	using namespace std::chrono_literals;

	using namespace hailort;


	class Engine {
	public:
	Engine() {
	LOG("Engine constructor");

	auto vdevice = VDevice::create();
	if (!vdevice) {
	LOG("Failed to create VDevice %d", vdevice.status());
	return;
	}
	_vdevice = move(vdevice.value());
	}

	~Engine() {
	LOG("Engine destructor");
	}

	bool load(const string& her_file_path) {
	LOG("Engine load %s", her_file_path.c_str());

	configureNetworkGroup(her_file_path);

	return _bindings != nullptr;
	}

	struct Tensor {
	enum class Type { AUTO, UINT8, UINT16, FLOAT32 };

	static size_t bytesOfType(Type type) {
	switch (type) {
	case Type::UINT8:
	return 1;
	case Type::UINT16:
	return 2;
	case Type::FLOAT32:
	return 4;
	default:
	return 1;
	}
	}

	explicit Tensor(Type type_, vector<size_t> shape_, size_t size_) : type(type_), shape(shape_), size(size_), data(nullptr) {
	data = static_cast<uint8_t*>(aligned_alloc(4096, size));
	if (!data) {
	throw runtime_error("Failed to allocate memory");
	}
	LOG("Allocated %ld bytes at %p", size, data);

	view = MemoryView(data, size);
	}

	~Tensor() {
	if (data) {
	free(data);
	data = nullptr;
	}
	}

	template <typename T>
	T* dataAs() {
	if (sizeof(T) != bytesOfType(type)) {
	throw runtime_error("Invalid type");
	}
	return reinterpret_cast<T*>(data);
	}


	const Type type;
	const vector<size_t> shape;
	const size_t size;
	uint8_t* data;
	MemoryView view;
	};


	decltype(auto) getRefInputs() {
	vector<Tensor*> ref_tensors;

	if (!_model \|\| !_bindings) {
	return ref_tensors;
	}

	auto inputs = _model->inputs();
	for (auto& tsr : inputs) {
	auto name = tsr.name();
	if (_io_buffers.find(name) != _io_buffers.end()) {
	continue;
	}

	auto shape = tsr.shape();
	auto size = tsr.get_frame_size();
	auto type = tsr.format().type;
	auto tensor = new Tensor(_tsr_type_map[type], {1, shape.height, shape.width, shape.features}, size);

	auto sta = _bindings->input(name)->set_buffer(tensor->view);
	if (sta != HAILO_SUCCESS) {
	LOG("Failed to set buffer %d", sta);
	continue;
	}

	_io_buffers[name] = shared_ptr<Tensor>(tensor, [](Tensor* ptr) {
	LOG("Deleting Tensor");
	delete ptr;
	ptr = nullptr;
	});

	ref_tensors.push_back(tensor);
	}

	return ref_tensors;
	}

	decltype(auto) getRefOutputs() {
	vector<Tensor*> ref_tensors;

	if (!_model \|\| !_bindings) {
	return ref_tensors;
	}

	auto outputs = _model->outputs();
	for (auto& tsr : outputs) {
	auto name = tsr.name();
	if (_io_buffers.find(name) != _io_buffers.end()) {
	continue;
	}

	auto shape = tsr.shape();
	auto size = tsr.get_frame_size();
	auto type = tsr.format().type;
	auto tensor = new Tensor(_tsr_type_map[type], {1, shape.height, shape.width, shape.features}, size);

	auto sta = _bindings->output(name)->set_buffer(tensor->view);
	if (sta != HAILO_SUCCESS) {
	LOG("Failed to set buffer %d", sta);
	continue;
	}

	_io_buffers[name] = shared_ptr<Tensor>(tensor, [](Tensor* ptr) {
	LOG("Deleting Tensor");
	delete ptr;
	ptr = nullptr;
	});

	ref_tensors.push_back(tensor);
	}

	return ref_tensors;
	}

	bool infer() {
	LOG("Engine infer");

	if (!_configured_model \|\| !_bindings) {
	return false;
	}

	auto sta = _configured_model->wait_for_async_ready(1000ms);
	if (sta != HAILO_SUCCESS) {
	LOG("Failed to wait for async ready %d", sta);
	return false;
	}

	auto job = _configured_model->run_async(*_bindings, [](const AsyncInferCompletionInfo& info) { LOG("AsyncInferCompletionInfo %d", info.status); });

	do {
	this_thread::yield();
	} while (job->wait(1000ms) != HAILO_SUCCESS);

	return true;
	}

	private:
	void configureNetworkGroup(const string& her_file_path) {
	LOG("configureNetworkGroup %s", her_file_path.c_str());

	auto hef = Hef::create(her_file_path);
	if (!hef) {
	LOG("Failed to create Hef %d", hef.status());
	return;
	}

	if (!_vdevice) {
	LOG("VDevice is not created");
	return;
	}

	auto model = _vdevice->create_infer_model(her_file_path);
	if (!model) {
	LOG("Failed to create model %d", model.status());
	return;
	}
	_model = move(model.value());
	_model->set_hw_latency_measurement_flags(HAILO_LATENCY_MEASURE);

	static auto type2Str = [](hailo_format_type_t v) -> string {
	switch (v) {
	case HAILO_FORMAT_TYPE_AUTO:
	return "AUTO";
	case HAILO_FORMAT_TYPE_UINT8:
	return "UINT8";
	case HAILO_FORMAT_TYPE_UINT16:
	return "UINT16";
	case HAILO_FORMAT_TYPE_FLOAT32:
	return "FLOAT32";
	case HAILO_FORMAT_TYPE_MAX_ENUM:
	return "MAX_ENUM";
	default:
	return to_string(v);
	}
	};

	static auto order2Str = [](hailo_format_order_t v) -> string {
	switch (v) {
	case HAILO_FORMAT_ORDER_AUTO:
	return "AUTO";
	case HAILO_FORMAT_ORDER_NHWC:
	return "NHWC";
	case HAILO_FORMAT_ORDER_NCHW:
	return "NCHW";
	case HAILO_FORMAT_ORDER_FCR:
	return "FCR";
	default:
	return to_string(v);
	}
	};

	static auto flags2Str = [](hailo_format_flags_t v) -> string {
	switch (v) {
	case HAILO_FORMAT_FLAGS_NONE:
	return "NONE";
	case HAILO_FORMAT_FLAGS_QUANTIZED:
	return "QUANTIZED";
	case HAILO_FORMAT_FLAGS_TRANSPOSED:
	return "TRANSPOSED";
	case HAILO_FORMAT_FLAGS_MAX_ENUM:
	return "MAX_ENUM";
	default:
	return to_string(v);
	}
	};

	static auto dumpTsrs = [&](string name, const auto& tsrs) {
	LOG("%s %ld:", name.c_str(), tsrs.size());
	for (auto& tsr : tsrs) {
	auto name = tsr.name();
	auto shape = tsr.shape();
	auto format = tsr.format();
	auto quant_info = tsr.get_quant_infos();

	LOG(" %s, (%d, %d, %d)", name.c_str(), shape.height, shape.width, shape.features);
	auto [t, o, f] = tuple(type2Str(format.type), order2Str(format.order), flags2Str(format.flags));
	LOG(" format type %s, order %s, flags %s, quant_info %ld", t.c_str(), o.c_str(), f.c_str(), quant_info.size());
	for (auto& info : quant_info) {
	LOG(" scale %f zp %d", info.qp_scale, static_cast<int32_t>(info.qp_zp));
	}
	}
	};

	const auto inputs = _model->inputs();
	dumpTsrs("Inputs", inputs);

	const auto outputs = _model->outputs();
	dumpTsrs("Outputs", outputs);


	auto configured_model = _model->configure();
	if (!configured_model) {
	LOG("Failed to configure model %d", configured_model.status());
	return;
	}
	{
	auto shared = new ConfiguredInferModel(configured_model.value());
	_configured_model = shared_ptr<ConfiguredInferModel>(shared, [dep_ref = _model](ConfiguredInferModel* ptr) {
	LOG("Deleting ConfiguredInferModel");
	delete ptr;
	ptr = nullptr;
	dep_ref.~shared_ptr();
	});
	}

	auto bindings = configured_model.value().create_bindings();
	if (!bindings) {
	LOG("Failed to create bindings %d", bindings.status());
	return;
	}
	{
	auto shared = new ConfiguredInferModel::Bindings(bindings.value());
	_bindings = shared_ptr<ConfiguredInferModel::Bindings>(shared, [dep_ref = _configured_model](ConfiguredInferModel::Bindings* ptr) {
	LOG("Deleting ConfiguredInferModel::Bindings");
	delete ptr;
	ptr = nullptr;
	dep_ref.~shared_ptr();
	});
	}

	LOG("Model use count %ld", _model.use_count());
	LOG("ConfiguredInferModel use count %ld", _configured_model.use_count());
	LOG("Bindings use count %ld", _bindings.use_count());
	}

	private:
	unique_ptr<VDevice> _vdevice = nullptr;
	shared_ptr<InferModel> _model = nullptr;
	shared_ptr<ConfiguredInferModel> _configured_model = nullptr;
	shared_ptr<ConfiguredInferModel::Bindings> _bindings = nullptr;

	unordered_map<string, shared_ptr<Tensor>> _io_buffers;

	unordered_map<hailo_format_type_t, Tensor::Type> _tsr_type_map = {
	{HAILO_FORMAT_TYPE_AUTO, Tensor::Type::AUTO},
	{HAILO_FORMAT_TYPE_UINT8, Tensor::Type::UINT8},
	{HAILO_FORMAT_TYPE_UINT16, Tensor::Type::UINT16},
	{HAILO_FORMAT_TYPE_FLOAT32, Tensor::Type::FLOAT32},
	};
	};

	} // namespace ns