Skip to content

Instantly share code, notes, and snippets.

@makslevental
Created September 19, 2024 18:46
Show Gist options
  • Save makslevental/382d5b66e1bb4a99eb03b3b043dc2a15 to your computer and use it in GitHub Desktop.
Save makslevental/382d5b66e1bb4a99eb03b3b043dc2a15 to your computer and use it in GitHub Desktop.
//
// Created by mlevental on 9/19/24.
//
#include "amdxdna_accel.h"
#include <csignal>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <fcntl.h>
#include <iostream>
#include <memory>
#include <string>
#include <sys/ioctl.h>
#include <sys/mman.h>
struct xcl_bo_flags {
union {
uint64_t all; // [63-0]
struct {
uint32_t flags; // [31-0]
uint32_t extension; // [63-32]
};
struct {
uint16_t bank; // [15-0]
uint8_t slot; // [23-16]
uint8_t boflags; // [31-24]
// extension
uint32_t access : 2; // [33-32]
uint32_t dir : 2; // [35-34]
uint32_t use : 1; // [36]
uint32_t unused : 27; // [63-35]
};
};
};
#define XRT_BO_USE_NORMAL 0
#define XRT_BO_USE_DEBUG 1
template <typename... Args> void shim_debug(const char *fmt, Args &&...args) {
std::string format = "PID(%d): ";
format += std::string(fmt);
format += "\n";
printf(format.c_str(), getpid(), std::forward<Args>(args)...);
}
template <typename... Args>
[[noreturn]] void shim_err(int err, const char *fmt, Args &&...args) {
std::string format = std::string(fmt);
format += " (err=%d)";
int sz = std::snprintf(nullptr, 0, format.c_str(), args..., err) + 1;
if (sz <= 0) {
printf("could not format error string");
exit(-1);
}
auto size = static_cast<size_t>(sz);
std::unique_ptr<char[]> buf(new char[size]);
std::snprintf(buf.get(), size, format.c_str(), args..., err);
std::cerr << std::string(buf.get());
exit(-1);
}
std::string type_to_name(amdxdna_bo_type type) {
switch (type) {
case AMDXDNA_BO_SHMEM:
return std::string("AMDXDNA_BO_SHMEM");
case AMDXDNA_BO_DEV_HEAP:
return std::string("AMDXDNA_BO_DEV_HEAP");
case AMDXDNA_BO_DEV:
return std::string("AMDXDNA_BO_DEV");
case AMDXDNA_BO_CMD:
return std::string("AMDXDNA_BO_CMD");
}
return std::string("BO_UNKNOWN");
}
uint32_t alloc_drm_bo(int dev, amdxdna_bo_type type, void *buf, size_t size) {
amdxdna_drm_create_bo cbo = {
.type = type,
.vaddr = reinterpret_cast<uintptr_t>(buf),
.size = size,
};
shim_debug("alloc_drm_bo %s %d", type_to_name(type).c_str(), size);
ioctl(dev, DRM_IOCTL_AMDXDNA_CREATE_BO, &cbo);
return cbo.handle;
}
void free_drm_bo(int dev, uint32_t boh) {
drm_gem_close close_bo = {boh, 0};
ioctl(dev, DRM_IOCTL_GEM_CLOSE, &close_bo);
}
void get_drm_bo_info(int dev, uint32_t boh, amdxdna_drm_get_bo_info *bo_info) {
bo_info->handle = boh;
ioctl(dev, DRM_IOCTL_AMDXDNA_GET_BO_INFO, bo_info);
}
void *map_parent_range(size_t size) {
auto p =
mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (!p)
shim_err(errno, "mmap(len=%ld) failed", size);
return p;
}
void *map_drm_bo(int dev, size_t size, int prot, uint64_t offset) {
return mmap(0, size, prot, MAP_SHARED | MAP_LOCKED, dev, offset);
}
void *map_drm_bo(int dev, void *addr, size_t size, int prot, int flags,
uint64_t offset) {
return mmap(addr, size, prot, flags, dev, offset);
}
void unmap_drm_bo(int dev, void *addr, size_t size) { munmap(addr, size); }
void attach_dbg_drm_bo(int dev, uint32_t boh, uint32_t ctx_id) {
amdxdna_drm_config_hwctx adbo = {
.handle = ctx_id,
.param_type = DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF,
.param_val = boh,
};
ioctl(dev, DRM_IOCTL_AMDXDNA_CONFIG_HWCTX, &adbo);
}
void detach_dbg_drm_bo(int dev, uint32_t boh, uint32_t ctx_id) {
amdxdna_drm_config_hwctx adbo = {
.handle = ctx_id,
.param_type = DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF,
.param_val = boh,
};
ioctl(dev, DRM_IOCTL_AMDXDNA_CONFIG_HWCTX, &adbo);
}
bool is_power_of_two(size_t x) { return (x > 0) && ((x & (x - 1)) == 0); }
void *addr_align(void *p, size_t align) {
if (!is_power_of_two(align))
shim_err(EINVAL, "Alignment 0x%lx is not power of two", align);
return (void *)(((uintptr_t)p + align) & ~(align - 1));
}
namespace shim_xdna {
enum class direction {
host2device = 0,
device2host = 1,
};
class bo {
public:
bo(int device, uint32_t ctx_id, size_t size, uint64_t flags,
amdxdna_bo_type type);
~bo();
enum class map_type { read, write };
void *map(map_type) const;
void unmap(void *addr);
virtual void sync(direction, size_t size, size_t offset);
public:
uint32_t get_drm_bo_handle() const;
// DRM BO managed by driver.
class drm_bo {
public:
bo &m_parent;
uint32_t m_handle = AMDXDNA_INVALID_BO_HANDLE;
off_t m_map_offset = AMDXDNA_INVALID_ADDR;
uint64_t m_xdna_addr = AMDXDNA_INVALID_ADDR;
uint64_t m_vaddr = AMDXDNA_INVALID_ADDR;
drm_bo(bo &parent, const amdxdna_drm_get_bo_info &bo_info);
~drm_bo();
};
void alloc_bo();
void free_bo();
void mmap_bo(size_t align = 0);
void munmap_bo();
std::string type_to_name() const;
const int m_pdev;
void *m_parent = nullptr;
void *m_aligned = nullptr;
size_t m_parent_size = 0;
size_t m_aligned_size = 0;
uint64_t m_flags = 0;
amdxdna_bo_type m_type = AMDXDNA_BO_INVALID;
std::unique_ptr<drm_bo> m_bo;
// Command ID in the queue after command submission.
// Only valid for cmd BO.
uint64_t m_cmd_id = -1;
// Used when exclusively assigned to a HW context. By default, BO is shared
// among all HW contexts.
uint32_t m_owner_ctx_id = AMDXDNA_INVALID_CTX_HANDLE;
};
bo::drm_bo::drm_bo(bo &parent, const amdxdna_drm_get_bo_info &bo_info)
: m_parent(parent), m_handle(bo_info.handle),
m_map_offset(bo_info.map_offset), m_vaddr(bo_info.vaddr),
m_xdna_addr(bo_info.xdna_addr) {}
bo::drm_bo::~drm_bo() {
if (m_handle == AMDXDNA_INVALID_BO_HANDLE)
return;
free_drm_bo(m_parent.m_pdev, m_handle);
}
std::string bo::type_to_name() const {
switch (m_type) {
case AMDXDNA_BO_SHMEM:
return std::string("AMDXDNA_BO_SHMEM");
case AMDXDNA_BO_DEV_HEAP:
return std::string("AMDXDNA_BO_DEV_HEAP");
case AMDXDNA_BO_DEV:
if (xcl_bo_flags{m_flags}.use == XRT_BO_USE_DEBUG)
return std::string("AMDXDNA_BO_DEV_DEBUG");
return std::string("AMDXDNA_BO_DEV");
case AMDXDNA_BO_CMD:
return std::string("AMDXDNA_BO_CMD");
}
return std::string("BO_UNKNOWN");
}
void bo::mmap_bo(size_t align) {
size_t a = align;
if (m_bo->m_map_offset == AMDXDNA_INVALID_ADDR) {
m_aligned = reinterpret_cast<void *>(m_bo->m_vaddr);
return;
}
if (a == 0) {
m_aligned = map_drm_bo(m_pdev, m_aligned_size, PROT_READ | PROT_WRITE,
m_bo->m_map_offset);
return;
}
/*
* Handle special alignment
* The first mmap() is just for reserved a range in user vritual address
* space. The second mmap() uses an aligned addr as the first argument in mmap
* syscall.
*/
m_parent_size = align * 2 - 1;
m_parent = map_parent_range(m_parent_size);
auto aligned = addr_align(m_parent, align);
m_aligned =
map_drm_bo(m_pdev, aligned, m_aligned_size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, m_bo->m_map_offset);
}
void bo::munmap_bo() {
if (m_bo->m_map_offset == AMDXDNA_INVALID_ADDR)
return;
shim_debug("Unmap BO, aligned %p parent %p", m_aligned, m_parent);
unmap_drm_bo(m_pdev, m_aligned, m_aligned_size);
if (m_parent)
unmap_drm_bo(m_pdev, m_parent, m_parent_size);
}
void bo::alloc_bo() {
uint32_t boh = alloc_drm_bo(m_pdev, m_type, NULL, m_aligned_size);
amdxdna_drm_get_bo_info bo_info = {};
get_drm_bo_info(m_pdev, boh, &bo_info);
m_bo = std::make_unique<bo::drm_bo>(*this, bo_info);
}
void bo::free_bo() { m_bo.reset(); }
bo::bo(int device, uint32_t ctx_id, size_t size, uint64_t flags,
amdxdna_bo_type type)
: m_pdev(device), m_aligned_size(size), m_flags(flags), m_type(type),
m_owner_ctx_id(ctx_id) {}
bo::~bo() = default;
void *bo::map(bo::map_type type) const {
if (type != bo::map_type::write)
shim_err(
EINVAL,
"Not support map BO as readonly. Type must be bo::map_type::write");
return m_aligned;
}
void bo::unmap(void *addr) {}
uint32_t bo::get_drm_bo_handle() const { return m_bo->m_handle; }
void bo::sync(direction dir, size_t size, size_t offset) {
amdxdna_drm_sync_bo sbo = {
.handle = m_bo->m_handle,
.direction =
(dir == shim_xdna::direction::host2device ? SYNC_DIRECT_TO_DEVICE
: SYNC_DIRECT_FROM_DEVICE),
.offset = offset,
.size = size,
};
ioctl(m_pdev, DRM_IOCTL_AMDXDNA_SYNC_BO, &sbo);
}
} // namespace shim_xdna
void kmq(int device, uint32_t ctx_id, size_t size, uint64_t flags,
amdxdna_bo_type type) {
auto b = shim_xdna::bo(device, ctx_id, size, flags, type);
size_t align = 0;
if (type == AMDXDNA_BO_DEV_HEAP)
align = 64 * 1024 * 1024; // Device mem heap must align at 64MB boundary.
b.alloc_bo();
b.mmap_bo(align);
// Newly allocated buffer may contain dirty pages. If used as output buffer,
// the data in cacheline will be flushed onto memory and pollute the output
// from device. We perform a cache flush right after the BO is allocated to
// avoid this issue.
if (type == AMDXDNA_BO_SHMEM)
b.sync(shim_xdna::direction::host2device, size, 0);
b.munmap_bo();
b.free_bo();
}
int main(int argc, char **argv) {
int drv_fd;
const char drv_path[] = "/dev/accel/accel0";
drv_fd = open(drv_path, O_RDWR);
kmq(drv_fd, AMDXDNA_INVALID_CTX_HANDLE, 64 * 1024 * 1024, 0x0,
AMDXDNA_BO_DEV_HEAP);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment