Skip to content

Instantly share code, notes, and snippets.

@notogawa
Created February 10, 2025 06:42
Show Gist options
  • Save notogawa/4dcebe6db14f5898dee85babb85f7d37 to your computer and use it in GitHub Desktop.
Save notogawa/4dcebe6db14f5898dee85babb85f7d37 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <errno.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
typedef struct {
uint32_t size;
uint32_t flags;
uint32_t handle;
uint32_t offset;
} drm_v3d_create_bo;
typedef struct {
uint32_t handle;
uint32_t flags;
uint64_t offset;
} drm_v3d_mmap_bo;
typedef struct {
uint32_t handle;
uint32_t pad;
} gem_close;
typedef struct {
uint32_t handle;
uint32_t pad;
uint64_t timeout_ns;
} drm_v3d_wait_bo;
typedef struct {
uint32_t cfg[7];
uint32_t coef[4];
uint64_t bo_handles;
uint32_t bo_handle_count;
uint32_t in_sync;
uint32_t out_sync;
uint32_t perfmon_id;
uint64_t extensions;
uint32_t flags;
uint32_t pad;
} drm_v3d_submit_csd;
#define DRM_IOCTL_BASE 'd'
#define DRM_COMMAND_BASE 0x40
#define DRM_GEM_CLOSE 0x09
#define DRM_V3D_WAIT_BO (DRM_COMMAND_BASE + 0x01)
#define DRM_V3D_CREATE_BO (DRM_COMMAND_BASE + 0x02)
#define DRM_V3D_MMAP_BO (DRM_COMMAND_BASE + 0x03)
#define DRM_V3D_SUBMIT_CSD (DRM_COMMAND_BASE + 0x07)
#define IOCTL_GEM_CLOSE _IOW(DRM_IOCTL_BASE, DRM_GEM_CLOSE, gem_close)
#define IOCTL_V3D_CREATE_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_CREATE_BO, drm_v3d_create_bo)
#define IOCTL_V3D_MMAP_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_MMAP_BO, drm_v3d_mmap_bo)
#define IOCTL_V3D_WAIT_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_WAIT_BO, drm_v3d_wait_bo)
#define IOCTL_V3D_SUBMIT_CSD _IOW(DRM_IOCTL_BASE, DRM_V3D_SUBMIT_CSD, drm_v3d_submit_csd)
static uint64_t nop_loop[] = {
0x39807186bb03f000, // nop; ldunifrf(rf1)
0x3800b186f903f043, // mov nop rf1 cond=pushn
0x02ffffe5ff009000, // branch anyna
0x39e0a1813c03f041, // delay slot: sub rf1 rf1 1 cond=pushn
0x38003186bb03f000, // delay slot: nop
0x38003186bb03f000, // delay slot: nop
0x38203186bb03f000, // nop; thrsw
0x38203186bb03f000, // nop; thrsw
0x38003186bb03f000, // nop
0x38003186bb03f000, // nop
0x38203186bb03f000, // nop; thrsw
0x38003186bb03f000, // nop
0x38003186bb03f000, // nop
0x38003186bb03f000, // nop
};
static int submit_csd(int fd, uint32_t phyaddr, uint32_t unifaddr, uint32_t handle) {
const uint32_t wg_x = 1;
const uint32_t wg_y = 1;
const uint32_t wg_z = 1;
const uint32_t wg_size = wg_x * wg_y * wg_z;
const uint32_t wgs_per_sg = 1;
const uint32_t bo_handles[] = { handle };
drm_v3d_submit_csd csd = {0};
csd.cfg[0] = wg_x << 16;
csd.cfg[1] = wg_y << 16;
csd.cfg[2] = wg_z << 16;
csd.cfg[3] =
((((wgs_per_sg * wg_size + 16u - 1u) / 16u) - 1u) << 12) |
(wgs_per_sg << 8) |
(wg_size & 0xff);
csd.cfg[4] = 1;
csd.cfg[5] = phyaddr;
csd.cfg[6] = unifaddr;
csd.coef[0] = 0;
csd.coef[1] = 0;
csd.coef[2] = 0;
csd.coef[3] = 0;
csd.bo_handles = (uintptr_t)bo_handles;
csd.bo_handle_count = sizeof(bo_handles)/sizeof(bo_handles[0]);
csd.in_sync = 0;
csd.out_sync = 0;
return ioctl(fd, IOCTL_V3D_SUBMIT_CSD, &csd);
}
static int wait_bo(int fd, uint32_t handle) {
drm_v3d_wait_bo wait;
wait.handle = handle;
wait.pad = 0;
wait.timeout_ns = 10e9;
return ioctl(fd, IOCTL_V3D_WAIT_BO, &wait);
}
static double get_time() {
struct timeval t;
gettimeofday(&t, NULL);
return (double)t.tv_sec + t.tv_usec * 1e-6;
}
#define CYCLES_PER_INST (4)
#define INSTS_PER_LOOP (4)
int main(int argc, char* argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: %s <number>\n", argv[0]);
return 1;
}
uint32_t insts = (uint32_t)strtoul(argv[1], NULL, 10);
assert(insts % INSTS_PER_LOOP == 0);
int fd = open("/dev/dri/card0", O_RDWR);
assert(fd > 0);
drm_v3d_create_bo create_bo;
create_bo.size = 1024 + 1024;
create_bo.flags = 0;
{
int res = ioctl(fd, IOCTL_V3D_CREATE_BO, &create_bo);
assert(res == 0);
}
uint32_t handle = create_bo.handle;
drm_v3d_mmap_bo mmap_bo;
mmap_bo.handle = handle;
mmap_bo.flags = 0;
{
int res = ioctl(fd, IOCTL_V3D_MMAP_BO, &mmap_bo);
assert(res == 0);
}
void* usraddr = mmap(NULL, create_bo.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, mmap_bo.offset);
if (usraddr == MAP_FAILED) {
perror("mmap");
return 1;
}
memcpy(usraddr, nop_loop, sizeof(nop_loop));
{
uint32_t loop = insts / INSTS_PER_LOOP;
memcpy((uint8_t*)usraddr + 1024, &loop, sizeof(insts));
}
uint32_t phyaddr = create_bo.offset;
uint32_t unifaddr = create_bo.offset + 1024;
printf("[loop:%u]\n", insts);
double start = get_time();
if (submit_csd(fd, phyaddr, unifaddr, handle) < 0) {
perror("submit_csd");
return 1;
}
if (wait_bo(fd, handle) < 0) {
perror("wait_bo");
return 1;
}
double end = get_time();
printf("%.6lf sec\n", end - start);
{
int res = munmap(usraddr, sizeof(nop_loop));
assert(res == 0);
}
gem_close cl;
cl.handle = handle;
ioctl(fd, IOCTL_GEM_CLOSE, &cl);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment