Created
February 10, 2025 06:42
-
-
Save notogawa/4dcebe6db14f5898dee85babb85f7d37 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdint.h> | |
#include <assert.h> | |
#include <errno.h> | |
#include <sys/time.h> | |
#include <sys/ioctl.h> | |
#include <sys/mman.h> | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <fcntl.h> | |
typedef struct { | |
uint32_t size; | |
uint32_t flags; | |
uint32_t handle; | |
uint32_t offset; | |
} drm_v3d_create_bo; | |
typedef struct { | |
uint32_t handle; | |
uint32_t flags; | |
uint64_t offset; | |
} drm_v3d_mmap_bo; | |
typedef struct { | |
uint32_t handle; | |
uint32_t pad; | |
} gem_close; | |
typedef struct { | |
uint32_t handle; | |
uint32_t pad; | |
uint64_t timeout_ns; | |
} drm_v3d_wait_bo; | |
typedef struct { | |
uint32_t cfg[7]; | |
uint32_t coef[4]; | |
uint64_t bo_handles; | |
uint32_t bo_handle_count; | |
uint32_t in_sync; | |
uint32_t out_sync; | |
uint32_t perfmon_id; | |
uint64_t extensions; | |
uint32_t flags; | |
uint32_t pad; | |
} drm_v3d_submit_csd; | |
#define DRM_IOCTL_BASE 'd' | |
#define DRM_COMMAND_BASE 0x40 | |
#define DRM_GEM_CLOSE 0x09 | |
#define DRM_V3D_WAIT_BO (DRM_COMMAND_BASE + 0x01) | |
#define DRM_V3D_CREATE_BO (DRM_COMMAND_BASE + 0x02) | |
#define DRM_V3D_MMAP_BO (DRM_COMMAND_BASE + 0x03) | |
#define DRM_V3D_SUBMIT_CSD (DRM_COMMAND_BASE + 0x07) | |
#define IOCTL_GEM_CLOSE _IOW(DRM_IOCTL_BASE, DRM_GEM_CLOSE, gem_close) | |
#define IOCTL_V3D_CREATE_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_CREATE_BO, drm_v3d_create_bo) | |
#define IOCTL_V3D_MMAP_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_MMAP_BO, drm_v3d_mmap_bo) | |
#define IOCTL_V3D_WAIT_BO _IOWR(DRM_IOCTL_BASE, DRM_V3D_WAIT_BO, drm_v3d_wait_bo) | |
#define IOCTL_V3D_SUBMIT_CSD _IOW(DRM_IOCTL_BASE, DRM_V3D_SUBMIT_CSD, drm_v3d_submit_csd) | |
static uint64_t nop_loop[] = { | |
0x39807186bb03f000, // nop; ldunifrf(rf1) | |
0x3800b186f903f043, // mov nop rf1 cond=pushn | |
0x02ffffe5ff009000, // branch anyna | |
0x39e0a1813c03f041, // delay slot: sub rf1 rf1 1 cond=pushn | |
0x38003186bb03f000, // delay slot: nop | |
0x38003186bb03f000, // delay slot: nop | |
0x38203186bb03f000, // nop; thrsw | |
0x38203186bb03f000, // nop; thrsw | |
0x38003186bb03f000, // nop | |
0x38003186bb03f000, // nop | |
0x38203186bb03f000, // nop; thrsw | |
0x38003186bb03f000, // nop | |
0x38003186bb03f000, // nop | |
0x38003186bb03f000, // nop | |
}; | |
static int submit_csd(int fd, uint32_t phyaddr, uint32_t unifaddr, uint32_t handle) { | |
const uint32_t wg_x = 1; | |
const uint32_t wg_y = 1; | |
const uint32_t wg_z = 1; | |
const uint32_t wg_size = wg_x * wg_y * wg_z; | |
const uint32_t wgs_per_sg = 1; | |
const uint32_t bo_handles[] = { handle }; | |
drm_v3d_submit_csd csd = {0}; | |
csd.cfg[0] = wg_x << 16; | |
csd.cfg[1] = wg_y << 16; | |
csd.cfg[2] = wg_z << 16; | |
csd.cfg[3] = | |
((((wgs_per_sg * wg_size + 16u - 1u) / 16u) - 1u) << 12) | | |
(wgs_per_sg << 8) | | |
(wg_size & 0xff); | |
csd.cfg[4] = 1; | |
csd.cfg[5] = phyaddr; | |
csd.cfg[6] = unifaddr; | |
csd.coef[0] = 0; | |
csd.coef[1] = 0; | |
csd.coef[2] = 0; | |
csd.coef[3] = 0; | |
csd.bo_handles = (uintptr_t)bo_handles; | |
csd.bo_handle_count = sizeof(bo_handles)/sizeof(bo_handles[0]); | |
csd.in_sync = 0; | |
csd.out_sync = 0; | |
return ioctl(fd, IOCTL_V3D_SUBMIT_CSD, &csd); | |
} | |
static int wait_bo(int fd, uint32_t handle) { | |
drm_v3d_wait_bo wait; | |
wait.handle = handle; | |
wait.pad = 0; | |
wait.timeout_ns = 10e9; | |
return ioctl(fd, IOCTL_V3D_WAIT_BO, &wait); | |
} | |
static double get_time() { | |
struct timeval t; | |
gettimeofday(&t, NULL); | |
return (double)t.tv_sec + t.tv_usec * 1e-6; | |
} | |
#define CYCLES_PER_INST (4) | |
#define INSTS_PER_LOOP (4) | |
int main(int argc, char* argv[]) { | |
if (argc < 2) { | |
fprintf(stderr, "Usage: %s <number>\n", argv[0]); | |
return 1; | |
} | |
uint32_t insts = (uint32_t)strtoul(argv[1], NULL, 10); | |
assert(insts % INSTS_PER_LOOP == 0); | |
int fd = open("/dev/dri/card0", O_RDWR); | |
assert(fd > 0); | |
drm_v3d_create_bo create_bo; | |
create_bo.size = 1024 + 1024; | |
create_bo.flags = 0; | |
{ | |
int res = ioctl(fd, IOCTL_V3D_CREATE_BO, &create_bo); | |
assert(res == 0); | |
} | |
uint32_t handle = create_bo.handle; | |
drm_v3d_mmap_bo mmap_bo; | |
mmap_bo.handle = handle; | |
mmap_bo.flags = 0; | |
{ | |
int res = ioctl(fd, IOCTL_V3D_MMAP_BO, &mmap_bo); | |
assert(res == 0); | |
} | |
void* usraddr = mmap(NULL, create_bo.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, mmap_bo.offset); | |
if (usraddr == MAP_FAILED) { | |
perror("mmap"); | |
return 1; | |
} | |
memcpy(usraddr, nop_loop, sizeof(nop_loop)); | |
{ | |
uint32_t loop = insts / INSTS_PER_LOOP; | |
memcpy((uint8_t*)usraddr + 1024, &loop, sizeof(insts)); | |
} | |
uint32_t phyaddr = create_bo.offset; | |
uint32_t unifaddr = create_bo.offset + 1024; | |
printf("[loop:%u]\n", insts); | |
double start = get_time(); | |
if (submit_csd(fd, phyaddr, unifaddr, handle) < 0) { | |
perror("submit_csd"); | |
return 1; | |
} | |
if (wait_bo(fd, handle) < 0) { | |
perror("wait_bo"); | |
return 1; | |
} | |
double end = get_time(); | |
printf("%.6lf sec\n", end - start); | |
{ | |
int res = munmap(usraddr, sizeof(nop_loop)); | |
assert(res == 0); | |
} | |
gem_close cl; | |
cl.handle = handle; | |
ioctl(fd, IOCTL_GEM_CLOSE, &cl); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment