Last active
May 27, 2024 01:48
-
-
Save harrisonturton/abecaf00f3c3b35b7aa3881f6937990b to your computer and use it in GitHub Desktop.
Read a file with io_uring
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <errno.h> | |
#include <fcntl.h> | |
#include <linux/io_uring.h> | |
#include <signal.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/mman.h> | |
#include <sys/syscall.h> | |
#include <unistd.h> | |
#define RING_ENTRIES 256 | |
#define BUFLEN 8192 // 8KiB | |
/** | |
* Submission queue ring. Mmapped from the kernel. | |
*/ | |
struct sq_ring { | |
uint32_t *head; | |
uint32_t *tail; | |
uint32_t *ring_mask; | |
uint32_t *ring_entries; | |
uint32_t *flags; | |
uint32_t *dropped; | |
uint32_t *array; | |
}; | |
/** | |
* Completion queue ring. Mmapped from the kernel. | |
*/ | |
struct cq_ring { | |
uint32_t *head; | |
uint32_t *tail; | |
uint32_t *ring_mask; | |
uint32_t *ring_entries; | |
uint32_t *flags; | |
uint32_t *overflow; | |
struct io_uring_cqe *cqes; | |
}; | |
struct uring { | |
unsigned int fd; | |
struct sq_ring sq_ring; | |
struct cq_ring cq_ring; | |
struct io_uring_sqe *sqes; | |
}; | |
/** | |
* Execute the `io_uring_setup` syscall. | |
*/ | |
static inline int io_uring_setup(unsigned int entries, | |
struct io_uring_params *params) | |
{ | |
int ret = syscall(__NR_io_uring_setup, entries, params); | |
return ret < 0 ? -errno : ret; | |
} | |
/** | |
* Execute the `io_uring_enter` syscall. | |
*/ | |
static inline int io_uring_enter(unsigned int fd, unsigned int to_submit, | |
unsigned int min_complete, unsigned int flags, | |
sigset_t sig) | |
{ | |
int ret = syscall(__NR_io_uring_enter, fd, to_submit, min_complete, flags, | |
sig, _NSIG / 8); | |
return ret < 0 ? -errno : ret; | |
} | |
/** | |
* Setup the submission queue ring and mmap it from kernel space. | |
*/ | |
int sq_ring_setup(int ringfd, const struct io_uring_params *params, | |
struct sq_ring *sq_ring) | |
{ | |
size_t len; | |
void *ptr; | |
len = params->sq_off.array + params->sq_entries * sizeof(uint32_t); | |
ptr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, | |
ringfd, IORING_OFF_SQ_RING); | |
if (ptr == MAP_FAILED) | |
return -1; | |
sq_ring->head = ptr + params->sq_off.head; | |
sq_ring->tail = ptr + params->sq_off.tail; | |
sq_ring->ring_mask = ptr + params->sq_off.ring_mask; | |
sq_ring->ring_entries = ptr + params->sq_off.ring_entries; | |
sq_ring->flags = ptr + params->sq_off.flags; | |
sq_ring->dropped = ptr + params->sq_off.dropped; | |
sq_ring->array = ptr + params->sq_off.array; | |
return 0; | |
} | |
/** | |
* Setup the completion queue ring and mmap it from kernel space. | |
*/ | |
int cq_ring_setup(int ringfd, const struct io_uring_params *params, | |
struct cq_ring *cq_ring) | |
{ | |
size_t len; | |
void *ptr; | |
len = params->cq_off.cqes + params->cq_entries * sizeof(struct io_uring_cqe); | |
ptr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, | |
ringfd, IORING_OFF_CQ_RING); | |
if (ptr == MAP_FAILED) | |
return -1; | |
cq_ring->head = ptr + params->cq_off.head; | |
cq_ring->tail = ptr + params->cq_off.tail; | |
cq_ring->ring_mask = ptr + params->cq_off.ring_mask; | |
cq_ring->ring_entries = ptr + params->cq_off.ring_entries; | |
cq_ring->overflow = ptr + params->cq_off.overflow; | |
cq_ring->cqes = ptr + params->cq_off.cqes; | |
cq_ring->flags = ptr + params->cq_off.flags; | |
return 0; | |
} | |
/** | |
* Setup the SQE buffer and mmap it from kernel space. | |
*/ | |
int sqe_buffer_setup(int ringfd, const struct io_uring_params *params, | |
struct io_uring_sqe **sqes) | |
{ | |
size_t len; | |
void *ptr; | |
len = params->sq_entries * sizeof(struct io_uring_sqe); | |
ptr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, | |
ringfd, IORING_OFF_SQES); | |
if (ptr == MAP_FAILED) | |
return -1; | |
*sqes = ptr; | |
return 0; | |
} | |
/** | |
* Construct a READV request to read `buflen` bytes from `filefd` into `buf`. | |
*/ | |
void prep_readv_sqe(struct io_uring_sqe *sqe, unsigned int filefd, char *buf, | |
size_t buflen) | |
{ | |
sqe->opcode = IORING_OP_READ; | |
sqe->fd = filefd; | |
sqe->addr = (unsigned long)buf; | |
sqe->len = buflen; | |
} | |
/** | |
* Submit an arbitrary SQE to the uring and wait for it to complete. | |
*/ | |
int submit_sqe(struct uring *uring, struct io_uring_sqe *sqe_to_submit) | |
{ | |
uint32_t curr_tail = *uring->sq_ring.tail; | |
uint32_t next_tail = curr_tail + 1; | |
uint32_t index = curr_tail & *uring->sq_ring.ring_mask; | |
// Copy the SQE into the SQE buffer. This could be constructed in place, but | |
// the copy is fine for this example. | |
struct io_uring_sqe *sqe = &uring->sqes[index]; | |
memcpy(sqe, sqe_to_submit, sizeof(struct io_uring_sqe)); | |
uring->sq_ring.array[index] = index; | |
*uring->sq_ring.tail = next_tail; | |
__sync_synchronize(); | |
sigset_t sigset; | |
sigemptyset(&sigset); | |
if (io_uring_enter(uring->fd, 1, 1, IORING_ENTER_GETEVENTS, sigset) < 0) { | |
return -1; | |
} | |
return 0; | |
} | |
/** | |
* Read an arbitrary CQE from the uring. | |
*/ | |
int recv_cqe(struct uring *uring, struct io_uring_cqe *cqe) | |
{ | |
unsigned int head = 0; | |
do { | |
__sync_synchronize(); | |
if (head == *uring->cq_ring.tail) { | |
fprintf(stderr, "Tried to read empty completion ring\n"); | |
break; | |
} | |
cqe = &uring->cq_ring.cqes[head & (*uring->cq_ring.ring_mask)]; | |
head++; | |
} while (1); | |
*uring->cq_ring.head = head; | |
__sync_synchronize(); | |
return 0; | |
} | |
int main(int argc, char *argv[]) | |
{ | |
// Read the file | |
unsigned int fd; | |
char *pathname; | |
if (argc < 2) { | |
fprintf(stderr, "Usage: %s [filename]\n", argv[0]); | |
exit(1); | |
} | |
pathname = argv[1]; | |
fd = open(pathname, O_RDONLY); | |
if (fd < 0) { | |
fprintf(stderr, "Failed to open file\n"); | |
exit(1); | |
} | |
printf("Reading file with fd %d\n", fd); | |
// Setup the uring | |
struct io_uring_params *params; | |
int ringfd; | |
params = calloc(1, sizeof(struct io_uring_params)); | |
if (!params) { | |
fprintf(stderr, "Failed to allocate memory for params: %s\n", | |
strerror(-errno)); | |
exit(1); | |
} | |
ringfd = io_uring_setup(RING_ENTRIES, params); | |
if (ringfd < 0) { | |
fprintf(stderr, "Failed to create uring: %s\n", strerror(-ringfd)); | |
exit(1); | |
} | |
struct uring uring = { | |
.fd = ringfd, | |
}; | |
if (sq_ring_setup(ringfd, params, &uring.sq_ring) < 0) { | |
fprintf(stderr, "Failed to setup sq_ring\n"); | |
exit(1); | |
} | |
if (cq_ring_setup(ringfd, params, &uring.cq_ring) < 0) { | |
fprintf(stderr, "Failed to setup cq_ring\n"); | |
exit(1); | |
} | |
if (sqe_buffer_setup(ringfd, params, &uring.sqes) < 0) { | |
fprintf(stderr, "Failed to setup sqe buffer\n"); | |
exit(1); | |
} | |
// Create and execute the READV operation | |
size_t buflen = BUFLEN; | |
char buf[buflen]; | |
struct io_uring_sqe sqe = {}; | |
struct io_uring_cqe cqe = {}; | |
prep_readv_sqe(&sqe, fd, buf, buflen); | |
if (submit_sqe(&uring, &sqe) < 0) { | |
fprintf(stderr, "Failed to submit read sqe\n"); | |
exit(1); | |
} | |
if (recv_cqe(&uring, &cqe) < 0) { | |
fprintf(stderr, "Failed to receive read cqe\n"); | |
exit(1); | |
} | |
if (cqe.res < 0) { | |
fprintf(stderr, "Received CQE with error: %s\n", strerror(cqe.res)); | |
exit(1); | |
} | |
printf("Received: %s\n", buf); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment