-
-
Save WJWH/f3a196e65fdabd6eace5f89da430600e to your computer and use it in GitHub Desktop.
// Extremely hacky server program that will send a standard response | |
// to every client that connects, then closes the connection. Will | |
// issue no system calls (as measured by `strace`) after initial setup | |
// no matter how many requests are served. | |
// Yes, this program is sorely lacking in error checking. It's a toy | |
// and not meant to be taken seriously. | |
// compile with gcc no_syscall_server.c -luring | |
#include <stdio.h> | |
#include <netinet/in.h> | |
#include <string.h> | |
#include <unistd.h> | |
#include <stdlib.h> | |
#include <signal.h> | |
#include <liburing.h> | |
int ENTRIES = 1024; | |
struct io_uring ring; | |
const char *standard_response = \ | |
"HTTP/1.0 200 OK\r\n" | |
"Content-type: text/html\r\n" | |
"Content-length: 17\r\n" | |
"\r\n" | |
"Have a nice day!\n"; | |
int setup_listening_socket(int port) { | |
int sock; | |
struct sockaddr_in srv_addr; | |
int enable = 1; | |
sock = socket(PF_INET, SOCK_STREAM, 0); | |
setsockopt(sock,SOL_SOCKET, SO_REUSEADDR,&enable, sizeof(int)); | |
memset(&srv_addr, 0, sizeof(srv_addr)); | |
srv_addr.sin_family = AF_INET; | |
srv_addr.sin_port = htons(port); | |
srv_addr.sin_addr.s_addr = htonl(INADDR_ANY); | |
bind(sock, (const struct sockaddr *)&srv_addr, sizeof(srv_addr)); | |
listen(sock, 10); | |
return (sock); | |
} | |
void add_accept_request(int server_socket, | |
struct sockaddr_in *client_addr, | |
socklen_t *client_addr_len) { | |
struct io_uring_sqe *sqe = io_uring_get_sqe(&ring); | |
io_uring_prep_accept(sqe, | |
server_socket, | |
(struct sockaddr *) client_addr, | |
client_addr_len, | |
0); | |
// magic number in the userdata to differentiate between accept CQEs and others | |
io_uring_sqe_set_data(sqe, (void*) 123); | |
io_uring_submit(&ring); | |
} | |
void add_write_and_close_requests(int fd) { | |
struct io_uring_sqe *sqe; | |
sqe = io_uring_get_sqe(&ring); | |
io_uring_prep_write(sqe, fd, standard_response, strlen(standard_response), 0); | |
// make sure the write is complete before doing the close(): | |
sqe->flags |= IOSQE_IO_LINK; | |
sqe = io_uring_get_sqe(&ring); | |
io_uring_prep_close(sqe, fd); | |
io_uring_submit(&ring); | |
} | |
void server_loop(int server_socket) { | |
struct io_uring_cqe *cqe; | |
struct sockaddr_in client_addr; | |
socklen_t client_addr_len = sizeof(client_addr); | |
int peek_result = 0; | |
// initial accept call | |
add_accept_request(server_socket, &client_addr, &client_addr_len); | |
while(1){ | |
peek_result = io_uring_peek_cqe(&ring,&cqe); | |
// peek_result is 0 if a cqe was available and -errno otherwise | |
if(!peek_result){ | |
if (cqe->user_data == 123) { | |
// accept CQE | |
add_write_and_close_requests(cqe->res); | |
add_accept_request(server_socket, &client_addr, &client_addr_len); | |
} | |
else { | |
// no action required | |
} | |
io_uring_cqe_seen(&ring, cqe); | |
} | |
} | |
} | |
void sigint_handler(int signo) { | |
printf("^C pressed. Shutting down.\n"); | |
io_uring_queue_exit(&ring); | |
exit(0); | |
} | |
int main(){ | |
struct io_uring_params params; | |
int server_socket; | |
if (geteuid()) { | |
fprintf(stderr, "You need root privileges to run this program.\n"); | |
return 1; | |
} | |
memset(¶ms, 0, sizeof(params)); | |
params.flags |= IORING_SETUP_SQPOLL; | |
params.sq_thread_idle = 120000; // 2 minutes in ms | |
io_uring_queue_init_params(ENTRIES, &ring, ¶ms); | |
signal(SIGINT, sigint_handler); | |
server_socket = setup_listening_socket(8000); | |
server_loop(server_socket); | |
} |
your code made me realize that io_uring_wait_cqe
will always result in an io_uring_enter()
syscall (i think?). resolved :) Switching to io_uring_peek_cqe()
like your example removed the syscalls. I had expected io_uring_enter()
to check if polling mode was enabled, like io_uring_submit()
does, and if it is, then do a busy userspace spin instead of waiting for an interrupt? I'll have to read into the liburing source to confirm.
If I remember correctly, io_uring_wait_cqe()
will check if any CQEs are available and immediately return if one is available, but will use io_uring_enter()
if none are available. As you've already discovered, if you want spinlocking behavior you have to make it yourself with io_uring_peek_cqe()
. I assume you've already found it but the reasoning behind this snippet is in a blog post at https://wjwh.eu/posts/2021-10-01-no-syscall-server-iouring.html.
Yes, I came from your blog post! After rereading your blog post more carefully, I realized that your post does mention that io_uring_wait_cqe()
may result in a syscall, if none are available.
I think a more user-friendly design for liburing though, might be to let io_uring_wait_cqe()
do busy spinning if IORING_SETUP_SQPOLL
is set? Or let a flag enable that, this way, there would not be a need to completely refactor userspace logic to accommodate polling logic. io_uring_submit()
already has this kind of overloaded behavior. This might be more of a question for the liburing maintainers though.
some pseudocode:
while (1) {
cqe = io_uring_wait_cqe()
switch (cqe->data) {
// handle cqe
}
io_uring_seen(cqe);
}
so for the following pseudocode, the while loop can both be blocking or SQPOLL, depending on how io_uring_queue_init()
was called.
I see what you mean, but I also think making it dependent on IORING_SETUP_SQPOLL
might be confusing. Its name indicates polling of the Submit Queue, not the Completion Queue. Perhaps a new flag IORING_SETUP_CQPOLL
could be introduced for if you want spinlooping behavior in io_uring_wait_cqe()
, but given how easy it is to roll your own with io_uring_peek_cqe()
I don't know if it would add all that much.
In any case I'm not on the liburing maintainers team though, so you should probably ask over there if you want to make this happen. Have fun hacking on io_uring! 🙂
thank you, very cool!
I was struggling in my own example, I enabled
IORING_SETUP_SQPOLL
in my initialization, one CPU core did show 100%, so I think the polling was definitely taking place, butstrace
was still showing anio_uring_enter()
syscall with each request on mine.Your example worked for me, no syscalls! I'll try to debug my own version with yours as reference.