Last active
November 13, 2024 15:55
-
-
Save teknoraver/ed341c5506027c7cdda9e759fdd30c21 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*.o | |
sysctl_monitor | |
vmlinux.h | |
*.skel.h |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LDLIBS += -lbpf | |
CFLAGS += -O2 -pipe -g -Wall | |
all:: sysctl_monitor | |
vmlinux.h: | |
bpftool btf dump file /sys/kernel/btf/vmlinux format c > $@ | |
sysctl_monitor_bpf.skel.h: sysctl_monitor.bpf.o | |
bpftool gen skeleton $< > $@ | |
sysctl_monitor.bpf.o: sysctl_monitor.bpf.c vmlinux.h | |
clang $(CFLAGS) -target bpf -c $< -o $@ | |
sysctl_monitor: sysctl_monitor.c sysctl_monitor_bpf.skel.h | |
clean:: | |
$(RM) *.o sysctl_monitor *.skel.h vmlinux.h |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#ifndef TASK_COMM_LEN | |
#define TASK_COMM_LEN 16 | |
#endif | |
/* It would be nice to size these members to bigger values, but the stack | |
* in BPF programs is limited to 512 bytes, and allocating bigger structures | |
* leads to this compile time error: | |
* error: Looks like the BPF stack limit is exceeded. | |
* Please move large on stack variables into BPF per-cpu array map. | |
* For non-kernel uses, the stack can be increased using -mllvm -bpf-stack-size. */ | |
struct sysctl_write_event { | |
/* Used to track changes in the struct layout */ | |
int version; | |
/* Error code returned to userspace to handle eventual failures. */ | |
int errorcode; | |
/* The PID of the process which is writing the sysctl. */ | |
pid_t pid; | |
/* The name of the binary. */ | |
char comm[TASK_COMM_LEN]; | |
/* The path of the sysctl, relative to /proc/sys/. | |
* The longest path observed is 64 bytes: | |
* net/ipv4/conf/123456789012345/igmpv3_unsolicited_report_interval */ | |
char path[80]; | |
/* The value of the sysctl just before the write. | |
* The longest value observed is net.core.netdev_rss_key which | |
* contains 155 bytes. */ | |
char current[160]; | |
/* The new value being written into the sysctl. | |
* same sizing as 'current' */ | |
char newvalue[160]; | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "vmlinux.h" | |
#include <bpf/bpf_helpers.h> | |
#include "sysctl-write-event.h" | |
struct { | |
__uint(type, BPF_MAP_TYPE_RINGBUF); | |
__uint(max_entries, 256 * 1024); | |
} written_sysctls SEC(".maps"); | |
static bool my_streq(const char *s1, const char *s2, size_t l) | |
{ | |
for (size_t i = 0; i < l; i++) { | |
if (s1[i] != s2[i]) | |
return false; | |
if (!s1[i]) | |
return true; | |
} | |
return true; | |
} | |
struct str { | |
char *s; | |
size_t l; | |
}; | |
static long cut_last(u64 i, struct str *str) | |
{ | |
char *s; | |
// Sanity checks for the preverifier | |
if (i >= str->l) | |
return 1; | |
i = str->l - i - 1; | |
s = str->s + i; | |
if (*s == 0) | |
return 0; | |
if (*s == '\n' || *s == '\r' || *s == ' ' || *s == '\t') { | |
*s = 0; | |
return 0; | |
} | |
return 1; | |
} | |
// Cut off trailing whitespace and newlines | |
static void chop(char *s, size_t l) | |
{ | |
struct str str = { s, l }; | |
bpf_loop(l, cut_last, &str, 0); | |
} | |
SEC("cgroup/sysctl") | |
int sysctl_monitor(struct bpf_sysctl *ctx) | |
{ | |
int r; | |
// Ignore reads | |
if (!ctx->write) | |
return 1; | |
/* Declare the struct without contextually initializing it. | |
* This avoid zero-filling the struct, which would be a waste of | |
* resource and code size. Since we're sending an event even on failure, | |
* truncate the strings to zero size, in case we don't populate them. */ | |
struct sysctl_write_event we; | |
we.errorcode = 0; | |
we.path[0] = 0; | |
we.comm[0] = 0; | |
we.current[0] = 0; | |
we.newvalue[0] = 0; | |
/* Set the simple values first */ | |
we.pid = bpf_get_current_pid_tgid() >> 32; | |
// Only monitor net/ | |
r = bpf_sysctl_get_name(ctx, we.path, sizeof(we.path), 0); | |
if (r < 0) { | |
we.errorcode = r; | |
goto send_event; | |
} | |
r = bpf_get_current_comm(we.comm, sizeof(we.comm)); | |
if (r < 0) { | |
we.errorcode = r; | |
goto send_event; | |
} | |
r = bpf_sysctl_get_current_value(ctx, we.current, sizeof(we.current)); | |
if (r < 0) { | |
we.errorcode = r; | |
goto send_event; | |
} | |
r = bpf_sysctl_get_new_value(ctx, we.newvalue, sizeof(we.newvalue)); | |
if (r < 0) { | |
we.errorcode = r; | |
goto send_event; | |
} | |
// Both the kernel and userspace applications add a newline at the end, | |
// remove it from both strings | |
chop(we.current, sizeof(we.current)); | |
chop(we.newvalue, sizeof(we.newvalue)); | |
send_event: | |
// If new value is the same, ignore it | |
if (r < 0 || !my_streq(we.current, we.newvalue, sizeof(we.current))) | |
bpf_ringbuf_output(&written_sysctls, &we, sizeof(we), 0); | |
return 1; | |
} | |
char _license[] SEC("license") = "GPL"; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE | |
#include <stdio.h> | |
#include <fcntl.h> | |
#include <signal.h> | |
#include <errno.h> | |
#include <bpf/bpf.h> | |
#include <bpf/libbpf.h> | |
#include "sysctl-write-event.h" | |
#include "sysctl_monitor_bpf.skel.h" | |
#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" | |
struct ring_buffer *rb; | |
static void int_exit(int sig) | |
{ | |
int cgfd = open(CGROUP_MOUNT_DFLT, O_PATH | O_DIRECTORY | O_CLOEXEC); | |
if (cgfd >= 0) { | |
bpf_prog_detach(cgfd, BPF_CGROUP_SYSCTL); | |
close(cgfd); | |
} | |
} | |
static int log_sysctl_writes(void *ctx, void *data, size_t data_sz) | |
{ | |
struct sysctl_write_event *we = data; | |
if (we->errorcode) | |
printf("Sysctl monitor BPF returned error: %d\n", we->errorcode); | |
else | |
printf("%s[%d] updated '%s' from '%s' to '%s'\n", we->comm, we->pid, we->path, we->current, we->newvalue); | |
return 0; | |
} | |
static int attach_bpf(void) | |
{ | |
struct sysctl_monitor_bpf *skel; | |
int progfd, cgfd; | |
int err; | |
cgfd = open(CGROUP_MOUNT_DFLT, O_PATH | O_DIRECTORY | O_CLOEXEC); | |
if (cgfd < 0) { | |
printf("failed to open cgroup mount point\n"); | |
return 1; | |
} | |
skel = sysctl_monitor_bpf__open_and_load(); | |
if (!skel) { | |
printf("failed to open and load BPF object\n"); | |
return 1; | |
} | |
err = sysctl_monitor_bpf__attach(skel); | |
if (err) { | |
printf("failed to attach BPF program\n"); | |
return 1; | |
} | |
rb = ring_buffer__new(bpf_map__fd(skel->maps.written_sysctls), log_sysctl_writes, NULL, NULL); | |
if (!rb) { | |
printf("failed to create ring buffer\n"); | |
return 1; | |
} | |
progfd = bpf_program__fd(skel->progs.sysctl_monitor); | |
if (bpf_prog_attach(progfd, cgfd, BPF_CGROUP_SYSCTL, BPF_F_ALLOW_OVERRIDE) < 0) { | |
close(progfd); | |
return 1; | |
} | |
close(progfd); | |
return 0; | |
} | |
int main(int argc, char **argv) | |
{ | |
int ret, cgfd; | |
signal(SIGINT, int_exit); | |
signal(SIGTERM, int_exit); | |
signal(SIGQUIT, int_exit); | |
if (attach_bpf()) | |
return 1; | |
// In business | |
while (1) { | |
ret = ring_buffer__poll(rb, 1000); | |
if (ret < 0) { | |
if (errno == EINTR) | |
break; | |
printf("Error polling ring buffer\n"); | |
break; | |
} | |
} | |
cgfd = open(CGROUP_MOUNT_DFLT, O_PATH | O_DIRECTORY | O_CLOEXEC); | |
if (cgfd >= 0) { | |
bpf_prog_detach(cgfd, BPF_CGROUP_SYSCTL); | |
close(cgfd); | |
} | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
while sleep 1; do | |
sysctl -q fs.mount-max=$((100000 + RANDOM % 100)) | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment