Skip to content

Instantly share code, notes, and snippets.

@EBADBEEF
Created October 3, 2024 05:07
Show Gist options
  • Save EBADBEEF/f168458028f684a91148f4d3e791ba84 to your computer and use it in GitHub Desktop.
Save EBADBEEF/f168458028f684a91148f4d3e791ba84 to your computer and use it in GitHub Desktop.
A linux userspace memory reclaimer
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
#include <unistd.h>
#define KB (1024)
#define MB (1024*1024)
#define GB (1024*1024*1024)
#define NSEC_PER_SEC (1000000000L)
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
#define MIN(a,b) ((a)<(b)?(a):(b))
#define BUF_LEN (4096) /* fit the whole /proc/meminfo */
/* from bsd/sys/time.h */
#ifndef timespeccmp
#define timespeccmp(tsp, usp, cmp) \
(((tsp)->tv_sec == (usp)->tv_sec) ? \
((tsp)->tv_nsec cmp (usp)->tv_nsec) : \
((tsp)->tv_sec cmp (usp)->tv_sec))
#endif
#ifndef timespecsub
#define timespecsub(tsp, usp, vsp) \
do { \
(vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \
(vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \
if ((vsp)->tv_nsec < 0) { \
(vsp)->tv_sec--; \
(vsp)->tv_nsec += NSEC_PER_SEC; \
} \
} while (0)
#endif
enum matcher_type {
MEM_FREE = 0,
MEM_AVAILABLE,
};
struct matcher {
const char * const s;
const size_t len;
unsigned long long ull;
};
struct matcher matchers[] = {
#define MAKE_MATCHER(a, x) [a] = { x, sizeof(x)-1 }
/* order matters */
MAKE_MATCHER(MEM_FREE, "MemFree:"),
MAKE_MATCHER(MEM_AVAILABLE, "MemAvailable:"),
#undef MAKE_MATCHER
};
#define WATERMARK_LOW_DEFAULT_KB ((unsigned long long)512*(MB/KB))
#define RECLAIM_DEFAULT_KB ((unsigned long long)1*(GB/KB))
#define OOM_LOW_DEFAULT_KB ((unsigned long long)350*(MB/KB))
#define RECLAIM_DROP_CACHES_SEC (5)
/* echo 3 > /proc/sys/vm/drop_caches */
#define DROP_CACHES_STR ("3")
void usage() {
printf("Usage: reclaimer [watermark_low] [reclaim] [oom_low]\n");
printf("Description:\n");
printf(" When MemoryFree goes below [watermark_low] KB, trigger a memory reclaim by writing [reclaim] to /sys/fs/cgroup/memory.reclaim\n");
printf(" If memory.reclaim happens twice in a period of %d seconds, write \"%s\" to /proc/sys/vm/drop_caches\n", RECLAIM_DROP_CACHES_SEC, DROP_CACHES_STR);
printf(" When MemoryAvailable goes below [oom_low] KB, trigger a kernel OOM by writing 'f' to /proc/sysrq-trigger");
printf("Defaults:\n");
printf(" watermark_low (in KB) = %llu\n", WATERMARK_LOW_DEFAULT_KB);
printf(" reclaim (in KB) = %llu\n", RECLAIM_DEFAULT_KB);
printf(" oom_low (in KB) = %llu\n", OOM_LOW_DEFAULT_KB);
fflush(stdout);
}
struct timing {
struct timespec reclaim_start;
struct timespec reclaim_end;
struct timespec dropcaches_start;
struct timespec dropcaches_end;
struct timespec diff;
};
int main(int argc, char **argv)
{
char buf[BUF_LEN];
char *c;
unsigned char ch, ch_prev;
unsigned long long watermark_low_kb = 0;
unsigned long long reclaim_kb = 0;
unsigned long long oom_low_kb = 0;
int has_reclaimed = 0;
ssize_t nr;
int mi; /* "match index" */
int len;
int fd;
int fd_reclaim;
int fd_sysrq;
int fd_drop_caches;
struct timing t;
for(int i=1; i<argc; ++i) {
if (argv[i][0] == '-' || argv[i][0] == '/') {
usage();
return 0;
}
}
if (argc >= 2)
watermark_low_kb = strtoull(argv[1], NULL, 10);
if (argc >= 3)
reclaim_kb = strtoull(argv[2], NULL, 10);
if (argc >= 4)
oom_low_kb = strtoull(argv[3], NULL, 10);
if (watermark_low_kb == 0)
watermark_low_kb = WATERMARK_LOW_DEFAULT_KB;
if (reclaim_kb == 0)
reclaim_kb = RECLAIM_DEFAULT_KB;
if (oom_low_kb == 0)
oom_low_kb = OOM_LOW_DEFAULT_KB;
printf("watermark_low = %llu KiB\n", watermark_low_kb);
printf("reclaim = %llu KiB\n", reclaim_kb);
printf("oom_low = %llu KiB\n", oom_low_kb);
printf("drop_caches_str = %s\n", DROP_CACHES_STR);
fflush(stdout);
fd = open("/proc/meminfo", O_RDONLY);
if (fd < 0) {
perror("failed to open meminfo");
exit(EXIT_FAILURE);
}
fd_reclaim = open("/sys/fs/cgroup/memory.reclaim", O_WRONLY);
if (fd_reclaim < 0) {
perror("failed to open memory.reclaim");
exit(EXIT_FAILURE);
}
fd_sysrq = open("/proc/sysrq-trigger", O_WRONLY);
if (fd_sysrq < 0) {
perror("failed to open sysrq-trigger");
exit(EXIT_FAILURE);
}
fd_drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY);
if (fd_drop_caches < 0) {
perror("failed to open drop_caches");
exit(EXIT_FAILURE);
}
mlockall(MCL_CURRENT);
goto start;
again:
sleep(1);
start:
memset(buf, 0, BUF_LEN);
lseek(fd, 0, SEEK_SET);
nr = read(fd, buf, BUF_LEN-1);
if (nr < 0) {
perror("failed to read");
exit(EXIT_FAILURE);
}
for(c=buf, ch_prev='\n', mi=0; (ch = *c) && (mi < ARRAY_SIZE(matchers)); ++c) {
if (ch_prev == '\n') {
if (!memcmp(c, matchers[mi].s, matchers[mi].len)) {
c += matchers[mi].len;
matchers[mi].ull = strtoull(c, &c, 10);
mi += 1;
}
}
ch_prev = ch;
}
if (mi < ARRAY_SIZE(matchers)) {
fprintf(stderr, "failed to find all matches\n");
exit(EXIT_FAILURE);
}
if ((matchers[MEM_AVAILABLE].ull < oom_low_kb)) {
nr = write(fd_sysrq, "f", 2);
if (nr < 0) {
perror("failed to write sysrq-trigger");
exit(EXIT_FAILURE);
}
printf("Triggered OOM, MemFree was %lluK, MemAvailable was %llu\n",
matchers[MEM_FREE].ull, matchers[MEM_AVAILABLE].ull);
fflush(stdout);
}
if ((matchers[MEM_FREE].ull < watermark_low_kb)) {
clock_gettime(CLOCK_MONOTONIC, &t.reclaim_start);
timespecsub(&t.reclaim_start, &t.reclaim_end, &t.diff);
/* Sometimes we get stuck in a reclaim loop, detect that and break out by
* writing to drop_caches. If the start of the next reclaim is less than
* RECLAIM_DROP_CACHES_SEC seconds from the end of the previous reclaim,
* break out. */
if (has_reclaimed && (t.diff.tv_sec < RECLAIM_DROP_CACHES_SEC)) {
lseek(fd_drop_caches, 0, SEEK_END);
len = sprintf(buf, "%s\n", DROP_CACHES_STR); /* NB: no newline = EINVAL */
nr = write(fd_drop_caches, buf, len+1);
if (nr < 0) {
perror("failed to write drop_caches");
exit(EXIT_FAILURE);
}
printf("Completed drop_caches (%s)\n", DROP_CACHES_STR);
fflush(stdout);
goto again;
}
/* free up to reclaim_kb but not more than MemAvailable */
len = sprintf(buf, "%lluK", MIN(reclaim_kb, matchers[MEM_AVAILABLE].ull));
if (len < 0) {
perror("failed to write string");
exit(EXIT_FAILURE);
}
lseek(fd_reclaim, 0, SEEK_END);
nr = write(fd_reclaim, buf, len+1);
if (nr < 0) {
perror("failed to write reclaim");
exit(EXIT_FAILURE);
}
has_reclaimed = 1;
clock_gettime(CLOCK_MONOTONIC, &t.reclaim_end);
timespecsub(&t.reclaim_end, &t.reclaim_start, &t.diff);
printf("Completed reclaim of %s in %ld.%02lds, MemFree was %lluK, MemAvailable was %llu\n"
,buf
,t.diff.tv_sec, t.diff.tv_nsec/(NSEC_PER_SEC/100)
,matchers[MEM_FREE].ull, matchers[MEM_AVAILABLE].ull
);
fflush(stdout);
}
goto again;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment