Created
October 3, 2024 05:07
-
-
Save EBADBEEF/f168458028f684a91148f4d3e791ba84 to your computer and use it in GitHub Desktop.
A linux userspace memory reclaimer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <fcntl.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/mman.h> | |
#include <time.h> | |
#include <unistd.h> | |
#define KB (1024) | |
#define MB (1024*1024) | |
#define GB (1024*1024*1024) | |
#define NSEC_PER_SEC (1000000000L) | |
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) | |
#define MIN(a,b) ((a)<(b)?(a):(b)) | |
#define BUF_LEN (4096) /* fit the whole /proc/meminfo */ | |
/* from bsd/sys/time.h */ | |
#ifndef timespeccmp | |
#define timespeccmp(tsp, usp, cmp) \ | |
(((tsp)->tv_sec == (usp)->tv_sec) ? \ | |
((tsp)->tv_nsec cmp (usp)->tv_nsec) : \ | |
((tsp)->tv_sec cmp (usp)->tv_sec)) | |
#endif | |
#ifndef timespecsub | |
#define timespecsub(tsp, usp, vsp) \ | |
do { \ | |
(vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \ | |
(vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \ | |
if ((vsp)->tv_nsec < 0) { \ | |
(vsp)->tv_sec--; \ | |
(vsp)->tv_nsec += NSEC_PER_SEC; \ | |
} \ | |
} while (0) | |
#endif | |
enum matcher_type { | |
MEM_FREE = 0, | |
MEM_AVAILABLE, | |
}; | |
struct matcher { | |
const char * const s; | |
const size_t len; | |
unsigned long long ull; | |
}; | |
struct matcher matchers[] = { | |
#define MAKE_MATCHER(a, x) [a] = { x, sizeof(x)-1 } | |
/* order matters */ | |
MAKE_MATCHER(MEM_FREE, "MemFree:"), | |
MAKE_MATCHER(MEM_AVAILABLE, "MemAvailable:"), | |
#undef MAKE_MATCHER | |
}; | |
#define WATERMARK_LOW_DEFAULT_KB ((unsigned long long)512*(MB/KB)) | |
#define RECLAIM_DEFAULT_KB ((unsigned long long)1*(GB/KB)) | |
#define OOM_LOW_DEFAULT_KB ((unsigned long long)350*(MB/KB)) | |
#define RECLAIM_DROP_CACHES_SEC (5) | |
/* echo 3 > /proc/sys/vm/drop_caches */ | |
#define DROP_CACHES_STR ("3") | |
void usage() { | |
printf("Usage: reclaimer [watermark_low] [reclaim] [oom_low]\n"); | |
printf("Description:\n"); | |
printf(" When MemoryFree goes below [watermark_low] KB, trigger a memory reclaim by writing [reclaim] to /sys/fs/cgroup/memory.reclaim\n"); | |
printf(" If memory.reclaim happens twice in a period of %d seconds, write \"%s\" to /proc/sys/vm/drop_caches\n", RECLAIM_DROP_CACHES_SEC, DROP_CACHES_STR); | |
printf(" When MemoryAvailable goes below [oom_low] KB, trigger a kernel OOM by writing 'f' to /proc/sysrq-trigger"); | |
printf("Defaults:\n"); | |
printf(" watermark_low (in KB) = %llu\n", WATERMARK_LOW_DEFAULT_KB); | |
printf(" reclaim (in KB) = %llu\n", RECLAIM_DEFAULT_KB); | |
printf(" oom_low (in KB) = %llu\n", OOM_LOW_DEFAULT_KB); | |
fflush(stdout); | |
} | |
struct timing { | |
struct timespec reclaim_start; | |
struct timespec reclaim_end; | |
struct timespec dropcaches_start; | |
struct timespec dropcaches_end; | |
struct timespec diff; | |
}; | |
int main(int argc, char **argv) | |
{ | |
char buf[BUF_LEN]; | |
char *c; | |
unsigned char ch, ch_prev; | |
unsigned long long watermark_low_kb = 0; | |
unsigned long long reclaim_kb = 0; | |
unsigned long long oom_low_kb = 0; | |
int has_reclaimed = 0; | |
ssize_t nr; | |
int mi; /* "match index" */ | |
int len; | |
int fd; | |
int fd_reclaim; | |
int fd_sysrq; | |
int fd_drop_caches; | |
struct timing t; | |
for(int i=1; i<argc; ++i) { | |
if (argv[i][0] == '-' || argv[i][0] == '/') { | |
usage(); | |
return 0; | |
} | |
} | |
if (argc >= 2) | |
watermark_low_kb = strtoull(argv[1], NULL, 10); | |
if (argc >= 3) | |
reclaim_kb = strtoull(argv[2], NULL, 10); | |
if (argc >= 4) | |
oom_low_kb = strtoull(argv[3], NULL, 10); | |
if (watermark_low_kb == 0) | |
watermark_low_kb = WATERMARK_LOW_DEFAULT_KB; | |
if (reclaim_kb == 0) | |
reclaim_kb = RECLAIM_DEFAULT_KB; | |
if (oom_low_kb == 0) | |
oom_low_kb = OOM_LOW_DEFAULT_KB; | |
printf("watermark_low = %llu KiB\n", watermark_low_kb); | |
printf("reclaim = %llu KiB\n", reclaim_kb); | |
printf("oom_low = %llu KiB\n", oom_low_kb); | |
printf("drop_caches_str = %s\n", DROP_CACHES_STR); | |
fflush(stdout); | |
fd = open("/proc/meminfo", O_RDONLY); | |
if (fd < 0) { | |
perror("failed to open meminfo"); | |
exit(EXIT_FAILURE); | |
} | |
fd_reclaim = open("/sys/fs/cgroup/memory.reclaim", O_WRONLY); | |
if (fd_reclaim < 0) { | |
perror("failed to open memory.reclaim"); | |
exit(EXIT_FAILURE); | |
} | |
fd_sysrq = open("/proc/sysrq-trigger", O_WRONLY); | |
if (fd_sysrq < 0) { | |
perror("failed to open sysrq-trigger"); | |
exit(EXIT_FAILURE); | |
} | |
fd_drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY); | |
if (fd_drop_caches < 0) { | |
perror("failed to open drop_caches"); | |
exit(EXIT_FAILURE); | |
} | |
mlockall(MCL_CURRENT); | |
goto start; | |
again: | |
sleep(1); | |
start: | |
memset(buf, 0, BUF_LEN); | |
lseek(fd, 0, SEEK_SET); | |
nr = read(fd, buf, BUF_LEN-1); | |
if (nr < 0) { | |
perror("failed to read"); | |
exit(EXIT_FAILURE); | |
} | |
for(c=buf, ch_prev='\n', mi=0; (ch = *c) && (mi < ARRAY_SIZE(matchers)); ++c) { | |
if (ch_prev == '\n') { | |
if (!memcmp(c, matchers[mi].s, matchers[mi].len)) { | |
c += matchers[mi].len; | |
matchers[mi].ull = strtoull(c, &c, 10); | |
mi += 1; | |
} | |
} | |
ch_prev = ch; | |
} | |
if (mi < ARRAY_SIZE(matchers)) { | |
fprintf(stderr, "failed to find all matches\n"); | |
exit(EXIT_FAILURE); | |
} | |
if ((matchers[MEM_AVAILABLE].ull < oom_low_kb)) { | |
nr = write(fd_sysrq, "f", 2); | |
if (nr < 0) { | |
perror("failed to write sysrq-trigger"); | |
exit(EXIT_FAILURE); | |
} | |
printf("Triggered OOM, MemFree was %lluK, MemAvailable was %llu\n", | |
matchers[MEM_FREE].ull, matchers[MEM_AVAILABLE].ull); | |
fflush(stdout); | |
} | |
if ((matchers[MEM_FREE].ull < watermark_low_kb)) { | |
clock_gettime(CLOCK_MONOTONIC, &t.reclaim_start); | |
timespecsub(&t.reclaim_start, &t.reclaim_end, &t.diff); | |
/* Sometimes we get stuck in a reclaim loop, detect that and break out by | |
* writing to drop_caches. If the start of the next reclaim is less than | |
* RECLAIM_DROP_CACHES_SEC seconds from the end of the previous reclaim, | |
* break out. */ | |
if (has_reclaimed && (t.diff.tv_sec < RECLAIM_DROP_CACHES_SEC)) { | |
lseek(fd_drop_caches, 0, SEEK_END); | |
len = sprintf(buf, "%s\n", DROP_CACHES_STR); /* NB: no newline = EINVAL */ | |
nr = write(fd_drop_caches, buf, len+1); | |
if (nr < 0) { | |
perror("failed to write drop_caches"); | |
exit(EXIT_FAILURE); | |
} | |
printf("Completed drop_caches (%s)\n", DROP_CACHES_STR); | |
fflush(stdout); | |
goto again; | |
} | |
/* free up to reclaim_kb but not more than MemAvailable */ | |
len = sprintf(buf, "%lluK", MIN(reclaim_kb, matchers[MEM_AVAILABLE].ull)); | |
if (len < 0) { | |
perror("failed to write string"); | |
exit(EXIT_FAILURE); | |
} | |
lseek(fd_reclaim, 0, SEEK_END); | |
nr = write(fd_reclaim, buf, len+1); | |
if (nr < 0) { | |
perror("failed to write reclaim"); | |
exit(EXIT_FAILURE); | |
} | |
has_reclaimed = 1; | |
clock_gettime(CLOCK_MONOTONIC, &t.reclaim_end); | |
timespecsub(&t.reclaim_end, &t.reclaim_start, &t.diff); | |
printf("Completed reclaim of %s in %ld.%02lds, MemFree was %lluK, MemAvailable was %llu\n" | |
,buf | |
,t.diff.tv_sec, t.diff.tv_nsec/(NSEC_PER_SEC/100) | |
,matchers[MEM_FREE].ull, matchers[MEM_AVAILABLE].ull | |
); | |
fflush(stdout); | |
} | |
goto again; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment