Skip to content

Instantly share code, notes, and snippets.

@kulp
Last active December 21, 2016 02:05
Show Gist options
  • Save kulp/3992122 to your computer and use it in GitHub Desktop.
Save kulp/3992122 to your computer and use it in GitHub Desktop.
A little buddy allocator
#include <assert.h>
#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <stdint.h>
#define RANK_0_LOG 5
#define RANK_0_SIZE (1ul << RANK_0_LOG)
#define RANKS 20
// NPER is an architecture-dependent number.
// NPER is nodes per element of `nodes`
#define NPERBITS 4
#define NPER (1ul << NPERBITS)
// BPER is bits per element of `nodes`
#define BPERBITS 1
#define BPER (1ul << BPERBITS)
#define SI static inline
// TODO need to guarantee alignment of the pool
static char pool[RANK_0_SIZE << (RANKS - 1)];
static const size_t pool_size = sizeof pool;
static unsigned counts[RANKS] = { [RANKS - 1] = 1 };
// implicit binary tree of nodes
// node at n has children at (2n + 1) and (2n + 2)
// bit0 : 0=leaf, 1=internal
// bit1 : full
static uint32_t nodes[(1ul << RANKS) / NPER - (1 / NPER)];
typedef uint32_t np;
typedef size_t sz;
#define TN 0 /* top node */
// -----------------------------------------------------------------------------
static inline unsigned ilog2(unsigned long x)
{
unsigned result = 0;
while (x >>= 1)
result++;
return result;
}
SI sz IRANK (sz r) { return RANKS - 1 - r; }
SI np LLINK (np n) { return (n << 1) + 1; }
SI np RLINK (np n) { return (n << 1) + 2; }
SI sz ISLEFT (np n) { return n & 1; }
SI sz ISRIGHT (np n) { return !ISLEFT(n); }
SI np PARENT (np n) { return (n - 1) >> 1; }
SI np SIBLING (np n) { return n + ISLEFT(n) - ISRIGHT(n); }
SI sz NODE2RANK (np n) { return (n ? NODE2RANK(PARENT(n)) : RANKS) - 1; }
SI sz SIZE2BLKS (sz x) { return (x + RANK_0_SIZE - 1) >> RANK_0_LOG; }
SI sz SIZE2RANK (sz s) { return s ? ilog2(SIZE2BLKS(s) - 1) + 1 : 0; }
SI sz RANK2BYTES(sz r) { return 1ul << (r + RANK_0_LOG); }
SI sz NODE2BASE (np n) { return n >> NPERBITS; }
SI sz NODE2SH (np n) { return n & (NPER - 1); }
SI sz GET_COUNT(sz r) { return counts[r]; }
SI void SET_COUNT(sz r, sz v) { counts[r] = v; }
SI sz SHAMT(np n, sz b) { return NODE2SH(n) * BPER + b; }
SI uint32_t* NODE2PTR(np n) { return &nodes[NODE2BASE(n)]; }
SI sz GET_BIT (np n, sz b) { return (*NODE2PTR(n) >> SHAMT(n, b)) & 1; }
SI void CLR_BIT (np n, sz b) { *NODE2PTR(n) &= ~(1ul << SHAMT(n,b)); }
SI void SET_BIT (np n, sz b) { *NODE2PTR(n) |= (1ul << SHAMT(n,b)); }
SI sz GET_LEAF (np n ) { return !GET_BIT(n, 0); }
SI sz GET_FULL (np n ) { return GET_BIT(n, 1); }
SI sz GET_VALID(np n ) { return n == TN || !GET_LEAF(PARENT(n)); }
SI void SET_LEAF (np n, sz v) { !v ? SET_BIT(n, 0) : CLR_BIT(n, 0); }
SI void SET_FULL (np n, sz v) { v ? SET_BIT(n, 1) : CLR_BIT(n, 1); }
static char *NODE2ADDR(np n)
{
size_t base = 0;
int i = NODE2RANK(n) + RANK_0_LOG;
while (n != TN) {
base |= ISRIGHT(n) << i;
n = PARENT(n);
i++;
}
return pool + base;
}
static np ADDR2NODE(char *key)
{
// We start at the second-highest rank because we are always checking the
// key against the midpoint of the current-rank node, which midpoint is
// calculated using the size of the next-smallest-rank node.
size_t rank = RANKS - 2;
np n = TN;
char *base = pool;
while (!GET_LEAF(n)) {
if (key - base >= (signed)RANK2BYTES(rank)) {
n = RLINK(n);
base += RANK2BYTES(rank);
} else {
n = LLINK(n);
}
rank--;
}
// Now we are at a leaf. Either it matches our key, or we bail.
if (key != base)
abort();
return n;
}
// -----------------------------------------------------------------------------
static void buddy_splitnode(np n, sz rank)
{
assert(("node is legally splittable", GET_LEAF(n)));
// remove one node of our rank and create two of a smaller rank
if (!GET_FULL(n))
SET_COUNT(rank, GET_COUNT(rank) - 1);
SET_COUNT(rank - 1, GET_COUNT(rank - 1) + 2);
SET_LEAF(n,0);
// Both children are already empty leaf nodes, either because they have
// never been written, or because buddy_{free,realloc} left them that way.
}
static void *buddy_alloc(size_t rank)
{
for (size_t r = rank; r < RANKS; r++) {
if (!GET_COUNT(r))
continue;
size_t rp = IRANK(r);
size_t first = (1ul << rp) - 1;
for (np n = first; n < first + (1ul << rp); n++) {
if (!GET_VALID(n) || !GET_LEAF(n) || GET_FULL(n))
continue;
while (r > rank) {
buddy_splitnode(n, r--);
n = LLINK(n);
}
SET_FULL(n,1);
SET_COUNT(r, GET_COUNT(r) - 1);
return NODE2ADDR(n);
}
}
// if we make it here, there were no nodes of appropriate size or larger
errno = ENOMEM;
return NULL;
}
// -----------------------------------------------------------------------------
// External API
void *buddy_malloc(size_t size)
{
return buddy_alloc(SIZE2RANK(size));
}
void *buddy_calloc(size_t nelem, size_t width)
{
return memset(buddy_malloc(nelem * width), 0, nelem * width);
}
void buddy_free(void *orig)
{
if (!orig)
return;
np n = ADDR2NODE(orig);
assert(("freeing node is a leaf", GET_LEAF(n) == 1));
SET_FULL(n,0);
size_t rank = NODE2RANK(n);
assert(("rank is valid", rank < RANKS && rank >= 0));
SET_COUNT(rank, GET_COUNT(rank) + 1); // mark self free
assert(("count is legal", GET_COUNT(rank) < (1ul << (IRANK(rank) + 1))));
if (n == TN)
return;
np p = PARENT(n);
np b = SIBLING(n);
while (GET_LEAF(b) && !GET_FULL(b)) {
// TODO write this recursively too ?
assert(("count will not underflow", GET_COUNT(rank) >= 2));
SET_COUNT(rank, GET_COUNT(rank) - 2); // b and n are disappearing
assert(("count legal", GET_COUNT(rank + 1) < (2u << IRANK(rank + 1))));
SET_COUNT(rank + 1, GET_COUNT(rank + 1) + 1); // parent appears as leaf
rank++;
SET_LEAF(p,1);
SET_FULL(p,0);
n = p;
if (n == TN)
break;
p = PARENT(n);
assert(("parent is valid", GET_VALID(p)));
b = SIBLING(n);
assert(("sibling is valid", GET_VALID(b)));
}
}
void *buddy_realloc(void *orig, size_t size)
{
if (!size) {
buddy_free(orig);
return NULL;
}
if (!orig)
return buddy_malloc(size);
void *what = orig;
np n = ADDR2NODE(what);
size_t rank = NODE2RANK(n);
size_t max = RANK2BYTES(rank);
if (size > max) {
if (n == TN || size >= pool_size) {
errno = ENOMEM;
return NULL;
}
#if !NO_PROMOTE
// Without the ability to promote and demote existing allocation sizes,
// we drop from 70% utilisation to 50%. Demotion seems more important
// than promotion, but that may be an effect of the allocation pattern.
size_t get = max;
np p = PARENT(n), b = SIBLING(n);
while (GET_LEAF(b) && !GET_FULL(b) && (get <<= 1) < size && p != TN) {
b = SIBLING(p);
p = PARENT(p);
}
if (get >= pool_size) {
errno = ENOMEM;
return NULL;
}
if (get >= size) {
// can promote existing allocation
do {
np q = PARENT(n);
SET_LEAF(q,1);
SET_FULL(q,1);
// Set node empty even though invalid, so future allocations
// can depend on it without setting it explicitly.
SET_FULL(n,0);
n = q;
// coalescing a full and an empty
SET_COUNT(rank, GET_COUNT(rank) - 1);
rank++;
} while (n != p);
what = NODE2ADDR(n);
memmove(what, orig, max);
} else
#endif
{
what = buddy_alloc(SIZE2RANK(size));
if (!what)
return NULL; // ENOMEM is already set
memcpy(what, orig, max);
buddy_free(orig);
}
}
#if !NO_DEMOTE
else while (rank > 0 && size <= (max >>= 1)) {
// demote node
buddy_splitnode(n, rank--);
// claim the left child of the newly split node
n = LLINK(n);
SET_FULL(n,1);
SET_COUNT(rank, GET_COUNT(rank) - 1);
}
#endif
return what;
}
PEDANTIC = -pedantic-errors -Werror
ifneq ($(DEBUG),)
CFLAGS += -g -O0
LDFLAGS += -g
CPPFLAGS += -DDEBUG
else
ARCH = native
CFLAGS += -Os -fomit-frame-pointer -march=$(ARCH)
CPPFLAGS += -DNDEBUG
endif
#CPPFLAGS += -DNO_REALLOC
#CPPFLAGS += -DNO_PROMOTE
#CPPFLAGS += -DNO_DEMOTE
all: test_buddy
buddy.o: CFLAGS += -Wall -Wextra $(PEDANTIC) -Wno-unused-value
buddy.o: CPPFLAGS += -std=c99
test_buddy: CPPFLAGS += -std=gnu99
test_buddy: test_buddy.o buddy.c
$(LINK.c) -o $@ $< $(LDLIBS)
clean:
$(RM) test_buddy *.o
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <assert.h>
#include "buddy.c"
const size_t buddy_ranks = RANKS;
const unsigned *buddy_counts = counts;
float buddy_overhead = ((float)(sizeof nodes) + sizeof counts) / sizeof pool;
const size_t buddy_pool_size = sizeof pool;
void randomise(char *ptr, long size)
{
for (long i = 0; i < size; i++)
ptr[i] = (random() >> 8) & 0xff;
}
#define COUNT 500
static struct rec {
void *addr;
void *check;
size_t size;
} records[COUNT];
unsigned check_range(const char *bad, const char *good, size_t len, size_t bounds[2])
{
if (len < 2) {
if (len)
return *(const char*)bad != *(const char*)good;
return 0;
} else if (memcmp(bad, good, len)) {
size_t subbounds[2][2] = { { 0 } };
int leftbad = check_range(&bad[0 ], &good[0 ], len / 2, subbounds[0]);
int rightbad = check_range(&bad[len / 2], &good[len / 2], len - len / 2, subbounds[1]);
// return leftmost and rightmost bad bounds
bounds[0] = subbounds[ leftbad ? 0 : 1][0] + ( leftbad ? 0 : len - len / 2);
bounds[1] = subbounds[rightbad ? 1 : 0][1] + (rightbad ? len - len / 2 : 0);
return leftbad + rightbad;
} else {
return 0;
}
}
void check(int i)
{
size_t bounds[2];
unsigned count;
if ((count = check_range(records[i].addr, records[i].check, records[i].size, bounds))) {
printf("index %i had %u bad bytes between %zd and %zd\n", i, count, bounds[0], bounds[1]);
abort();
}
}
void checkall(void)
{
for (int i = 0; i < COUNT; i++)
check(i);
}
int main(int argc, char *argv[])
{
struct timeval tv;
gettimeofday(&tv, NULL);
long seed = argc > 1 ? strtol(argv[1], NULL, 0) : tv.tv_usec;
printf("seed = %ld\n", seed);
extern float buddy_overhead;
printf("overhead = %5.2f%%\n", buddy_overhead * 100);
srandom(seed);
buddy_free(buddy_calloc(0,0));
for (int i = 0; i < COUNT; i++) {
size_t piece = 1 << (random() % 20);
size_t size = piece + (random() % piece) + 5;
printf("Trying to allocate %zd bytes ...\n", size);
void *ptr = buddy_malloc(size);
if (!ptr) {
puts("allocation failed, skipping");
continue;
}
void *c = malloc(size);
randomise(ptr, size);
memcpy(c, ptr, size);
records[i] = (struct rec){
.addr = ptr,
.check = c,
.size = size
};
checkall();
#if !NO_REALLOC
size_t nsize = size * (1.2 + (random() % 4) - 1);
printf("Trying to reallocate size %zd bytes ...\n", nsize);
ptr = buddy_realloc(ptr, nsize);
if (!ptr) {
puts("reallocation failed, skipping");
(void)random(); // eat the random number for consistency across runs
continue;
}
c = realloc(c, nsize);
if (nsize > size) {
randomise(&ptr[size], nsize - size);
memcpy(&c[size], &ptr[size], nsize - size);
}
records[i] = (struct rec){
.addr = ptr,
.check = c,
.size = nsize
};
checkall();
size_t qsize = size * (0.2 + random() % 4);
printf("Trying to reallocate size %zd bytes ...\n", qsize);
ptr = buddy_realloc(ptr, qsize);
if (!ptr) {
puts("reallocation failed, skipping");
continue;
}
c = realloc(c, qsize);
if (qsize > nsize) {
randomise(&ptr[nsize], qsize - nsize);
memcpy(&c[nsize], &ptr[nsize], qsize - nsize);
}
records[i] = (struct rec){
.addr = ptr,
.check = c,
.size = qsize
};
checkall();
#endif
}
checkall();
unsigned long sum = 0;
for (int i = 0; i < COUNT; i++) {
sum += records[i].size;
check(i);
buddy_free(records[i].addr);
free(records[i].check);
}
extern const size_t buddy_pool_size;
extern const size_t buddy_ranks;
extern const unsigned *buddy_counts;
printf( "at peak, allocated %lu of %lu possible bytes (%5.2f%%)\n"
"effective overhead : %5.2f%%\n",
sum, buddy_pool_size, sum * 100. / buddy_pool_size,
buddy_overhead / ((double)sum / buddy_pool_size) * 100.
);
assert(buddy_counts[buddy_ranks - 1] == 1);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment