Last active
December 21, 2016 02:05
-
-
Save kulp/3992122 to your computer and use it in GitHub Desktop.
A little buddy allocator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <errno.h> | |
#include <stddef.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <limits.h> | |
#include <stdint.h> | |
#define RANK_0_LOG 5 | |
#define RANK_0_SIZE (1ul << RANK_0_LOG) | |
#define RANKS 20 | |
// NPER is an architecture-dependent number. | |
// NPER is nodes per element of `nodes` | |
#define NPERBITS 4 | |
#define NPER (1ul << NPERBITS) | |
// BPER is bits per element of `nodes` | |
#define BPERBITS 1 | |
#define BPER (1ul << BPERBITS) | |
#define SI static inline | |
// TODO need to guarantee alignment of the pool | |
static char pool[RANK_0_SIZE << (RANKS - 1)]; | |
static const size_t pool_size = sizeof pool; | |
static unsigned counts[RANKS] = { [RANKS - 1] = 1 }; | |
// implicit binary tree of nodes | |
// node at n has children at (2n + 1) and (2n + 2) | |
// bit0 : 0=leaf, 1=internal | |
// bit1 : full | |
static uint32_t nodes[(1ul << RANKS) / NPER - (1 / NPER)]; | |
typedef uint32_t np; | |
typedef size_t sz; | |
#define TN 0 /* top node */ | |
// ----------------------------------------------------------------------------- | |
static inline unsigned ilog2(unsigned long x) | |
{ | |
unsigned result = 0; | |
while (x >>= 1) | |
result++; | |
return result; | |
} | |
SI sz IRANK (sz r) { return RANKS - 1 - r; } | |
SI np LLINK (np n) { return (n << 1) + 1; } | |
SI np RLINK (np n) { return (n << 1) + 2; } | |
SI sz ISLEFT (np n) { return n & 1; } | |
SI sz ISRIGHT (np n) { return !ISLEFT(n); } | |
SI np PARENT (np n) { return (n - 1) >> 1; } | |
SI np SIBLING (np n) { return n + ISLEFT(n) - ISRIGHT(n); } | |
SI sz NODE2RANK (np n) { return (n ? NODE2RANK(PARENT(n)) : RANKS) - 1; } | |
SI sz SIZE2BLKS (sz x) { return (x + RANK_0_SIZE - 1) >> RANK_0_LOG; } | |
SI sz SIZE2RANK (sz s) { return s ? ilog2(SIZE2BLKS(s) - 1) + 1 : 0; } | |
SI sz RANK2BYTES(sz r) { return 1ul << (r + RANK_0_LOG); } | |
SI sz NODE2BASE (np n) { return n >> NPERBITS; } | |
SI sz NODE2SH (np n) { return n & (NPER - 1); } | |
SI sz GET_COUNT(sz r) { return counts[r]; } | |
SI void SET_COUNT(sz r, sz v) { counts[r] = v; } | |
SI sz SHAMT(np n, sz b) { return NODE2SH(n) * BPER + b; } | |
SI uint32_t* NODE2PTR(np n) { return &nodes[NODE2BASE(n)]; } | |
SI sz GET_BIT (np n, sz b) { return (*NODE2PTR(n) >> SHAMT(n, b)) & 1; } | |
SI void CLR_BIT (np n, sz b) { *NODE2PTR(n) &= ~(1ul << SHAMT(n,b)); } | |
SI void SET_BIT (np n, sz b) { *NODE2PTR(n) |= (1ul << SHAMT(n,b)); } | |
SI sz GET_LEAF (np n ) { return !GET_BIT(n, 0); } | |
SI sz GET_FULL (np n ) { return GET_BIT(n, 1); } | |
SI sz GET_VALID(np n ) { return n == TN || !GET_LEAF(PARENT(n)); } | |
SI void SET_LEAF (np n, sz v) { !v ? SET_BIT(n, 0) : CLR_BIT(n, 0); } | |
SI void SET_FULL (np n, sz v) { v ? SET_BIT(n, 1) : CLR_BIT(n, 1); } | |
static char *NODE2ADDR(np n) | |
{ | |
size_t base = 0; | |
int i = NODE2RANK(n) + RANK_0_LOG; | |
while (n != TN) { | |
base |= ISRIGHT(n) << i; | |
n = PARENT(n); | |
i++; | |
} | |
return pool + base; | |
} | |
static np ADDR2NODE(char *key) | |
{ | |
// We start at the second-highest rank because we are always checking the | |
// key against the midpoint of the current-rank node, which midpoint is | |
// calculated using the size of the next-smallest-rank node. | |
size_t rank = RANKS - 2; | |
np n = TN; | |
char *base = pool; | |
while (!GET_LEAF(n)) { | |
if (key - base >= (signed)RANK2BYTES(rank)) { | |
n = RLINK(n); | |
base += RANK2BYTES(rank); | |
} else { | |
n = LLINK(n); | |
} | |
rank--; | |
} | |
// Now we are at a leaf. Either it matches our key, or we bail. | |
if (key != base) | |
abort(); | |
return n; | |
} | |
// ----------------------------------------------------------------------------- | |
static void buddy_splitnode(np n, sz rank) | |
{ | |
assert(("node is legally splittable", GET_LEAF(n))); | |
// remove one node of our rank and create two of a smaller rank | |
if (!GET_FULL(n)) | |
SET_COUNT(rank, GET_COUNT(rank) - 1); | |
SET_COUNT(rank - 1, GET_COUNT(rank - 1) + 2); | |
SET_LEAF(n,0); | |
// Both children are already empty leaf nodes, either because they have | |
// never been written, or because buddy_{free,realloc} left them that way. | |
} | |
static void *buddy_alloc(size_t rank) | |
{ | |
for (size_t r = rank; r < RANKS; r++) { | |
if (!GET_COUNT(r)) | |
continue; | |
size_t rp = IRANK(r); | |
size_t first = (1ul << rp) - 1; | |
for (np n = first; n < first + (1ul << rp); n++) { | |
if (!GET_VALID(n) || !GET_LEAF(n) || GET_FULL(n)) | |
continue; | |
while (r > rank) { | |
buddy_splitnode(n, r--); | |
n = LLINK(n); | |
} | |
SET_FULL(n,1); | |
SET_COUNT(r, GET_COUNT(r) - 1); | |
return NODE2ADDR(n); | |
} | |
} | |
// if we make it here, there were no nodes of appropriate size or larger | |
errno = ENOMEM; | |
return NULL; | |
} | |
// ----------------------------------------------------------------------------- | |
// External API | |
void *buddy_malloc(size_t size) | |
{ | |
return buddy_alloc(SIZE2RANK(size)); | |
} | |
void *buddy_calloc(size_t nelem, size_t width) | |
{ | |
return memset(buddy_malloc(nelem * width), 0, nelem * width); | |
} | |
void buddy_free(void *orig) | |
{ | |
if (!orig) | |
return; | |
np n = ADDR2NODE(orig); | |
assert(("freeing node is a leaf", GET_LEAF(n) == 1)); | |
SET_FULL(n,0); | |
size_t rank = NODE2RANK(n); | |
assert(("rank is valid", rank < RANKS && rank >= 0)); | |
SET_COUNT(rank, GET_COUNT(rank) + 1); // mark self free | |
assert(("count is legal", GET_COUNT(rank) < (1ul << (IRANK(rank) + 1)))); | |
if (n == TN) | |
return; | |
np p = PARENT(n); | |
np b = SIBLING(n); | |
while (GET_LEAF(b) && !GET_FULL(b)) { | |
// TODO write this recursively too ? | |
assert(("count will not underflow", GET_COUNT(rank) >= 2)); | |
SET_COUNT(rank, GET_COUNT(rank) - 2); // b and n are disappearing | |
assert(("count legal", GET_COUNT(rank + 1) < (2u << IRANK(rank + 1)))); | |
SET_COUNT(rank + 1, GET_COUNT(rank + 1) + 1); // parent appears as leaf | |
rank++; | |
SET_LEAF(p,1); | |
SET_FULL(p,0); | |
n = p; | |
if (n == TN) | |
break; | |
p = PARENT(n); | |
assert(("parent is valid", GET_VALID(p))); | |
b = SIBLING(n); | |
assert(("sibling is valid", GET_VALID(b))); | |
} | |
} | |
void *buddy_realloc(void *orig, size_t size) | |
{ | |
if (!size) { | |
buddy_free(orig); | |
return NULL; | |
} | |
if (!orig) | |
return buddy_malloc(size); | |
void *what = orig; | |
np n = ADDR2NODE(what); | |
size_t rank = NODE2RANK(n); | |
size_t max = RANK2BYTES(rank); | |
if (size > max) { | |
if (n == TN || size >= pool_size) { | |
errno = ENOMEM; | |
return NULL; | |
} | |
#if !NO_PROMOTE | |
// Without the ability to promote and demote existing allocation sizes, | |
// we drop from 70% utilisation to 50%. Demotion seems more important | |
// than promotion, but that may be an effect of the allocation pattern. | |
size_t get = max; | |
np p = PARENT(n), b = SIBLING(n); | |
while (GET_LEAF(b) && !GET_FULL(b) && (get <<= 1) < size && p != TN) { | |
b = SIBLING(p); | |
p = PARENT(p); | |
} | |
if (get >= pool_size) { | |
errno = ENOMEM; | |
return NULL; | |
} | |
if (get >= size) { | |
// can promote existing allocation | |
do { | |
np q = PARENT(n); | |
SET_LEAF(q,1); | |
SET_FULL(q,1); | |
// Set node empty even though invalid, so future allocations | |
// can depend on it without setting it explicitly. | |
SET_FULL(n,0); | |
n = q; | |
// coalescing a full and an empty | |
SET_COUNT(rank, GET_COUNT(rank) - 1); | |
rank++; | |
} while (n != p); | |
what = NODE2ADDR(n); | |
memmove(what, orig, max); | |
} else | |
#endif | |
{ | |
what = buddy_alloc(SIZE2RANK(size)); | |
if (!what) | |
return NULL; // ENOMEM is already set | |
memcpy(what, orig, max); | |
buddy_free(orig); | |
} | |
} | |
#if !NO_DEMOTE | |
else while (rank > 0 && size <= (max >>= 1)) { | |
// demote node | |
buddy_splitnode(n, rank--); | |
// claim the left child of the newly split node | |
n = LLINK(n); | |
SET_FULL(n,1); | |
SET_COUNT(rank, GET_COUNT(rank) - 1); | |
} | |
#endif | |
return what; | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PEDANTIC = -pedantic-errors -Werror | |
ifneq ($(DEBUG),) | |
CFLAGS += -g -O0 | |
LDFLAGS += -g | |
CPPFLAGS += -DDEBUG | |
else | |
ARCH = native | |
CFLAGS += -Os -fomit-frame-pointer -march=$(ARCH) | |
CPPFLAGS += -DNDEBUG | |
endif | |
#CPPFLAGS += -DNO_REALLOC | |
#CPPFLAGS += -DNO_PROMOTE | |
#CPPFLAGS += -DNO_DEMOTE | |
all: test_buddy | |
buddy.o: CFLAGS += -Wall -Wextra $(PEDANTIC) -Wno-unused-value | |
buddy.o: CPPFLAGS += -std=c99 | |
test_buddy: CPPFLAGS += -std=gnu99 | |
test_buddy: test_buddy.o buddy.c | |
$(LINK.c) -o $@ $< $(LDLIBS) | |
clean: | |
$(RM) test_buddy *.o | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/time.h> | |
#include <assert.h> | |
#include "buddy.c" | |
const size_t buddy_ranks = RANKS; | |
const unsigned *buddy_counts = counts; | |
float buddy_overhead = ((float)(sizeof nodes) + sizeof counts) / sizeof pool; | |
const size_t buddy_pool_size = sizeof pool; | |
void randomise(char *ptr, long size) | |
{ | |
for (long i = 0; i < size; i++) | |
ptr[i] = (random() >> 8) & 0xff; | |
} | |
#define COUNT 500 | |
static struct rec { | |
void *addr; | |
void *check; | |
size_t size; | |
} records[COUNT]; | |
unsigned check_range(const char *bad, const char *good, size_t len, size_t bounds[2]) | |
{ | |
if (len < 2) { | |
if (len) | |
return *(const char*)bad != *(const char*)good; | |
return 0; | |
} else if (memcmp(bad, good, len)) { | |
size_t subbounds[2][2] = { { 0 } }; | |
int leftbad = check_range(&bad[0 ], &good[0 ], len / 2, subbounds[0]); | |
int rightbad = check_range(&bad[len / 2], &good[len / 2], len - len / 2, subbounds[1]); | |
// return leftmost and rightmost bad bounds | |
bounds[0] = subbounds[ leftbad ? 0 : 1][0] + ( leftbad ? 0 : len - len / 2); | |
bounds[1] = subbounds[rightbad ? 1 : 0][1] + (rightbad ? len - len / 2 : 0); | |
return leftbad + rightbad; | |
} else { | |
return 0; | |
} | |
} | |
void check(int i) | |
{ | |
size_t bounds[2]; | |
unsigned count; | |
if ((count = check_range(records[i].addr, records[i].check, records[i].size, bounds))) { | |
printf("index %i had %u bad bytes between %zd and %zd\n", i, count, bounds[0], bounds[1]); | |
abort(); | |
} | |
} | |
void checkall(void) | |
{ | |
for (int i = 0; i < COUNT; i++) | |
check(i); | |
} | |
int main(int argc, char *argv[]) | |
{ | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
long seed = argc > 1 ? strtol(argv[1], NULL, 0) : tv.tv_usec; | |
printf("seed = %ld\n", seed); | |
extern float buddy_overhead; | |
printf("overhead = %5.2f%%\n", buddy_overhead * 100); | |
srandom(seed); | |
buddy_free(buddy_calloc(0,0)); | |
for (int i = 0; i < COUNT; i++) { | |
size_t piece = 1 << (random() % 20); | |
size_t size = piece + (random() % piece) + 5; | |
printf("Trying to allocate %zd bytes ...\n", size); | |
void *ptr = buddy_malloc(size); | |
if (!ptr) { | |
puts("allocation failed, skipping"); | |
continue; | |
} | |
void *c = malloc(size); | |
randomise(ptr, size); | |
memcpy(c, ptr, size); | |
records[i] = (struct rec){ | |
.addr = ptr, | |
.check = c, | |
.size = size | |
}; | |
checkall(); | |
#if !NO_REALLOC | |
size_t nsize = size * (1.2 + (random() % 4) - 1); | |
printf("Trying to reallocate size %zd bytes ...\n", nsize); | |
ptr = buddy_realloc(ptr, nsize); | |
if (!ptr) { | |
puts("reallocation failed, skipping"); | |
(void)random(); // eat the random number for consistency across runs | |
continue; | |
} | |
c = realloc(c, nsize); | |
if (nsize > size) { | |
randomise(&ptr[size], nsize - size); | |
memcpy(&c[size], &ptr[size], nsize - size); | |
} | |
records[i] = (struct rec){ | |
.addr = ptr, | |
.check = c, | |
.size = nsize | |
}; | |
checkall(); | |
size_t qsize = size * (0.2 + random() % 4); | |
printf("Trying to reallocate size %zd bytes ...\n", qsize); | |
ptr = buddy_realloc(ptr, qsize); | |
if (!ptr) { | |
puts("reallocation failed, skipping"); | |
continue; | |
} | |
c = realloc(c, qsize); | |
if (qsize > nsize) { | |
randomise(&ptr[nsize], qsize - nsize); | |
memcpy(&c[nsize], &ptr[nsize], qsize - nsize); | |
} | |
records[i] = (struct rec){ | |
.addr = ptr, | |
.check = c, | |
.size = qsize | |
}; | |
checkall(); | |
#endif | |
} | |
checkall(); | |
unsigned long sum = 0; | |
for (int i = 0; i < COUNT; i++) { | |
sum += records[i].size; | |
check(i); | |
buddy_free(records[i].addr); | |
free(records[i].check); | |
} | |
extern const size_t buddy_pool_size; | |
extern const size_t buddy_ranks; | |
extern const unsigned *buddy_counts; | |
printf( "at peak, allocated %lu of %lu possible bytes (%5.2f%%)\n" | |
"effective overhead : %5.2f%%\n", | |
sum, buddy_pool_size, sum * 100. / buddy_pool_size, | |
buddy_overhead / ((double)sum / buddy_pool_size) * 100. | |
); | |
assert(buddy_counts[buddy_ranks - 1] == 1); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment