kulp · December 21, 2016 02:05
diff --git a/buddy.c b/buddy.c
 #include <assert.h>
 #include <errno.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
 #include <stdint.h>

 #define RANK_0_LOG  5
 #define RANK_0_SIZE (1ul << RANK_0_LOG)
 #define RANKS       20

 // NPER is an architecture-dependent number.
 // NPER is nodes per element of `nodes`
 #define NPERBITS    4
 #define NPER        (1ul << NPERBITS)
 // BPER is bits per element of `nodes`
 #define BPERBITS    1
 #define BPER        (1ul << BPERBITS)

 #define SI static inline

 // TODO need to guarantee alignment of the pool
 static char pool[RANK_0_SIZE << (RANKS - 1)];
 static const size_t pool_size = sizeof pool;
 static unsigned counts[RANKS] = { [RANKS - 1] = 1 };
 // implicit binary tree of nodes
 // node at n has children at (2n + 1) and (2n + 2)
 // bit0 : 0=leaf, 1=internal
 // bit1 : full
 static uint32_t nodes[(1ul << RANKS) / NPER - (1 / NPER)];

 typedef uint32_t np;
 typedef size_t sz;
 #define TN 0 /* top node */

 // -----------------------------------------------------------------------------
 static inline unsigned ilog2(unsigned long x)
 {
    unsigned result = 0;
    while (x >>= 1)
        result++;
    return result;
 }

 SI sz IRANK     (sz r) { return RANKS - 1 - r; }
 SI np LLINK     (np n) { return (n << 1) + 1; }
 SI np RLINK     (np n) { return (n << 1) + 2; }
 SI sz ISLEFT    (np n) { return n & 1; }
 SI sz ISRIGHT   (np n) { return !ISLEFT(n); }
 SI np PARENT    (np n) { return (n - 1) >> 1; }
 SI np SIBLING   (np n) { return n + ISLEFT(n) - ISRIGHT(n); }
 SI sz NODE2RANK (np n) { return (n ? NODE2RANK(PARENT(n)) : RANKS) - 1; }
 SI sz SIZE2BLKS (sz x) { return (x + RANK_0_SIZE - 1) >> RANK_0_LOG; }
 SI sz SIZE2RANK (sz s) { return s ? ilog2(SIZE2BLKS(s) - 1) + 1 : 0; }
 SI sz RANK2BYTES(sz r) { return 1ul << (r + RANK_0_LOG); }
 SI sz NODE2BASE (np n) { return n >> NPERBITS; }
 SI sz NODE2SH   (np n) { return n & (NPER - 1); }

 SI sz   GET_COUNT(sz r)       { return counts[r]; }
 SI void SET_COUNT(sz r, sz v) { counts[r] = v; }

 SI sz SHAMT(np n, sz b) { return NODE2SH(n) * BPER + b; }
 SI uint32_t* NODE2PTR(np n) { return &nodes[NODE2BASE(n)]; }

 SI sz   GET_BIT  (np n, sz b) { return (*NODE2PTR(n) >> SHAMT(n, b)) & 1; }
 SI void CLR_BIT  (np n, sz b) { *NODE2PTR(n) &= ~(1ul << SHAMT(n,b)); }
 SI void SET_BIT  (np n, sz b) { *NODE2PTR(n) |=  (1ul << SHAMT(n,b)); }

 SI sz   GET_LEAF (np n      ) { return !GET_BIT(n, 0); }
 SI sz   GET_FULL (np n      ) { return  GET_BIT(n, 1); }
 SI sz   GET_VALID(np n      ) { return n == TN || !GET_LEAF(PARENT(n)); }
 SI void SET_LEAF (np n, sz v) { !v ? SET_BIT(n, 0) : CLR_BIT(n, 0); }
 SI void SET_FULL (np n, sz v) {  v ? SET_BIT(n, 1) : CLR_BIT(n, 1); }

 static char *NODE2ADDR(np n)
 {
    size_t base = 0;
    int i = NODE2RANK(n) + RANK_0_LOG;
    while (n != TN) {
        base |= ISRIGHT(n) << i;
        n = PARENT(n);
        i++;
    }

    return pool + base;
 }

 static np ADDR2NODE(char *key)
 {
    // We start at the second-highest rank because we are always checking the
    // key against the midpoint of the current-rank node, which midpoint is
    // calculated using the size of the next-smallest-rank node.
    size_t rank = RANKS - 2;
    np n = TN;
    char *base = pool;

    while (!GET_LEAF(n)) {
        if (key - base >= (signed)RANK2BYTES(rank)) {
            n = RLINK(n);
            base += RANK2BYTES(rank);
        } else {
            n = LLINK(n);
        }

        rank--;
    }

    // Now we are at a leaf. Either it matches our key, or we bail.
    if (key != base)
        abort();

    return n;
 }

 // -----------------------------------------------------------------------------
 static void buddy_splitnode(np n, sz rank)
 {
    assert(("node is legally splittable", GET_LEAF(n)));

    // remove one node of our rank and create two of a smaller rank
    if (!GET_FULL(n))
        SET_COUNT(rank, GET_COUNT(rank) - 1);
    SET_COUNT(rank - 1, GET_COUNT(rank - 1) + 2);

    SET_LEAF(n,0);

    // Both children are already empty leaf nodes, either because they have
    // never been written, or because buddy_{free,realloc} left them that way.
 }

 static void *buddy_alloc(size_t rank)
 {
    for (size_t r = rank; r < RANKS; r++) {
        if (!GET_COUNT(r))
            continue;

        size_t rp = IRANK(r);
        size_t first = (1ul << rp) - 1;
        for (np n = first; n < first + (1ul << rp); n++) {
            if (!GET_VALID(n) || !GET_LEAF(n) || GET_FULL(n))
                continue;

            while (r > rank) {
                buddy_splitnode(n, r--);
                n = LLINK(n);
            }

            SET_FULL(n,1);
            SET_COUNT(r, GET_COUNT(r) - 1);
            return NODE2ADDR(n);
        }
    }

    // if we make it here, there were no nodes of appropriate size or larger
    errno = ENOMEM;
    return NULL;
 }

 // -----------------------------------------------------------------------------
 // External API
 void *buddy_malloc(size_t size)
 {
    return buddy_alloc(SIZE2RANK(size));
 }

 void *buddy_calloc(size_t nelem, size_t width)
 {
    return memset(buddy_malloc(nelem * width), 0, nelem * width);
 }

 void buddy_free(void *orig)
 {
    if (!orig)
        return;

    np n = ADDR2NODE(orig);
    assert(("freeing node is a leaf", GET_LEAF(n) == 1));
    SET_FULL(n,0);
    size_t rank = NODE2RANK(n);
    assert(("rank is valid", rank < RANKS && rank >= 0));
    SET_COUNT(rank, GET_COUNT(rank) + 1); // mark self free
    assert(("count is legal", GET_COUNT(rank) < (1ul << (IRANK(rank) + 1))));
    if (n == TN)
        return;

    np p = PARENT(n);
    np b = SIBLING(n);

    while (GET_LEAF(b) && !GET_FULL(b)) {
        // TODO write this recursively too ?
        assert(("count will not underflow", GET_COUNT(rank) >= 2));
        SET_COUNT(rank, GET_COUNT(rank) - 2); // b and n are disappearing
        assert(("count legal", GET_COUNT(rank + 1) < (2u << IRANK(rank + 1))));
        SET_COUNT(rank + 1, GET_COUNT(rank + 1) + 1); // parent appears as leaf
        rank++;

        SET_LEAF(p,1);
        SET_FULL(p,0);

        n = p;

        if (n == TN)
            break;

        p = PARENT(n);
        assert(("parent is valid", GET_VALID(p)));
        b = SIBLING(n);
        assert(("sibling is valid", GET_VALID(b)));
    }
 }

 void *buddy_realloc(void *orig, size_t size)
 {
    if (!size) {
        buddy_free(orig);
        return NULL;
    }
    if (!orig)
        return buddy_malloc(size);

    void *what = orig;
    np n = ADDR2NODE(what);
    size_t rank = NODE2RANK(n);
    size_t max = RANK2BYTES(rank);
    if (size > max) {
        if (n == TN || size >= pool_size) {
            errno = ENOMEM;
            return NULL;
        }

        #if !NO_PROMOTE
        // Without the ability to promote and demote existing allocation sizes,
        // we drop from 70% utilisation to 50%. Demotion seems more important
        // than promotion, but that may be an effect of the allocation pattern.
        size_t get = max;
        np p = PARENT(n), b = SIBLING(n);
        while (GET_LEAF(b) && !GET_FULL(b) && (get <<= 1) < size && p != TN) {
            b = SIBLING(p);
            p = PARENT(p);
        }

        if (get >= pool_size) {
            errno = ENOMEM;
            return NULL;
        }

        if (get >= size) {
            // can promote existing allocation
            do {
                np q = PARENT(n);
                SET_LEAF(q,1);
                SET_FULL(q,1);
                // Set node empty even though invalid, so future allocations
                // can depend on it without setting it explicitly.
                SET_FULL(n,0);
                n = q;
                // coalescing a full and an empty
                SET_COUNT(rank, GET_COUNT(rank) - 1);
                rank++;
            } while (n != p);
            what = NODE2ADDR(n);
            memmove(what, orig, max);
        } else
        #endif
        {
            what = buddy_alloc(SIZE2RANK(size));
            if (!what)
                return NULL; // ENOMEM is already set

            memcpy(what, orig, max);
            buddy_free(orig);
        }
    }
    #if !NO_DEMOTE
    else while (rank > 0 && size <= (max >>= 1)) {
        // demote node
        buddy_splitnode(n, rank--);
        // claim the left child of the newly split node
        n = LLINK(n);
        SET_FULL(n,1);

        SET_COUNT(rank, GET_COUNT(rank) - 1);
    }
    #endif

    return what;
 }

diff --git a/makefile b/makefile
 PEDANTIC = -pedantic-errors -Werror

 ifneq ($(DEBUG),)
 CFLAGS   += -g -O0
 LDFLAGS  += -g
 CPPFLAGS += -DDEBUG
 else
 ARCH      = native
 CFLAGS   += -Os -fomit-frame-pointer -march=$(ARCH)
 CPPFLAGS += -DNDEBUG
 endif

 #CPPFLAGS += -DNO_REALLOC
 #CPPFLAGS += -DNO_PROMOTE
 #CPPFLAGS += -DNO_DEMOTE

 all: test_buddy

 buddy.o: CFLAGS += -Wall -Wextra $(PEDANTIC) -Wno-unused-value
 buddy.o: CPPFLAGS += -std=c99

 test_buddy: CPPFLAGS += -std=gnu99
 test_buddy: test_buddy.o buddy.c
 	$(LINK.c) -o $@ $< $(LDLIBS)

 clean:
 	$(RM) test_buddy *.o

diff --git a/test_buddy.c b/test_buddy.c
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/time.h>
 #include <assert.h>

 #include "buddy.c"
 const size_t buddy_ranks = RANKS;
 const unsigned *buddy_counts = counts;
 float buddy_overhead = ((float)(sizeof nodes) + sizeof counts) / sizeof pool;
 const size_t buddy_pool_size = sizeof pool;

 void randomise(char *ptr, long size)
 {
    for (long i = 0; i < size; i++)
        ptr[i] = (random() >> 8) & 0xff;
 }

 #define COUNT 500
 static struct rec {
    void *addr;
    void *check;
    size_t size;
 } records[COUNT];

 unsigned check_range(const char *bad, const char *good, size_t len, size_t bounds[2])
 {
    if (len < 2) {
        if (len)
            return *(const char*)bad != *(const char*)good;
        return 0;
    } else if (memcmp(bad, good, len)) {
        size_t subbounds[2][2] = { { 0 } };
        int  leftbad = check_range(&bad[0      ], &good[0      ],       len / 2, subbounds[0]);
        int rightbad = check_range(&bad[len / 2], &good[len / 2], len - len / 2, subbounds[1]);

        // return leftmost and rightmost bad bounds
        bounds[0] = subbounds[ leftbad ? 0 : 1][0] + ( leftbad ?             0 : len - len / 2);
        bounds[1] = subbounds[rightbad ? 1 : 0][1] + (rightbad ? len - len / 2 :             0);

        return leftbad + rightbad;
    } else {
        return 0;
    }
 }

 void check(int i)
 {
    size_t bounds[2];
    unsigned count;
    if ((count = check_range(records[i].addr, records[i].check, records[i].size, bounds))) {
        printf("index %i had %u bad bytes between %zd and %zd\n", i, count, bounds[0], bounds[1]);
        abort();
    }
 }

 void checkall(void)
 {
    for (int i = 0; i < COUNT; i++)
        check(i);
 }

 int main(int argc, char *argv[])
 {
    struct timeval tv;
    gettimeofday(&tv, NULL);
    long seed = argc > 1 ? strtol(argv[1], NULL, 0) : tv.tv_usec;
    printf("seed = %ld\n", seed);
    extern float buddy_overhead;
    printf("overhead = %5.2f%%\n", buddy_overhead * 100);
    srandom(seed);

    buddy_free(buddy_calloc(0,0));

    for (int i = 0; i < COUNT; i++) {
        size_t piece = 1 << (random() % 20);
        size_t size = piece + (random() % piece) + 5;
        printf("Trying to allocate %zd bytes ...\n", size);
        void *ptr = buddy_malloc(size);
        if (!ptr) {
            puts("allocation failed, skipping");
            continue;
        }
        void *c = malloc(size);
        randomise(ptr, size);
        memcpy(c, ptr, size);
        records[i] = (struct rec){
            .addr = ptr,
            .check = c,
            .size = size
        };
        checkall();

        #if !NO_REALLOC
        size_t nsize = size * (1.2 + (random() % 4) - 1);
        printf("Trying to reallocate size %zd bytes ...\n", nsize);
        ptr = buddy_realloc(ptr, nsize);
        if (!ptr) {
            puts("reallocation failed, skipping");
            (void)random(); // eat the random number for consistency across runs
            continue;
        }
        c = realloc(c, nsize);
        if (nsize > size) {
            randomise(&ptr[size], nsize - size);
            memcpy(&c[size], &ptr[size], nsize - size);
        }

        records[i] = (struct rec){
            .addr = ptr,
            .check = c,
            .size = nsize
        };
        checkall();

        size_t qsize = size * (0.2 + random() % 4);
        printf("Trying to reallocate size %zd bytes ...\n", qsize);
        ptr = buddy_realloc(ptr, qsize);
        if (!ptr) {
            puts("reallocation failed, skipping");
            continue;
        }
        c = realloc(c, qsize);
        if (qsize > nsize) {
            randomise(&ptr[nsize], qsize - nsize);
            memcpy(&c[nsize], &ptr[nsize], qsize - nsize);
        }

        records[i] = (struct rec){
            .addr = ptr,
            .check = c,
            .size = qsize
        };
        checkall();
        #endif
    }

    checkall();

    unsigned long sum = 0;
    for (int i = 0; i < COUNT; i++) {
        sum += records[i].size;
        check(i);
        buddy_free(records[i].addr);
        free(records[i].check);
    }

    extern const size_t buddy_pool_size;
    extern const size_t buddy_ranks;
    extern const unsigned *buddy_counts;

    printf( "at peak, allocated %lu of %lu possible bytes (%5.2f%%)\n"
            "effective overhead : %5.2f%%\n",
            sum, buddy_pool_size, sum * 100. / buddy_pool_size,
            buddy_overhead / ((double)sum / buddy_pool_size) * 100.
        );

    assert(buddy_counts[buddy_ranks - 1] == 1);

    return 0;
 }
	#include <assert.h>
	#include <errno.h>
	#include <stddef.h>
	#include <stdlib.h>
	#include <string.h>
	#include <limits.h>
	#include <stdint.h>

	#define RANK_0_LOG 5
	#define RANK_0_SIZE (1ul << RANK_0_LOG)
	#define RANKS 20

	// NPER is an architecture-dependent number.
	// NPER is nodes per element of `nodes`
	#define NPERBITS 4
	#define NPER (1ul << NPERBITS)
	// BPER is bits per element of `nodes`
	#define BPERBITS 1
	#define BPER (1ul << BPERBITS)

	#define SI static inline

	// TODO need to guarantee alignment of the pool
	static char pool[RANK_0_SIZE << (RANKS - 1)];
	static const size_t pool_size = sizeof pool;
	static unsigned counts[RANKS] = { [RANKS - 1] = 1 };
	// implicit binary tree of nodes
	// node at n has children at (2n + 1) and (2n + 2)
	// bit0 : 0=leaf, 1=internal
	// bit1 : full
	static uint32_t nodes[(1ul << RANKS) / NPER - (1 / NPER)];

	typedef uint32_t np;
	typedef size_t sz;
	#define TN 0 /* top node */

	// -----------------------------------------------------------------------------
	static inline unsigned ilog2(unsigned long x)
	{
	unsigned result = 0;
	while (x >>= 1)
	result++;
	return result;
	}

	SI sz IRANK (sz r) { return RANKS - 1 - r; }
	SI np LLINK (np n) { return (n << 1) + 1; }
	SI np RLINK (np n) { return (n << 1) + 2; }
	SI sz ISLEFT (np n) { return n & 1; }
	SI sz ISRIGHT (np n) { return !ISLEFT(n); }
	SI np PARENT (np n) { return (n - 1) >> 1; }
	SI np SIBLING (np n) { return n + ISLEFT(n) - ISRIGHT(n); }
	SI sz NODE2RANK (np n) { return (n ? NODE2RANK(PARENT(n)) : RANKS) - 1; }
	SI sz SIZE2BLKS (sz x) { return (x + RANK_0_SIZE - 1) >> RANK_0_LOG; }
	SI sz SIZE2RANK (sz s) { return s ? ilog2(SIZE2BLKS(s) - 1) + 1 : 0; }
	SI sz RANK2BYTES(sz r) { return 1ul << (r + RANK_0_LOG); }
	SI sz NODE2BASE (np n) { return n >> NPERBITS; }
	SI sz NODE2SH (np n) { return n & (NPER - 1); }

	SI sz GET_COUNT(sz r) { return counts[r]; }
	SI void SET_COUNT(sz r, sz v) { counts[r] = v; }

	SI sz SHAMT(np n, sz b) { return NODE2SH(n) * BPER + b; }
	SI uint32_t* NODE2PTR(np n) { return &nodes[NODE2BASE(n)]; }

	SI sz GET_BIT (np n, sz b) { return (*NODE2PTR(n) >> SHAMT(n, b)) & 1; }
	SI void CLR_BIT (np n, sz b) { *NODE2PTR(n) &= ~(1ul << SHAMT(n,b)); }
	SI void SET_BIT (np n, sz b) { *NODE2PTR(n) \|= (1ul << SHAMT(n,b)); }

	SI sz GET_LEAF (np n ) { return !GET_BIT(n, 0); }
	SI sz GET_FULL (np n ) { return GET_BIT(n, 1); }
	SI sz GET_VALID(np n ) { return n == TN \|\| !GET_LEAF(PARENT(n)); }
	SI void SET_LEAF (np n, sz v) { !v ? SET_BIT(n, 0) : CLR_BIT(n, 0); }
	SI void SET_FULL (np n, sz v) { v ? SET_BIT(n, 1) : CLR_BIT(n, 1); }

	static char *NODE2ADDR(np n)
	{
	size_t base = 0;
	int i = NODE2RANK(n) + RANK_0_LOG;
	while (n != TN) {
	base \|= ISRIGHT(n) << i;
	n = PARENT(n);
	i++;
	}

	return pool + base;
	}

	static np ADDR2NODE(char *key)
	{
	// We start at the second-highest rank because we are always checking the
	// key against the midpoint of the current-rank node, which midpoint is
	// calculated using the size of the next-smallest-rank node.
	size_t rank = RANKS - 2;
	np n = TN;
	char *base = pool;

	while (!GET_LEAF(n)) {
	if (key - base >= (signed)RANK2BYTES(rank)) {
	n = RLINK(n);
	base += RANK2BYTES(rank);
	} else {
	n = LLINK(n);
	}

	rank--;
	}

	// Now we are at a leaf. Either it matches our key, or we bail.
	if (key != base)
	abort();

	return n;
	}

	// -----------------------------------------------------------------------------
	static void buddy_splitnode(np n, sz rank)
	{
	assert(("node is legally splittable", GET_LEAF(n)));

	// remove one node of our rank and create two of a smaller rank
	if (!GET_FULL(n))
	SET_COUNT(rank, GET_COUNT(rank) - 1);
	SET_COUNT(rank - 1, GET_COUNT(rank - 1) + 2);

	SET_LEAF(n,0);

	// Both children are already empty leaf nodes, either because they have
	// never been written, or because buddy_{free,realloc} left them that way.
	}

	static void *buddy_alloc(size_t rank)
	{
	for (size_t r = rank; r < RANKS; r++) {
	if (!GET_COUNT(r))
	continue;

	size_t rp = IRANK(r);
	size_t first = (1ul << rp) - 1;
	for (np n = first; n < first + (1ul << rp); n++) {
	if (!GET_VALID(n) \|\| !GET_LEAF(n) \|\| GET_FULL(n))
	continue;

	while (r > rank) {
	buddy_splitnode(n, r--);
	n = LLINK(n);
	}

	SET_FULL(n,1);
	SET_COUNT(r, GET_COUNT(r) - 1);
	return NODE2ADDR(n);
	}
	}

	// if we make it here, there were no nodes of appropriate size or larger
	errno = ENOMEM;
	return NULL;
	}

	// -----------------------------------------------------------------------------
	// External API
	void *buddy_malloc(size_t size)
	{
	return buddy_alloc(SIZE2RANK(size));
	}

	void *buddy_calloc(size_t nelem, size_t width)
	{
	return memset(buddy_malloc(nelem * width), 0, nelem * width);
	}

	void buddy_free(void *orig)
	{
	if (!orig)
	return;

	np n = ADDR2NODE(orig);
	assert(("freeing node is a leaf", GET_LEAF(n) == 1));
	SET_FULL(n,0);
	size_t rank = NODE2RANK(n);
	assert(("rank is valid", rank < RANKS && rank >= 0));
	SET_COUNT(rank, GET_COUNT(rank) + 1); // mark self free
	assert(("count is legal", GET_COUNT(rank) < (1ul << (IRANK(rank) + 1))));
	if (n == TN)
	return;

	np p = PARENT(n);
	np b = SIBLING(n);

	while (GET_LEAF(b) && !GET_FULL(b)) {
	// TODO write this recursively too ?
	assert(("count will not underflow", GET_COUNT(rank) >= 2));
	SET_COUNT(rank, GET_COUNT(rank) - 2); // b and n are disappearing
	assert(("count legal", GET_COUNT(rank + 1) < (2u << IRANK(rank + 1))));
	SET_COUNT(rank + 1, GET_COUNT(rank + 1) + 1); // parent appears as leaf
	rank++;

	SET_LEAF(p,1);
	SET_FULL(p,0);

	n = p;

	if (n == TN)
	break;

	p = PARENT(n);
	assert(("parent is valid", GET_VALID(p)));
	b = SIBLING(n);
	assert(("sibling is valid", GET_VALID(b)));
	}
	}

	void buddy_realloc(void orig, size_t size)
	{
	if (!size) {
	buddy_free(orig);
	return NULL;
	}
	if (!orig)
	return buddy_malloc(size);

	void *what = orig;
	np n = ADDR2NODE(what);
	size_t rank = NODE2RANK(n);
	size_t max = RANK2BYTES(rank);
	if (size > max) {
	if (n == TN \|\| size >= pool_size) {
	errno = ENOMEM;
	return NULL;
	}

	#if !NO_PROMOTE
	// Without the ability to promote and demote existing allocation sizes,
	// we drop from 70% utilisation to 50%. Demotion seems more important
	// than promotion, but that may be an effect of the allocation pattern.
	size_t get = max;
	np p = PARENT(n), b = SIBLING(n);
	while (GET_LEAF(b) && !GET_FULL(b) && (get <<= 1) < size && p != TN) {
	b = SIBLING(p);
	p = PARENT(p);
	}

	if (get >= pool_size) {
	errno = ENOMEM;
	return NULL;
	}

	if (get >= size) {
	// can promote existing allocation
	do {
	np q = PARENT(n);
	SET_LEAF(q,1);
	SET_FULL(q,1);
	// Set node empty even though invalid, so future allocations
	// can depend on it without setting it explicitly.
	SET_FULL(n,0);
	n = q;
	// coalescing a full and an empty
	SET_COUNT(rank, GET_COUNT(rank) - 1);
	rank++;
	} while (n != p);
	what = NODE2ADDR(n);
	memmove(what, orig, max);
	} else
	#endif
	{
	what = buddy_alloc(SIZE2RANK(size));
	if (!what)
	return NULL; // ENOMEM is already set

	memcpy(what, orig, max);
	buddy_free(orig);
	}
	}
	#if !NO_DEMOTE
	else while (rank > 0 && size <= (max >>= 1)) {
	// demote node
	buddy_splitnode(n, rank--);
	// claim the left child of the newly split node
	n = LLINK(n);
	SET_FULL(n,1);

	SET_COUNT(rank, GET_COUNT(rank) - 1);
	}
	#endif

	return what;
	}
	PEDANTIC = -pedantic-errors -Werror

	ifneq ($(DEBUG),)
	CFLAGS += -g -O0
	LDFLAGS += -g
	CPPFLAGS += -DDEBUG
	else
	ARCH = native
	CFLAGS += -Os -fomit-frame-pointer -march=$(ARCH)
	CPPFLAGS += -DNDEBUG
	endif

	#CPPFLAGS += -DNO_REALLOC
	#CPPFLAGS += -DNO_PROMOTE
	#CPPFLAGS += -DNO_DEMOTE

	all: test_buddy

	buddy.o: CFLAGS += -Wall -Wextra $(PEDANTIC) -Wno-unused-value
	buddy.o: CPPFLAGS += -std=c99

	test_buddy: CPPFLAGS += -std=gnu99
	test_buddy: test_buddy.o buddy.c
	$(LINK.c) -o $@ $< $(LDLIBS)

	clean:
	$(RM) test_buddy *.o
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <sys/time.h>
	#include <assert.h>

	#include "buddy.c"
	const size_t buddy_ranks = RANKS;
	const unsigned *buddy_counts = counts;
	float buddy_overhead = ((float)(sizeof nodes) + sizeof counts) / sizeof pool;
	const size_t buddy_pool_size = sizeof pool;

	void randomise(char *ptr, long size)
	{
	for (long i = 0; i < size; i++)
	ptr[i] = (random() >> 8) & 0xff;
	}

	#define COUNT 500
	static struct rec {
	void *addr;
	void *check;
	size_t size;
	} records[COUNT];

	unsigned check_range(const char bad, const char good, size_t len, size_t bounds[2])
	{
	if (len < 2) {
	if (len)
	return (const char)bad != (const char)good;
	return 0;
	} else if (memcmp(bad, good, len)) {
	size_t subbounds[2][2] = { { 0 } };
	int leftbad = check_range(&bad[0 ], &good[0 ], len / 2, subbounds[0]);
	int rightbad = check_range(&bad[len / 2], &good[len / 2], len - len / 2, subbounds[1]);

	// return leftmost and rightmost bad bounds
	bounds[0] = subbounds[ leftbad ? 0 : 1][0] + ( leftbad ? 0 : len - len / 2);
	bounds[1] = subbounds[rightbad ? 1 : 0][1] + (rightbad ? len - len / 2 : 0);

	return leftbad + rightbad;
	} else {
	return 0;
	}
	}

	void check(int i)
	{
	size_t bounds[2];
	unsigned count;
	if ((count = check_range(records[i].addr, records[i].check, records[i].size, bounds))) {
	printf("index %i had %u bad bytes between %zd and %zd\n", i, count, bounds[0], bounds[1]);
	abort();
	}
	}

	void checkall(void)
	{
	for (int i = 0; i < COUNT; i++)
	check(i);
	}

	int main(int argc, char *argv[])
	{
	struct timeval tv;
	gettimeofday(&tv, NULL);
	long seed = argc > 1 ? strtol(argv[1], NULL, 0) : tv.tv_usec;
	printf("seed = %ld\n", seed);
	extern float buddy_overhead;
	printf("overhead = %5.2f%%\n", buddy_overhead * 100);
	srandom(seed);

	buddy_free(buddy_calloc(0,0));

	for (int i = 0; i < COUNT; i++) {
	size_t piece = 1 << (random() % 20);
	size_t size = piece + (random() % piece) + 5;
	printf("Trying to allocate %zd bytes ...\n", size);
	void *ptr = buddy_malloc(size);
	if (!ptr) {
	puts("allocation failed, skipping");
	continue;
	}
	void *c = malloc(size);
	randomise(ptr, size);
	memcpy(c, ptr, size);
	records[i] = (struct rec){
	.addr = ptr,
	.check = c,
	.size = size
	};
	checkall();

	#if !NO_REALLOC
	size_t nsize = size * (1.2 + (random() % 4) - 1);
	printf("Trying to reallocate size %zd bytes ...\n", nsize);
	ptr = buddy_realloc(ptr, nsize);
	if (!ptr) {
	puts("reallocation failed, skipping");
	(void)random(); // eat the random number for consistency across runs
	continue;
	}
	c = realloc(c, nsize);
	if (nsize > size) {
	randomise(&ptr[size], nsize - size);
	memcpy(&c[size], &ptr[size], nsize - size);
	}

	records[i] = (struct rec){
	.addr = ptr,
	.check = c,
	.size = nsize
	};
	checkall();

	size_t qsize = size * (0.2 + random() % 4);
	printf("Trying to reallocate size %zd bytes ...\n", qsize);
	ptr = buddy_realloc(ptr, qsize);
	if (!ptr) {
	puts("reallocation failed, skipping");
	continue;
	}
	c = realloc(c, qsize);
	if (qsize > nsize) {
	randomise(&ptr[nsize], qsize - nsize);
	memcpy(&c[nsize], &ptr[nsize], qsize - nsize);
	}

	records[i] = (struct rec){
	.addr = ptr,
	.check = c,
	.size = qsize
	};
	checkall();
	#endif
	}

	checkall();

	unsigned long sum = 0;
	for (int i = 0; i < COUNT; i++) {
	sum += records[i].size;
	check(i);
	buddy_free(records[i].addr);
	free(records[i].check);
	}

	extern const size_t buddy_pool_size;
	extern const size_t buddy_ranks;
	extern const unsigned *buddy_counts;

	printf( "at peak, allocated %lu of %lu possible bytes (%5.2f%%)\n"
	"effective overhead : %5.2f%%\n",
	sum, buddy_pool_size, sum * 100. / buddy_pool_size,
	buddy_overhead / ((double)sum / buddy_pool_size) * 100.
	);

	assert(buddy_counts[buddy_ranks - 1] == 1);

	return 0;
	}