paniq · August 28, 2019 22:17
diff --git a/twistpool.c b/twistpool.c
 /*

 Twisting Pool Allocator
 =======================
 written by Leonard Ritter ([email protected])

 This file is in the public domain

 I don't know if I was the first one to stumble upon this technique, so
 I can't guarantee there's no patent on it, but let's hope there's not, 
 then this counts as prior art.

 --------------------------------------------------------------------------------

 This is a proof of concept implementation for a pool allocator that guarantees
 compactness (unsorted gapless iteration without indirections) while preserving 
 element ids (using one order-optimized indirection), with insertion, deletion 
 and lookup in O(1) time.

 Because all id <-> index assignments are symmetric swaps (either 
 identity-mapped or twisted), only a single table is required to resolve 
 index from id and id from index.

 Insertion and deletion is a self-sorting process. Both operations attempt
 to untwist identifiers that share the same status (obtained/released) and
 so, regardless of deallocation order, only few entries are accessed and
 fragmentation is generally low; in my random allocation test, the average number
 of fragmented entries appeared to be ~log2(capacity)); It is possible to produce
 a worst case by obtaining all id's, then releasing every other id. The upper
 bound on fragmented entries appears to be capacity/3.

 The data being managed must be allocated separately, and the user must
 copy at least none, at most one element after a call to obtain/release.

 With this implementation, the memory requirement is 
 sizeof(unsigned int) * capacity, typically 4 bytes per entry.

 It might however possible to implement the map with a compact sorted array or
 hashtable, storing only twisted entries, which should tremendously reduce memory
 usage and therefore notably improve cache efficiency.

 --------------------------------------------------------------------------------

 */

 #include <stdbool.h>
 #include <assert.h>

 // use whatever size you prefer; this implementation operates directly on 
 // global memory; a productive implementation would probably use the heap.
 #define CAPACITY 10000
 #define N (CAPACITY+1)

 /*  index: offset of element in data array; elements are moved
    to keep the array compact and contiguous

    id: numerical name assigned to a data element; never changes.
    therefore, elements should typically be referenced by their id,
    not their index.

    id 0 and index 0 are synonymous with no element
    */

 /*  symmetrical map
    maps index -> id; compact, gapless
    last index == count
    happens to also map id -> index; has gaps
    */
 unsigned int map[N];

 /*  all ids are mapped, even the unused ones;
    the ideal mapping is id == index
    the first free index is count+1
    if count == capacity, the pool is full.
    */
 unsigned int count;

 // pool constructor
 void construct () {
    unsigned int i;
    // initially no elements
    count = 0;
    for (i = 0; i < N; ++i) {
        // start out with identity mapping;
        // all unused ids are mapped
        map[i] = i;
    }
 }

 // lookup index from id or id from index
 unsigned int resolve(unsigned int id_or_index) {
    return map[id_or_index];
 }

 bool is_index_valid (unsigned int index) {
    return (index > 0) && (index <= count);
 }

 bool is_id_valid (unsigned int id) {
    return (id > 0) && (id <= CAPACITY) && is_index_valid(resolve(id));
 }

 bool is_identity (unsigned int id_or_index) {
    return (map[id_or_index] == id_or_index);
 }

 // swap the ids of two indices (or the indices of two ids)
 void swap (unsigned int a, unsigned int b) {
    // ids can only be twisted or untwisted, map must not be shuffled any further
    assert((a == b)
            || ((map[a] == a) && (map[b] == b))
            || ((map[a] == b) && (map[b] == a)));
    unsigned int t = map[a];
    map[a] = map[b];
    map[b] = t;
 }

 typedef struct _pair {
    unsigned int _0;
    unsigned int _1;
 } pair;

 /*  allocate a new id from the pool
    user must copy slot[index(id)] to slot[count] after
    allocation. obtain() returns a { src, dst } tuple indicating
    how data must be moved; if (dst == src), then no move is necessary.
    src is also identical to the newly obtained id
    if the pool is full, obtain returns { 0, 0 }
    */
 pair obtain () {
    if (count < CAPACITY) {
        unsigned int index;
        unsigned int id;
        
        // increase number of elements
        ++count;
        // index of new last element
        index = count;
        // id of new last element
        id = map[index];
        // if id not identical to index (is twisted)
        if (id != index) {
            // swap with index that matches this id,
            // so that index(id) == id
            swap(index, id);
        }
        // return new id/index and index of moved item
        pair result = { id, index };
        return result;
    } else {
        // out of space
        pair result = { 0, 0 };
        return result;
    }
 }

 /*  release an obtained id to the pool
    user must copy slot[count] to slot[index(id)] after
    deletion. (release id) returns a (src dst) tuple indicating
    how data must be moved; if (src == dst), then no move is necessary.
    */
 pair release (unsigned int id) {
    unsigned int index;
    unsigned int last_index;
    unsigned int last_id;
    
    assert(is_id_valid(id));
    
    // index of element to be deleted
    index = map[id];
    
    // if element is twisted, then untwist
    if (id > count)
        swap(index, id);
    // index and id of element to take its place
    last_index = count;
    last_id = map[last_index];
    
    // swap indices so that tailing element fills the gap (twist)
    // if last element is twisted, then untwist first
    if (last_id > count) {
        swap(last_index, last_id);
        swap(index, last_id);
    } else {
        swap(index, last_index);
    }
        
    // decrease number of elements
    --count;
    // return index of element to be moved and index of gap
    pair result = { last_index, index };
    return result;
 }

 // ----------------------------------------------------------------------------
 // test

 #include <stdlib.h>
 #include <stdio.h>

 // our "data array", which just stores the id again, so we can easily verify
 // if an id still matches its assigned content
 unsigned int data[N];

 void dump() {
    unsigned int i; 
    
    printf("index -> ID:\n");
    for (i = 1; i <= CAPACITY; ++i) {
        if (i == (count + 1))
            printf("-------\n");
        unsigned int id = resolve(i);
        unsigned int ri = resolve(id);
        printf("%u\t%u\t%s\n", i, id, ((i != ri)?"!":""));
    }
    printf("%i used elements\n\n", count);
 }
 
 void move(pair k) {
    if (k._0 != k._1) {
        data[k._1] = data[k._0];
    }
 }

 unsigned int verify_data () {
    unsigned int i;
    unsigned int twisted = 0;
    for (i = 1; i <= count; ++i) {
        unsigned int id = resolve(i);
        assert(data[i] == id);
        assert(resolve(id) == i);
        if (!is_identity(i))
            ++twisted;
    }
    return twisted;
 }
    
 unsigned int test_obtain () {
    pair k = obtain();
    move(k);
    data[k._0] = k._0;
    return k._0;
 }

 unsigned int test_release (unsigned int id) {
    assert(id != 0);
    pair k = release(id);
    move(k);
 }

 #include <memory.h>
 #include <stdio.h>

 // array of ids in use
 unsigned int used[CAPACITY];

 int main (int argc, void** argv) {
    int i;
    unsigned int mintwisted = N;
    unsigned int maxtwisted = 0;
    unsigned int total = 0;
    unsigned int steps = 0;
    unsigned int used_count = 0;
    
    memset(used, 0, sizeof(used));
    
    construct();

    srand(time());
 #if 1
    // do random obtains/releases, see if something breaks
    for (i = 0; i < 100000; ++i) {
        if (((rand() % 100) < 50) && (used_count > 0)) {
            unsigned int used_index = rand() % used_count;
            unsigned int id = used[used_index];
            // remove from used array and fill
            used_count--;
            used[used_index] = used[used_count];
            used[used_count] = 0;
            test_release(id);            
            unsigned int t = verify_data();
            mintwisted = (mintwisted < t)?mintwisted:t;
            maxtwisted = (maxtwisted > t)?maxtwisted:t;
            total += t;
            ++steps;
        } else {
            unsigned int k = test_obtain();
            if (k != 0) {
                assert(used_count < CAPACITY);
                used[used_count] = k;
                ++used_count;
            }
            verify_data();
        }
    }
 #else
    // attempt to fabricate a worst case
    for (i = 0; i < CAPACITY; ++i) {
        test_obtain();
    }    
    for (i = 1; i <= CAPACITY; i += 2) {
        test_release(i);
        unsigned int t = verify_data();
        mintwisted = (mintwisted < t)?mintwisted:t;
        maxtwisted = (maxtwisted > t)?maxtwisted:t;
        total += t;
        ++steps;
    }
 #endif
    
    dump();
    printf("releases: %u\nmin twisted: %u\nmax twisted: %u\ntotal twisted: %u\naverage: %f\n",
        steps, mintwisted, maxtwisted, total, (double)total / (double)steps);
    printf("OK.\n");
    
    return 0;
 }
	/*

	Twisting Pool Allocator
	=======================
	written by Leonard Ritter ([email protected])

	This file is in the public domain

	I don't know if I was the first one to stumble upon this technique, so
	I can't guarantee there's no patent on it, but let's hope there's not,
	then this counts as prior art.

	--------------------------------------------------------------------------------

	This is a proof of concept implementation for a pool allocator that guarantees
	compactness (unsorted gapless iteration without indirections) while preserving
	element ids (using one order-optimized indirection), with insertion, deletion
	and lookup in O(1) time.

	Because all id <-> index assignments are symmetric swaps (either
	identity-mapped or twisted), only a single table is required to resolve
	index from id and id from index.

	Insertion and deletion is a self-sorting process. Both operations attempt
	to untwist identifiers that share the same status (obtained/released) and
	so, regardless of deallocation order, only few entries are accessed and
	fragmentation is generally low; in my random allocation test, the average number
	of fragmented entries appeared to be ~log2(capacity)); It is possible to produce
	a worst case by obtaining all id's, then releasing every other id. The upper
	bound on fragmented entries appears to be capacity/3.

	The data being managed must be allocated separately, and the user must
	copy at least none, at most one element after a call to obtain/release.

	With this implementation, the memory requirement is
	sizeof(unsigned int) * capacity, typically 4 bytes per entry.

	It might however possible to implement the map with a compact sorted array or
	hashtable, storing only twisted entries, which should tremendously reduce memory
	usage and therefore notably improve cache efficiency.

	--------------------------------------------------------------------------------

	*/

	#include <stdbool.h>
	#include <assert.h>

	// use whatever size you prefer; this implementation operates directly on
	// global memory; a productive implementation would probably use the heap.
	#define CAPACITY 10000
	#define N (CAPACITY+1)

	/* index: offset of element in data array; elements are moved
	to keep the array compact and contiguous

	id: numerical name assigned to a data element; never changes.
	therefore, elements should typically be referenced by their id,
	not their index.

	id 0 and index 0 are synonymous with no element
	*/

	/* symmetrical map
	maps index -> id; compact, gapless
	last index == count
	happens to also map id -> index; has gaps
	*/
	unsigned int map[N];

	/* all ids are mapped, even the unused ones;
	the ideal mapping is id == index
	the first free index is count+1
	if count == capacity, the pool is full.
	*/
	unsigned int count;

	// pool constructor
	void construct () {
	unsigned int i;
	// initially no elements
	count = 0;
	for (i = 0; i < N; ++i) {
	// start out with identity mapping;
	// all unused ids are mapped
	map[i] = i;
	}
	}

	// lookup index from id or id from index
	unsigned int resolve(unsigned int id_or_index) {
	return map[id_or_index];
	}

	bool is_index_valid (unsigned int index) {
	return (index > 0) && (index <= count);
	}

	bool is_id_valid (unsigned int id) {
	return (id > 0) && (id <= CAPACITY) && is_index_valid(resolve(id));
	}

	bool is_identity (unsigned int id_or_index) {
	return (map[id_or_index] == id_or_index);
	}

	// swap the ids of two indices (or the indices of two ids)
	void swap (unsigned int a, unsigned int b) {
	// ids can only be twisted or untwisted, map must not be shuffled any further
	assert((a == b)
	\|\| ((map[a] == a) && (map[b] == b))
	\|\| ((map[a] == b) && (map[b] == a)));
	unsigned int t = map[a];
	map[a] = map[b];
	map[b] = t;
	}

	typedef struct _pair {
	unsigned int _0;
	unsigned int _1;
	} pair;

	/* allocate a new id from the pool
	user must copy slot[index(id)] to slot[count] after
	allocation. obtain() returns a { src, dst } tuple indicating
	how data must be moved; if (dst == src), then no move is necessary.
	src is also identical to the newly obtained id
	if the pool is full, obtain returns { 0, 0 }
	*/
	pair obtain () {
	if (count < CAPACITY) {
	unsigned int index;
	unsigned int id;

	// increase number of elements
	++count;
	// index of new last element
	index = count;
	// id of new last element
	id = map[index];
	// if id not identical to index (is twisted)
	if (id != index) {
	// swap with index that matches this id,
	// so that index(id) == id
	swap(index, id);
	}
	// return new id/index and index of moved item
	pair result = { id, index };
	return result;
	} else {
	// out of space
	pair result = { 0, 0 };
	return result;
	}
	}

	/* release an obtained id to the pool
	user must copy slot[count] to slot[index(id)] after
	deletion. (release id) returns a (src dst) tuple indicating
	how data must be moved; if (src == dst), then no move is necessary.
	*/
	pair release (unsigned int id) {
	unsigned int index;
	unsigned int last_index;
	unsigned int last_id;

	assert(is_id_valid(id));

	// index of element to be deleted
	index = map[id];

	// if element is twisted, then untwist
	if (id > count)
	swap(index, id);
	// index and id of element to take its place
	last_index = count;
	last_id = map[last_index];

	// swap indices so that tailing element fills the gap (twist)
	// if last element is twisted, then untwist first
	if (last_id > count) {
	swap(last_index, last_id);
	swap(index, last_id);
	} else {
	swap(index, last_index);
	}

	// decrease number of elements
	--count;
	// return index of element to be moved and index of gap
	pair result = { last_index, index };
	return result;
	}

	// ----------------------------------------------------------------------------
	// test

	#include <stdlib.h>
	#include <stdio.h>

	// our "data array", which just stores the id again, so we can easily verify
	// if an id still matches its assigned content
	unsigned int data[N];

	void dump() {
	unsigned int i;

	printf("index -> ID:\n");
	for (i = 1; i <= CAPACITY; ++i) {
	if (i == (count + 1))
	printf("-------\n");
	unsigned int id = resolve(i);
	unsigned int ri = resolve(id);
	printf("%u\t%u\t%s\n", i, id, ((i != ri)?"!":""));
	}
	printf("%i used elements\n\n", count);
	}

	void move(pair k) {
	if (k._0 != k._1) {
	data[k._1] = data[k._0];
	}
	}

	unsigned int verify_data () {
	unsigned int i;
	unsigned int twisted = 0;
	for (i = 1; i <= count; ++i) {
	unsigned int id = resolve(i);
	assert(data[i] == id);
	assert(resolve(id) == i);
	if (!is_identity(i))
	++twisted;
	}
	return twisted;
	}

	unsigned int test_obtain () {
	pair k = obtain();
	move(k);
	data[k._0] = k._0;
	return k._0;
	}

	unsigned int test_release (unsigned int id) {
	assert(id != 0);
	pair k = release(id);
	move(k);
	}

	#include <memory.h>
	#include <stdio.h>

	// array of ids in use
	unsigned int used[CAPACITY];

	int main (int argc, void** argv) {
	int i;
	unsigned int mintwisted = N;
	unsigned int maxtwisted = 0;
	unsigned int total = 0;
	unsigned int steps = 0;
	unsigned int used_count = 0;

	memset(used, 0, sizeof(used));

	construct();

	srand(time());
	#if 1
	// do random obtains/releases, see if something breaks
	for (i = 0; i < 100000; ++i) {
	if (((rand() % 100) < 50) && (used_count > 0)) {
	unsigned int used_index = rand() % used_count;
	unsigned int id = used[used_index];
	// remove from used array and fill
	used_count--;
	used[used_index] = used[used_count];
	used[used_count] = 0;
	test_release(id);
	unsigned int t = verify_data();
	mintwisted = (mintwisted < t)?mintwisted:t;
	maxtwisted = (maxtwisted > t)?maxtwisted:t;
	total += t;
	++steps;
	} else {
	unsigned int k = test_obtain();
	if (k != 0) {
	assert(used_count < CAPACITY);
	used[used_count] = k;
	++used_count;
	}
	verify_data();
	}
	}
	#else
	// attempt to fabricate a worst case
	for (i = 0; i < CAPACITY; ++i) {
	test_obtain();
	}
	for (i = 1; i <= CAPACITY; i += 2) {
	test_release(i);
	unsigned int t = verify_data();
	mintwisted = (mintwisted < t)?mintwisted:t;
	maxtwisted = (maxtwisted > t)?maxtwisted:t;
	total += t;
	++steps;
	}
	#endif

	dump();
	printf("releases: %u\nmin twisted: %u\nmax twisted: %u\ntotal twisted: %u\naverage: %f\n",
	steps, mintwisted, maxtwisted, total, (double)total / (double)steps);
	printf("OK.\n");

	return 0;
	}
No results found