Created
November 11, 2018 07:35
-
-
Save cellularmitosis/a1eeb8a101b507823c9e63ae6764e5b3 to your computer and use it in GitHub Desktop.
A user-space batching malloc wrapper.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
All tests passed. | |
benchmarking... | |
naive malloc: elapsed: 59.271ms, rate: 16,871,657m/s, n: 1,000,000 (1) | |
pp_malloc: elapsed: 28.880ms, rate: 34,626,039m/s, n: 1,000,000 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
pp_malloc.h: a batching malloc wrapper. | |
See https://github.com/pepaslabs/pp_c | |
Copyright (c) 2018 Jason Pepas | |
Released under the terms of the MIT License. | |
See https://opensource.org/licenses/MIT | |
This is a single-file C library. | |
See https://github.com/nothings/stb/blob/master/docs/stb_howto.txt | |
How to use this file: | |
- From only one file in your codebase, include this as an implementation: | |
#define PP_MALLOC_IMPLEMENTATION | |
#include "pp_malloc.h" | |
- From as many files as needed, include this as a header: | |
#include "pp_malloc.h" | |
History: | |
- v0.1 (2018/11/10): intial release | |
*/ | |
// MARK: - Header | |
#ifndef PP_MALLOC_HEADER | |
#define PP_MALLOC_HEADER | |
#include <stdlib.h> // for size_t, NULL, etc. | |
typedef struct _pp_malloc_t pp_malloc_t; | |
// Create / destroy a pp_malloc_t. | |
pp_malloc_t* pp_malloc_create(size_t object_size); | |
void pp_malloc_free(pp_malloc_t** ptr); | |
// The primary interface to the batched malloc / free implementation. | |
void* pp_malloc(pp_malloc_t* self); | |
void pp_free(pp_malloc_t* self, void** ptr); | |
#endif // PP_MALLOC_HEADER | |
// MARK: - Implementation | |
#ifdef PP_MALLOC_IMPLEMENTATION | |
#include <assert.h> | |
#include <stdbool.h> | |
#include <strings.h> // for bzero | |
#include <stdint.h> | |
#include <limits.h> | |
// Similar to CHAR_BIT, LONG_BIT, etc, we define BITMASK_BIT. | |
#define BITMASK_BIT 64 | |
typedef uint64_t bitmask_t; | |
#define BITMASK_MAX UINT64_MAX | |
// #define BITMASK_BIT 32 | |
// typedef uint32_t bitmask_t; | |
// #define BITMASK_MAX UINT32_MAX | |
// #define BITMASK_BIT 16 | |
// typedef uint16_t bitmask_t; | |
// #define BITMASK_MAX UINT16_MAX | |
// #define BITMASK_BIT 8 | |
// typedef uint8_t bitmask_t; | |
// #define BITMASK_MAX UINT8_MAX | |
// MARK: - bitmask functions | |
// Check if a bitmask is all binary 1's. | |
static bool _bitmask_is_full(bitmask_t bitmask) { | |
return bitmask == (bitmask_t)BITMASK_MAX; | |
} | |
// Check if an individual bit is set in a bitmask. | |
static bool _bit_is_set(bitmask_t bitmask, uint8_t bit_index) { | |
return bitmask & (1UL << bit_index); | |
} | |
// Return the index of the first unset bit in a bitmask. | |
uint8_t _bitmask_index_of_first_unset_bit(bitmask_t bitmask, uint8_t start) { | |
for (uint8_t i = start; i < BITMASK_BIT; i++) { | |
if (!_bit_is_set(bitmask, i)) { | |
return i; | |
} | |
} | |
return UINT8_MAX; | |
} | |
// Set the bit at the given index to 1. | |
void _set_bit(bitmask_t* bitmask, uint8_t bit_index) { | |
(*bitmask) |= (1UL << bit_index); | |
} | |
// Clear the bit at the given index to 0. | |
void _clear_bit(bitmask_t* bitmask, uint8_t bit_index) { | |
(*bitmask) &= ~(1UL << bit_index); | |
} | |
// MARK: - slab_t implementation | |
typedef unsigned char byte_t; | |
// pp_slab_t: A slab of memory, sliced up into "slots". | |
// A slab has 32 slots on i386, 64 slots on x86_64, etc. | |
// 'bitmask' indicates the used / free status of each slot. | |
// A slab_t is "over-allocated": slot0 is merely a placeholder | |
// address which marks the start of the slots. | |
// The size of a slab is calculated as: | |
// sizeof(slab_t) - 1 + (BITMASK_BIT * slot_size) | |
struct _pp_slab_t { | |
bitmask_t bitmask; | |
uint8_t first_free_index; | |
size_t slot_size; | |
byte_t slot0[1]; | |
}; | |
typedef struct _pp_slab_t pp_slab_t; | |
// Allocate, initialize and return a slab. | |
pp_slab_t* pp_slab_create(size_t slot_size) { | |
pp_slab_t* slab = (pp_slab_t*)malloc(sizeof(pp_slab_t) - 1 + BITMASK_BIT * slot_size); | |
assert(slab != NULL); | |
slab->bitmask = 0; | |
slab->first_free_index = 0; | |
slab->slot_size = slot_size; | |
assert(slab->slot0 != NULL); | |
return slab; | |
} | |
// Return a slab's memory to the operating system. | |
void pp_slab_free(pp_slab_t** slab) { | |
assert(slab != NULL && *slab != NULL); | |
free(*slab); | |
*slab = NULL; | |
} | |
// Check whether a slab is full. | |
bool pp_slab_is_full(pp_slab_t *slab) { | |
assert(slab != NULL); | |
return _bitmask_is_full(slab->bitmask); | |
} | |
// Return the index of the first free slot in a slab. | |
// Note: It is an error call this function when the slab is full. | |
static uint8_t _pp_slab_next_free_index(pp_slab_t* slab) { | |
assert(slab != NULL); | |
return slab->first_free_index; | |
} | |
// Return a pointer to the slot at the given index in a slab. | |
static void* _pp_slab_slot(pp_slab_t* slab, uint8_t slot_index) { | |
assert(slab != NULL); | |
byte_t* addr = &(slab->slot0[slot_index * slab->slot_size]); | |
return (void*)addr; | |
} | |
// Mark the slot at the given index in a slab as used. | |
static void _pp_slab_mark_used(pp_slab_t* slab, uint8_t slot_index) { | |
assert(slab != NULL); | |
_set_bit(&(slab->bitmask), slot_index); | |
slab->first_free_index = _bitmask_index_of_first_unset_bit(slab->bitmask, slot_index); | |
} | |
// Mark the slot at the given index in a slab as available. | |
static void _pp_slab_mark_free(pp_slab_t* slab, uint8_t slot_index) { | |
assert(slab != NULL); | |
_clear_bit(&(slab->bitmask), slot_index); | |
if (slot_index < slab->first_free_index) { | |
slab->first_free_index = slot_index; | |
} | |
} | |
// Find the next available slot in a slab, mark it as used, and return its address. | |
// Note: It is an error to call this function on a full slab. | |
void* pp_slab_claim_next(pp_slab_t* slab) { | |
assert(slab != NULL); | |
assert(pp_slab_is_full(slab) == false); | |
uint8_t i = _pp_slab_next_free_index(slab); | |
_pp_slab_mark_used(slab, i); | |
return _pp_slab_slot(slab, i); | |
} | |
// Return the slot index of the given pointer (assuming it lies within this slab). | |
static uint8_t _pp_slab_slot_index_of_ptr(pp_slab_t* slab, void* ptr) { | |
assert(slab != NULL); | |
assert(ptr >= (void*)slab); | |
assert(ptr <= (void*)(slab->slot0) + (slab->slot_size * BITMASK_BIT)); | |
unsigned int byte_offset = slab->slot0 - (byte_t*)ptr; | |
assert(byte_offset % slab->slot_size == 0); | |
return byte_offset / slab->slot_size; | |
} | |
// If the given pointer lies within this slot, mark it free and return true. | |
bool pp_slab_release(pp_slab_t* slab, void** ptr) { | |
assert(slab != NULL); | |
void* first = (void*)(slab->slot0); | |
void* last = (void*)(slab->slot0) + (slab->slot_size * BITMASK_BIT); | |
if (*ptr < first || *ptr >= last) { | |
return false; | |
} | |
uint8_t i = _pp_slab_slot_index_of_ptr(slab, *ptr); | |
_pp_slab_mark_free(slab, i); | |
*ptr = NULL; | |
return true; | |
} | |
// MARK: - pp_slabtree_t implementation | |
// A union to access the children of a slabtree as either slabtrees or slabs. | |
// "leaf" nodes (height == 1) will use 'as_slabs', while "internal" nodes | |
// (height > 1) will use 'as_slabtrees'. | |
union _pp_slabtree_children_t { | |
struct _pp_slabtree_t* as_slabtrees[BITMASK_BIT]; | |
struct _pp_slab_t* as_slabs[BITMASK_BIT]; | |
}; | |
typedef union _pp_slabtree_children_t pp_slabtree_children_t; | |
// pp_slabtree_t: a tree to organize slabs. | |
struct _pp_slabtree_t { | |
bitmask_t bitmask; | |
// The index of the first slot which isn't full. | |
// (This is a cache to avoid scanning the bitmask). | |
uint8_t first_free_index; | |
// The address range of all slabs within this (sub)tree. | |
// Note: this is going to cause a performance problem because | |
// slab ranges will overlap as a tree grows, eventually forcing | |
// a search of (most of) the entire tree to find a pointer's slab. | |
void* addr_range_first; | |
void* addr_range_last; | |
// The height of this tree node. | |
// slabtrees which have slabs as children have a height of 1. | |
// "internal" slabtrees have a height > 1. | |
uint16_t height; | |
union _pp_slabtree_children_t children; | |
}; | |
typedef struct _pp_slabtree_t pp_slabtree_t; | |
// Allocate, initialize and return a slabtree. | |
pp_slabtree_t* pp_slabtree_create() { | |
pp_slabtree_t* tree = (pp_slabtree_t*)malloc(sizeof(pp_slabtree_t)); | |
bzero(tree, sizeof(pp_slabtree_t)); | |
tree->height = 1; | |
return tree; | |
} | |
// Return a slabtree's memory to the operating system. | |
// Recursively frees its entire subtree, including slabs. | |
void pp_slabtree_free(pp_slabtree_t** ptr) { | |
pp_slabtree_t* t = *ptr; | |
if (t->height > 1) { | |
for (uint8_t i = 0; i < BITMASK_BIT; i++) { | |
if (t->children.as_slabtrees[i] != NULL) { | |
pp_slabtree_free(&(t->children.as_slabtrees[i])); | |
} | |
} | |
} else { | |
for (uint8_t i = 0; i < BITMASK_BIT; i++) { | |
if (t->children.as_slabs[i] != NULL) { | |
pp_slab_free(&(t->children.as_slabs[i])); | |
} | |
} | |
} | |
*ptr = NULL; | |
} | |
// Check if all of the slabs within this (sub)tree are full. | |
bool pp_slabtree_is_full(pp_slabtree_t* tree) { | |
assert(tree != NULL); | |
return _bitmask_is_full(tree->bitmask); | |
} | |
// Grow a slabtree up and to the right. | |
static void pp_slabtree_grow(pp_slabtree_t** tree) { | |
assert(tree != NULL && *tree != NULL); | |
pp_slabtree_t* t = *tree; | |
pp_slabtree_t* parent = pp_slabtree_create(); | |
parent->height = t->height + 1; | |
parent->children.as_slabtrees[0] = t; | |
parent->bitmask = 1; | |
parent->first_free_index = 1; | |
*tree = parent; | |
} | |
// Find the next available slot, mark it as used and return it's pointer. | |
// Note: It is an error to call this function on a full tree. | |
void* pp_slabtree_claim_next(pp_slabtree_t* tree, size_t object_size) { | |
assert(tree != NULL); | |
uint8_t i = tree->first_free_index; | |
void* ret; | |
// This is an internal node. | |
if (tree->height > 1) { | |
// Grab the first non-full subtree. | |
pp_slabtree_t* child = tree->children.as_slabtrees[i]; | |
// ...which might not have been allocated yet. | |
if (child == NULL) { | |
// ...so create one and stitch it into the tree. | |
child = pp_slabtree_create(); | |
child->height = tree->height - 1; | |
tree->children.as_slabtrees[i] = child; | |
} | |
// Recurse down into the subtree until you hit a leaf node. | |
ret = pp_slabtree_claim_next(child, object_size); | |
// If the subtree's bitmask has changed, update our first_free_index. | |
if (_bitmask_is_full(child->bitmask)) { | |
_set_bit(&(tree->bitmask), i); | |
tree->first_free_index = _bitmask_index_of_first_unset_bit(tree->bitmask, i); | |
} | |
// Update this node's address range book-keeping. | |
if (ret < tree->addr_range_first || ret >= tree->addr_range_last) { | |
// Scan our subtrees, adjusting our total address range as needed. | |
// FIXME: actually, this is broken. this implements a one-way | |
// ratcheting mechanism which never recedes. Fix this by setting | |
// first to MAX and last to 0 before starting the scan. | |
for (uint8_t i = 0; i < BITMASK_BIT; i++) { | |
child = tree->children.as_slabtrees[i]; | |
if (child == NULL) { | |
continue; | |
} | |
if (child->addr_range_first < tree->addr_range_first) { | |
tree->addr_range_first = child->addr_range_first; | |
} else if (child->addr_range_last > tree->addr_range_last) { | |
tree->addr_range_last = child->addr_range_last; | |
} | |
} | |
} | |
} | |
// This is a leaf node. | |
else { | |
// Grab the first non-full slab. | |
pp_slab_t* slab = tree->children.as_slabs[i]; | |
// ...which might not have been allocated yet. | |
if (slab == NULL) { | |
// ...so create one and stitch it into the tree. | |
slab = pp_slab_create(object_size); | |
tree->children.as_slabs[i] = slab; | |
// Update this node's address range book-keeping. | |
if ((void*)slab < tree->addr_range_first) { | |
tree->addr_range_first = slab; | |
} else if ((void*)slab >= tree->addr_range_last) { | |
void* last = (void*)(slab->slot0) + (slab->slot_size * BITMASK_BIT); | |
tree->addr_range_last = last; | |
} | |
} | |
// Claim the next available slot in the slab. | |
ret = pp_slab_claim_next(slab); | |
// If the slab's bitmask has changed, update our first_free_index. | |
if (_bitmask_is_full(slab->bitmask)) { | |
_set_bit(&(tree->bitmask), i); | |
tree->first_free_index = _bitmask_index_of_first_unset_bit(tree->bitmask, i); | |
} | |
} | |
// Bubble the recursive result back up. | |
return ret; | |
} | |
// Find the slot of the given pointer and mark it as available. | |
bool pp_slabtree_release(pp_slabtree_t* tree, void** ptr) { | |
assert(tree != NULL); | |
// If ptr was outside of this subtree's range, return false. | |
if (*ptr < tree->addr_range_first || *ptr > tree->addr_range_last) { | |
return false; | |
} | |
// If this is an internal tree node... | |
if (tree->height > 1) { | |
// Scan through all (64) subtrees, looking for ptr's subtree. | |
// The fact that slabs are not guaranteed to be in increasing | |
// order of address space is what necessitates this complete | |
// scan of all 64 subtrees, because the slab ranges of the | |
// subtrees can overlap. This is problematic for performance. | |
// To address this issue, I will likely need to move to a | |
// tree which maintains slab sort order (which isn't so bad | |
// if we pre-allocate all 64 slabs of a leaf node, because then | |
// we just have to shuffle the tree nodes, rather than shuffling | |
// individual slabs). | |
for (uint8_t i = 0; i < BITMASK_BIT; i++) { | |
pp_slabtree_t* child = tree->children.as_slabtrees[i]; | |
if (child == NULL) { | |
continue; | |
} | |
// If ptr was in this subtree, end the scan. | |
bool result = pp_slabtree_release(child, ptr); | |
if (result) { | |
return result; | |
} | |
} | |
// The scan completed and we didn't find ptr within our subtree. | |
return false; | |
} | |
// Otherwise, this is a leaf node. | |
else { | |
// Scan the slabs, looking for ptr. | |
for (uint8_t i = 0; i < BITMASK_BIT; i++) { | |
pp_slab_t* slab = tree->children.as_slabs[i]; | |
if (slab == NULL) { | |
continue; | |
} | |
// If ptr was in this slab, end the scan. | |
bool result = pp_slab_release(slab, ptr); | |
if (result) { | |
return result; | |
} | |
} | |
// The scan completed and we didn't find ptr in any of our slabs. | |
return false; | |
} | |
} | |
// MARK: - pp_malloc_t implementation | |
// pp_malloc_t: a user-space managed memory pool. | |
struct _pp_malloc_t { | |
pp_slabtree_t* tree; | |
size_t object_size; | |
}; | |
// Allocate, initialize and return a user-space malloc pool. | |
pp_malloc_t* pp_malloc_create(size_t object_size) { | |
pp_malloc_t* self = (pp_malloc_t*)malloc(sizeof(pp_malloc_t)); | |
self->object_size = object_size; | |
self->tree = pp_slabtree_create(); | |
pp_slab_t* slab = pp_slab_create(object_size); | |
self->tree->children.as_slabs[0] = slab; | |
self->tree->addr_range_first = slab; | |
void* last = (void*)(slab->slot0) + (slab->slot_size * BITMASK_BIT); | |
self->tree->addr_range_last = last; | |
return self; | |
} | |
// Return the memory of a user-space malloc pool to the operating system. | |
void pp_malloc_free(pp_malloc_t** ptr) { | |
pp_slabtree_free(&((*ptr)->tree)); | |
free(*ptr); | |
*ptr = NULL; | |
} | |
// A user-space malloc wrapper, designed to decrease malloc overhead | |
// by batching (64) calls to malloc. | |
// Initial (trivial) testing shows this to be 1.5x to 2x as fast | |
// as unwrapped malloc calls. However, I have yet tested a workload | |
// involving heavy use of 'pp_free', so performance might not be 2x | |
// for long-lived processes. | |
void* pp_malloc(pp_malloc_t* self) { | |
assert(self != NULL); | |
if (pp_slabtree_is_full(self->tree)) { | |
pp_slabtree_grow(&(self->tree)); | |
} | |
return pp_slabtree_claim_next(self->tree, self->object_size); | |
} | |
// This is the corresponding free wrapper. | |
void pp_free(pp_malloc_t* self, void** ptr) { | |
assert(self != NULL); | |
assert(ptr != NULL && *ptr != NULL); | |
assert(pp_slabtree_release(self->tree, ptr)); | |
} | |
#endif // PP_MALLOC_IMPLEMENTATION | |
// MARK: - Tests | |
#ifdef PP_MALLOC_TESTS | |
static void test_bitmask_is_full() { | |
bitmask_t bitmask = 0; | |
assert(_bitmask_is_full(bitmask) == false); | |
bitmask--; | |
assert(_bitmask_is_full(bitmask) == true); | |
bitmask = 12345; | |
assert(_bitmask_is_full(bitmask) == false); | |
} | |
static void test_bit_is_set() { | |
bitmask_t bitmask = 0; | |
assert(_bit_is_set(bitmask, 0) == false); | |
assert(_bit_is_set(bitmask, 1) == false); | |
assert(_bit_is_set(bitmask, 2) == false); | |
bitmask = 1; | |
assert(_bit_is_set(bitmask, 0) == true); | |
assert(_bit_is_set(bitmask, 1) == false); | |
assert(_bit_is_set(bitmask, 2) == false); | |
bitmask = 2; | |
assert(_bit_is_set(bitmask, 0) == false); | |
assert(_bit_is_set(bitmask, 1) == true); | |
assert(_bit_is_set(bitmask, 2) == false); | |
bitmask = 3; | |
assert(_bit_is_set(bitmask, 0) == true); | |
assert(_bit_is_set(bitmask, 1) == true); | |
assert(_bit_is_set(bitmask, 2) == false); | |
bitmask = 4; | |
assert(_bit_is_set(bitmask, 0) == false); | |
assert(_bit_is_set(bitmask, 1) == false); | |
assert(_bit_is_set(bitmask, 2) == true); | |
} | |
static void test_bitmask_index_of_first_unset_bit() { | |
bitmask_t bitmask = 0; | |
assert(_bitmask_index_of_first_unset_bit(bitmask, 0) == 0); | |
bitmask = 1; | |
assert(_bitmask_index_of_first_unset_bit(bitmask, 0) == 1); | |
bitmask = 3; | |
assert(_bitmask_index_of_first_unset_bit(bitmask, 0) == 2); | |
bitmask = 7; | |
assert(_bitmask_index_of_first_unset_bit(bitmask, 0) == 3); | |
bitmask = -1; | |
assert(_bitmask_index_of_first_unset_bit(bitmask, 0) == UINT8_MAX); | |
} | |
static void test_set_bit() { | |
bitmask_t bitmask = 0; | |
_set_bit(&bitmask, 0); | |
assert(bitmask == 1); | |
_set_bit(&bitmask, 1); | |
assert(bitmask == 3); | |
_set_bit(&bitmask, 2); | |
assert(bitmask == 7); | |
bitmask = 0; | |
_set_bit(&bitmask, 31); | |
assert(bitmask == 2147483648UL); | |
bitmask = 0; | |
_set_bit(&bitmask, 63); | |
// hmm, "error: integer literal is too large to be represented in any integer type" | |
// assert(bitmask == 18446744073709551616UL); | |
// thanks to https://stackoverflow.com/questions/35136202/how-do-i-mute-this-error-integer-literal-is-too-large-to-be-represented-in-a#comment57995212_35136303 | |
// turns out integer constants aren't actually unsigned? | |
assert(bitmask == 0x8000000000000000); | |
} | |
static void test_clear_bit() { | |
bitmask_t bitmask = 7; | |
_clear_bit(&bitmask, 0); | |
assert(bitmask == 6); | |
_clear_bit(&bitmask, 1); | |
assert(bitmask == 4); | |
_clear_bit(&bitmask, 2); | |
assert(bitmask == 0); | |
} | |
static void test_pp_slab_claim_next() { | |
size_t slot_size = 1; | |
pp_slab_t* slab = pp_slab_create(slot_size); | |
assert(slab->bitmask == 0); | |
void* ret = pp_slab_claim_next(slab); | |
assert(ret != NULL); | |
assert(slab->bitmask == 1); | |
for (int i = 1; i < BITMASK_BIT; i++) { | |
pp_slab_claim_next(slab); | |
} | |
assert(pp_slab_is_full(slab)); | |
pp_slab_free(&slab); | |
} | |
static void test_pp_slab_is_full() { | |
size_t slot_size = 1; | |
pp_slab_t* slab = pp_slab_create(slot_size); | |
for (int i = 0; i < BITMASK_BIT; i++) { | |
pp_slab_claim_next(slab); | |
} | |
assert(pp_slab_is_full(slab)); | |
pp_slab_free(&slab); | |
} | |
static void test_pp_slabtree_is_full() { | |
pp_malloc_t* md = pp_malloc_create(1); | |
size_t n = BITMASK_BIT - 1; | |
for (size_t i = 0; i < n; i++) { | |
pp_malloc(md); | |
} | |
assert(!pp_slab_is_full(md->tree->children.as_slabs[0])); | |
assert(md->tree->bitmask == 0); | |
pp_malloc(md); | |
assert(pp_slab_is_full(md->tree->children.as_slabs[0])); | |
assert(md->tree->bitmask == 1); | |
} | |
static void test_slab_create_and_free() { | |
pp_slab_t* s = pp_slab_create(1); | |
pp_slab_free(&s); | |
assert(s == NULL); | |
} | |
static void test_slabtree_create_and_free() { | |
pp_slabtree_t* s = pp_slabtree_create(1); | |
pp_slabtree_free(&s); | |
assert(s == NULL); | |
} | |
static void test_malloc_create_and_free() { | |
pp_malloc_t* m = pp_malloc_create(1); | |
pp_malloc_free(&m); | |
assert(m == NULL); | |
m = pp_malloc_create(1); | |
// create a tree of depth 1: | |
size_t n = BITMASK_BIT; | |
for (size_t i = 0; i < n; i++) { | |
pp_malloc(m); | |
} | |
pp_malloc_free(&m); | |
assert(m == NULL); | |
m = pp_malloc_create(1); | |
// create a tree of depth 2: | |
n = BITMASK_BIT * BITMASK_BIT; | |
for (size_t i = 0; i < n; i++) { | |
pp_malloc(m); | |
} | |
pp_malloc_free(&m); | |
assert(m == NULL); | |
m = pp_malloc_create(1); | |
// create a tree of depth 3: | |
n = BITMASK_BIT * BITMASK_BIT * BITMASK_BIT; | |
for (size_t i = 0; i < n; i++) { | |
pp_malloc(m); | |
} | |
pp_malloc_free(&m); | |
assert(m == NULL); | |
} | |
static void test_malloc_and_free() { | |
pp_malloc_t* m = pp_malloc_create(1); | |
void* ptr = pp_malloc(m); | |
pp_free(m, &ptr); | |
assert(ptr == NULL); | |
pp_malloc_free(&m); | |
assert(m == NULL); | |
} | |
/* | |
To TDD this file, implement a main.c: | |
#define PP_MALLOC_IMPLEMENTATION | |
#define PP_MALLOC_TESTS | |
#include "pp_malloc.h" | |
int main() { test(); return 0; } | |
Then run this in a shell: | |
watch -d -n1 'gcc -std=c99 -Wall foo.c && ./a.out' | |
Or, if you don't have 'watch' installed: | |
while true ; do sleep 1 ; gcc -std=c99 -Wall main.c && ./a.out ; done | |
Each time you edit and save, look at the terminal to see if the tests are | |
still passing. | |
*/ | |
#include <stdio.h> | |
void test() { | |
test_bitmask_is_full(); | |
test_bit_is_set(); | |
test_bitmask_index_of_first_unset_bit(); | |
test_set_bit(); | |
test_clear_bit(); | |
test_pp_slab_claim_next(); | |
test_pp_slab_is_full(); | |
test_pp_slabtree_is_full(); | |
test_slab_create_and_free(); | |
test_slabtree_create_and_free(); | |
test_malloc_create_and_free(); | |
test_malloc_and_free(); | |
printf("All tests passed.\n"); | |
} | |
#endif // PP_MALLOC_TESTS | |
// MARK: - Performance testing | |
#ifdef PP_MALLOC_BENCHMARK | |
#include <stdio.h> | |
#include <sys/time.h> // for gettimeofday() | |
#include <locale.h> | |
static unsigned long benchmark_naive_malloc(size_t object_size, size_t n) { | |
unsigned long sum = 0; | |
for (size_t i = 0; i < n; i++) { | |
sum = sum + (unsigned long)malloc(object_size); | |
} | |
return sum; | |
} | |
static void benchmark_batched_malloc(size_t object_size, size_t n) { | |
pp_malloc_t* md = pp_malloc_create(object_size); | |
for (size_t i = 0; i < n; i++) { | |
pp_malloc(md); | |
} | |
} | |
void benchmark() { | |
setlocale(LC_NUMERIC, ""); | |
printf("benchmarking...\n"); | |
struct timeval then; | |
struct timeval now; | |
double elapsed1; | |
double elapsed2; | |
double rate1; | |
double rate2; | |
size_t object_size = 1; | |
size_t n = 1000000; | |
// this is just to avoid having the optimizer turn the naive test into a no-op. | |
unsigned long sum; | |
gettimeofday(&then, NULL); | |
sum = benchmark_naive_malloc(object_size, n); | |
gettimeofday(&now, NULL); | |
// thanks to https://stackoverflow.com/a/2150334/7543271 | |
elapsed1 = (now.tv_sec - then.tv_sec) + (now.tv_usec - then.tv_usec) / 1000000.0; | |
rate1 = n / elapsed1; | |
printf("naive malloc: elapsed: %'0.3fms, rate: %'0.0fm/s, n: %'lu (%i)\n", elapsed1 * 1000, rate1, n, (bool)sum); | |
gettimeofday(&then, NULL); | |
benchmark_batched_malloc(object_size, n); | |
gettimeofday(&now, NULL); | |
elapsed2 = (now.tv_sec - then.tv_sec) + (now.tv_usec - then.tv_usec) / 1000000.0; | |
rate2 = n / elapsed2; | |
printf("pp_malloc: elapsed: %'0.3fms, rate: %'0.0fm/s, n: %'lu\n", elapsed2 * 1000, rate2, n); | |
// factor = 100.0 / ((double)elapsed2 / (double)elapsed1); | |
// printf("speedup: %0.1f%% faster\n", factor); | |
} | |
#endif // PP_MALLOC_BENCHMARK |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define PP_MALLOC_IMPLEMENTATION | |
#define PP_MALLOC_BENCHMARK | |
#include "pp_malloc.h" | |
int main() { benchmark(); return 0; } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define PP_MALLOC_IMPLEMENTATION | |
#define PP_MALLOC_TESTS | |
#include "pp_malloc.h" | |
int main() { test(); return 0; } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
gcc -O3 -std=c99 -Wall pp_malloc_test.c && ./a.out | |
gcc -O3 -std=c99 -Wall pp_malloc_benchmark.c && ./a.out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment