Skip to content

Instantly share code, notes, and snippets.

@cellularmitosis
Created November 11, 2018 07:35
Show Gist options
  • Save cellularmitosis/a1eeb8a101b507823c9e63ae6764e5b3 to your computer and use it in GitHub Desktop.
Save cellularmitosis/a1eeb8a101b507823c9e63ae6764e5b3 to your computer and use it in GitHub Desktop.
A user-space batching malloc wrapper.
All tests passed.
benchmarking...
naive malloc: elapsed: 59.271ms, rate: 16,871,657m/s, n: 1,000,000 (1)
pp_malloc: elapsed: 28.880ms, rate: 34,626,039m/s, n: 1,000,000
/*
pp_malloc.h: a batching malloc wrapper.
See https://github.com/pepaslabs/pp_c
Copyright (c) 2018 Jason Pepas
Released under the terms of the MIT License.
See https://opensource.org/licenses/MIT
This is a single-file C library.
See https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
How to use this file:
- From only one file in your codebase, include this as an implementation:
#define PP_MALLOC_IMPLEMENTATION
#include "pp_malloc.h"
- From as many files as needed, include this as a header:
#include "pp_malloc.h"
History:
- v0.1 (2018/11/10): intial release
*/
// MARK: - Header
#ifndef PP_MALLOC_HEADER
#define PP_MALLOC_HEADER
#include <stdlib.h> // for size_t, NULL, etc.
typedef struct _pp_malloc_t pp_malloc_t;
// Create / destroy a pp_malloc_t.
pp_malloc_t* pp_malloc_create(size_t object_size);
void pp_malloc_free(pp_malloc_t** ptr);
// The primary interface to the batched malloc / free implementation.
void* pp_malloc(pp_malloc_t* self);
void pp_free(pp_malloc_t* self, void** ptr);
#endif // PP_MALLOC_HEADER
// MARK: - Implementation
#ifdef PP_MALLOC_IMPLEMENTATION
#include <assert.h>
#include <stdbool.h>
#include <strings.h> // for bzero
#include <stdint.h>
#include <limits.h>
// Similar to CHAR_BIT, LONG_BIT, etc, we define BITMASK_BIT.
#define BITMASK_BIT 64
typedef uint64_t bitmask_t;
#define BITMASK_MAX UINT64_MAX
// #define BITMASK_BIT 32
// typedef uint32_t bitmask_t;
// #define BITMASK_MAX UINT32_MAX
// #define BITMASK_BIT 16
// typedef uint16_t bitmask_t;
// #define BITMASK_MAX UINT16_MAX
// #define BITMASK_BIT 8
// typedef uint8_t bitmask_t;
// #define BITMASK_MAX UINT8_MAX
// MARK: - bitmask functions
// Check if a bitmask is all binary 1's.
static bool _bitmask_is_full(bitmask_t bitmask) {
return bitmask == (bitmask_t)BITMASK_MAX;
}
// Check if an individual bit is set in a bitmask.
static bool _bit_is_set(bitmask_t bitmask, uint8_t bit_index) {
return bitmask & (1UL << bit_index);
}
// Return the index of the first unset bit in a bitmask.
uint8_t _bitmask_index_of_first_unset_bit(bitmask_t bitmask, uint8_t start) {
for (uint8_t i = start; i < BITMASK_BIT; i++) {
if (!_bit_is_set(bitmask, i)) {
return i;
}
}
return UINT8_MAX;
}
// Set the bit at the given index to 1.
void _set_bit(bitmask_t* bitmask, uint8_t bit_index) {
(*bitmask) |= (1UL << bit_index);
}
// Clear the bit at the given index to 0.
void _clear_bit(bitmask_t* bitmask, uint8_t bit_index) {
(*bitmask) &= ~(1UL << bit_index);
}
// MARK: - slab_t implementation
typedef unsigned char byte_t;
// pp_slab_t: A slab of memory, sliced up into "slots".
// A slab has 32 slots on i386, 64 slots on x86_64, etc.
// 'bitmask' indicates the used / free status of each slot.
// A slab_t is "over-allocated": slot0 is merely a placeholder
// address which marks the start of the slots.
// The size of a slab is calculated as:
// sizeof(slab_t) - 1 + (BITMASK_BIT * slot_size)
struct _pp_slab_t {
bitmask_t bitmask;
uint8_t first_free_index;
size_t slot_size;
byte_t slot0[1];
};
typedef struct _pp_slab_t pp_slab_t;
// Allocate, initialize and return a slab.
pp_slab_t* pp_slab_create(size_t slot_size) {
pp_slab_t* slab = (pp_slab_t*)malloc(sizeof(pp_slab_t) - 1 + BITMASK_BIT * slot_size);
assert(slab != NULL);
slab->bitmask = 0;
slab->first_free_index = 0;
slab->slot_size = slot_size;
assert(slab->slot0 != NULL);
return slab;
}
// Return a slab's memory to the operating system.
void pp_slab_free(pp_slab_t** slab) {
assert(slab != NULL && *slab != NULL);
free(*slab);
*slab = NULL;
}
// Check whether a slab is full.
bool pp_slab_is_full(pp_slab_t *slab) {
assert(slab != NULL);
return _bitmask_is_full(slab->bitmask);
}
// Return the index of the first free slot in a slab.
// Note: It is an error call this function when the slab is full.
static uint8_t _pp_slab_next_free_index(pp_slab_t* slab) {
assert(slab != NULL);
return slab->first_free_index;
}
// Return a pointer to the slot at the given index in a slab.
static void* _pp_slab_slot(pp_slab_t* slab, uint8_t slot_index) {
assert(slab != NULL);
byte_t* addr = &(slab->slot0[slot_index * slab->slot_size]);
return (void*)addr;
}
// Mark the slot at the given index in a slab as used.
static void _pp_slab_mark_used(pp_slab_t* slab, uint8_t slot_index) {
assert(slab != NULL);
_set_bit(&(slab->bitmask), slot_index);
slab->first_free_index = _bitmask_index_of_first_unset_bit(slab->bitmask, slot_index);
}
// Mark the slot at the given index in a slab as available.
static void _pp_slab_mark_free(pp_slab_t* slab, uint8_t slot_index) {
assert(slab != NULL);
_clear_bit(&(slab->bitmask), slot_index);
if (slot_index < slab->first_free_index) {
slab->first_free_index = slot_index;
}
}
// Find the next available slot in a slab, mark it as used, and return its address.
// Note: It is an error to call this function on a full slab.
void* pp_slab_claim_next(pp_slab_t* slab) {
assert(slab != NULL);
assert(pp_slab_is_full(slab) == false);
uint8_t i = _pp_slab_next_free_index(slab);
_pp_slab_mark_used(slab, i);
return _pp_slab_slot(slab, i);
}
// Return the slot index of the given pointer (assuming it lies within this slab).
static uint8_t _pp_slab_slot_index_of_ptr(pp_slab_t* slab, void* ptr) {
assert(slab != NULL);
assert(ptr >= (void*)slab);
assert(ptr <= (void*)(slab->slot0) + (slab->slot_size * BITMASK_BIT));
unsigned int byte_offset = slab->slot0 - (byte_t*)ptr;
assert(byte_offset % slab->slot_size == 0);
return byte_offset / slab->slot_size;
}
// If the given pointer lies within this slot, mark it free and return true.
bool pp_slab_release(pp_slab_t* slab, void** ptr) {
assert(slab != NULL);
void* first = (void*)(slab->slot0);
void* last = (void*)(slab->slot0) + (slab->slot_size * BITMASK_BIT);
if (*ptr < first || *ptr >= last) {
return false;
}
uint8_t i = _pp_slab_slot_index_of_ptr(slab, *ptr);
_pp_slab_mark_free(slab, i);
*ptr = NULL;
return true;
}
// MARK: - pp_slabtree_t implementation
// A union to access the children of a slabtree as either slabtrees or slabs.
// "leaf" nodes (height == 1) will use 'as_slabs', while "internal" nodes
// (height > 1) will use 'as_slabtrees'.
union _pp_slabtree_children_t {
struct _pp_slabtree_t* as_slabtrees[BITMASK_BIT];
struct _pp_slab_t* as_slabs[BITMASK_BIT];
};
typedef union _pp_slabtree_children_t pp_slabtree_children_t;
// pp_slabtree_t: a tree to organize slabs.
struct _pp_slabtree_t {
bitmask_t bitmask;
// The index of the first slot which isn't full.
// (This is a cache to avoid scanning the bitmask).
uint8_t first_free_index;
// The address range of all slabs within this (sub)tree.
// Note: this is going to cause a performance problem because
// slab ranges will overlap as a tree grows, eventually forcing
// a search of (most of) the entire tree to find a pointer's slab.
void* addr_range_first;
void* addr_range_last;
// The height of this tree node.
// slabtrees which have slabs as children have a height of 1.
// "internal" slabtrees have a height > 1.
uint16_t height;
union _pp_slabtree_children_t children;
};
typedef struct _pp_slabtree_t pp_slabtree_t;
// Allocate, initialize and return a slabtree.
pp_slabtree_t* pp_slabtree_create() {
pp_slabtree_t* tree = (pp_slabtree_t*)malloc(sizeof(pp_slabtree_t));
bzero(tree, sizeof(pp_slabtree_t));
tree->height = 1;
return tree;
}
// Return a slabtree's memory to the operating system.
// Recursively frees its entire subtree, including slabs.
void pp_slabtree_free(pp_slabtree_t** ptr) {
pp_slabtree_t* t = *ptr;
if (t->height > 1) {
for (uint8_t i = 0; i < BITMASK_BIT; i++) {
if (t->children.as_slabtrees[i] != NULL) {
pp_slabtree_free(&(t->children.as_slabtrees[i]));
}
}
} else {
for (uint8_t i = 0; i < BITMASK_BIT; i++) {
if (t->children.as_slabs[i] != NULL) {
pp_slab_free(&(t->children.as_slabs[i]));
}
}
}
*ptr = NULL;
}
// Check if all of the slabs within this (sub)tree are full.
bool pp_slabtree_is_full(pp_slabtree_t* tree) {
assert(tree != NULL);
return _bitmask_is_full(tree->bitmask);
}
// Grow a slabtree up and to the right.
static void pp_slabtree_grow(pp_slabtree_t** tree) {
assert(tree != NULL && *tree != NULL);
pp_slabtree_t* t = *tree;
pp_slabtree_t* parent = pp_slabtree_create();
parent->height = t->height + 1;
parent->children.as_slabtrees[0] = t;
parent->bitmask = 1;
parent->first_free_index = 1;
*tree = parent;
}
// Find the next available slot, mark it as used and return it's pointer.
// Note: It is an error to call this function on a full tree.
void* pp_slabtree_claim_next(pp_slabtree_t* tree, size_t object_size) {
assert(tree != NULL);
uint8_t i = tree->first_free_index;
void* ret;
// This is an internal node.
if (tree->height > 1) {
// Grab the first non-full subtree.
pp_slabtree_t* child = tree->children.as_slabtrees[i];
// ...which might not have been allocated yet.
if (child == NULL) {
// ...so create one and stitch it into the tree.
child = pp_slabtree_create();
child->height = tree->height - 1;
tree->children.as_slabtrees[i] = child;
}
// Recurse down into the subtree until you hit a leaf node.
ret = pp_slabtree_claim_next(child, object_size);
// If the subtree's bitmask has changed, update our first_free_index.
if (_bitmask_is_full(child->bitmask)) {
_set_bit(&(tree->bitmask), i);
tree->first_free_index = _bitmask_index_of_first_unset_bit(tree->bitmask, i);
}
// Update this node's address range book-keeping.
if (ret < tree->addr_range_first || ret >= tree->addr_range_last) {
// Scan our subtrees, adjusting our total address range as needed.
// FIXME: actually, this is broken. this implements a one-way
// ratcheting mechanism which never recedes. Fix this by setting
// first to MAX and last to 0 before starting the scan.
for (uint8_t i = 0; i < BITMASK_BIT; i++) {
child = tree->children.as_slabtrees[i];
if (child == NULL) {
continue;
}
if (child->addr_range_first < tree->addr_range_first) {
tree->addr_range_first = child->addr_range_first;
} else if (child->addr_range_last > tree->addr_range_last) {
tree->addr_range_last = child->addr_range_last;
}
}
}
}
// This is a leaf node.
else {
// Grab the first non-full slab.
pp_slab_t* slab = tree->children.as_slabs[i];
// ...which might not have been allocated yet.
if (slab == NULL) {
// ...so create one and stitch it into the tree.
slab = pp_slab_create(object_size);
tree->children.as_slabs[i] = slab;
// Update this node's address range book-keeping.
if ((void*)slab < tree->addr_range_first) {
tree->addr_range_first = slab;
} else if ((void*)slab >= tree->addr_range_last) {
void* last = (void*)(slab->slot0) + (slab->slot_size * BITMASK_BIT);
tree->addr_range_last = last;
}
}
// Claim the next available slot in the slab.
ret = pp_slab_claim_next(slab);
// If the slab's bitmask has changed, update our first_free_index.
if (_bitmask_is_full(slab->bitmask)) {
_set_bit(&(tree->bitmask), i);
tree->first_free_index = _bitmask_index_of_first_unset_bit(tree->bitmask, i);
}
}
// Bubble the recursive result back up.
return ret;
}
// Find the slot of the given pointer and mark it as available.
bool pp_slabtree_release(pp_slabtree_t* tree, void** ptr) {
assert(tree != NULL);
// If ptr was outside of this subtree's range, return false.
if (*ptr < tree->addr_range_first || *ptr > tree->addr_range_last) {
return false;
}
// If this is an internal tree node...
if (tree->height > 1) {
// Scan through all (64) subtrees, looking for ptr's subtree.
// The fact that slabs are not guaranteed to be in increasing
// order of address space is what necessitates this complete
// scan of all 64 subtrees, because the slab ranges of the
// subtrees can overlap. This is problematic for performance.
// To address this issue, I will likely need to move to a
// tree which maintains slab sort order (which isn't so bad
// if we pre-allocate all 64 slabs of a leaf node, because then
// we just have to shuffle the tree nodes, rather than shuffling
// individual slabs).
for (uint8_t i = 0; i < BITMASK_BIT; i++) {
pp_slabtree_t* child = tree->children.as_slabtrees[i];
if (child == NULL) {
continue;
}
// If ptr was in this subtree, end the scan.
bool result = pp_slabtree_release(child, ptr);
if (result) {
return result;
}
}
// The scan completed and we didn't find ptr within our subtree.
return false;
}
// Otherwise, this is a leaf node.
else {
// Scan the slabs, looking for ptr.
for (uint8_t i = 0; i < BITMASK_BIT; i++) {
pp_slab_t* slab = tree->children.as_slabs[i];
if (slab == NULL) {
continue;
}
// If ptr was in this slab, end the scan.
bool result = pp_slab_release(slab, ptr);
if (result) {
return result;
}
}
// The scan completed and we didn't find ptr in any of our slabs.
return false;
}
}
// MARK: - pp_malloc_t implementation
// pp_malloc_t: a user-space managed memory pool.
struct _pp_malloc_t {
pp_slabtree_t* tree;
size_t object_size;
};
// Allocate, initialize and return a user-space malloc pool.
pp_malloc_t* pp_malloc_create(size_t object_size) {
pp_malloc_t* self = (pp_malloc_t*)malloc(sizeof(pp_malloc_t));
self->object_size = object_size;
self->tree = pp_slabtree_create();
pp_slab_t* slab = pp_slab_create(object_size);
self->tree->children.as_slabs[0] = slab;
self->tree->addr_range_first = slab;
void* last = (void*)(slab->slot0) + (slab->slot_size * BITMASK_BIT);
self->tree->addr_range_last = last;
return self;
}
// Return the memory of a user-space malloc pool to the operating system.
void pp_malloc_free(pp_malloc_t** ptr) {
pp_slabtree_free(&((*ptr)->tree));
free(*ptr);
*ptr = NULL;
}
// A user-space malloc wrapper, designed to decrease malloc overhead
// by batching (64) calls to malloc.
// Initial (trivial) testing shows this to be 1.5x to 2x as fast
// as unwrapped malloc calls. However, I have yet tested a workload
// involving heavy use of 'pp_free', so performance might not be 2x
// for long-lived processes.
void* pp_malloc(pp_malloc_t* self) {
assert(self != NULL);
if (pp_slabtree_is_full(self->tree)) {
pp_slabtree_grow(&(self->tree));
}
return pp_slabtree_claim_next(self->tree, self->object_size);
}
// This is the corresponding free wrapper.
void pp_free(pp_malloc_t* self, void** ptr) {
assert(self != NULL);
assert(ptr != NULL && *ptr != NULL);
assert(pp_slabtree_release(self->tree, ptr));
}
#endif // PP_MALLOC_IMPLEMENTATION
// MARK: - Tests
#ifdef PP_MALLOC_TESTS
static void test_bitmask_is_full() {
bitmask_t bitmask = 0;
assert(_bitmask_is_full(bitmask) == false);
bitmask--;
assert(_bitmask_is_full(bitmask) == true);
bitmask = 12345;
assert(_bitmask_is_full(bitmask) == false);
}
static void test_bit_is_set() {
bitmask_t bitmask = 0;
assert(_bit_is_set(bitmask, 0) == false);
assert(_bit_is_set(bitmask, 1) == false);
assert(_bit_is_set(bitmask, 2) == false);
bitmask = 1;
assert(_bit_is_set(bitmask, 0) == true);
assert(_bit_is_set(bitmask, 1) == false);
assert(_bit_is_set(bitmask, 2) == false);
bitmask = 2;
assert(_bit_is_set(bitmask, 0) == false);
assert(_bit_is_set(bitmask, 1) == true);
assert(_bit_is_set(bitmask, 2) == false);
bitmask = 3;
assert(_bit_is_set(bitmask, 0) == true);
assert(_bit_is_set(bitmask, 1) == true);
assert(_bit_is_set(bitmask, 2) == false);
bitmask = 4;
assert(_bit_is_set(bitmask, 0) == false);
assert(_bit_is_set(bitmask, 1) == false);
assert(_bit_is_set(bitmask, 2) == true);
}
static void test_bitmask_index_of_first_unset_bit() {
bitmask_t bitmask = 0;
assert(_bitmask_index_of_first_unset_bit(bitmask, 0) == 0);
bitmask = 1;
assert(_bitmask_index_of_first_unset_bit(bitmask, 0) == 1);
bitmask = 3;
assert(_bitmask_index_of_first_unset_bit(bitmask, 0) == 2);
bitmask = 7;
assert(_bitmask_index_of_first_unset_bit(bitmask, 0) == 3);
bitmask = -1;
assert(_bitmask_index_of_first_unset_bit(bitmask, 0) == UINT8_MAX);
}
static void test_set_bit() {
bitmask_t bitmask = 0;
_set_bit(&bitmask, 0);
assert(bitmask == 1);
_set_bit(&bitmask, 1);
assert(bitmask == 3);
_set_bit(&bitmask, 2);
assert(bitmask == 7);
bitmask = 0;
_set_bit(&bitmask, 31);
assert(bitmask == 2147483648UL);
bitmask = 0;
_set_bit(&bitmask, 63);
// hmm, "error: integer literal is too large to be represented in any integer type"
// assert(bitmask == 18446744073709551616UL);
// thanks to https://stackoverflow.com/questions/35136202/how-do-i-mute-this-error-integer-literal-is-too-large-to-be-represented-in-a#comment57995212_35136303
// turns out integer constants aren't actually unsigned?
assert(bitmask == 0x8000000000000000);
}
static void test_clear_bit() {
bitmask_t bitmask = 7;
_clear_bit(&bitmask, 0);
assert(bitmask == 6);
_clear_bit(&bitmask, 1);
assert(bitmask == 4);
_clear_bit(&bitmask, 2);
assert(bitmask == 0);
}
static void test_pp_slab_claim_next() {
size_t slot_size = 1;
pp_slab_t* slab = pp_slab_create(slot_size);
assert(slab->bitmask == 0);
void* ret = pp_slab_claim_next(slab);
assert(ret != NULL);
assert(slab->bitmask == 1);
for (int i = 1; i < BITMASK_BIT; i++) {
pp_slab_claim_next(slab);
}
assert(pp_slab_is_full(slab));
pp_slab_free(&slab);
}
static void test_pp_slab_is_full() {
size_t slot_size = 1;
pp_slab_t* slab = pp_slab_create(slot_size);
for (int i = 0; i < BITMASK_BIT; i++) {
pp_slab_claim_next(slab);
}
assert(pp_slab_is_full(slab));
pp_slab_free(&slab);
}
static void test_pp_slabtree_is_full() {
pp_malloc_t* md = pp_malloc_create(1);
size_t n = BITMASK_BIT - 1;
for (size_t i = 0; i < n; i++) {
pp_malloc(md);
}
assert(!pp_slab_is_full(md->tree->children.as_slabs[0]));
assert(md->tree->bitmask == 0);
pp_malloc(md);
assert(pp_slab_is_full(md->tree->children.as_slabs[0]));
assert(md->tree->bitmask == 1);
}
static void test_slab_create_and_free() {
pp_slab_t* s = pp_slab_create(1);
pp_slab_free(&s);
assert(s == NULL);
}
static void test_slabtree_create_and_free() {
pp_slabtree_t* s = pp_slabtree_create(1);
pp_slabtree_free(&s);
assert(s == NULL);
}
static void test_malloc_create_and_free() {
pp_malloc_t* m = pp_malloc_create(1);
pp_malloc_free(&m);
assert(m == NULL);
m = pp_malloc_create(1);
// create a tree of depth 1:
size_t n = BITMASK_BIT;
for (size_t i = 0; i < n; i++) {
pp_malloc(m);
}
pp_malloc_free(&m);
assert(m == NULL);
m = pp_malloc_create(1);
// create a tree of depth 2:
n = BITMASK_BIT * BITMASK_BIT;
for (size_t i = 0; i < n; i++) {
pp_malloc(m);
}
pp_malloc_free(&m);
assert(m == NULL);
m = pp_malloc_create(1);
// create a tree of depth 3:
n = BITMASK_BIT * BITMASK_BIT * BITMASK_BIT;
for (size_t i = 0; i < n; i++) {
pp_malloc(m);
}
pp_malloc_free(&m);
assert(m == NULL);
}
static void test_malloc_and_free() {
pp_malloc_t* m = pp_malloc_create(1);
void* ptr = pp_malloc(m);
pp_free(m, &ptr);
assert(ptr == NULL);
pp_malloc_free(&m);
assert(m == NULL);
}
/*
To TDD this file, implement a main.c:
#define PP_MALLOC_IMPLEMENTATION
#define PP_MALLOC_TESTS
#include "pp_malloc.h"
int main() { test(); return 0; }
Then run this in a shell:
watch -d -n1 'gcc -std=c99 -Wall foo.c && ./a.out'
Or, if you don't have 'watch' installed:
while true ; do sleep 1 ; gcc -std=c99 -Wall main.c && ./a.out ; done
Each time you edit and save, look at the terminal to see if the tests are
still passing.
*/
#include <stdio.h>
void test() {
test_bitmask_is_full();
test_bit_is_set();
test_bitmask_index_of_first_unset_bit();
test_set_bit();
test_clear_bit();
test_pp_slab_claim_next();
test_pp_slab_is_full();
test_pp_slabtree_is_full();
test_slab_create_and_free();
test_slabtree_create_and_free();
test_malloc_create_and_free();
test_malloc_and_free();
printf("All tests passed.\n");
}
#endif // PP_MALLOC_TESTS
// MARK: - Performance testing
#ifdef PP_MALLOC_BENCHMARK
#include <stdio.h>
#include <sys/time.h> // for gettimeofday()
#include <locale.h>
static unsigned long benchmark_naive_malloc(size_t object_size, size_t n) {
unsigned long sum = 0;
for (size_t i = 0; i < n; i++) {
sum = sum + (unsigned long)malloc(object_size);
}
return sum;
}
static void benchmark_batched_malloc(size_t object_size, size_t n) {
pp_malloc_t* md = pp_malloc_create(object_size);
for (size_t i = 0; i < n; i++) {
pp_malloc(md);
}
}
void benchmark() {
setlocale(LC_NUMERIC, "");
printf("benchmarking...\n");
struct timeval then;
struct timeval now;
double elapsed1;
double elapsed2;
double rate1;
double rate2;
size_t object_size = 1;
size_t n = 1000000;
// this is just to avoid having the optimizer turn the naive test into a no-op.
unsigned long sum;
gettimeofday(&then, NULL);
sum = benchmark_naive_malloc(object_size, n);
gettimeofday(&now, NULL);
// thanks to https://stackoverflow.com/a/2150334/7543271
elapsed1 = (now.tv_sec - then.tv_sec) + (now.tv_usec - then.tv_usec) / 1000000.0;
rate1 = n / elapsed1;
printf("naive malloc: elapsed: %'0.3fms, rate: %'0.0fm/s, n: %'lu (%i)\n", elapsed1 * 1000, rate1, n, (bool)sum);
gettimeofday(&then, NULL);
benchmark_batched_malloc(object_size, n);
gettimeofday(&now, NULL);
elapsed2 = (now.tv_sec - then.tv_sec) + (now.tv_usec - then.tv_usec) / 1000000.0;
rate2 = n / elapsed2;
printf("pp_malloc: elapsed: %'0.3fms, rate: %'0.0fm/s, n: %'lu\n", elapsed2 * 1000, rate2, n);
// factor = 100.0 / ((double)elapsed2 / (double)elapsed1);
// printf("speedup: %0.1f%% faster\n", factor);
}
#endif // PP_MALLOC_BENCHMARK
#define PP_MALLOC_IMPLEMENTATION
#define PP_MALLOC_BENCHMARK
#include "pp_malloc.h"
int main() { benchmark(); return 0; }
#define PP_MALLOC_IMPLEMENTATION
#define PP_MALLOC_TESTS
#include "pp_malloc.h"
int main() { test(); return 0; }
#!/bin/bash
set -e
gcc -O3 -std=c99 -Wall pp_malloc_test.c && ./a.out
gcc -O3 -std=c99 -Wall pp_malloc_benchmark.c && ./a.out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment