Last active
September 30, 2025 02:44
-
-
Save zeux/6a282d99f10a76d67e07ac9104561335 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * Thread-local cache for meshoptimizer allocations. Planned for inclusion into future meshoptimizer versions. | |
| * | |
| * Copyright (C) 2016-2025, by Arseny Kapoulkine ([email protected]) | |
| * This code is distributed under the MIT License. | |
| */ | |
| // reconfigure thread cache for meshopt_ allocations for N threads x M bytes per thread | |
| // can't be called concurrently with meshopt_ or clod functions | |
| void clodUseThreadCache(size_t thread_count, size_t size_per_thread) | |
| { | |
| struct Global | |
| { | |
| void* data; | |
| size_t block_size; | |
| uint64_t all_blocks; | |
| std::atomic<uint64_t> blocks{0}; | |
| }; | |
| struct Local | |
| { | |
| void* block; | |
| size_t offset; | |
| uint64_t block_mask; | |
| }; | |
| static Global global; | |
| thread_local Local local; | |
| // reset prior global state | |
| // note: all previously allocated blocks must have been returned at this point; this is guaranteed by the absence of concurrent execution with meshopt_/clod functions | |
| assert(global.blocks.load() == global.all_blocks); | |
| ::operator delete(global.data); | |
| global.data = NULL; | |
| global.block_size = 0; | |
| global.all_blocks = 0; | |
| global.blocks = 0; | |
| thread_count = std::min(thread_count, size_t(64)); | |
| size_per_thread &= ~size_t(15); | |
| if (thread_count == 0 || size_per_thread == 0) | |
| { | |
| meshopt_setAllocator(::operator new, ::operator delete); | |
| return; | |
| } | |
| // allocate a block for each thread and mark each block as available | |
| global.data = ::operator new(thread_count * size_per_thread); | |
| global.block_size = size_per_thread; | |
| global.blocks = global.all_blocks = (thread_count == 64) ? ~0ull : (1ull << thread_count) - 1; | |
| // override allocation callbacks | |
| void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t) = [](size_t size) -> void* | |
| { | |
| // try to grab an available local block | |
| if (local.block == NULL && global.blocks.load() != 0 && size < global.block_size) | |
| { | |
| uint64_t blocks, mask; | |
| do | |
| { | |
| blocks = global.blocks.load(); | |
| // prefer last index for coherency, but settle for lowest bit otherwise | |
| mask = (blocks & local.block_mask) ? local.block_mask : blocks & -blocks; | |
| // no available block, unlikely to get one soon | |
| if (blocks == 0) | |
| break; | |
| } while (!global.blocks.compare_exchange_weak(blocks, blocks & ~mask)); | |
| if (mask) | |
| { | |
| // extract block index from mask (must only have one bit set) | |
| int index = -1; | |
| for (int i = 0; i < 64; ++i) | |
| if (mask & (1ull << i)) | |
| { | |
| index = i; | |
| break; | |
| } | |
| assert(index >= 0); | |
| assert(mask && (mask & (mask - 1)) == 0); | |
| local.block = static_cast<char*>(global.data) + index * global.block_size; | |
| local.block_mask = mask; | |
| } | |
| } | |
| // allocate from local block if any | |
| if (local.block && size < global.block_size && local.offset < global.block_size - size) | |
| { | |
| void* ptr = static_cast<char*>(local.block) + local.offset; | |
| local.offset += size; | |
| local.offset = (local.offset + 15) & ~size_t(15); // align future allocations to 16b | |
| return ptr; | |
| } | |
| // fall back to system allocator | |
| return ::operator new(size); | |
| }; | |
| void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*) = [](void* ptr) | |
| { | |
| // has our allocation come from thread cache? | |
| if (local.block && ptr >= local.block && ptr < static_cast<char*>(local.block) + global.block_size) | |
| { | |
| // meshopt allocations are guaranteed to be stack ordered | |
| assert(ptr <= static_cast<char*>(local.block) + local.offset); | |
| local.offset = static_cast<char*>(ptr) - static_cast<char*>(local.block); | |
| // return local block to the pool | |
| if (local.offset == 0) | |
| { | |
| assert(local.block_mask); | |
| global.blocks |= local.block_mask; | |
| local.block = NULL; | |
| // keep block_mask as an affinity hint for the next allocation | |
| } | |
| } | |
| else | |
| ::operator delete(ptr); | |
| }; | |
| meshopt_setAllocator(allocate, deallocate); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment