Last active
February 8, 2021 22:30
-
-
Save lorenzhs/0a2a67b669779ab7a60e34fa0c566227 to your computer and use it in GitHub Desktop.
Arrays that are transparently distributed onto multiple NUMA nodes using Silo (https://github.com/stanford-mast/Silo) and make use of transparent hugepages, with a fallback for non-NUMA systems
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/******************************************************************************* | |
* numa_array.hpp | |
* | |
* Arrays that are transparently distributed onto multiple NUMA nodes using Silo | |
* (https://github.com/stanford-mast/Silo) and make use of transparent huge | |
* pages, with a fallback for non-NUMA systems | |
* | |
* Copyright (C) 2018 Lorenz Hübschle-Schneider <[email protected]> | |
* | |
* All rights reserved. Published under the BSD-2 license. | |
******************************************************************************/ | |
#pragma once | |
#ifndef NUMA_ARRAY_HEADER | |
#define NUMA_ARRAY_HEADER | |
// https://github.com/tlx/tlx, but only used for logging. You can get rid of | |
// this dependency by removing all the lines with "LOG" / "sLOG" statements. | |
#include <tlx/logger.hpp> | |
/* | |
* You should detect whether libnuma is installed using your build system. | |
* With cmake, you can do something like: | |
find_package(Numa) | |
if(NUMA_FOUND) | |
list(APPEND MY_DEFINITIONS "HAVE_LIBNUMA") | |
else() | |
message(STATUS "Could not find libnuma, disabling NUMA awareness") | |
endif() | |
# use MY_DEFINITIONS as follows: | |
target_compile_definitions(my_target PUBLIC ${MY_DEFINITIONS}) | |
*/ | |
#ifdef HAVE_LIBNUMA | |
#include <silo.h> | |
#include <topo.h> | |
#endif // HAVE_LIBNUMA | |
#include <sys/mman.h> // madvise | |
#include <cstdlib> // aligned_alloc | |
#include <memory> | |
// Align allocation size by rounding up (Silo sometimes rounds down, I'm not | |
// quite sure why) | |
constexpr size_t align_size(size_t size, size_t alignment) { | |
return ((size + alignment - 1) / alignment) * alignment; | |
} | |
// Allocate `size` bytes and tell the Linux kernel that it would be a good idea | |
// to use hugepages for this. Will align the size to 2MB. | |
void* alloc_hugepage(size_t size) { | |
constexpr size_t alignment = 2 * 1024 * 1024; | |
size_t bytes = align_size(size, alignment); | |
void* ptr = aligned_alloc(alignment, bytes); | |
madvise(ptr, bytes, MADV_HUGEPAGE); | |
return ptr; | |
} | |
// Allocate memory, automatically switching to 2MB-aligned allocations with | |
// support for transparent huge pages if `size` exceeds 1MB. You can use this | |
// as a replacement for `malloc`. | |
void* allocate(size_t size) { | |
if (size >= 1024 * 1024) { | |
return alloc_hugepage(size); | |
} else { | |
return malloc(size); | |
} | |
} | |
// Allocate an array distributed over the available NUMA nodes | |
void* numa_alloc(size_t bytes, bool align = true) { | |
#ifndef HAVE_LIBNUMA | |
(void) align; | |
return allocate(bytes); | |
#else | |
constexpr bool debug = true; | |
// the code below is designed to handle *fewer* threads than available, | |
// especially for the case where num_numa_nodes does not divide num_threads. | |
// In that case, the first NUMA nodes are assumed to get an additional | |
// thread each, and the portion of the array allocated on each NUMA node is | |
// distributed the same way (i.e., more memory on the first few nodes). | |
// This is irrelevant if using all threads. | |
int num_threads = std::thread::hardware_concurrency(); | |
int num_numa_nodes = topoGetSystemNUMANodeCount(); | |
int threads_per_node = (num_threads + num_numa_nodes - 1) / | |
num_numa_nodes; | |
size_t bytes_per_thread = (bytes + num_threads - 1) / num_threads; | |
// Fallback to normal allocations if this is a non-NUMA system | |
if (num_numa_nodes == 1) { | |
return allocate(bytes); | |
} | |
sLOG << "Allocating" << bytes << "bytes on" << num_numa_nodes | |
<< "NUMA nodes"; | |
SSiloMemorySpec* specs = (SSiloMemorySpec*)malloc( | |
sizeof(SSiloMemorySpec) * num_numa_nodes); | |
if (specs == nullptr) { // abort | |
assert(false); | |
return nullptr; | |
} | |
// Initialize the specifications. | |
int min = 0, max = threads_per_node; | |
for (int i = 0; i < num_numa_nodes; ++i) { | |
// Align sizes to 2MB | |
size_t size = bytes_per_thread * (max - min); | |
if (align) size = align_size(size, 2048*1024); | |
specs[i].size = size; | |
specs[i].numaNode = i; | |
sLOG << "Thereof" << specs[i].size << "bytes on node" << i; | |
min = max; | |
max = std::min(num_threads, max + threads_per_node); | |
} | |
// Allocate the multi-node array. Uses transparent hugepages. | |
void* buffer = siloMultinodeArrayAlloc(num_numa_nodes, specs); | |
// If for some reason the multi-NUMA-node allocation failed, fallback to | |
// the simple version | |
if (buffer == nullptr) { | |
LOG << "failed to allocate; fallback to simple"; | |
buffer = allocate(bytes); | |
assert(buffer != nullptr); | |
} | |
return buffer; | |
#endif | |
} | |
// Pointers allocated with silo need to be freed with `siloFree`. This also | |
// frees the SSiloMemorySpec object (Silo tracks these internally) | |
void numa_free(void* ptr) { | |
if (ptr != nullptr) { | |
#if HAVE_LIBNUMA | |
siloFree(ptr); | |
#else | |
free(ptr); | |
#endif | |
} | |
} | |
// A struct that fulfills the deleter requirements of std::unique_ptr | |
struct numa_deleter { | |
template <typename T> | |
void operator()(T* ptr) { | |
numa_free((void*)ptr); | |
} | |
}; | |
// A type definition for easy use | |
template <typename T> | |
using numa_arr_ptr = std::unique_ptr<T[], numa_deleter>; | |
// Helper function to create a numa_arr_ptr akin to std::make_unique | |
template <typename T> | |
numa_arr_ptr<T> make_numa_arr(size_t num_elems) { | |
T* ptr = static_cast<T*>(numa_alloc(num_elems * sizeof(T))); | |
return numa_arr_ptr<T>(ptr); | |
} | |
// remove this from the header before actual use... | |
void example() { | |
size_t size = 1000000; // size of the array | |
auto array = make_numa_arr<double>(size); | |
// do something with the array | |
array[0] = 1.234; | |
// the array is automatically deallocated using siloFree when it goes out of | |
// scope, no need to free it manually | |
} | |
#endif // NUMA_ARRAY_HEADER |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment