Created
December 30, 2024 19:29
-
-
Save forrestthewoods/fd091dd16fc1de029d2ddec0395baae9 to your computer and use it in GitHub Desktop.
Microbench for VirtualAlloc
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// virtual_alloc.cpp : This file contains the 'main' function. Program execution begins and ends there. | |
// | |
#if _WIN32 | |
#define WIN32_LEAN_AND_MEAN | |
#define NOMINMAX | |
#include <Windows.h> | |
#include <psapi.h> | |
#else | |
#include <sys/mman.h> | |
#include <sys/sysinfo.h> | |
#include <sys/types.h> | |
#include <sys/wait.h> | |
#include <unistd.h> | |
#endif | |
#include <algorithm> | |
#include <assert.h> | |
#include <charconv> | |
#include <chrono> | |
#include <cstdint> | |
#include <format> | |
#include <iostream> | |
#include <vector> | |
#include <type_traits> | |
#define ENABLE_SUPERLUMINAL 0 | |
#if ENABLE_SUPERLUMINAL | |
#include <Superluminal/PerformanceAPI_capi.h> | |
#else | |
#define PerformanceAPI_BeginEvent(a,b,c) | |
#define PerformanceAPI_EndEvent() | |
#endif | |
using namespace std::chrono; | |
using Clock = high_resolution_clock; | |
static const size_t ONE_KB = 1024; | |
static const size_t ONE_MB = 1024 * 1024; | |
static const size_t ONE_GB = 1024 * 1024 * 1024; | |
static const size_t ONE_TB = 1024llu * 1024 * 1024 * 1024; | |
static const size_t PAGE_SIZE = []() -> size_t { | |
#if _WIN32 | |
SYSTEM_INFO systemInfo; | |
GetSystemInfo(&systemInfo); | |
return systemInfo.dwPageSize; | |
#else | |
return sysconf(_SC_PAGESIZE); | |
#endif | |
}(); | |
static const size_t RESERVE_LIMIT = []() -> size_t { | |
#ifdef _WIN32 | |
MEMORYSTATUSEX memStatus; | |
memStatus.dwLength = sizeof(memStatus); | |
GlobalMemoryStatusEx(&memStatus); | |
return memStatus.ullTotalVirtual; | |
#else | |
struct sysinfo si; | |
if (sysinfo(&si) == 0) { | |
// Total virtual memory = RAM + swap | |
return (si.totalram + si.totalswap) * si.mem_unit; | |
} | |
return 0; | |
#endif | |
}(); | |
static const size_t COMMIT_LIMIT = []() -> size_t { | |
#if _WIN32 | |
PERFORMANCE_INFORMATION perfInfo; | |
perfInfo.cb = sizeof(perfInfo); | |
GetPerformanceInfo(&perfInfo, sizeof(perfInfo)); | |
size_t commitLimit = perfInfo.CommitLimit * perfInfo.PageSize; | |
return commitLimit; | |
#else | |
struct sysinfo si; | |
if (sysinfo(&si) == 0) { | |
// On Linux, commit limit is typically RAM + swap | |
return (si.totalram + si.totalswap) * si.mem_unit; | |
} | |
return 0; | |
#endif | |
}(); | |
// Small Helpers | |
size_t roundUpToPage(size_t size) { return (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); } | |
size_t roundDownToPage(size_t size) { return size & ~(PAGE_SIZE - 1); } | |
void* offsetPtr(void* ptr, size_t offset) { return (void*)((char*)ptr + offset); } | |
std::string prettyTime(nanoseconds ns) { | |
if (ns.count() < 1000) return std::format("{}ns", ns.count()); | |
else if (ns.count() < 1000 * 1000) return std::format("{}us", ns.count() / 1000); | |
else if (ns.count() < 1000llu * 1000llu * 1000llu * 10llu) return std::format("{}ms", ns.count() / 1000 / 1000); | |
else return std::format("{}s", ns.count() / 1000 / 1000 / 1000); | |
} | |
std::string prettyBytes(size_t bytes) { | |
if (bytes < 1024llu) return std::format("{}bytes", bytes); | |
else if (bytes < 1024llu * 1024) return std::format("{}KiB", bytes / 1024); | |
else if (bytes < 1024llu * 1024 * 1024) return std::format("{}MiB", bytes / 1024 / 1024); | |
else if (bytes < 1024llu * 1024 * 1024 * 1024) return std::format("{}GiB", bytes / 1024 / 1024 / 1024); | |
else return std::format("{}TiB", bytes / 1024 / 1024 / 1024 / 1024); | |
} | |
enum class CommitStrategy { Page, Multiplier, All }; | |
const double CommitMultiplier = 1.5; | |
template<typename T, CommitStrategy commitStrategy> | |
struct VirtualVec { | |
T* data = nullptr; | |
size_t count = 0; | |
size_t numVirtualBytes = 0; | |
void* nextPage = nullptr; | |
void* endPage = nullptr; // one byte past end of last page | |
VirtualVec() = default; | |
~VirtualVec() { | |
if (data) { | |
if constexpr (!std::is_trivially_destructible_v<T>) { | |
for (size_t i = 0; i < count; ++i) { | |
data[i].~T(); // destroy! | |
} | |
} | |
#if _WIN32 | |
VirtualFree(data, 0, MEM_RELEASE); | |
#else | |
munmap(data, numVirtualBytes); // Note: needs original size used in mmap | |
#endif | |
} | |
} | |
void init(size_t numElements) { | |
#if !_WIN32 | |
// Linux only supports ALL | |
assert(commitStrategy == CommitStrategy::All); | |
#endif | |
PerformanceAPI_BeginEvent("VirtualVec::init", nullptr, PERFORMANCEAPI_DEFAULT_COLOR); | |
assert(data == nullptr); // don't handle re-init | |
numVirtualBytes = roundUpToPage(numElements * sizeof(T)); // align to page size | |
#if _WIN32 | |
if constexpr (commitStrategy == CommitStrategy::All) { | |
// reserve AND commit | |
void* raw = VirtualAlloc(nullptr, numVirtualBytes, MEM_COMMIT, PAGE_READWRITE); | |
assert(raw != nullptr); | |
data = static_cast<T*>(raw); | |
endPage = offsetPtr(data, numVirtualBytes); | |
nextPage = endPage; | |
} | |
else { | |
// reserve but do NOT commit | |
void* raw = VirtualAlloc(nullptr, numVirtualBytes, MEM_RESERVE, PAGE_READWRITE); | |
assert(raw != nullptr); | |
data = static_cast<T*>(raw); | |
nextPage = data; | |
endPage = offsetPtr(data, numVirtualBytes); | |
} | |
#else | |
void* raw = mmap(nullptr, numVirtualBytes, | |
PROT_READ | PROT_WRITE, // Protection flags | |
MAP_PRIVATE | MAP_ANONYMOUS, // Mapping flags | |
-1, // File descriptor (not used) | |
0); // Offset (not used) | |
assert(raw != nullptr); | |
data = static_cast<T*>(raw); | |
endPage = offsetPtr(data, numVirtualBytes); | |
nextPage = endPage; | |
#endif | |
PerformanceAPI_EndEvent(); | |
} | |
void push_back(T const& v) { | |
assert(data != nullptr); // must call init first | |
T* nextSlot = data + count; | |
while (nextSlot >= nextPage) { | |
reserveMore(); | |
} | |
*nextSlot = v; | |
count += 1; | |
} | |
void reserveMore() { | |
#if _WIN32 | |
assert(nextPage < endPage); | |
if constexpr (commitStrategy == CommitStrategy::Page) { | |
VirtualAlloc(nextPage, PAGE_SIZE, MEM_COMMIT, PAGE_READWRITE); // commit first page | |
nextPage = static_cast<void*>(static_cast<char*>(nextPage) + PAGE_SIZE); | |
} | |
else if constexpr (commitStrategy == CommitStrategy::Multiplier) { | |
size_t currCommitedBytes = ((char*)nextPage - (char*)data); | |
void* targetPage = nullptr; | |
if (currCommitedBytes == 0) { | |
targetPage = offsetPtr(data, PAGE_SIZE); | |
} | |
else { | |
targetPage = offsetPtr(data, size_t(double(currCommitedBytes) * CommitMultiplier)); | |
} | |
targetPage = std::min(targetPage, endPage); | |
while (nextPage < targetPage && nextPage < endPage) { | |
VirtualAlloc(nextPage, PAGE_SIZE, MEM_COMMIT, PAGE_READWRITE); // commit first page | |
nextPage = static_cast<void*>(static_cast<char*>(nextPage) + PAGE_SIZE); | |
} | |
} | |
else { | |
} | |
#else | |
// Linux doesn't have to deal with this | |
assert(false); | |
#endif | |
} | |
}; | |
void run_subprocess(const char* exePath, size_t test, size_t subtest, size_t extra) { | |
PerformanceAPI_BeginEvent("run_subprocess", nullptr, PERFORMANCEAPI_DEFAULT_COLOR); | |
#if _WIN32 | |
HANDLE pipe_read, pipe_write; | |
SECURITY_ATTRIBUTES sa = { | |
.nLength = sizeof(SECURITY_ATTRIBUTES), | |
.lpSecurityDescriptor = NULL, | |
.bInheritHandle = TRUE, | |
}; | |
// Create pipe for stdout | |
if (!CreatePipe(&pipe_read, &pipe_write, &sa, 0)) { | |
// Handle error | |
} | |
// Ensure the read handle is not inherited | |
SetHandleInformation(pipe_read, HANDLE_FLAG_INHERIT, 0); | |
STARTUPINFOA si = { | |
.cb = sizeof(STARTUPINFO), | |
.dwFlags = STARTF_USESTDHANDLES, | |
.hStdOutput = pipe_write, | |
.hStdError = pipe_write, | |
}; | |
PROCESS_INFORMATION pi; | |
std::string cli = std::format("{} {} {} {}", exePath, test, subtest, extra); | |
if (!CreateProcessA( | |
NULL, // Application name | |
cli.data(), // Command line | |
NULL, // Process security attributes | |
NULL, // Thread security attributes | |
TRUE, // Inherit handles | |
0, // Creation flags | |
NULL, // Use parent's environment | |
NULL, // Use parent's directory | |
&si, // STARTUPINFO pointer | |
&pi // PROCESS_INFORMATION pointer | |
)) { | |
// Handle error | |
} | |
// Close the write end of pipe in parent | |
CloseHandle(pipe_write); | |
// Read from pipe | |
char buffer[4096]; | |
DWORD bytes_read; | |
while (ReadFile(pipe_read, buffer, sizeof(buffer) - 1, &bytes_read, NULL)) { | |
if (bytes_read == 0) break; | |
buffer[bytes_read] = '\0'; | |
// Process output here | |
printf("%s", buffer); | |
} | |
// Wait for process to complete | |
WaitForSingleObject(pi.hProcess, INFINITE); | |
// Cleanup | |
CloseHandle(pipe_read); | |
CloseHandle(pi.hProcess); | |
CloseHandle(pi.hThread); | |
#else | |
int pipe_fd[2]; | |
if (pipe(pipe_fd) == -1) { | |
perror("pipe"); | |
return; | |
} | |
pid_t pid = fork(); | |
if (pid == -1) { | |
perror("fork"); | |
close(pipe_fd[0]); | |
close(pipe_fd[1]); | |
return; | |
} | |
if (pid == 0) { // Child process | |
// Close read end of pipe | |
close(pipe_fd[0]); | |
// Redirect stdout and stderr to pipe | |
dup2(pipe_fd[1], STDOUT_FILENO); | |
dup2(pipe_fd[1], STDERR_FILENO); | |
close(pipe_fd[1]); | |
// Convert arguments to strings | |
std::string test_str = std::to_string(test); | |
std::string subtest_str = std::to_string(subtest); | |
std::string extra_str = std::to_string(extra); | |
// Execute the program | |
execl(exePath, exePath, | |
test_str.c_str(), | |
subtest_str.c_str(), | |
extra_str.c_str(), | |
(char*)nullptr); | |
// If execl returns, there was an error | |
perror("execl"); | |
exit(1); | |
} else { // Parent process | |
// Close write end of pipe | |
close(pipe_fd[1]); | |
// Read from pipe | |
char buffer[4096]; | |
ssize_t bytes_read; | |
while ((bytes_read = read(pipe_fd[0], buffer, sizeof(buffer) - 1)) > 0) { | |
buffer[bytes_read] = '\0'; | |
printf("%s", buffer); | |
} | |
// Close read end of pipe | |
close(pipe_fd[0]); | |
// Wait for child process to complete | |
int status; | |
waitpid(pid, &status, 0); | |
} | |
#endif | |
PerformanceAPI_EndEvent(); | |
} | |
enum Tests { | |
VirtualPageCommit, | |
VirtualGrowCommit, | |
VirtualAllCommit, | |
AllocCost | |
}; | |
int main(int argc, char** argv) { | |
using NumType = int; | |
const std::vector<size_t> testBytes = { ONE_MB, ONE_MB * 10, ONE_MB * 100, ONE_GB, ONE_GB * 4, ONE_GB * 16, ONE_GB * 128, ONE_TB }; | |
const std::vector<std::string> testBytesStr = { "1 MiB", "10 MiB", "100 MiB", "1 GiB", "4 GiB", "16GiB", "128GiB", "1TiB" }; | |
const size_t COMMIT_SAFE_BYTES = 6; | |
const std::vector<size_t> numAllocs = { 1, 5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000, 25000, 50000, 75000, 100000 }; | |
const std::vector<size_t> allocSizes = { ONE_MB, ONE_MB * 5, ONE_MB * 10, ONE_MB * 50, ONE_MB * 100, ONE_MB * 500, ONE_GB, ONE_GB * 4, ONE_GB * 8, ONE_GB * 16 }; | |
if (argc == 1) { | |
std::cout << std::format("Page Size: {}\n", PAGE_SIZE); | |
std::cout << std::format("Reserve Limit: {}\n", prettyBytes(RESERVE_LIMIT)); // ~127TB | |
std::cout << std::format("Commit Limit: {}\n\n", prettyBytes(COMMIT_LIMIT)); // ~100GB | |
#if _WIN32 | |
std::cout << "VirtualVec, reserve, no commit, no write\n"; | |
for (int i = 0; i < testBytes.size(); ++i) { run_subprocess(argv[0], VirtualPageCommit, i, 0); } | |
std::cout << "\n"; | |
#endif | |
std::cout << "VirtualVec, commit all, no write\n"; | |
for (int i = 0; i < COMMIT_SAFE_BYTES; ++i) { run_subprocess(argv[0], VirtualAllCommit, i, 0); } | |
std::cout << "\n"; | |
#if _WIN32 | |
std::cout << "VirtualVec, commit page at a time, write all\n"; | |
for (int i = 0; i < COMMIT_SAFE_BYTES; ++i) { run_subprocess(argv[0], VirtualPageCommit, i, 1); } | |
std::cout << "\n"; | |
std::cout << "VirtualVec, commit by 1.5x, write all\n"; | |
for (int i = 0; i < COMMIT_SAFE_BYTES; ++i) { run_subprocess(argv[0], VirtualGrowCommit, i, 1); } | |
std::cout << "\n"; | |
#endif | |
std::cout << "VirtualVec, commit all, write all\n"; | |
for (int i = 0; i < COMMIT_SAFE_BYTES; ++i) { run_subprocess(argv[0], VirtualAllCommit, i, 1); } | |
std::cout << "\n"; | |
for (int j = 0; j < allocSizes.size(); ++j) { | |
size_t allocSize = allocSizes[j]; | |
std::cout << std::format("Call VirtualAlloc({}) N times, no commit, no write\n", prettyBytes(allocSize)); | |
for (int i = 0; i < numAllocs.size(); ++i) { | |
size_t virtualSize = allocSize * numAllocs[i]; | |
if (virtualSize < RESERVE_LIMIT / 2) { | |
run_subprocess(argv[0], AllocCost, i, allocSize); | |
} | |
} | |
std::cout << "\n"; | |
} | |
return 0; | |
} | |
size_t test = 0, subtest = 0, extra = 0; | |
std::from_chars(argv[1], argv[1] + strlen(argv[1]), test); | |
std::from_chars(argv[2], argv[2] + strlen(argv[2]), subtest); | |
std::from_chars(argv[3], argv[3] + strlen(argv[3]), extra); | |
// Virtual, Page Commit | |
if (test == VirtualPageCommit) { | |
auto start = Clock::now(); | |
VirtualVec<NumType, CommitStrategy::Page> nums; | |
size_t numBytes = testBytes[subtest]; | |
size_t numElements = numBytes / sizeof(NumType); | |
nums.init(numElements); | |
if (extra == 1) { | |
for (int i = 0; i < numElements; ++i) { | |
nums.push_back(i); | |
} | |
} | |
auto end = Clock::now(); | |
std::cout << " Bytes: " << testBytesStr[subtest] << " Time: " << prettyTime(end - start) << "\n"; | |
} | |
// Virtual, 1.5x Commit | |
if (test == VirtualGrowCommit) { | |
auto start = Clock::now(); | |
VirtualVec<NumType, CommitStrategy::Multiplier> nums; | |
size_t numBytes = testBytes[subtest]; | |
size_t numElements = numBytes / sizeof(NumType); | |
nums.init(numElements); | |
if (extra == 1) { | |
for (int i = 0; i < numElements; ++i) { | |
nums.push_back(i); | |
} | |
} | |
auto end = Clock::now(); | |
std::cout << " Bytes: " << testBytesStr[subtest] << " Time: " << prettyTime(end - start) << "\n"; | |
} | |
// Virtual, Full Commit | |
if (test == VirtualAllCommit) { | |
auto start = Clock::now(); | |
VirtualVec<NumType, CommitStrategy::All> nums; | |
size_t numBytes = testBytes[subtest]; | |
size_t numElements = numBytes / sizeof(NumType); | |
nums.init(numElements); | |
if (extra == 1) { | |
for (int i = 0; i < numElements; ++i) { | |
nums.push_back(i); | |
} | |
} | |
auto end = Clock::now(); | |
std::cout << " Bytes: " << testBytesStr[subtest] << " Time: " << prettyTime(end - start) << "\n"; | |
} | |
// std::vector, no reserve | |
// std::vector, reserve | |
// Virtual Alloc cost | |
if (test == AllocCost) { | |
const size_t num_bytes = extra; | |
const size_t num_elements = num_bytes / sizeof(NumType); | |
size_t numAlloc = numAllocs[subtest]; | |
#if _WIN32 | |
std::vector<VirtualVec<NumType, CommitStrategy::Page>> vecs; | |
#else | |
std::vector<VirtualVec<NumType, CommitStrategy::All>> vecs; | |
#endif | |
vecs.resize(numAlloc); | |
auto start = Clock::now(); | |
duration<long long, std::nano> lastAlloc = {}; | |
for (int j = 0; j < vecs.size(); ++j) { | |
if (j == vecs.size() - 1) { | |
auto s = Clock::now(); | |
vecs[j].init(num_elements); | |
auto e = Clock::now(); | |
lastAlloc = e - s; | |
} | |
else { | |
vecs[j].init(num_elements); | |
} | |
} | |
auto end = Clock::now(); | |
auto elapsedTotal = duration_cast<nanoseconds>(end - start); | |
auto elapsedPer = elapsedTotal / numAlloc; | |
size_t totalBytes = num_bytes * numAlloc; | |
std::cout << " N: " << numAllocs[subtest] | |
<< " TotalTime: " << prettyTime(elapsedTotal) | |
<< " TotalReserved: " << prettyBytes(totalBytes) | |
<< " PerVirtualAlloc: " << elapsedPer << " / " << prettyTime(elapsedPer) | |
<< " PerVirtualMib: " << prettyTime(elapsedTotal / (totalBytes / ONE_MB)) | |
<< "\n"; | |
} | |
} | |
// https://devblogs.microsoft.com/oldnewthing/20160318-00/?p=93181 | |
// https://alwaysprocessing.blog/2022/02/20/size-matters | |
// over 100,000 virtual allocs and it sometimes hangs | |
// Linux: clang++ main.cpp -std=c++20 -stdlib=libc++ -lc++ -lc++abi -o x64/va |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment