Last active
January 30, 2024 18:37
-
-
Save jakubtomsu/177af38fa8a905bd60fca2b6634f1c51 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import "core:fmt" | |
import "core:mem" | |
import "core:os" | |
import "core:runtime" | |
import "core:strconv" | |
import "core:sys/windows" | |
import "core:time" | |
// This example is based on this program by Casey Muratori: | |
// https://github.com/cmuratori/largepages | |
// | |
// Relevant docs: | |
// https://learn.microsoft.com/en-us/windows/win32/memory/large-page-support | |
// This is fairly arbitrary | |
Item :: union { | |
Item_A, | |
Item_B, | |
} | |
Item_A :: struct { | |
foo: f32, | |
bar: f32, | |
} | |
Item_B :: struct { | |
bar: [4]u64, | |
baz: [12]u64, | |
} | |
// | |
// Win32 API stuff missing from core:sys/windows | |
// | |
foreign import advapi32 "system:Advapi32.lib" | |
@(default_calling_convention = "stdcall") | |
foreign advapi32 { | |
AdjustTokenPrivileges :: proc(TokenHandle: windows.HANDLE, DisableAllPrivileges: windows.BOOL, NewState: ^TOKEN_PRIVILEGES, BufferLength: windows.DWORD, PreviousState: ^TOKEN_PRIVILEGES, ReturnLength: ^windows.DWORD) -> windows.BOOL --- | |
LookupPrivilegeValueA :: proc(lpSystemName: cstring, lpName: cstring, lpLuid: ^LUID) -> windows.BOOL --- | |
} | |
LUID :: struct { | |
LowPart: windows.DWORD, | |
HighPart: windows.LONG, | |
} | |
LUID_AND_ATTRIBUTES :: struct { | |
Luid: LUID, | |
Attributes: windows.DWORD, | |
} | |
ANYSIZE_ARRAY :: 1 | |
TOKEN_PRIVILEGES :: struct { | |
PrivilegeCount: windows.DWORD, | |
Privileges: [ANYSIZE_ARRAY]LUID_AND_ATTRIBUTES, | |
} | |
ERROR_NOT_ALL_ASSIGNED :: 1300 // should use __MSABI_LONG but idc | |
SE_PRIVILEGE_ENABLED_BY_DEFAULT :: 0x00000001 | |
SE_PRIVILEGE_ENABLED :: 0x00000002 | |
SE_PRIVILEGE_REMOVED :: 0x00000004 | |
SE_PRIVILEGE_USED_FOR_ACCESS :: 0x80000000 | |
// WARNING: Ascii only! convert to utf16 to use with *W procs | |
SE_CREATE_TOKEN_NAME :: "SeCreateTokenPrivilege" | |
SE_ASSIGNPRIMARYTOKEN_NAME :: "SeAssignPrimaryTokenPrivilege" | |
SE_LOCK_MEMORY_NAME :: "SeLockMemoryPrivilege" | |
SE_INCREASE_QUOTA_NAME :: "SeIncreaseQuotaPrivilege" | |
SE_UNSOLICITED_INPUT_NAME :: "SeUnsolicitedInputPrivilege" | |
SE_MACHINE_ACCOUNT_NAME :: "SeMachineAccountPrivilege" | |
SE_TCB_NAME :: "SeTcbPrivilege" | |
SE_SECURITY_NAME :: "SeSecurityPrivilege" | |
SE_TAKE_OWNERSHIP_NAME :: "SeTakeOwnershipPrivilege" | |
SE_LOAD_DRIVER_NAME :: "SeLoadDriverPrivilege" | |
SE_SYSTEM_PROFILE_NAME :: "SeSystemProfilePrivilege" | |
SE_SYSTEMTIME_NAME :: "SeSystemtimePrivilege" | |
SE_PROF_SINGLE_PROCESS_NAME :: "SeProfileSingleProcessPrivilege" | |
SE_INC_BASE_PRIORITY_NAME :: "SeIncreaseBasePriorityPrivilege" | |
SE_CREATE_PAGEFILE_NAME :: "SeCreatePagefilePrivilege" | |
SE_CREATE_PERMANENT_NAME :: "SeCreatePermanentPrivilege" | |
SE_BACKUP_NAME :: "SeBackupPrivilege" | |
SE_RESTORE_NAME :: "SeRestorePrivilege" | |
SE_SHUTDOWN_NAME :: "SeShutdownPrivilege" | |
SE_DEBUG_NAME :: "SeDebugPrivilege" | |
SE_AUDIT_NAME :: "SeAuditPrivilege" | |
SE_SYSTEM_ENVIRONMENT_NAME :: "SeSystemEnvironmentPrivilege" | |
SE_CHANGE_NOTIFY_NAME :: "SeChangeNotifyPrivilege" | |
SE_REMOTE_SHUTDOWN_NAME :: "SeRemoteShutdownPrivilege" | |
SE_UNDOCK_NAME :: "SeUndockPrivilege" | |
SE_SYNC_AGENT_NAME :: "SeSyncAgentPrivilege" | |
SE_ENABLE_DELEGATION_NAME :: "SeEnableDelegationPrivilege" | |
SE_MANAGE_VOLUME_NAME :: "SeManageVolumePrivilege" | |
SE_IMPERSONATE_NAME :: "SeImpersonatePrivilege" | |
SE_CREATE_GLOBAL_NAME :: "SeCreateGlobalPrivilege" | |
SE_TRUSTED_CREDMAN_ACCESS_NAME :: "SeTrustedCredManAccessPrivilege" | |
SE_RELABEL_NAME :: "SeRelabelPrivilege" | |
SE_INC_WORKING_SET_NAME :: "SeIncreaseWorkingSetPrivilege" | |
SE_TIME_ZONE_NAME :: "SeTimeZonePrivilege" | |
SE_CREATE_SYMBOLIC_LINK_NAME :: "SeCreateSymbolicLinkPrivilege" | |
SE_DELEGATE_SESSION_USER_IMPERSONATE_NAME :: "SeDelegateSessionUserImpersonatePrivilege" | |
TOKEN_ASSIGN_PRIMARY :: 0x0001 | |
TOKEN_DUPLICATE :: 0x0002 | |
TOKEN_IMPERSONATE :: 0x0004 | |
TOKEN_QUERY :: 0x0008 | |
TOKEN_QUERY_SOURCE :: 0x0010 | |
TOKEN_ADJUST_PRIVILEGES :: 0x0020 | |
TOKEN_ADJUST_GROUPS :: 0x0040 | |
TOKEN_ADJUST_DEFAULT :: 0x0080 | |
TOKEN_ADJUST_SESSIONID :: 0x0100 | |
MEM_IMAGE :: 0x1000000 | |
// | |
// RIO - Registered Input/Output | |
// | |
RIO_BUFFERID :: rawptr | |
// TODO | |
LPFN_RIORECEIVE :: rawptr | |
LPFN_RIORECEIVEEX :: rawptr | |
LPFN_RIOSEND :: rawptr | |
LPFN_RIOSENDEX :: rawptr | |
LPFN_RIOCLOSECOMPLETIONQUEUE :: rawptr | |
LPFN_RIOCREATECOMPLETIONQUEUE :: rawptr | |
LPFN_RIOCREATEREQUESTQUEUE :: rawptr | |
LPFN_RIODEQUEUECOMPLETION :: rawptr | |
LPFN_RIONOTIFY :: rawptr | |
LPFN_RIORESIZECOMPLETIONQUEUE :: rawptr | |
LPFN_RIORESIZEREQUESTQUEUE :: rawptr | |
LPFN_RIOREGISTERBUFFER :: #type proc "stdcall" (DataBuffer: rawptr, DataLength: windows.DWORD) -> RIO_BUFFERID | |
LPFN_RIODEREGISTERBUFFER :: #type proc "stdcall" (BufferId: RIO_BUFFERID) | |
RIO_EXTENSION_FUNCTION_TABLE :: struct { | |
cbSize: windows.DWORD, | |
RIOReceive: LPFN_RIORECEIVE, | |
RIOReceiveEx: LPFN_RIORECEIVEEX, | |
RIOSend: LPFN_RIOSEND, | |
RIOSendEx: LPFN_RIOSENDEX, | |
RIOCloseCompletionQueue: LPFN_RIOCLOSECOMPLETIONQUEUE, | |
RIOCreateCompletionQueue: LPFN_RIOCREATECOMPLETIONQUEUE, | |
RIOCreateRequestQueue: LPFN_RIOCREATEREQUESTQUEUE, | |
RIODequeueCompletion: LPFN_RIODEQUEUECOMPLETION, | |
RIODeregisterBuffer: LPFN_RIODEREGISTERBUFFER, | |
RIONotify: LPFN_RIONOTIFY, | |
RIORegisterBuffer: LPFN_RIOREGISTERBUFFER, | |
RIOResizeCompletionQueue: LPFN_RIORESIZECOMPLETIONQUEUE, | |
RIOResizeRequestQueue: LPFN_RIORESIZEREQUESTQUEUE, | |
} | |
// | |
// WinSock2 for RIO | |
// | |
foreign import ws2_32 "system:Ws2_32.lib" | |
@(default_calling_convention = "stdcall") | |
foreign ws2_32 { | |
WSAStartup :: proc(wVersionRequired: windows.WORD, lpWSAData: ^WSADATA) -> i32 --- | |
WSAIoctl :: proc(s: windows.SOCKET, dwIoControlCode: windows.DWORD, lpvInBuffer: windows.LPVOID, cbInBuffer: windows.DWORD, lpvOutBuffer: windows.LPVOID, cbOutBuffer: windows.DWORD, lpcbBytesReturned: windows.LPDWORD, lpOverlapped: rawptr, lpCompletionRoutine: rawptr) -> i32 --- | |
} | |
WSADATA :: struct {} // we don't need the contents of this struct, so just deal it as an opaque buffer | |
// 8509e081-96dd-4005-b165-9e2ee8c79e3f | |
WSAID_MULTIPLE_RIO :: windows.GUID{0x8509e081, 0x96dd, 0x4005, {0xb1, 0x65, 0x9e, 0x2e, 0xe8, 0xc7, 0x9e, 0x3f}} | |
IOC_OUT :: 0x40000000 /* copy out parameters */ | |
IOC_IN :: 0x80000000 /* copy in parameters */ | |
IOC_INOUT :: (IOC_IN | IOC_OUT) | |
IOC_UNIX :: 0x00000000 | |
IOC_WS2 :: 0x08000000 | |
IOC_PROTOCOL :: 0x10000000 | |
IOC_VENDOR :: 0x18000000 | |
SIO_GET_MULTIPLE_EXTENSION_FUNCTION_POINTER :: IOC_INOUT | IOC_WS2 | 36 // _WSAIORW(IOC_WS2,36) | |
main :: proc() { | |
use_rio := false | |
use_large_pages := false | |
use_default_allocator := false | |
use_lock := false | |
num_items := 10_000_000 | |
for arg in os.args[1:] { | |
switch arg { | |
case "-large": | |
use_large_pages = true | |
case "-rio": | |
use_rio = true | |
case "-default-allocator": | |
use_default_allocator = true | |
case "-lock": | |
use_lock = true | |
case "-help", "help", "--help", "-h", "?": | |
fmt.println("This is a program for testing performance when touching freshly allocated memory.\n") | |
fmt.println("Args:") | |
fmt.println("\t-large use large (2Mb) pages. Default pages are 4k") | |
fmt.println("\t-rio locks the pages into physical memory with RIORegisterBuffer. Can be combined") | |
fmt.println("\t-default-allocator allocate memory with the default memory allocator, which calls HeapAlloc") | |
fmt.println("\t[number] specify number of items for the test. default is {}", num_items) | |
fmt.println("\nUsage examples:") | |
fmt.println("\tlargepages test with {} items, uses 4k pages", num_items) | |
fmt.println("\tlargepages -large test with {} items, uses large pages", num_items) | |
fmt.println("\tlargepages -default-allocator -rio 1000 test with 1000 items, uses default allocator and RIO") | |
return | |
case: | |
if n, ok := strconv.parse_int(arg); ok { | |
num_items = n | |
} else { | |
fmt.eprintf("Invalid argument: \"{}\" (use -help to print help info)\n", arg) | |
} | |
} | |
} | |
buffer_size := uint(num_items * size_of(Item)) | |
if use_large_pages { | |
if !windows_enable_large_pages() { | |
return | |
} | |
min_large_page_size := windows.GetLargePageMinimum() | |
if min_large_page_size == 0 { | |
fmt.eprintf("GetLargePageMinimum failed: The CPU doesn't support large pages\n") | |
} | |
fmt.printf("Minimum large page size: %M ({} bytes)\n", min_large_page_size, min_large_page_size) | |
buffer_size = mem.align_forward_uint(buffer_size, min_large_page_size) | |
} | |
rio: RIO_EXTENSION_FUNCTION_TABLE | |
if use_rio { | |
MAKEWORD :: proc(a, b: uint) -> windows.WORD { | |
return windows.WORD((a & 0xff) | ((b & 0xff) << 8)) | |
} | |
dummy_wsadata: [1024]u8 // this should be large enough for the struct, we don't need the contents | |
// NOTE by mmozeiko: need to get function pointers to RIO functions, and that requires dummy socket | |
win_sock_data := WSAStartup(wVersionRequired = MAKEWORD(2, 2), lpWSAData = transmute(^WSADATA)&dummy_wsadata[0]) | |
guid := WSAID_MULTIPLE_RIO | |
rio_bytes: windows.DWORD | |
sock := windows.socket(af = windows.AF_UNSPEC, type = windows.SOCK_STREAM, protocol = windows.IPPROTO_TCP) | |
WSAIoctl( | |
s = sock, | |
dwIoControlCode = SIO_GET_MULTIPLE_EXTENSION_FUNCTION_POINTER, | |
lpvInBuffer = &guid, | |
cbInBuffer = size_of(guid), | |
lpvOutBuffer = cast(windows.LPVOID)&rio, | |
cbOutBuffer = size_of(rio), | |
lpcbBytesReturned = &rio_bytes, | |
lpOverlapped = nil, | |
lpCompletionRoutine = nil, | |
) | |
windows.closesocket(sock) | |
} | |
fmt.printf("Allocating buffer of size %M ({} bytes)...\n", buffer_size, buffer_size) | |
buffer_addr: rawptr | |
if use_default_allocator { | |
buffer_addr = mem.alloc(size = int(buffer_size), alignment = align_of(Item)) or_else nil | |
if buffer_addr == nil { | |
fmt.eprintf("mem.alloc failed") | |
} | |
} else { | |
virtual_alloc_flags: windows.DWORD = windows.MEM_COMMIT | windows.MEM_RESERVE | |
if use_large_pages { | |
virtual_alloc_flags |= windows.MEM_LARGE_PAGES | |
} | |
buffer_addr = windows.VirtualAlloc( | |
lpAddress = nil, | |
dwSize = buffer_size, | |
flAllocationType = virtual_alloc_flags, | |
flProtect = windows.PAGE_READWRITE, | |
) | |
if buffer_addr == nil { | |
fmt.eprintf("VirtualAlloc failed") | |
windows_print_last_error() | |
return | |
} | |
} | |
if use_rio { | |
rio.RIODeregisterBuffer(rio.RIORegisterBuffer(buffer_addr, u32(buffer_size))) | |
} | |
if use_lock { | |
if windows.VirtualLock(buffer_addr, buffer_size) { | |
fmt.eprintf("VirtualLock failed") | |
windows_print_last_error() | |
return | |
} | |
} | |
windows_print_info_about_memory_address_range(uintptr(buffer_addr), buffer_size) | |
items := transmute([dynamic]Item)runtime.Raw_Dynamic_Array { | |
data = buffer_addr, | |
len = int(buffer_size / size_of(Item)), | |
cap = int(buffer_size / size_of(Item)), | |
allocator = mem.nil_allocator(), | |
} | |
start_tick := time.tick_now() | |
for &item, i in items { | |
if i % 2 == 0 { | |
item = Item_A { | |
foo = f32(i), | |
} | |
} else { | |
item = Item_B { | |
bar = {1, 2, 3, 4}, | |
} | |
} | |
} | |
fmt.printf("Fill array duration: %v\n", time.duration_milliseconds(time.tick_since(start_tick))) | |
} | |
windows_enable_large_pages :: proc() -> bool { | |
token_handle: windows.HANDLE | |
if windows.OpenProcessToken( | |
ProcessHandle = windows.GetCurrentProcess(), | |
DesiredAccess = TOKEN_ADJUST_PRIVILEGES, | |
TokenHandle = &token_handle, | |
) == | |
false { | |
fmt.eprintln("OpenProcessToken failed") | |
windows_print_last_error() | |
return false | |
} | |
defer windows.CloseHandle(token_handle) | |
privs: TOKEN_PRIVILEGES = { | |
PrivilegeCount = 1, | |
Privileges = {0 = {Attributes = SE_PRIVILEGE_ENABLED}}, | |
} | |
if LookupPrivilegeValueA(lpSystemName = nil, lpName = SE_LOCK_MEMORY_NAME, lpLuid = &privs.Privileges[0].Luid) == false { | |
fmt.eprintln("LookupPrivilegeValueA failed") | |
windows_print_last_error() | |
return false | |
} | |
AdjustTokenPrivileges( | |
TokenHandle = token_handle, | |
DisableAllPrivileges = false, | |
NewState = &privs, | |
BufferLength = 0, | |
PreviousState = nil, | |
ReturnLength = nil, | |
) | |
err := windows.GetLastError() | |
switch err { | |
case windows.ERROR_SUCCESS: | |
// Priviledges adjusted successfully | |
case ERROR_NOT_ALL_ASSIGNED: | |
fmt.eprintln("AdjustTokenPrivileges denied (user doesn't have group policy set?)") | |
return false | |
case: | |
fmt.eprintln("AdjustTokenPrivileges failed") | |
windows_print_last_error() | |
return false | |
} | |
return true | |
} | |
windows_print_last_error :: proc() { | |
err := windows.GetLastError() | |
if err == 0 { | |
fmt.println("No error") | |
return | |
} | |
buf: [1024]u16 | |
num_chars := windows.FormatMessageW( | |
flags = windows.FORMAT_MESSAGE_FROM_SYSTEM | windows.FORMAT_MESSAGE_IGNORE_INSERTS, | |
lpSrc = nil, | |
msgId = err, | |
langId = 0, | |
buf = &buf[0], | |
nsize = size_of(buf), | |
args = nil, | |
) | |
if num_chars == 0 { | |
fmt.println("No error message") | |
return | |
} | |
fmt.printf("%s", windows.wstring_to_utf8(&buf[0], int(num_chars)) or_else "Invalid") | |
} | |
windows_print_info_about_memory_address_range :: proc(addr: uintptr, num_bytes: uint) { | |
for offset: uint; offset < num_bytes; { | |
info: windows.MEMORY_BASIC_INFORMATION | |
info_buf_bytes := windows.VirtualQuery( | |
lpAddress = rawptr(addr + uintptr(offset)), | |
lpBuffer = &info, | |
dwLength = size_of(info), | |
) | |
if info_buf_bytes == 0 { | |
fmt.eprintf("Error") | |
return | |
} | |
state_str: string | |
switch info.State { | |
case windows.MEM_COMMIT: | |
state_str = "COMMIT" | |
case windows.MEM_RESERVE: | |
state_str = "RESERVE" | |
case windows.MEM_FREE: | |
state_str = "FREE" | |
} | |
type_str: string | |
switch info.Type { | |
case MEM_IMAGE: | |
type_str = "IMAGE" | |
case windows.MEM_MAPPED: | |
type_str = "MAPPED" | |
case windows.MEM_PRIVATE: | |
type_str = "PRIVATE" | |
} | |
fmt.printf("REGION: BaseAddress: {}, RegionSize: %M ({} bytes)\n", info.BaseAddress, info.RegionSize, info.RegionSize) | |
fmt.printf(" AllocationBase: %p\n", info.AllocationBase) | |
fmt.printf(" State: {}\n", state_str) | |
fmt.printf(" Type: {}\n", type_str) | |
fmt.printf(" AllocationProtect: {}\n", protect_tstr(info.AllocationProtect)) | |
fmt.printf(" Protect: {}\n", protect_tstr(info.Protect)) | |
fmt.printf(" PartitionId: {}\n", info.PartitionId) | |
offset += info.RegionSize | |
} | |
protect_tstr :: proc(protect: windows.DWORD) -> (result: string) { | |
if 0 != (protect & windows.PAGE_NOACCESS) do result = fmt.tprintf("PAGE_NOACCESS,%s", result) | |
if 0 != (protect & windows.PAGE_READONLY) do result = fmt.tprintf("PAGE_READONLY,%s", result) | |
if 0 != (protect & windows.PAGE_READWRITE) do result = fmt.tprintf("PAGE_READWRITE,%s", result) | |
if 0 != (protect & windows.PAGE_WRITECOPY) do result = fmt.tprintf("PAGE_WRITECOPY,%s", result) | |
if 0 != (protect & windows.PAGE_EXECUTE) do result = fmt.tprintf("PAGE_EXECUTE,%s", result) | |
if 0 != (protect & windows.PAGE_EXECUTE_READ) do result = fmt.tprintf("PAGE_EXECUTE_READ,%s", result) | |
if 0 != (protect & windows.PAGE_EXECUTE_READWRITE) do result = fmt.tprintf("PAGE_EXECUTE_READWRITE,%s", result) | |
if 0 != (protect & windows.PAGE_EXECUTE_WRITECOPY) do result = fmt.tprintf("PAGE_EXECUTE_WRITECOPY,%s", result) | |
if 0 != (protect & windows.PAGE_GUARD) do result = fmt.tprintf("PAGE_GUARD,%s", result) | |
if 0 != (protect & windows.PAGE_NOCACHE) do result = fmt.tprintf("PAGE_NOCACHE,%s", result) | |
if 0 != (protect & windows.PAGE_WRITECOMBINE) do result = fmt.tprintf("PAGE_WRITECOMBINE,%s", result) | |
return | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment