Skip to content

Instantly share code, notes, and snippets.

@jakubtomsu
Last active January 30, 2024 18:37
Show Gist options
  • Save jakubtomsu/177af38fa8a905bd60fca2b6634f1c51 to your computer and use it in GitHub Desktop.
Save jakubtomsu/177af38fa8a905bd60fca2b6634f1c51 to your computer and use it in GitHub Desktop.
package main
import "core:fmt"
import "core:mem"
import "core:os"
import "core:runtime"
import "core:strconv"
import "core:sys/windows"
import "core:time"
// This example is based on this program by Casey Muratori:
// https://github.com/cmuratori/largepages
//
// Relevant docs:
// https://learn.microsoft.com/en-us/windows/win32/memory/large-page-support
// This is fairly arbitrary
Item :: union {
Item_A,
Item_B,
}
Item_A :: struct {
foo: f32,
bar: f32,
}
Item_B :: struct {
bar: [4]u64,
baz: [12]u64,
}
//
// Win32 API stuff missing from core:sys/windows
//
foreign import advapi32 "system:Advapi32.lib"
@(default_calling_convention = "stdcall")
foreign advapi32 {
AdjustTokenPrivileges :: proc(TokenHandle: windows.HANDLE, DisableAllPrivileges: windows.BOOL, NewState: ^TOKEN_PRIVILEGES, BufferLength: windows.DWORD, PreviousState: ^TOKEN_PRIVILEGES, ReturnLength: ^windows.DWORD) -> windows.BOOL ---
LookupPrivilegeValueA :: proc(lpSystemName: cstring, lpName: cstring, lpLuid: ^LUID) -> windows.BOOL ---
}
LUID :: struct {
LowPart: windows.DWORD,
HighPart: windows.LONG,
}
LUID_AND_ATTRIBUTES :: struct {
Luid: LUID,
Attributes: windows.DWORD,
}
ANYSIZE_ARRAY :: 1
TOKEN_PRIVILEGES :: struct {
PrivilegeCount: windows.DWORD,
Privileges: [ANYSIZE_ARRAY]LUID_AND_ATTRIBUTES,
}
ERROR_NOT_ALL_ASSIGNED :: 1300 // should use __MSABI_LONG but idc
SE_PRIVILEGE_ENABLED_BY_DEFAULT :: 0x00000001
SE_PRIVILEGE_ENABLED :: 0x00000002
SE_PRIVILEGE_REMOVED :: 0x00000004
SE_PRIVILEGE_USED_FOR_ACCESS :: 0x80000000
// WARNING: Ascii only! convert to utf16 to use with *W procs
SE_CREATE_TOKEN_NAME :: "SeCreateTokenPrivilege"
SE_ASSIGNPRIMARYTOKEN_NAME :: "SeAssignPrimaryTokenPrivilege"
SE_LOCK_MEMORY_NAME :: "SeLockMemoryPrivilege"
SE_INCREASE_QUOTA_NAME :: "SeIncreaseQuotaPrivilege"
SE_UNSOLICITED_INPUT_NAME :: "SeUnsolicitedInputPrivilege"
SE_MACHINE_ACCOUNT_NAME :: "SeMachineAccountPrivilege"
SE_TCB_NAME :: "SeTcbPrivilege"
SE_SECURITY_NAME :: "SeSecurityPrivilege"
SE_TAKE_OWNERSHIP_NAME :: "SeTakeOwnershipPrivilege"
SE_LOAD_DRIVER_NAME :: "SeLoadDriverPrivilege"
SE_SYSTEM_PROFILE_NAME :: "SeSystemProfilePrivilege"
SE_SYSTEMTIME_NAME :: "SeSystemtimePrivilege"
SE_PROF_SINGLE_PROCESS_NAME :: "SeProfileSingleProcessPrivilege"
SE_INC_BASE_PRIORITY_NAME :: "SeIncreaseBasePriorityPrivilege"
SE_CREATE_PAGEFILE_NAME :: "SeCreatePagefilePrivilege"
SE_CREATE_PERMANENT_NAME :: "SeCreatePermanentPrivilege"
SE_BACKUP_NAME :: "SeBackupPrivilege"
SE_RESTORE_NAME :: "SeRestorePrivilege"
SE_SHUTDOWN_NAME :: "SeShutdownPrivilege"
SE_DEBUG_NAME :: "SeDebugPrivilege"
SE_AUDIT_NAME :: "SeAuditPrivilege"
SE_SYSTEM_ENVIRONMENT_NAME :: "SeSystemEnvironmentPrivilege"
SE_CHANGE_NOTIFY_NAME :: "SeChangeNotifyPrivilege"
SE_REMOTE_SHUTDOWN_NAME :: "SeRemoteShutdownPrivilege"
SE_UNDOCK_NAME :: "SeUndockPrivilege"
SE_SYNC_AGENT_NAME :: "SeSyncAgentPrivilege"
SE_ENABLE_DELEGATION_NAME :: "SeEnableDelegationPrivilege"
SE_MANAGE_VOLUME_NAME :: "SeManageVolumePrivilege"
SE_IMPERSONATE_NAME :: "SeImpersonatePrivilege"
SE_CREATE_GLOBAL_NAME :: "SeCreateGlobalPrivilege"
SE_TRUSTED_CREDMAN_ACCESS_NAME :: "SeTrustedCredManAccessPrivilege"
SE_RELABEL_NAME :: "SeRelabelPrivilege"
SE_INC_WORKING_SET_NAME :: "SeIncreaseWorkingSetPrivilege"
SE_TIME_ZONE_NAME :: "SeTimeZonePrivilege"
SE_CREATE_SYMBOLIC_LINK_NAME :: "SeCreateSymbolicLinkPrivilege"
SE_DELEGATE_SESSION_USER_IMPERSONATE_NAME :: "SeDelegateSessionUserImpersonatePrivilege"
TOKEN_ASSIGN_PRIMARY :: 0x0001
TOKEN_DUPLICATE :: 0x0002
TOKEN_IMPERSONATE :: 0x0004
TOKEN_QUERY :: 0x0008
TOKEN_QUERY_SOURCE :: 0x0010
TOKEN_ADJUST_PRIVILEGES :: 0x0020
TOKEN_ADJUST_GROUPS :: 0x0040
TOKEN_ADJUST_DEFAULT :: 0x0080
TOKEN_ADJUST_SESSIONID :: 0x0100
MEM_IMAGE :: 0x1000000
//
// RIO - Registered Input/Output
//
RIO_BUFFERID :: rawptr
// TODO
LPFN_RIORECEIVE :: rawptr
LPFN_RIORECEIVEEX :: rawptr
LPFN_RIOSEND :: rawptr
LPFN_RIOSENDEX :: rawptr
LPFN_RIOCLOSECOMPLETIONQUEUE :: rawptr
LPFN_RIOCREATECOMPLETIONQUEUE :: rawptr
LPFN_RIOCREATEREQUESTQUEUE :: rawptr
LPFN_RIODEQUEUECOMPLETION :: rawptr
LPFN_RIONOTIFY :: rawptr
LPFN_RIORESIZECOMPLETIONQUEUE :: rawptr
LPFN_RIORESIZEREQUESTQUEUE :: rawptr
LPFN_RIOREGISTERBUFFER :: #type proc "stdcall" (DataBuffer: rawptr, DataLength: windows.DWORD) -> RIO_BUFFERID
LPFN_RIODEREGISTERBUFFER :: #type proc "stdcall" (BufferId: RIO_BUFFERID)
RIO_EXTENSION_FUNCTION_TABLE :: struct {
cbSize: windows.DWORD,
RIOReceive: LPFN_RIORECEIVE,
RIOReceiveEx: LPFN_RIORECEIVEEX,
RIOSend: LPFN_RIOSEND,
RIOSendEx: LPFN_RIOSENDEX,
RIOCloseCompletionQueue: LPFN_RIOCLOSECOMPLETIONQUEUE,
RIOCreateCompletionQueue: LPFN_RIOCREATECOMPLETIONQUEUE,
RIOCreateRequestQueue: LPFN_RIOCREATEREQUESTQUEUE,
RIODequeueCompletion: LPFN_RIODEQUEUECOMPLETION,
RIODeregisterBuffer: LPFN_RIODEREGISTERBUFFER,
RIONotify: LPFN_RIONOTIFY,
RIORegisterBuffer: LPFN_RIOREGISTERBUFFER,
RIOResizeCompletionQueue: LPFN_RIORESIZECOMPLETIONQUEUE,
RIOResizeRequestQueue: LPFN_RIORESIZEREQUESTQUEUE,
}
//
// WinSock2 for RIO
//
foreign import ws2_32 "system:Ws2_32.lib"
@(default_calling_convention = "stdcall")
foreign ws2_32 {
WSAStartup :: proc(wVersionRequired: windows.WORD, lpWSAData: ^WSADATA) -> i32 ---
WSAIoctl :: proc(s: windows.SOCKET, dwIoControlCode: windows.DWORD, lpvInBuffer: windows.LPVOID, cbInBuffer: windows.DWORD, lpvOutBuffer: windows.LPVOID, cbOutBuffer: windows.DWORD, lpcbBytesReturned: windows.LPDWORD, lpOverlapped: rawptr, lpCompletionRoutine: rawptr) -> i32 ---
}
WSADATA :: struct {} // we don't need the contents of this struct, so just deal it as an opaque buffer
// 8509e081-96dd-4005-b165-9e2ee8c79e3f
WSAID_MULTIPLE_RIO :: windows.GUID{0x8509e081, 0x96dd, 0x4005, {0xb1, 0x65, 0x9e, 0x2e, 0xe8, 0xc7, 0x9e, 0x3f}}
IOC_OUT :: 0x40000000 /* copy out parameters */
IOC_IN :: 0x80000000 /* copy in parameters */
IOC_INOUT :: (IOC_IN | IOC_OUT)
IOC_UNIX :: 0x00000000
IOC_WS2 :: 0x08000000
IOC_PROTOCOL :: 0x10000000
IOC_VENDOR :: 0x18000000
SIO_GET_MULTIPLE_EXTENSION_FUNCTION_POINTER :: IOC_INOUT | IOC_WS2 | 36 // _WSAIORW(IOC_WS2,36)
main :: proc() {
use_rio := false
use_large_pages := false
use_default_allocator := false
use_lock := false
num_items := 10_000_000
for arg in os.args[1:] {
switch arg {
case "-large":
use_large_pages = true
case "-rio":
use_rio = true
case "-default-allocator":
use_default_allocator = true
case "-lock":
use_lock = true
case "-help", "help", "--help", "-h", "?":
fmt.println("This is a program for testing performance when touching freshly allocated memory.\n")
fmt.println("Args:")
fmt.println("\t-large use large (2Mb) pages. Default pages are 4k")
fmt.println("\t-rio locks the pages into physical memory with RIORegisterBuffer. Can be combined")
fmt.println("\t-default-allocator allocate memory with the default memory allocator, which calls HeapAlloc")
fmt.println("\t[number] specify number of items for the test. default is {}", num_items)
fmt.println("\nUsage examples:")
fmt.println("\tlargepages test with {} items, uses 4k pages", num_items)
fmt.println("\tlargepages -large test with {} items, uses large pages", num_items)
fmt.println("\tlargepages -default-allocator -rio 1000 test with 1000 items, uses default allocator and RIO")
return
case:
if n, ok := strconv.parse_int(arg); ok {
num_items = n
} else {
fmt.eprintf("Invalid argument: \"{}\" (use -help to print help info)\n", arg)
}
}
}
buffer_size := uint(num_items * size_of(Item))
if use_large_pages {
if !windows_enable_large_pages() {
return
}
min_large_page_size := windows.GetLargePageMinimum()
if min_large_page_size == 0 {
fmt.eprintf("GetLargePageMinimum failed: The CPU doesn't support large pages\n")
}
fmt.printf("Minimum large page size: %M ({} bytes)\n", min_large_page_size, min_large_page_size)
buffer_size = mem.align_forward_uint(buffer_size, min_large_page_size)
}
rio: RIO_EXTENSION_FUNCTION_TABLE
if use_rio {
MAKEWORD :: proc(a, b: uint) -> windows.WORD {
return windows.WORD((a & 0xff) | ((b & 0xff) << 8))
}
dummy_wsadata: [1024]u8 // this should be large enough for the struct, we don't need the contents
// NOTE by mmozeiko: need to get function pointers to RIO functions, and that requires dummy socket
win_sock_data := WSAStartup(wVersionRequired = MAKEWORD(2, 2), lpWSAData = transmute(^WSADATA)&dummy_wsadata[0])
guid := WSAID_MULTIPLE_RIO
rio_bytes: windows.DWORD
sock := windows.socket(af = windows.AF_UNSPEC, type = windows.SOCK_STREAM, protocol = windows.IPPROTO_TCP)
WSAIoctl(
s = sock,
dwIoControlCode = SIO_GET_MULTIPLE_EXTENSION_FUNCTION_POINTER,
lpvInBuffer = &guid,
cbInBuffer = size_of(guid),
lpvOutBuffer = cast(windows.LPVOID)&rio,
cbOutBuffer = size_of(rio),
lpcbBytesReturned = &rio_bytes,
lpOverlapped = nil,
lpCompletionRoutine = nil,
)
windows.closesocket(sock)
}
fmt.printf("Allocating buffer of size %M ({} bytes)...\n", buffer_size, buffer_size)
buffer_addr: rawptr
if use_default_allocator {
buffer_addr = mem.alloc(size = int(buffer_size), alignment = align_of(Item)) or_else nil
if buffer_addr == nil {
fmt.eprintf("mem.alloc failed")
}
} else {
virtual_alloc_flags: windows.DWORD = windows.MEM_COMMIT | windows.MEM_RESERVE
if use_large_pages {
virtual_alloc_flags |= windows.MEM_LARGE_PAGES
}
buffer_addr = windows.VirtualAlloc(
lpAddress = nil,
dwSize = buffer_size,
flAllocationType = virtual_alloc_flags,
flProtect = windows.PAGE_READWRITE,
)
if buffer_addr == nil {
fmt.eprintf("VirtualAlloc failed")
windows_print_last_error()
return
}
}
if use_rio {
rio.RIODeregisterBuffer(rio.RIORegisterBuffer(buffer_addr, u32(buffer_size)))
}
if use_lock {
if windows.VirtualLock(buffer_addr, buffer_size) {
fmt.eprintf("VirtualLock failed")
windows_print_last_error()
return
}
}
windows_print_info_about_memory_address_range(uintptr(buffer_addr), buffer_size)
items := transmute([dynamic]Item)runtime.Raw_Dynamic_Array {
data = buffer_addr,
len = int(buffer_size / size_of(Item)),
cap = int(buffer_size / size_of(Item)),
allocator = mem.nil_allocator(),
}
start_tick := time.tick_now()
for &item, i in items {
if i % 2 == 0 {
item = Item_A {
foo = f32(i),
}
} else {
item = Item_B {
bar = {1, 2, 3, 4},
}
}
}
fmt.printf("Fill array duration: %v\n", time.duration_milliseconds(time.tick_since(start_tick)))
}
windows_enable_large_pages :: proc() -> bool {
token_handle: windows.HANDLE
if windows.OpenProcessToken(
ProcessHandle = windows.GetCurrentProcess(),
DesiredAccess = TOKEN_ADJUST_PRIVILEGES,
TokenHandle = &token_handle,
) ==
false {
fmt.eprintln("OpenProcessToken failed")
windows_print_last_error()
return false
}
defer windows.CloseHandle(token_handle)
privs: TOKEN_PRIVILEGES = {
PrivilegeCount = 1,
Privileges = {0 = {Attributes = SE_PRIVILEGE_ENABLED}},
}
if LookupPrivilegeValueA(lpSystemName = nil, lpName = SE_LOCK_MEMORY_NAME, lpLuid = &privs.Privileges[0].Luid) == false {
fmt.eprintln("LookupPrivilegeValueA failed")
windows_print_last_error()
return false
}
AdjustTokenPrivileges(
TokenHandle = token_handle,
DisableAllPrivileges = false,
NewState = &privs,
BufferLength = 0,
PreviousState = nil,
ReturnLength = nil,
)
err := windows.GetLastError()
switch err {
case windows.ERROR_SUCCESS:
// Priviledges adjusted successfully
case ERROR_NOT_ALL_ASSIGNED:
fmt.eprintln("AdjustTokenPrivileges denied (user doesn't have group policy set?)")
return false
case:
fmt.eprintln("AdjustTokenPrivileges failed")
windows_print_last_error()
return false
}
return true
}
windows_print_last_error :: proc() {
err := windows.GetLastError()
if err == 0 {
fmt.println("No error")
return
}
buf: [1024]u16
num_chars := windows.FormatMessageW(
flags = windows.FORMAT_MESSAGE_FROM_SYSTEM | windows.FORMAT_MESSAGE_IGNORE_INSERTS,
lpSrc = nil,
msgId = err,
langId = 0,
buf = &buf[0],
nsize = size_of(buf),
args = nil,
)
if num_chars == 0 {
fmt.println("No error message")
return
}
fmt.printf("%s", windows.wstring_to_utf8(&buf[0], int(num_chars)) or_else "Invalid")
}
windows_print_info_about_memory_address_range :: proc(addr: uintptr, num_bytes: uint) {
for offset: uint; offset < num_bytes; {
info: windows.MEMORY_BASIC_INFORMATION
info_buf_bytes := windows.VirtualQuery(
lpAddress = rawptr(addr + uintptr(offset)),
lpBuffer = &info,
dwLength = size_of(info),
)
if info_buf_bytes == 0 {
fmt.eprintf("Error")
return
}
state_str: string
switch info.State {
case windows.MEM_COMMIT:
state_str = "COMMIT"
case windows.MEM_RESERVE:
state_str = "RESERVE"
case windows.MEM_FREE:
state_str = "FREE"
}
type_str: string
switch info.Type {
case MEM_IMAGE:
type_str = "IMAGE"
case windows.MEM_MAPPED:
type_str = "MAPPED"
case windows.MEM_PRIVATE:
type_str = "PRIVATE"
}
fmt.printf("REGION: BaseAddress: {}, RegionSize: %M ({} bytes)\n", info.BaseAddress, info.RegionSize, info.RegionSize)
fmt.printf(" AllocationBase: %p\n", info.AllocationBase)
fmt.printf(" State: {}\n", state_str)
fmt.printf(" Type: {}\n", type_str)
fmt.printf(" AllocationProtect: {}\n", protect_tstr(info.AllocationProtect))
fmt.printf(" Protect: {}\n", protect_tstr(info.Protect))
fmt.printf(" PartitionId: {}\n", info.PartitionId)
offset += info.RegionSize
}
protect_tstr :: proc(protect: windows.DWORD) -> (result: string) {
if 0 != (protect & windows.PAGE_NOACCESS) do result = fmt.tprintf("PAGE_NOACCESS,%s", result)
if 0 != (protect & windows.PAGE_READONLY) do result = fmt.tprintf("PAGE_READONLY,%s", result)
if 0 != (protect & windows.PAGE_READWRITE) do result = fmt.tprintf("PAGE_READWRITE,%s", result)
if 0 != (protect & windows.PAGE_WRITECOPY) do result = fmt.tprintf("PAGE_WRITECOPY,%s", result)
if 0 != (protect & windows.PAGE_EXECUTE) do result = fmt.tprintf("PAGE_EXECUTE,%s", result)
if 0 != (protect & windows.PAGE_EXECUTE_READ) do result = fmt.tprintf("PAGE_EXECUTE_READ,%s", result)
if 0 != (protect & windows.PAGE_EXECUTE_READWRITE) do result = fmt.tprintf("PAGE_EXECUTE_READWRITE,%s", result)
if 0 != (protect & windows.PAGE_EXECUTE_WRITECOPY) do result = fmt.tprintf("PAGE_EXECUTE_WRITECOPY,%s", result)
if 0 != (protect & windows.PAGE_GUARD) do result = fmt.tprintf("PAGE_GUARD,%s", result)
if 0 != (protect & windows.PAGE_NOCACHE) do result = fmt.tprintf("PAGE_NOCACHE,%s", result)
if 0 != (protect & windows.PAGE_WRITECOMBINE) do result = fmt.tprintf("PAGE_WRITECOMBINE,%s", result)
return
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment