Skip to content

Instantly share code, notes, and snippets.

@Wowfunhappy
Last active September 8, 2024 19:50
Show Gist options
  • Save Wowfunhappy/8212f5bea4c601ac9a6297789f232321 to your computer and use it in GitHub Desktop.
Save Wowfunhappy/8212f5bea4c601ac9a6297789f232321 to your computer and use it in GitHub Desktop.
KQueueScanContinuePatch
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>$(EXECUTABLE_NAME)</string>
<key>CFBundleIdentifier</key>
<string>wowfunhappy.$(PRODUCT_NAME:rfc1034identifier)</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>$(PRODUCT_NAME)</string>
<key>CFBundlePackageType</key>
<string>KEXT</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>OSBundleLibraries</key>
<dict>
<key>com.apple.kpi.iokit</key>
<string>10.0.0</string>
<key>com.apple.kpi.libkern</key>
<string>10.0.0</string>
<key>com.apple.kpi.unsupported</key>
<string>10.0.0</string>
</dict>
</dict>
</plist>
//
// KQueueScanContinuePatch.c
// KQueueScanContinuePatch
//
// Created by Wowfunhappy with assistance from krackers.
//
#include <IOKit/IOLib.h>
#include <mach-o/loader.h>
#include <i386/proc_reg.h>
#define LENGTH(arr) sizeof(arr) / sizeof(*arr)
// Copied from github.com/leiless/ksymresolver/blob/master/ksymresolver/ksymresolver.h
#define KERN_TEXT_BASE ((vm_offset_t) 0xffffff8000200000ULL)
#define CR0_WP 0x00010000
static size_t KASLRAlignment = 0x100000;
#if __LP64__
#define NUM_SUPPORTED_KERNELS 5
#else
#define NUM_SUPPORTED_KERNELS 1
#endif
// Taken from Hopper
static vm_offset_t possible_kqueue_scan_continue_panic_start_locations[NUM_SUPPORTED_KERNELS] = {
#if __LP64__
0xffffff800053b590, //10.7.5 11G63, xnu-1699.32.7
0xffffff8000557afe, //10.8.4 12F2560, xnu-2050.48.19
0xffffff80005c7a9c, //10.9.5 13F1911, xnu-2422.115.15
0xffffff8000615928, //10.9.5 Bronya-Ryzen
0xffffff80005bf9bc, //10.9.5 IPCA
#else
0x0055bb7e, //10.7.5 11G63, xnu-1699.32.7 (32 bit)
#endif
};
static vm_offset_t possible_kqueue_scan_continue_panic_end_locations[NUM_SUPPORTED_KERNELS] = {
#if __LP64__
0xffffff800053b5a2, //10.7.5 11G63, xnu-1699.32.7
0xffffff8000557b11, //10.8.4 12F2560, xnu-2050.48.19
0xffffff80005c7ab7, //10.9.5 13F1911, xnu-2422.115.15
0xffffff8000615942, //10.9.5 Bronya-Ryzen
0xffffff80005bf9d7, //10.9.5 IPCA
#else
0x0055bb90, //10.7.5 11G63, xnu-1699.32.7 (32 bit)
#endif
};
static char possible_search_bytes[NUM_SUPPORTED_KERNELS][30] = {
#if __LP64__
//10.7.5 11G63, xnu-1699.32.7
{
0x48, 0x8d, 0x3d, 0x69, 0xf4, 0x19, 0x00, //lea rdi,qword [aKeventscancont]
0x30, 0xc0, //xor al,al
0x89, 0xde, //mov esi,ebx
0xe8, 0xa0, 0x4f, 0xce, 0xff, //call _panic
0x31, 0xdb, 0x89, //xor ebx,ebx
0xda, 0x49, 0x8b, 0xb7, 0xc0, 0x00, 0x00, 0x00, 0x4c, 0x89, 0xf7,
},
//10.8.4 12F2560, xnu-2050.48.19
{
0x48, 0x8d, 0x3d, 0x77, 0xe9, 0x19, 0x00, //lea rdi,qword [aKeventscancont]
0x89, 0xde, //mov esi,ebx
0x30, 0xc0, //xor al,al
0xe8, 0x02, 0x5b, 0xcc, 0xff, //call _panic
0x45, 0x31, 0xed, //xor r13d,r13d
0x49, 0x8b, 0xb6, 0xc0, 0x00, 0x00, 0x00, 0x4c, 0x89, 0xff, 0x44,
},
//10.9.5 13F1911, xnu-2422.115.15
{
0x48, 0x8d, 0x3d, 0xbb, 0xf3, 0x19, 0x00, //lea rdi,qword
0x48, 0x8d, 0x35, 0x11, 0xf4, 0x19, 0x00, //lea rsi,qword
0x44, 0x89, 0xfa, //mov edx,r15d
0x30, 0xc0, //xor al,al
0xe8, 0xbc, 0xb5, 0xc5, 0xff, //call _panic
0x45, 0x31, 0xe4, //xor r12d,r12d
0x49, 0x8b, 0xb5,
},
//10.9.5 Bronya-Ryzen
{
0x48, 0x8d, 0x3d, 0xbb, 0xf3, 0x19, 0x00, //lea rdi,qword
0x48, 0x8d, 0x35, 0x11, 0xf4, 0x19, 0x00, //lea rsi,qword
0x44, 0x89, 0xfa, //mov edx,r15d
0x30, 0xc0, //xor al,al
0xe8, 0xbc, 0xb5, 0xc5, 0xff, //call _panic
0x49, 0x8b, 0x47, 0x70, 0x49, 0x8b,
},
//10.9.5 IPCA
{
0x48, 0x8d, 0x3d, 0x7c, 0x08, 0x1a, 0x00, //lea rdi,qword [aSInvalidWaitre]
0x48, 0x8d, 0x35, 0xdd, 0x08, 0x1a, 0x00, //lea rsi, qword [aKqueuescancont]
0x44, 0x89, 0xfa, //mov edx,r15d
0x30, 0xc0, //xor al,al
0xe8, 0xdc, 0x34, 0xc6, 0xff, //call _panic
0x45, 0x31, 0xe4, //xor r12d, r12d
0x49, 0x8b, 0xb5,
},
#else
//10.7.5 11G63, xnu-1699.32.7 (32 bit)
{
0x89, 0x4c, 0x24, 0x04, //mov dword[esp+0x28+var_24],ecx
0xc7, 0x04, 0x24, 0xa4, 0x06, 0x70, 0x00, //mov dword[esp+0x28+var_28],aKeventscancont
0xe8, 0x32, 0x46, 0xcc, 0xff, //call _panic
0x31, 0xf6, //xor esi,esi
0x8b, 0x45, 0xec, 0x8b, 0x88, 0x9c, 0x00, 0x00, 0x00, 0x89, 0x74, 0x24,
},
#endif
};
static char possible_replacement_bytes[NUM_SUPPORTED_KERNELS][7] = {
#if __LP64__
{0x48, 0xc7, 0xc3, 0x09, 0x00, 0x00, 0x00}, // mov rbx,0x9 | 10.7.5 11G63, xnu-1699.32.7
{0x41, 0xbd, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r13d,0x9 | 10.8.4 12F2560, xnu-2050.48.19
{0x41, 0xbc, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r12d,0x9 | 10.9.5 13F1911, xnu-2422.115.15
{0x41, 0xbd, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r13d,0x9 | 10.9.5 Bronya-Ryzen
{0x41, 0xbc, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r12d,0x9 | 10.9.5 IPCA
#else
{0xbe, 0x09, 0x00, 0x00, 0x00, 0x90, 0x90}, // mov esi,0x9 | 10.7.5 11G63, xnu-1699.32.7 (32 bit)
#endif
};
// Adapted from github.com/acidanthera/Lilu/blob/137b4d9deb7022bb97fa9899303934534ff20ec7/Lilu/Sources/kern_mach.cpp
static vm_offset_t get_kernel_base() {
#if __LP64__
// The function choice is mostly arbitrary, but IOLog frequently has a low address.
vm_offset_t tmp = (vm_offset_t)(IOLog);
// Align the address
tmp &= ~(KASLRAlignment - 1);
// Search backwards for the kernel base address (mach-o header)
while (tmp >= KASLRAlignment) {
if (*(uint32_t *)(tmp) == MH_MAGIC_64) {
// make sure it's the header and not some reference to the MAGIC number
struct segment_command_64 segmentCommand = *(struct segment_command_64 *)(tmp + sizeof(struct mach_header_64));
if (!strncmp(segmentCommand.segname, "__TEXT", sizeof(segmentCommand.segname))) {
printf("KQueueScanContinuePatch: found kernel mach-o header address at %lx\n", tmp);
break;
}
}
tmp -= KASLRAlignment;
}
return tmp - KERN_TEXT_BASE;
#else
// No 32-bit XNU kernels have KASLR
return 0;
#endif
}
boolean_t write_protection_is_enabled() {
return (get_cr0() & CR0_WP) != 0;
}
kern_return_t KQueueScanContinuePatch_start(kmod_info_t * ki, void *d)
{
printf("KQueueScanContinuePatch START\n");
vm_offset_t kernel_base = get_kernel_base();
vm_offset_t kqueue_scan_continue_panic_start_location = 0;
vm_offset_t kqueue_scan_continue_panic_end_location = 0;
char search_bytes[sizeof(possible_search_bytes[0])];
char replacement_bytes[sizeof(possible_replacement_bytes[0])];
uint8_t *kscpb = NULL;
for (int i = 0; i < LENGTH(possible_kqueue_scan_continue_panic_start_locations); i++) {
kqueue_scan_continue_panic_start_location = kernel_base + possible_kqueue_scan_continue_panic_start_locations[i];
kqueue_scan_continue_panic_end_location = kernel_base + possible_kqueue_scan_continue_panic_end_locations[i];
memcpy(search_bytes, possible_search_bytes[i], sizeof(search_bytes));
memcpy(replacement_bytes, possible_replacement_bytes[i], sizeof(replacement_bytes));
kscpb = (uint8_t*) kqueue_scan_continue_panic_start_location;
if (memcmp(kscpb, search_bytes, sizeof(search_bytes)) == 0) {
break;
}
if (i == LENGTH(possible_kqueue_scan_continue_panic_start_locations) - 1) {
printf("KQueueScanContinuePatch: Memory region not found. You are probably using an unsupported kernel, or your kernel has already been patched.\n");
return KERN_FAILURE;
}
}
printf("KQueueScanContinuePatch: Pre-Patch: Bytes at kqueue_scan_continue panic location: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n", kscpb[0], kscpb[1], kscpb[2], kscpb[3], kscpb[4], kscpb[5], kscpb[6], kscpb[7], kscpb[8], kscpb[9], kscpb[10], kscpb[11], kscpb[12], kscpb[13], kscpb[14], kscpb[15], kscpb[16], kscpb[17], kscpb[18], kscpb[19], kscpb[20], kscpb[21], kscpb[22], kscpb[23], kscpb[24], kscpb[25], kscpb[26], kscpb[27], kscpb[28], kscpb[29], kscpb[30], kscpb[31], kscpb[32], kscpb[33], kscpb[34], kscpb[35], kscpb[36], kscpb[37], kscpb[38], kscpb[39]);
unsigned long extra_space_to_fill = kqueue_scan_continue_panic_end_location - kqueue_scan_continue_panic_start_location - sizeof(replacement_bytes);
if (kqueue_scan_continue_panic_start_location + sizeof(replacement_bytes) + extra_space_to_fill != kqueue_scan_continue_panic_end_location) {
printf("kqueue_scan_continue_panic_start_location + sizeof(replacement_bytes) + extra_space_to_fill != kqueue_scan_continue_panic_end_location\n");
return KERN_FAILURE;
}
boolean_t interrupts_were_enabled = ml_get_interrupts_enabled();
if (interrupts_were_enabled) {
ml_set_interrupts_enabled(false);
if (! ml_get_interrupts_enabled()) {
printf("KQueueScanContinuePatch: Disabled interrupts\n");
} else {
printf("KQueueScanContinuePatch: Failed to disable interrupts\n");
return KERN_FAILURE;
}
}
boolean_t write_protection_was_enabled = write_protection_is_enabled();
if (write_protection_was_enabled) {
set_cr0(get_cr0() & ~CR0_WP); // disable write protection
if (!write_protection_is_enabled()) {
printf("KQueueScanContinuePatch: Disabled write protection\n");
} else {
printf("KQueueScanContinuePatch: Failed to disable write protection\n");
//Re-enable interrupts before exiting.
if (interrupts_were_enabled && !ml_get_interrupts_enabled()) {
ml_set_interrupts_enabled(true);
if (ml_get_interrupts_enabled()) {
printf("KQueueScanContinuePatch: Re-enabled interrupts after failing to disable write protection.\n");
} else {
panic("KQueueScanContinuePatch: Failed to re-enable interrupts after failing to disable write protection!\n");
}
}
return KERN_FAILURE;
}
}
//commence memory rewriting
memcpy((void *)kqueue_scan_continue_panic_start_location, replacement_bytes, sizeof(replacement_bytes));
memset((void *)kqueue_scan_continue_panic_start_location + sizeof(replacement_bytes), 0x90 /*nop*/, extra_space_to_fill);
printf("KQueueScanContinuePatch: Memory rewritten\n");
//conclude memory rewriting
if (write_protection_was_enabled && !write_protection_is_enabled()) {
set_cr0(get_cr0() | CR0_WP); // re-enable write protection
if (write_protection_is_enabled()) {
printf("KQueueScanContinuePatch: Re-enabled write protection\n");
} else {
panic("KQueueScanContinuePatch: failed to re-enable write protection!\n");
}
}
if (interrupts_were_enabled && !ml_get_interrupts_enabled()) {
ml_set_interrupts_enabled(true);
if (ml_get_interrupts_enabled()) {
printf("KQueueScanContinuePatch: Re-enabled interrupts\n");
} else {
panic("KQueueScanContinuePatch: Failed to re-enable interrupts!\n");
}
}
printf("KQueueScanContinuePatch: Post-Patch: Bytes at kqueue_scan_continue panic location: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n", kscpb[0], kscpb[1], kscpb[2], kscpb[3], kscpb[4], kscpb[5], kscpb[6], kscpb[7], kscpb[8], kscpb[9], kscpb[10], kscpb[11], kscpb[12], kscpb[13], kscpb[14], kscpb[15], kscpb[16], kscpb[17], kscpb[18], kscpb[19], kscpb[20], kscpb[21], kscpb[22], kscpb[23], kscpb[24], kscpb[25], kscpb[26], kscpb[27], kscpb[28], kscpb[29], kscpb[30], kscpb[31], kscpb[32], kscpb[33], kscpb[34], kscpb[35], kscpb[36], kscpb[37], kscpb[38], kscpb[39]);
return KERN_SUCCESS;
}
kern_return_t KQueueScanContinuePatch_stop(kmod_info_t *ki, void *d)
{
printf("KQueueScanContinuePatch STOP\n");
return KERN_SUCCESS;
}
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>wowfunhappy.KQueueScanContinuePatch-loader</string>
<key>ProgramArguments</key>
<array>
<string>/bin/sh</string>
<string>-c</string>
<string>/bin/cp -R /Library/Extensions/KQueueScanContinuePatch.kext /tmp/ &amp;&amp; /sbin/kextload /tmp/KQueueScanContinuePatch.kext ; /bin/rm -r /tmp/KQueueScanContinuePatch.kext</string>
</array>
<key>RunAtLoad</key>
<true/>
</dict>
</plist>
@Wowfunhappy
Copy link
Author

The issue is closed as fixed btw?!

Yeah, but that's an issue in mainline Chromium.

I suspect the way Google fixed it broke Chromium Legacy. IE it's broken on old OSs, or otherwise causes a race condition when used alongside Bluebox's other patches.

@krackers
Copy link

See the discussion in the github issue for things we tried to investigate. I think the first step if you wanted to go about fixing would be to create a minimal repro. It's still not clear to me under what conditions could lead to THREAD_RESTART while waiting on a kqueue. I'm assuming this is some sort of race condition that only appears when waiting on a gazillion descriptors or something, and only Chrome exhibits it because of how they use IPC for everything.

@RJVB
Copy link

RJVB commented Feb 18, 2023 via email

@krackers
Copy link

Chrome still uses Mojo IPC. Seems like kqueue on mac can watch mach ports

Introduce MessagePumpKqueue and make it the MessagePumpForIO on macOS.

MessagePumpKqueue is a WatchableIOMessagePumpPosix capable of watching
FDs, like the MessagePumpLibevent it is replacing (note that Libevent is
still used on other POSIX systems). However MessagePumpKqueue can also
watch Mach port receive rights for notification of new Mach messages,
which libevent cannot do.

The underlying wait primitive for both is a kqueue, so the performance
characteristics are expected to be similar. MessagePumpLibevent uses a
pipe as as the ScheduleWork signal source, while MessagePumpKqueue uses
a Mach port, so MessagePumpKqueue will consume two fewer FDs per
instance.

information to a log

Hard to do this when hotpatching kernel, but we can easily do this when compiling our own. I believe Wowfunhappy did try this, and we tried to insert some logs at possible places that could have signalled THREAD_RESTART. We didn't find anything.

@RJVB
Copy link

RJVB commented Feb 18, 2023 via email

@krackers
Copy link

would that be possible with a hotpatch?

Everything is possible to do via patching, with enough time and effort. But for something like this one-off debugging it's easier to just compile your own kernel.

To notify the culprit process of what happened, would that be possible with a hotpatch?

Doesn't returning EBADF already do that? I believe when this is returned the relevant chromium process crashes, and we have the trace log. I believe we do know where the kqueue call happens from in chromium, but the issue is trying to understand why the THREAD_RESTART was signaled in the first place. And like all race-condition debugging, there's no really easy way to get this info.

@RJVB
Copy link

RJVB commented Feb 18, 2023 via email

@Wowfunhappy
Copy link
Author

Heisenbug? ;)

I wouldn't say that, we aren't able to reproduce the crash consistently but it happens often enough. However, none of us understand enough about Chromium internals to really dig into the cause.

We know the crash happens when Chromium encounters a bad file descriptor, because it always hits this PCHECK, in MessagePumpKqueue::DoInternalWork, right before the crash:

Which prints out as:

[22670:14595:0403/121124.951621:FATAL:message_pump_kqueue.cc(442)] Check failed: . kevent64: Bad file descriptor (9)

And of course the backtrace also implicates this function, I've attached a few backtraces here on the off-chance you'd like to see them: Chromium Legacy IO Thread Crashes (change the file extension from .png to .zip, Github is being a pain.)

But we have no idea why the file descriptor is invalid.

@RJVB
Copy link

RJVB commented Feb 19, 2023 via email

@krackers
Copy link

work for checking the FD before handing it off to kevent64()?

But this wouldn't help if the FD is closed while we're blocked on the kqueue call though, as seems to be the case? Basically from what i understand in our setup the userspace chromium process makes the blocking call into kqueue, then on the kernel side of things it does it spin loop or whatever it does, and somewhere while it's busy doing the thread receives a THREAD_RESTART signal from somewhere (which previously was not handled and caused panic, now returns EBADF).

It's not even clear to me what exactly THREAD_RESTART means or under what cases it get signalled, especially in the context of kqueue. Docs only say for the code operation should be restarted entirely (only if PCATCH). The most likely source is that it originates from wait_queue_assert_wait64

	/* If it is an invalid wait queue, you cant wait on it */
	if (!wait_queue_is_valid(wq))
		return (thread->wait_result = THREAD_RESTART);

which is called from kqueue_scan_continue, but note that this is part of the continue portion of the kqueue processing loop. I.e. checking that the FD was valid before handing it off to the kernel wouldn't do any good, because some other process meddles with it while the kernel is doing it's thing.

@krackers
Copy link

Under what circumstances is a wait_queue made invalid?

https://opensource.apple.com/source/xnu/xnu-344.21.74/osfmk/kern/wait_queue.c.auto.html
https://opensource.apple.com/source/xnu/xnu-344.21.74/osfmk/kern/wait_queue.c.auto.html

Based on the definitions an invalid wait queue still has a valid pointer but it's wq_type is reset.

@RJVB
Copy link

RJVB commented Feb 19, 2023 via email

@krackers
Copy link

doesn't cause instability elsewhere.

Returning EBADF is what newer XNU does, and from then on it's up to userspace to handle the code properly. So it should not cause any instability. But interestingly on newer xnu it seems we never hit that THREAD_RESTART case anyway, so it's possible they reworked some other part of the kqueue code.

mechanism spawn dedicated threads, for whatever reason/operation

Just by skimming the code I don't think so, there's no separate thread spawned for the wait loop on the kernel side. From the comment in wait_queue_assert_wait64 I get the impression THREAD_RESTART here is being signaled to say "something changed from beneath our feet, just retry the entire thing again". The thing I can't figure out is why you'd ever have a situation where a previously valid kqueue suddenly becomes invalid. Closing the fd from another process while one process is waiting on the kqueue shouldn't do anything because close syscall should only free it after all references are gone.

@krackers
Copy link

Actually osx docs say kqueue fd isn't even inherited by child processes

  The kqueue() system call creates a new kernel event queue and returns a
     descriptor.  The queue is not inherited by a child created with fork(2).

@RJVB
Copy link

RJVB commented Feb 19, 2023 via email

@Wowfunhappy
Copy link
Author

...you know, there was that one report of odd stuff happening after the kqueue crash:

blueboxd/chromium-legacy#97 (comment)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment