-
-
Save Wowfunhappy/8212f5bea4c601ac9a6297789f232321 to your computer and use it in GitHub Desktop.
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | |
<plist version="1.0"> | |
<dict> | |
<key>CFBundleDevelopmentRegion</key> | |
<string>en</string> | |
<key>CFBundleExecutable</key> | |
<string>$(EXECUTABLE_NAME)</string> | |
<key>CFBundleIdentifier</key> | |
<string>wowfunhappy.$(PRODUCT_NAME:rfc1034identifier)</string> | |
<key>CFBundleInfoDictionaryVersion</key> | |
<string>6.0</string> | |
<key>CFBundleName</key> | |
<string>$(PRODUCT_NAME)</string> | |
<key>CFBundlePackageType</key> | |
<string>KEXT</string> | |
<key>CFBundleShortVersionString</key> | |
<string>1.0</string> | |
<key>CFBundleSignature</key> | |
<string>????</string> | |
<key>CFBundleVersion</key> | |
<string>1</string> | |
<key>OSBundleLibraries</key> | |
<dict> | |
<key>com.apple.kpi.iokit</key> | |
<string>10.0.0</string> | |
<key>com.apple.kpi.libkern</key> | |
<string>10.0.0</string> | |
<key>com.apple.kpi.unsupported</key> | |
<string>10.0.0</string> | |
</dict> | |
</dict> | |
</plist> |
// | |
// KQueueScanContinuePatch.c | |
// KQueueScanContinuePatch | |
// | |
// Created by Wowfunhappy with assistance from krackers. | |
// | |
#include <IOKit/IOLib.h> | |
#include <mach-o/loader.h> | |
#include <i386/proc_reg.h> | |
#define LENGTH(arr) sizeof(arr) / sizeof(*arr) | |
// Copied from github.com/leiless/ksymresolver/blob/master/ksymresolver/ksymresolver.h | |
#define KERN_TEXT_BASE ((vm_offset_t) 0xffffff8000200000ULL) | |
#define CR0_WP 0x00010000 | |
static size_t KASLRAlignment = 0x100000; | |
#if __LP64__ | |
#define NUM_SUPPORTED_KERNELS 5 | |
#else | |
#define NUM_SUPPORTED_KERNELS 1 | |
#endif | |
// Taken from Hopper | |
static vm_offset_t possible_kqueue_scan_continue_panic_start_locations[NUM_SUPPORTED_KERNELS] = { | |
#if __LP64__ | |
0xffffff800053b590, //10.7.5 11G63, xnu-1699.32.7 | |
0xffffff8000557afe, //10.8.4 12F2560, xnu-2050.48.19 | |
0xffffff80005c7a9c, //10.9.5 13F1911, xnu-2422.115.15 | |
0xffffff8000615928, //10.9.5 Bronya-Ryzen | |
0xffffff80005bf9bc, //10.9.5 IPCA | |
#else | |
0x0055bb7e, //10.7.5 11G63, xnu-1699.32.7 (32 bit) | |
#endif | |
}; | |
static vm_offset_t possible_kqueue_scan_continue_panic_end_locations[NUM_SUPPORTED_KERNELS] = { | |
#if __LP64__ | |
0xffffff800053b5a2, //10.7.5 11G63, xnu-1699.32.7 | |
0xffffff8000557b11, //10.8.4 12F2560, xnu-2050.48.19 | |
0xffffff80005c7ab7, //10.9.5 13F1911, xnu-2422.115.15 | |
0xffffff8000615942, //10.9.5 Bronya-Ryzen | |
0xffffff80005bf9d7, //10.9.5 IPCA | |
#else | |
0x0055bb90, //10.7.5 11G63, xnu-1699.32.7 (32 bit) | |
#endif | |
}; | |
static char possible_search_bytes[NUM_SUPPORTED_KERNELS][30] = { | |
#if __LP64__ | |
//10.7.5 11G63, xnu-1699.32.7 | |
{ | |
0x48, 0x8d, 0x3d, 0x69, 0xf4, 0x19, 0x00, //lea rdi,qword [aKeventscancont] | |
0x30, 0xc0, //xor al,al | |
0x89, 0xde, //mov esi,ebx | |
0xe8, 0xa0, 0x4f, 0xce, 0xff, //call _panic | |
0x31, 0xdb, 0x89, //xor ebx,ebx | |
0xda, 0x49, 0x8b, 0xb7, 0xc0, 0x00, 0x00, 0x00, 0x4c, 0x89, 0xf7, | |
}, | |
//10.8.4 12F2560, xnu-2050.48.19 | |
{ | |
0x48, 0x8d, 0x3d, 0x77, 0xe9, 0x19, 0x00, //lea rdi,qword [aKeventscancont] | |
0x89, 0xde, //mov esi,ebx | |
0x30, 0xc0, //xor al,al | |
0xe8, 0x02, 0x5b, 0xcc, 0xff, //call _panic | |
0x45, 0x31, 0xed, //xor r13d,r13d | |
0x49, 0x8b, 0xb6, 0xc0, 0x00, 0x00, 0x00, 0x4c, 0x89, 0xff, 0x44, | |
}, | |
//10.9.5 13F1911, xnu-2422.115.15 | |
{ | |
0x48, 0x8d, 0x3d, 0xbb, 0xf3, 0x19, 0x00, //lea rdi,qword | |
0x48, 0x8d, 0x35, 0x11, 0xf4, 0x19, 0x00, //lea rsi,qword | |
0x44, 0x89, 0xfa, //mov edx,r15d | |
0x30, 0xc0, //xor al,al | |
0xe8, 0xbc, 0xb5, 0xc5, 0xff, //call _panic | |
0x45, 0x31, 0xe4, //xor r12d,r12d | |
0x49, 0x8b, 0xb5, | |
}, | |
//10.9.5 Bronya-Ryzen | |
{ | |
0x48, 0x8d, 0x3d, 0xbb, 0xf3, 0x19, 0x00, //lea rdi,qword | |
0x48, 0x8d, 0x35, 0x11, 0xf4, 0x19, 0x00, //lea rsi,qword | |
0x44, 0x89, 0xfa, //mov edx,r15d | |
0x30, 0xc0, //xor al,al | |
0xe8, 0xbc, 0xb5, 0xc5, 0xff, //call _panic | |
0x49, 0x8b, 0x47, 0x70, 0x49, 0x8b, | |
}, | |
//10.9.5 IPCA | |
{ | |
0x48, 0x8d, 0x3d, 0x7c, 0x08, 0x1a, 0x00, //lea rdi,qword [aSInvalidWaitre] | |
0x48, 0x8d, 0x35, 0xdd, 0x08, 0x1a, 0x00, //lea rsi, qword [aKqueuescancont] | |
0x44, 0x89, 0xfa, //mov edx,r15d | |
0x30, 0xc0, //xor al,al | |
0xe8, 0xdc, 0x34, 0xc6, 0xff, //call _panic | |
0x45, 0x31, 0xe4, //xor r12d, r12d | |
0x49, 0x8b, 0xb5, | |
}, | |
#else | |
//10.7.5 11G63, xnu-1699.32.7 (32 bit) | |
{ | |
0x89, 0x4c, 0x24, 0x04, //mov dword[esp+0x28+var_24],ecx | |
0xc7, 0x04, 0x24, 0xa4, 0x06, 0x70, 0x00, //mov dword[esp+0x28+var_28],aKeventscancont | |
0xe8, 0x32, 0x46, 0xcc, 0xff, //call _panic | |
0x31, 0xf6, //xor esi,esi | |
0x8b, 0x45, 0xec, 0x8b, 0x88, 0x9c, 0x00, 0x00, 0x00, 0x89, 0x74, 0x24, | |
}, | |
#endif | |
}; | |
static char possible_replacement_bytes[NUM_SUPPORTED_KERNELS][7] = { | |
#if __LP64__ | |
{0x48, 0xc7, 0xc3, 0x09, 0x00, 0x00, 0x00}, // mov rbx,0x9 | 10.7.5 11G63, xnu-1699.32.7 | |
{0x41, 0xbd, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r13d,0x9 | 10.8.4 12F2560, xnu-2050.48.19 | |
{0x41, 0xbc, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r12d,0x9 | 10.9.5 13F1911, xnu-2422.115.15 | |
{0x41, 0xbd, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r13d,0x9 | 10.9.5 Bronya-Ryzen | |
{0x41, 0xbc, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r12d,0x9 | 10.9.5 IPCA | |
#else | |
{0xbe, 0x09, 0x00, 0x00, 0x00, 0x90, 0x90}, // mov esi,0x9 | 10.7.5 11G63, xnu-1699.32.7 (32 bit) | |
#endif | |
}; | |
// Adapted from github.com/acidanthera/Lilu/blob/137b4d9deb7022bb97fa9899303934534ff20ec7/Lilu/Sources/kern_mach.cpp | |
static vm_offset_t get_kernel_base() { | |
#if __LP64__ | |
// The function choice is mostly arbitrary, but IOLog frequently has a low address. | |
vm_offset_t tmp = (vm_offset_t)(IOLog); | |
// Align the address | |
tmp &= ~(KASLRAlignment - 1); | |
// Search backwards for the kernel base address (mach-o header) | |
while (tmp >= KASLRAlignment) { | |
if (*(uint32_t *)(tmp) == MH_MAGIC_64) { | |
// make sure it's the header and not some reference to the MAGIC number | |
struct segment_command_64 segmentCommand = *(struct segment_command_64 *)(tmp + sizeof(struct mach_header_64)); | |
if (!strncmp(segmentCommand.segname, "__TEXT", sizeof(segmentCommand.segname))) { | |
printf("KQueueScanContinuePatch: found kernel mach-o header address at %lx\n", tmp); | |
break; | |
} | |
} | |
tmp -= KASLRAlignment; | |
} | |
return tmp - KERN_TEXT_BASE; | |
#else | |
// No 32-bit XNU kernels have KASLR | |
return 0; | |
#endif | |
} | |
boolean_t write_protection_is_enabled() { | |
return (get_cr0() & CR0_WP) != 0; | |
} | |
kern_return_t KQueueScanContinuePatch_start(kmod_info_t * ki, void *d) | |
{ | |
printf("KQueueScanContinuePatch START\n"); | |
vm_offset_t kernel_base = get_kernel_base(); | |
vm_offset_t kqueue_scan_continue_panic_start_location = 0; | |
vm_offset_t kqueue_scan_continue_panic_end_location = 0; | |
char search_bytes[sizeof(possible_search_bytes[0])]; | |
char replacement_bytes[sizeof(possible_replacement_bytes[0])]; | |
uint8_t *kscpb = NULL; | |
for (int i = 0; i < LENGTH(possible_kqueue_scan_continue_panic_start_locations); i++) { | |
kqueue_scan_continue_panic_start_location = kernel_base + possible_kqueue_scan_continue_panic_start_locations[i]; | |
kqueue_scan_continue_panic_end_location = kernel_base + possible_kqueue_scan_continue_panic_end_locations[i]; | |
memcpy(search_bytes, possible_search_bytes[i], sizeof(search_bytes)); | |
memcpy(replacement_bytes, possible_replacement_bytes[i], sizeof(replacement_bytes)); | |
kscpb = (uint8_t*) kqueue_scan_continue_panic_start_location; | |
if (memcmp(kscpb, search_bytes, sizeof(search_bytes)) == 0) { | |
break; | |
} | |
if (i == LENGTH(possible_kqueue_scan_continue_panic_start_locations) - 1) { | |
printf("KQueueScanContinuePatch: Memory region not found. You are probably using an unsupported kernel, or your kernel has already been patched.\n"); | |
return KERN_FAILURE; | |
} | |
} | |
printf("KQueueScanContinuePatch: Pre-Patch: Bytes at kqueue_scan_continue panic location: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n", kscpb[0], kscpb[1], kscpb[2], kscpb[3], kscpb[4], kscpb[5], kscpb[6], kscpb[7], kscpb[8], kscpb[9], kscpb[10], kscpb[11], kscpb[12], kscpb[13], kscpb[14], kscpb[15], kscpb[16], kscpb[17], kscpb[18], kscpb[19], kscpb[20], kscpb[21], kscpb[22], kscpb[23], kscpb[24], kscpb[25], kscpb[26], kscpb[27], kscpb[28], kscpb[29], kscpb[30], kscpb[31], kscpb[32], kscpb[33], kscpb[34], kscpb[35], kscpb[36], kscpb[37], kscpb[38], kscpb[39]); | |
unsigned long extra_space_to_fill = kqueue_scan_continue_panic_end_location - kqueue_scan_continue_panic_start_location - sizeof(replacement_bytes); | |
if (kqueue_scan_continue_panic_start_location + sizeof(replacement_bytes) + extra_space_to_fill != kqueue_scan_continue_panic_end_location) { | |
printf("kqueue_scan_continue_panic_start_location + sizeof(replacement_bytes) + extra_space_to_fill != kqueue_scan_continue_panic_end_location\n"); | |
return KERN_FAILURE; | |
} | |
boolean_t interrupts_were_enabled = ml_get_interrupts_enabled(); | |
if (interrupts_were_enabled) { | |
ml_set_interrupts_enabled(false); | |
if (! ml_get_interrupts_enabled()) { | |
printf("KQueueScanContinuePatch: Disabled interrupts\n"); | |
} else { | |
printf("KQueueScanContinuePatch: Failed to disable interrupts\n"); | |
return KERN_FAILURE; | |
} | |
} | |
boolean_t write_protection_was_enabled = write_protection_is_enabled(); | |
if (write_protection_was_enabled) { | |
set_cr0(get_cr0() & ~CR0_WP); // disable write protection | |
if (!write_protection_is_enabled()) { | |
printf("KQueueScanContinuePatch: Disabled write protection\n"); | |
} else { | |
printf("KQueueScanContinuePatch: Failed to disable write protection\n"); | |
//Re-enable interrupts before exiting. | |
if (interrupts_were_enabled && !ml_get_interrupts_enabled()) { | |
ml_set_interrupts_enabled(true); | |
if (ml_get_interrupts_enabled()) { | |
printf("KQueueScanContinuePatch: Re-enabled interrupts after failing to disable write protection.\n"); | |
} else { | |
panic("KQueueScanContinuePatch: Failed to re-enable interrupts after failing to disable write protection!\n"); | |
} | |
} | |
return KERN_FAILURE; | |
} | |
} | |
//commence memory rewriting | |
memcpy((void *)kqueue_scan_continue_panic_start_location, replacement_bytes, sizeof(replacement_bytes)); | |
memset((void *)kqueue_scan_continue_panic_start_location + sizeof(replacement_bytes), 0x90 /*nop*/, extra_space_to_fill); | |
printf("KQueueScanContinuePatch: Memory rewritten\n"); | |
//conclude memory rewriting | |
if (write_protection_was_enabled && !write_protection_is_enabled()) { | |
set_cr0(get_cr0() | CR0_WP); // re-enable write protection | |
if (write_protection_is_enabled()) { | |
printf("KQueueScanContinuePatch: Re-enabled write protection\n"); | |
} else { | |
panic("KQueueScanContinuePatch: failed to re-enable write protection!\n"); | |
} | |
} | |
if (interrupts_were_enabled && !ml_get_interrupts_enabled()) { | |
ml_set_interrupts_enabled(true); | |
if (ml_get_interrupts_enabled()) { | |
printf("KQueueScanContinuePatch: Re-enabled interrupts\n"); | |
} else { | |
panic("KQueueScanContinuePatch: Failed to re-enable interrupts!\n"); | |
} | |
} | |
printf("KQueueScanContinuePatch: Post-Patch: Bytes at kqueue_scan_continue panic location: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n", kscpb[0], kscpb[1], kscpb[2], kscpb[3], kscpb[4], kscpb[5], kscpb[6], kscpb[7], kscpb[8], kscpb[9], kscpb[10], kscpb[11], kscpb[12], kscpb[13], kscpb[14], kscpb[15], kscpb[16], kscpb[17], kscpb[18], kscpb[19], kscpb[20], kscpb[21], kscpb[22], kscpb[23], kscpb[24], kscpb[25], kscpb[26], kscpb[27], kscpb[28], kscpb[29], kscpb[30], kscpb[31], kscpb[32], kscpb[33], kscpb[34], kscpb[35], kscpb[36], kscpb[37], kscpb[38], kscpb[39]); | |
return KERN_SUCCESS; | |
} | |
kern_return_t KQueueScanContinuePatch_stop(kmod_info_t *ki, void *d) | |
{ | |
printf("KQueueScanContinuePatch STOP\n"); | |
return KERN_SUCCESS; | |
} |
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | |
<plist version="1.0"> | |
<dict> | |
<key>Label</key> | |
<string>wowfunhappy.KQueueScanContinuePatch-loader</string> | |
<key>ProgramArguments</key> | |
<array> | |
<string>/bin/sh</string> | |
<string>-c</string> | |
<string>/bin/cp -R /Library/Extensions/KQueueScanContinuePatch.kext /tmp/ && /sbin/kextload /tmp/KQueueScanContinuePatch.kext ; /bin/rm -r /tmp/KQueueScanContinuePatch.kext</string> | |
</array> | |
<key>RunAtLoad</key> | |
<true/> | |
</dict> | |
</plist> |
RJVB
commented
Feb 17, 2023
via email
The issue is closed as fixed btw?!
Yeah, but that's an issue in mainline Chromium.
I suspect the way Google fixed it broke Chromium Legacy. IE it's broken on old OSs, or otherwise causes a race condition when used alongside Bluebox's other patches.
See the discussion in the github issue for things we tried to investigate. I think the first step if you wanted to go about fixing would be to create a minimal repro. It's still not clear to me under what conditions could lead to THREAD_RESTART while waiting on a kqueue. I'm assuming this is some sort of race condition that only appears when waiting on a gazillion descriptors or something, and only Chrome exhibits it because of how they use IPC for everything.
Chrome still uses Mojo IPC. Seems like kqueue on mac can watch mach ports
Introduce MessagePumpKqueue and make it the MessagePumpForIO on macOS.
MessagePumpKqueue is a WatchableIOMessagePumpPosix capable of watching
FDs, like the MessagePumpLibevent it is replacing (note that Libevent is
still used on other POSIX systems). However MessagePumpKqueue can also
watch Mach port receive rights for notification of new Mach messages,
which libevent cannot do.
The underlying wait primitive for both is a kqueue, so the performance
characteristics are expected to be similar. MessagePumpLibevent uses a
pipe as as the ScheduleWork signal source, while MessagePumpKqueue uses
a Mach port, so MessagePumpKqueue will consume two fewer FDs per
instance.
information to a log
Hard to do this when hotpatching kernel, but we can easily do this when compiling our own. I believe Wowfunhappy did try this, and we tried to insert some logs at possible places that could have signalled THREAD_RESTART
. We didn't find anything.
would that be possible with a hotpatch?
Everything is possible to do via patching, with enough time and effort. But for something like this one-off debugging it's easier to just compile your own kernel.
To notify the culprit process of what happened, would that be possible with a hotpatch?
Doesn't returning EBADF already do that? I believe when this is returned the relevant chromium process crashes, and we have the trace log. I believe we do know where the kqueue call happens from in chromium, but the issue is trying to understand why the THREAD_RESTART was signaled in the first place. And like all race-condition debugging, there's no really easy way to get this info.
Heisenbug? ;)
I wouldn't say that, we aren't able to reproduce the crash consistently but it happens often enough. However, none of us understand enough about Chromium internals to really dig into the cause.
We know the crash happens when Chromium encounters a bad file descriptor, because it always hits this PCHECK, in MessagePumpKqueue::DoInternalWork
, right before the crash:
Which prints out as:
[22670:14595:0403/121124.951621:FATAL:message_pump_kqueue.cc(442)] Check failed: . kevent64: Bad file descriptor (9)
And of course the backtrace also implicates this function, I've attached a few backtraces here on the off-chance you'd like to see them: (change the file extension from .png to .zip, Github is being a pain.)
But we have no idea why the file descriptor is invalid.
work for checking the FD before handing it off to kevent64()?
But this wouldn't help if the FD is closed while we're blocked on the kqueue call though, as seems to be the case? Basically from what i understand in our setup the userspace chromium process makes the blocking call into kqueue
, then on the kernel side of things it does it spin loop or whatever it does, and somewhere while it's busy doing the thread receives a THREAD_RESTART
signal from somewhere (which previously was not handled and caused panic, now returns EBADF
).
It's not even clear to me what exactly THREAD_RESTART
means or under what cases it get signalled, especially in the context of kqueue. Docs only say for the code operation should be restarted entirely (only if PCATCH)
. The most likely source is that it originates from wait_queue_assert_wait64
/* If it is an invalid wait queue, you cant wait on it */
if (!wait_queue_is_valid(wq))
return (thread->wait_result = THREAD_RESTART);
which is called from kqueue_scan_continue
, but note that this is part of the continue
portion of the kqueue processing loop. I.e. checking that the FD was valid before handing it off to the kernel wouldn't do any good, because some other process meddles with it while the kernel is doing it's thing.
Under what circumstances is a wait_queue
made invalid?
https://opensource.apple.com/source/xnu/xnu-344.21.74/osfmk/kern/wait_queue.c.auto.html
https://opensource.apple.com/source/xnu/xnu-344.21.74/osfmk/kern/wait_queue.c.auto.html
Based on the definitions an invalid wait queue still has a valid pointer but it's wq_type
is reset.
doesn't cause instability elsewhere.
Returning EBADF
is what newer XNU does, and from then on it's up to userspace to handle the code properly. So it should not cause any instability. But interestingly on newer xnu it seems we never hit that THREAD_RESTART
case anyway, so it's possible they reworked some other part of the kqueue code.
mechanism spawn dedicated threads, for whatever reason/operation
Just by skimming the code I don't think so, there's no separate thread spawned for the wait loop on the kernel side. From the comment in wait_queue_assert_wait64
I get the impression THREAD_RESTART
here is being signaled to say "something changed from beneath our feet, just retry the entire thing again". The thing I can't figure out is why you'd ever have a situation where a previously valid kqueue suddenly becomes invalid. Closing the fd from another process while one process is waiting on the kqueue shouldn't do anything because close syscall should only free it after all references are gone.
Actually osx docs say kqueue fd isn't even inherited by child processes
The kqueue() system call creates a new kernel event queue and returns a
descriptor. The queue is not inherited by a child created with fork(2).
...you know, there was that one report of odd stuff happening after the kqueue crash: