Skip to content

Instantly share code, notes, and snippets.

@Wowfunhappy
Last active September 8, 2024 19:50
Show Gist options
  • Save Wowfunhappy/8212f5bea4c601ac9a6297789f232321 to your computer and use it in GitHub Desktop.
Save Wowfunhappy/8212f5bea4c601ac9a6297789f232321 to your computer and use it in GitHub Desktop.
KQueueScanContinuePatch
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>$(EXECUTABLE_NAME)</string>
<key>CFBundleIdentifier</key>
<string>wowfunhappy.$(PRODUCT_NAME:rfc1034identifier)</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>$(PRODUCT_NAME)</string>
<key>CFBundlePackageType</key>
<string>KEXT</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>OSBundleLibraries</key>
<dict>
<key>com.apple.kpi.iokit</key>
<string>10.0.0</string>
<key>com.apple.kpi.libkern</key>
<string>10.0.0</string>
<key>com.apple.kpi.unsupported</key>
<string>10.0.0</string>
</dict>
</dict>
</plist>
//
// KQueueScanContinuePatch.c
// KQueueScanContinuePatch
//
// Created by Wowfunhappy with assistance from krackers.
//
#include <IOKit/IOLib.h>
#include <mach-o/loader.h>
#include <i386/proc_reg.h>
#define LENGTH(arr) sizeof(arr) / sizeof(*arr)
// Copied from github.com/leiless/ksymresolver/blob/master/ksymresolver/ksymresolver.h
#define KERN_TEXT_BASE ((vm_offset_t) 0xffffff8000200000ULL)
#define CR0_WP 0x00010000
static size_t KASLRAlignment = 0x100000;
#if __LP64__
#define NUM_SUPPORTED_KERNELS 5
#else
#define NUM_SUPPORTED_KERNELS 1
#endif
// Taken from Hopper
static vm_offset_t possible_kqueue_scan_continue_panic_start_locations[NUM_SUPPORTED_KERNELS] = {
#if __LP64__
0xffffff800053b590, //10.7.5 11G63, xnu-1699.32.7
0xffffff8000557afe, //10.8.4 12F2560, xnu-2050.48.19
0xffffff80005c7a9c, //10.9.5 13F1911, xnu-2422.115.15
0xffffff8000615928, //10.9.5 Bronya-Ryzen
0xffffff80005bf9bc, //10.9.5 IPCA
#else
0x0055bb7e, //10.7.5 11G63, xnu-1699.32.7 (32 bit)
#endif
};
static vm_offset_t possible_kqueue_scan_continue_panic_end_locations[NUM_SUPPORTED_KERNELS] = {
#if __LP64__
0xffffff800053b5a2, //10.7.5 11G63, xnu-1699.32.7
0xffffff8000557b11, //10.8.4 12F2560, xnu-2050.48.19
0xffffff80005c7ab7, //10.9.5 13F1911, xnu-2422.115.15
0xffffff8000615942, //10.9.5 Bronya-Ryzen
0xffffff80005bf9d7, //10.9.5 IPCA
#else
0x0055bb90, //10.7.5 11G63, xnu-1699.32.7 (32 bit)
#endif
};
static char possible_search_bytes[NUM_SUPPORTED_KERNELS][30] = {
#if __LP64__
//10.7.5 11G63, xnu-1699.32.7
{
0x48, 0x8d, 0x3d, 0x69, 0xf4, 0x19, 0x00, //lea rdi,qword [aKeventscancont]
0x30, 0xc0, //xor al,al
0x89, 0xde, //mov esi,ebx
0xe8, 0xa0, 0x4f, 0xce, 0xff, //call _panic
0x31, 0xdb, 0x89, //xor ebx,ebx
0xda, 0x49, 0x8b, 0xb7, 0xc0, 0x00, 0x00, 0x00, 0x4c, 0x89, 0xf7,
},
//10.8.4 12F2560, xnu-2050.48.19
{
0x48, 0x8d, 0x3d, 0x77, 0xe9, 0x19, 0x00, //lea rdi,qword [aKeventscancont]
0x89, 0xde, //mov esi,ebx
0x30, 0xc0, //xor al,al
0xe8, 0x02, 0x5b, 0xcc, 0xff, //call _panic
0x45, 0x31, 0xed, //xor r13d,r13d
0x49, 0x8b, 0xb6, 0xc0, 0x00, 0x00, 0x00, 0x4c, 0x89, 0xff, 0x44,
},
//10.9.5 13F1911, xnu-2422.115.15
{
0x48, 0x8d, 0x3d, 0xbb, 0xf3, 0x19, 0x00, //lea rdi,qword
0x48, 0x8d, 0x35, 0x11, 0xf4, 0x19, 0x00, //lea rsi,qword
0x44, 0x89, 0xfa, //mov edx,r15d
0x30, 0xc0, //xor al,al
0xe8, 0xbc, 0xb5, 0xc5, 0xff, //call _panic
0x45, 0x31, 0xe4, //xor r12d,r12d
0x49, 0x8b, 0xb5,
},
//10.9.5 Bronya-Ryzen
{
0x48, 0x8d, 0x3d, 0xbb, 0xf3, 0x19, 0x00, //lea rdi,qword
0x48, 0x8d, 0x35, 0x11, 0xf4, 0x19, 0x00, //lea rsi,qword
0x44, 0x89, 0xfa, //mov edx,r15d
0x30, 0xc0, //xor al,al
0xe8, 0xbc, 0xb5, 0xc5, 0xff, //call _panic
0x49, 0x8b, 0x47, 0x70, 0x49, 0x8b,
},
//10.9.5 IPCA
{
0x48, 0x8d, 0x3d, 0x7c, 0x08, 0x1a, 0x00, //lea rdi,qword [aSInvalidWaitre]
0x48, 0x8d, 0x35, 0xdd, 0x08, 0x1a, 0x00, //lea rsi, qword [aKqueuescancont]
0x44, 0x89, 0xfa, //mov edx,r15d
0x30, 0xc0, //xor al,al
0xe8, 0xdc, 0x34, 0xc6, 0xff, //call _panic
0x45, 0x31, 0xe4, //xor r12d, r12d
0x49, 0x8b, 0xb5,
},
#else
//10.7.5 11G63, xnu-1699.32.7 (32 bit)
{
0x89, 0x4c, 0x24, 0x04, //mov dword[esp+0x28+var_24],ecx
0xc7, 0x04, 0x24, 0xa4, 0x06, 0x70, 0x00, //mov dword[esp+0x28+var_28],aKeventscancont
0xe8, 0x32, 0x46, 0xcc, 0xff, //call _panic
0x31, 0xf6, //xor esi,esi
0x8b, 0x45, 0xec, 0x8b, 0x88, 0x9c, 0x00, 0x00, 0x00, 0x89, 0x74, 0x24,
},
#endif
};
static char possible_replacement_bytes[NUM_SUPPORTED_KERNELS][7] = {
#if __LP64__
{0x48, 0xc7, 0xc3, 0x09, 0x00, 0x00, 0x00}, // mov rbx,0x9 | 10.7.5 11G63, xnu-1699.32.7
{0x41, 0xbd, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r13d,0x9 | 10.8.4 12F2560, xnu-2050.48.19
{0x41, 0xbc, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r12d,0x9 | 10.9.5 13F1911, xnu-2422.115.15
{0x41, 0xbd, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r13d,0x9 | 10.9.5 Bronya-Ryzen
{0x41, 0xbc, 0x09, 0x00, 0x00, 0x00, 0x90}, // mov r12d,0x9 | 10.9.5 IPCA
#else
{0xbe, 0x09, 0x00, 0x00, 0x00, 0x90, 0x90}, // mov esi,0x9 | 10.7.5 11G63, xnu-1699.32.7 (32 bit)
#endif
};
// Adapted from github.com/acidanthera/Lilu/blob/137b4d9deb7022bb97fa9899303934534ff20ec7/Lilu/Sources/kern_mach.cpp
static vm_offset_t get_kernel_base() {
#if __LP64__
// The function choice is mostly arbitrary, but IOLog frequently has a low address.
vm_offset_t tmp = (vm_offset_t)(IOLog);
// Align the address
tmp &= ~(KASLRAlignment - 1);
// Search backwards for the kernel base address (mach-o header)
while (tmp >= KASLRAlignment) {
if (*(uint32_t *)(tmp) == MH_MAGIC_64) {
// make sure it's the header and not some reference to the MAGIC number
struct segment_command_64 segmentCommand = *(struct segment_command_64 *)(tmp + sizeof(struct mach_header_64));
if (!strncmp(segmentCommand.segname, "__TEXT", sizeof(segmentCommand.segname))) {
printf("KQueueScanContinuePatch: found kernel mach-o header address at %lx\n", tmp);
break;
}
}
tmp -= KASLRAlignment;
}
return tmp - KERN_TEXT_BASE;
#else
// No 32-bit XNU kernels have KASLR
return 0;
#endif
}
boolean_t write_protection_is_enabled() {
return (get_cr0() & CR0_WP) != 0;
}
kern_return_t KQueueScanContinuePatch_start(kmod_info_t * ki, void *d)
{
printf("KQueueScanContinuePatch START\n");
vm_offset_t kernel_base = get_kernel_base();
vm_offset_t kqueue_scan_continue_panic_start_location = 0;
vm_offset_t kqueue_scan_continue_panic_end_location = 0;
char search_bytes[sizeof(possible_search_bytes[0])];
char replacement_bytes[sizeof(possible_replacement_bytes[0])];
uint8_t *kscpb = NULL;
for (int i = 0; i < LENGTH(possible_kqueue_scan_continue_panic_start_locations); i++) {
kqueue_scan_continue_panic_start_location = kernel_base + possible_kqueue_scan_continue_panic_start_locations[i];
kqueue_scan_continue_panic_end_location = kernel_base + possible_kqueue_scan_continue_panic_end_locations[i];
memcpy(search_bytes, possible_search_bytes[i], sizeof(search_bytes));
memcpy(replacement_bytes, possible_replacement_bytes[i], sizeof(replacement_bytes));
kscpb = (uint8_t*) kqueue_scan_continue_panic_start_location;
if (memcmp(kscpb, search_bytes, sizeof(search_bytes)) == 0) {
break;
}
if (i == LENGTH(possible_kqueue_scan_continue_panic_start_locations) - 1) {
printf("KQueueScanContinuePatch: Memory region not found. You are probably using an unsupported kernel, or your kernel has already been patched.\n");
return KERN_FAILURE;
}
}
printf("KQueueScanContinuePatch: Pre-Patch: Bytes at kqueue_scan_continue panic location: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n", kscpb[0], kscpb[1], kscpb[2], kscpb[3], kscpb[4], kscpb[5], kscpb[6], kscpb[7], kscpb[8], kscpb[9], kscpb[10], kscpb[11], kscpb[12], kscpb[13], kscpb[14], kscpb[15], kscpb[16], kscpb[17], kscpb[18], kscpb[19], kscpb[20], kscpb[21], kscpb[22], kscpb[23], kscpb[24], kscpb[25], kscpb[26], kscpb[27], kscpb[28], kscpb[29], kscpb[30], kscpb[31], kscpb[32], kscpb[33], kscpb[34], kscpb[35], kscpb[36], kscpb[37], kscpb[38], kscpb[39]);
unsigned long extra_space_to_fill = kqueue_scan_continue_panic_end_location - kqueue_scan_continue_panic_start_location - sizeof(replacement_bytes);
if (kqueue_scan_continue_panic_start_location + sizeof(replacement_bytes) + extra_space_to_fill != kqueue_scan_continue_panic_end_location) {
printf("kqueue_scan_continue_panic_start_location + sizeof(replacement_bytes) + extra_space_to_fill != kqueue_scan_continue_panic_end_location\n");
return KERN_FAILURE;
}
boolean_t interrupts_were_enabled = ml_get_interrupts_enabled();
if (interrupts_were_enabled) {
ml_set_interrupts_enabled(false);
if (! ml_get_interrupts_enabled()) {
printf("KQueueScanContinuePatch: Disabled interrupts\n");
} else {
printf("KQueueScanContinuePatch: Failed to disable interrupts\n");
return KERN_FAILURE;
}
}
boolean_t write_protection_was_enabled = write_protection_is_enabled();
if (write_protection_was_enabled) {
set_cr0(get_cr0() & ~CR0_WP); // disable write protection
if (!write_protection_is_enabled()) {
printf("KQueueScanContinuePatch: Disabled write protection\n");
} else {
printf("KQueueScanContinuePatch: Failed to disable write protection\n");
//Re-enable interrupts before exiting.
if (interrupts_were_enabled && !ml_get_interrupts_enabled()) {
ml_set_interrupts_enabled(true);
if (ml_get_interrupts_enabled()) {
printf("KQueueScanContinuePatch: Re-enabled interrupts after failing to disable write protection.\n");
} else {
panic("KQueueScanContinuePatch: Failed to re-enable interrupts after failing to disable write protection!\n");
}
}
return KERN_FAILURE;
}
}
//commence memory rewriting
memcpy((void *)kqueue_scan_continue_panic_start_location, replacement_bytes, sizeof(replacement_bytes));
memset((void *)kqueue_scan_continue_panic_start_location + sizeof(replacement_bytes), 0x90 /*nop*/, extra_space_to_fill);
printf("KQueueScanContinuePatch: Memory rewritten\n");
//conclude memory rewriting
if (write_protection_was_enabled && !write_protection_is_enabled()) {
set_cr0(get_cr0() | CR0_WP); // re-enable write protection
if (write_protection_is_enabled()) {
printf("KQueueScanContinuePatch: Re-enabled write protection\n");
} else {
panic("KQueueScanContinuePatch: failed to re-enable write protection!\n");
}
}
if (interrupts_were_enabled && !ml_get_interrupts_enabled()) {
ml_set_interrupts_enabled(true);
if (ml_get_interrupts_enabled()) {
printf("KQueueScanContinuePatch: Re-enabled interrupts\n");
} else {
panic("KQueueScanContinuePatch: Failed to re-enable interrupts!\n");
}
}
printf("KQueueScanContinuePatch: Post-Patch: Bytes at kqueue_scan_continue panic location: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n", kscpb[0], kscpb[1], kscpb[2], kscpb[3], kscpb[4], kscpb[5], kscpb[6], kscpb[7], kscpb[8], kscpb[9], kscpb[10], kscpb[11], kscpb[12], kscpb[13], kscpb[14], kscpb[15], kscpb[16], kscpb[17], kscpb[18], kscpb[19], kscpb[20], kscpb[21], kscpb[22], kscpb[23], kscpb[24], kscpb[25], kscpb[26], kscpb[27], kscpb[28], kscpb[29], kscpb[30], kscpb[31], kscpb[32], kscpb[33], kscpb[34], kscpb[35], kscpb[36], kscpb[37], kscpb[38], kscpb[39]);
return KERN_SUCCESS;
}
kern_return_t KQueueScanContinuePatch_stop(kmod_info_t *ki, void *d)
{
printf("KQueueScanContinuePatch STOP\n");
return KERN_SUCCESS;
}
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>wowfunhappy.KQueueScanContinuePatch-loader</string>
<key>ProgramArguments</key>
<array>
<string>/bin/sh</string>
<string>-c</string>
<string>/bin/cp -R /Library/Extensions/KQueueScanContinuePatch.kext /tmp/ &amp;&amp; /sbin/kextload /tmp/KQueueScanContinuePatch.kext ; /bin/rm -r /tmp/KQueueScanContinuePatch.kext</string>
</array>
<key>RunAtLoad</key>
<true/>
</dict>
</plist>
@Wowfunhappy
Copy link
Author

Wowfunhappy commented Feb 13, 2023

Have you ever thought of back-porting the XNU fix and giving users the possibility to install a new kernel?

Yes, it's super easy! In kern_event.c, in the kqueue_scan_continue function, add to the switch (wait_result) statement:

case THREAD_RESTART:
	error = EBADF;
	break;
Full code of fixed kqueue_scan_continue function:
static void
kqueue_scan_continue(void *data, wait_result_t wait_result)
{
	thread_t self = current_thread();
	uthread_t ut = (uthread_t)get_bsdthread_info(self);
	struct _kqueue_scan * cont_args = &ut->uu_kevent.ss_kqueue_scan;
	struct kqueue *kq = (struct kqueue *)data;
	int error;
	int count;

	/* convert the (previous) wait_result to a proper error */
	switch (wait_result) {
	case THREAD_AWAKENED:
		kqlock(kq);
		error = kqueue_process(kq, cont_args->call, cont_args, &count,
		    current_proc());
		if (error == 0 && count == 0) {
			wait_queue_assert_wait((wait_queue_t)kq->kq_wqs,
			    KQ_EVENT, THREAD_ABORTSAFE, cont_args->deadline);
			kq->kq_state |= KQ_SLEEP;
			kqunlock(kq);
			thread_block_parameter(kqueue_scan_continue, kq);
			/* NOTREACHED */
		}
		kqunlock(kq);
		break;
	case THREAD_TIMED_OUT:
		error = EWOULDBLOCK;
		break;
	case THREAD_INTERRUPTED:
		error = EINTR;
		break;
	case THREAD_RESTART:
		error = EBADF;
		break;
	default:
		panic("%s: - invalid wait_result (%d)", __func__,
		    wait_result);
		error = 0;
	}

	/* call the continuation with the results */
	assert(cont_args->cont != NULL);
	(cont_args->cont)(kq, cont_args->data, error);
}

If you don't want to build the kernel yourself I have a binary here: blueboxd/chromium-legacy#44 (comment) (for OS X 10.9.5).

The downside is that the open source version of XNU is missing some features of the official kernels. Most notably for me, iMessage won't work. @krackers also says the open source kernels are missing some power saving functions on Haswell CPUs—I don't know if that affects you—and there plausibly may be other features missing too.

I didn't need more than that glance to see that even that older version of the underlying engine contained way too many changes in way too much code. So kudos for those who have delved into this!

That's all bluebox! I can't give him enough credit, it's an insane project he's maintaining by himself! I don't think there's currently anything like it for other browsers. MacPorts has webkit2-gtk but I've never been able to make that work well. Parrotgeek had a Firefox Legacy at one point for Lion/Mountain Lion, but not anymore and it's behind what works natively on Mavericks.

I would encourage you to report that audio issue on Github unless it happens in upstream Chromium as well. I have a feeling it's related to the error I posted yesterday but I don't have hardware which reproduces the actual problem.

@RJVB
Copy link

RJVB commented Feb 13, 2023 via email

@krackers
Copy link

The most transparent option would have been to add the kext to your Chromium appbundle and invoke a privileged helper through the AppExec that loads the kext before starting Chromium and unloads it again on exit.

Yeah that'd probably be cleaner, although last I checked writing native privileged helpers on osx is a pain. But maybe you could do a one-time manual install of a binary with setuid 0 permission or something, and then invoke that. It's not the first time that someone has mentioned not seeing the dialog by the prefpane though.

I could imagine they would leave out all system integrity features

Up until 10.8 I think the xnu source was fully featured modulo bits needed for iMessage, it's on 10.9 onward that they gimped things for people using haswell or newer.

You don't happen to know about anyone who does a similar job for Firefox

Some older XUL based forks of Firefox still work-ish on 10.9. SeaMonkey only recent stopped building for 10.9 and I just building with an older sdk would fix it. Palemoon apparently still supports 10.9. But these don't really supported all newer web features like css l4 selectors and webcomponents.

@RJVB
Copy link

RJVB commented Feb 16, 2023 via email

@Wowfunhappy
Copy link
Author

BTW, any chance that the OS X/Chromium panic is triggered by a codepath not taken in an X11 based build

I doubt it, the crash is in Chromium's IO thread, when Chromium tries to access an invalid file descriptor. I suspect it has something to do with this change based on a preponderance of things https://bugs.chromium.org/p/chromium/issues/detail?id=932175, but no one really understands what is going on.

That said, please feel free to take a look at it. At the moment I don't think anyone has built modern Chromium for Mac with X11, it would be neat if you could do that!

The GTk/X11 version of WebKit still seems to build and I'd be perfectly happy running a decent browser under X11

It builds but it's very buggy in my experience!


Keep in mind that even if you unload the kext, it doesn't ever really stop working until you reboot. The process is as follows:

  1. The kext loads.
  2. The kext looks for a precise sequence of bytes at a specific memory address. (These bytes represent the code for the bugged switch statement.) If it sees the right bytes (which means you're running a supported, unpatched kernel), it replaces them with new ones.
  3. The kext exits

You can unload the kext at this point, but it won't matter. Kernel memory has been rewritten until you reboot.

Now, I could theoretically add code to put back the original bytes when the kext is unloaded, but... why? IMO, rewriting kernel memory twice instead of once just opens more possibilities for things to go wrong. (Not that I think the kext is unsafe, but I don't like the fact that I'm working in the kernel at all, and I did it only because I felt I had no other choice.)

@RJVB
Copy link

RJVB commented Feb 16, 2023 via email

@krackers
Copy link

krackers commented Feb 16, 2023

Remember that I even gave up on fixing up a newer

I have hopes that Ladybird will be a greenfield browser that has a modern enough rendering engine and is simple enough to port cross platform.

Of course the nuclear option is to do something like https://fathy.fr/carbonyl and then run chrome on a platform where it's supported and then stream the framebuffer back to your device. But at that point you may as well just use a VM, or RDP or something.

@RJVB
Copy link

RJVB commented Feb 16, 2023 via email

@RJVB
Copy link

RJVB commented Feb 16, 2023 via email

@Wowfunhappy
Copy link
Author

Wowfunhappy commented Feb 16, 2023 via email

@krackers
Copy link

Right but the gui portion can hopefully be replaced with native cocoa, it's really the rendering & js engine that we need. And for libc++ we can always statically link that, or provide it in the bundle directly.

@RJVB
Copy link

RJVB commented Feb 16, 2023 via email

@RJVB
Copy link

RJVB commented Feb 17, 2023 via email

@Wowfunhappy
Copy link
Author

The issue is closed as fixed btw?!

Yeah, but that's an issue in mainline Chromium.

I suspect the way Google fixed it broke Chromium Legacy. IE it's broken on old OSs, or otherwise causes a race condition when used alongside Bluebox's other patches.

@krackers
Copy link

See the discussion in the github issue for things we tried to investigate. I think the first step if you wanted to go about fixing would be to create a minimal repro. It's still not clear to me under what conditions could lead to THREAD_RESTART while waiting on a kqueue. I'm assuming this is some sort of race condition that only appears when waiting on a gazillion descriptors or something, and only Chrome exhibits it because of how they use IPC for everything.

@RJVB
Copy link

RJVB commented Feb 18, 2023 via email

@krackers
Copy link

Chrome still uses Mojo IPC. Seems like kqueue on mac can watch mach ports

Introduce MessagePumpKqueue and make it the MessagePumpForIO on macOS.

MessagePumpKqueue is a WatchableIOMessagePumpPosix capable of watching
FDs, like the MessagePumpLibevent it is replacing (note that Libevent is
still used on other POSIX systems). However MessagePumpKqueue can also
watch Mach port receive rights for notification of new Mach messages,
which libevent cannot do.

The underlying wait primitive for both is a kqueue, so the performance
characteristics are expected to be similar. MessagePumpLibevent uses a
pipe as as the ScheduleWork signal source, while MessagePumpKqueue uses
a Mach port, so MessagePumpKqueue will consume two fewer FDs per
instance.

information to a log

Hard to do this when hotpatching kernel, but we can easily do this when compiling our own. I believe Wowfunhappy did try this, and we tried to insert some logs at possible places that could have signalled THREAD_RESTART. We didn't find anything.

@RJVB
Copy link

RJVB commented Feb 18, 2023 via email

@krackers
Copy link

would that be possible with a hotpatch?

Everything is possible to do via patching, with enough time and effort. But for something like this one-off debugging it's easier to just compile your own kernel.

To notify the culprit process of what happened, would that be possible with a hotpatch?

Doesn't returning EBADF already do that? I believe when this is returned the relevant chromium process crashes, and we have the trace log. I believe we do know where the kqueue call happens from in chromium, but the issue is trying to understand why the THREAD_RESTART was signaled in the first place. And like all race-condition debugging, there's no really easy way to get this info.

@RJVB
Copy link

RJVB commented Feb 18, 2023 via email

@Wowfunhappy
Copy link
Author

Heisenbug? ;)

I wouldn't say that, we aren't able to reproduce the crash consistently but it happens often enough. However, none of us understand enough about Chromium internals to really dig into the cause.

We know the crash happens when Chromium encounters a bad file descriptor, because it always hits this PCHECK, in MessagePumpKqueue::DoInternalWork, right before the crash:

Which prints out as:

[22670:14595:0403/121124.951621:FATAL:message_pump_kqueue.cc(442)] Check failed: . kevent64: Bad file descriptor (9)

And of course the backtrace also implicates this function, I've attached a few backtraces here on the off-chance you'd like to see them: Chromium Legacy IO Thread Crashes (change the file extension from .png to .zip, Github is being a pain.)

But we have no idea why the file descriptor is invalid.

@RJVB
Copy link

RJVB commented Feb 19, 2023 via email

@krackers
Copy link

work for checking the FD before handing it off to kevent64()?

But this wouldn't help if the FD is closed while we're blocked on the kqueue call though, as seems to be the case? Basically from what i understand in our setup the userspace chromium process makes the blocking call into kqueue, then on the kernel side of things it does it spin loop or whatever it does, and somewhere while it's busy doing the thread receives a THREAD_RESTART signal from somewhere (which previously was not handled and caused panic, now returns EBADF).

It's not even clear to me what exactly THREAD_RESTART means or under what cases it get signalled, especially in the context of kqueue. Docs only say for the code operation should be restarted entirely (only if PCATCH). The most likely source is that it originates from wait_queue_assert_wait64

	/* If it is an invalid wait queue, you cant wait on it */
	if (!wait_queue_is_valid(wq))
		return (thread->wait_result = THREAD_RESTART);

which is called from kqueue_scan_continue, but note that this is part of the continue portion of the kqueue processing loop. I.e. checking that the FD was valid before handing it off to the kernel wouldn't do any good, because some other process meddles with it while the kernel is doing it's thing.

@krackers
Copy link

Under what circumstances is a wait_queue made invalid?

https://opensource.apple.com/source/xnu/xnu-344.21.74/osfmk/kern/wait_queue.c.auto.html
https://opensource.apple.com/source/xnu/xnu-344.21.74/osfmk/kern/wait_queue.c.auto.html

Based on the definitions an invalid wait queue still has a valid pointer but it's wq_type is reset.

@RJVB
Copy link

RJVB commented Feb 19, 2023 via email

@krackers
Copy link

doesn't cause instability elsewhere.

Returning EBADF is what newer XNU does, and from then on it's up to userspace to handle the code properly. So it should not cause any instability. But interestingly on newer xnu it seems we never hit that THREAD_RESTART case anyway, so it's possible they reworked some other part of the kqueue code.

mechanism spawn dedicated threads, for whatever reason/operation

Just by skimming the code I don't think so, there's no separate thread spawned for the wait loop on the kernel side. From the comment in wait_queue_assert_wait64 I get the impression THREAD_RESTART here is being signaled to say "something changed from beneath our feet, just retry the entire thing again". The thing I can't figure out is why you'd ever have a situation where a previously valid kqueue suddenly becomes invalid. Closing the fd from another process while one process is waiting on the kqueue shouldn't do anything because close syscall should only free it after all references are gone.

@krackers
Copy link

Actually osx docs say kqueue fd isn't even inherited by child processes

  The kqueue() system call creates a new kernel event queue and returns a
     descriptor.  The queue is not inherited by a child created with fork(2).

@RJVB
Copy link

RJVB commented Feb 19, 2023 via email

@Wowfunhappy
Copy link
Author

...you know, there was that one report of odd stuff happening after the kqueue crash:

blueboxd/chromium-legacy#97 (comment)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment