Wowfunhappy · March 5, 2022 01:31
diff --git a/ipc_mqueue b/ipc_mqueue
 /*
 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 * 
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
 /*
 * @OSF_FREE_COPYRIGHT@
 */
 /* 
 * Mach Operating System
 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
 * All Rights Reserved.
 * 
 * Permission to use, copy, modify and distribute this software and its
 * documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 * 
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 * 
 * Carnegie Mellon requests users of this software to return to
 * 
 *  Software Distribution Coordinator  or  [email protected]
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 * 
 * any improvements or extensions that they make and grant Carnegie Mellon
 * the rights to redistribute these changes.
 */
 /*
 */
 /*
 *	File:	ipc/ipc_mqueue.c
 *	Author:	Rich Draves
 *	Date:	1989
 *
 *	Functions to manipulate IPC message queues.
 */
 /*
 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
 * support for mandatory and extensible security protections.  This notice
 * is included in support of clause 2.2 (b) of the Apple Public License,
 * Version 2.0.
 */
    

 #include <mach/port.h>
 #include <mach/message.h>
 #include <mach/sync_policy.h>

 #include <kern/assert.h>
 #include <kern/counters.h>
 #include <kern/sched_prim.h>
 #include <kern/ipc_kobject.h>
 #include <kern/ipc_mig.h>	/* XXX - for mach_msg_receive_continue */
 #include <kern/misc_protos.h>
 #include <kern/task.h>
 #include <kern/thread.h>
 #include <kern/wait_queue.h>

 #include <ipc/ipc_mqueue.h>
 #include <ipc/ipc_kmsg.h>
 #include <ipc/ipc_port.h>
 #include <ipc/ipc_pset.h>
 #include <ipc/ipc_space.h>

 #ifdef __LP64__
 #include <vm/vm_map.h>
 #endif

 #if CONFIG_MACF_MACH
 #include <security/mac_mach_internal.h>
 #endif

 int ipc_mqueue_full;		/* address is event for queue space */
 int ipc_mqueue_rcv;		/* address is event for message arrival */

 /* forward declarations */
 void ipc_mqueue_receive_results(wait_result_t result);

 /*
 *	Routine:	ipc_mqueue_init
 *	Purpose:
 *		Initialize a newly-allocated message queue.
 */
 void
 ipc_mqueue_init(
 	ipc_mqueue_t	mqueue,
 	boolean_t	is_set)
 {
 	if (is_set) {
 		wait_queue_set_init(&mqueue->imq_set_queue, SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST);
 	} else {
 		wait_queue_init(&mqueue->imq_wait_queue, SYNC_POLICY_FIFO);
 		ipc_kmsg_queue_init(&mqueue->imq_messages);
 		mqueue->imq_seqno = 0;
 		mqueue->imq_msgcount = 0;
 		mqueue->imq_qlimit = MACH_PORT_QLIMIT_DEFAULT;
 		mqueue->imq_fullwaiters = FALSE;
 	}
 }

 /*
 *	Routine:	ipc_mqueue_member
 *	Purpose:
 *		Indicate whether the (port) mqueue is a member of
 *		this portset's mqueue.  We do this by checking
 *		whether the portset mqueue's waitq is an member of
 *		the port's mqueue waitq.
 *	Conditions:
 *		the portset's mqueue is not already a member
 *		this may block while allocating linkage structures.
 */

 boolean_t
 ipc_mqueue_member(
 	ipc_mqueue_t		port_mqueue,
 	ipc_mqueue_t		set_mqueue)
 {
 	wait_queue_t	port_waitq = &port_mqueue->imq_wait_queue;
 	wait_queue_set_t set_waitq = &set_mqueue->imq_set_queue;

 	return (wait_queue_member(port_waitq, set_waitq));

 }

 /*
 *	Routine:	ipc_mqueue_remove
 *	Purpose:
 *		Remove the association between the queue and the specified
 *		set message queue.
 */

 kern_return_t
 ipc_mqueue_remove(
 	ipc_mqueue_t	  mqueue,
 	ipc_mqueue_t	  set_mqueue,
 	wait_queue_link_t *wqlp)
 {
 	wait_queue_t	 mq_waitq = &mqueue->imq_wait_queue;
 	wait_queue_set_t set_waitq = &set_mqueue->imq_set_queue;

 	return wait_queue_unlink_nofree(mq_waitq, set_waitq, wqlp);
 }

 /*
 *	Routine:	ipc_mqueue_remove_from_all
 *	Purpose:
 *		Remove the mqueue from all the sets it is a member of
 *	Conditions:
 *		Nothing locked.
 */
 void
 ipc_mqueue_remove_from_all(
 	ipc_mqueue_t	mqueue,
 	queue_t 	links)
 {
 	wait_queue_t	mq_waitq = &mqueue->imq_wait_queue;

 	wait_queue_unlink_all_nofree(mq_waitq, links);
 	return;
 }

 /*
 *	Routine:	ipc_mqueue_remove_all
 *	Purpose:
 *		Remove all the member queues from the specified set.
 *	Conditions:
 *		Nothing locked.
 */
 void
 ipc_mqueue_remove_all(
 	ipc_mqueue_t	mqueue,
 	queue_t		links)
 {
 	wait_queue_set_t	mq_setq = &mqueue->imq_set_queue;

 	wait_queue_set_unlink_all_nofree(mq_setq, links);
 	return;
 }


 /*
 *	Routine:	ipc_mqueue_add
 *	Purpose:
 *		Associate the portset's mqueue with the port's mqueue.
 *		This has to be done so that posting the port will wakeup
 *		a portset waiter.  If there are waiters on the portset
 *		mqueue and messages on the port mqueue, try to match them
 *		up now.
 *	Conditions:
 *		May block.
 */
 kern_return_t
 ipc_mqueue_add(
 	ipc_mqueue_t	 port_mqueue,
 	ipc_mqueue_t	 set_mqueue,
 	wait_queue_link_t wql)
 {
 	wait_queue_t	 port_waitq = &port_mqueue->imq_wait_queue;
 	wait_queue_set_t set_waitq = &set_mqueue->imq_set_queue;
 	ipc_kmsg_queue_t kmsgq;
 	ipc_kmsg_t       kmsg, next;
 	kern_return_t	 kr;
 	spl_t		 s;

 	kr = wait_queue_link_noalloc(port_waitq, set_waitq, wql);
 	if (kr != KERN_SUCCESS)
 		return kr;

 	/*
 	 * Now that the set has been added to the port, there may be
 	 * messages queued on the port and threads waiting on the set
 	 * waitq.  Lets get them together.
 	 */
 	s = splsched();
 	imq_lock(port_mqueue);
 	kmsgq = &port_mqueue->imq_messages;
 	for (kmsg = ipc_kmsg_queue_first(kmsgq);
 	     kmsg != IKM_NULL;
 	     kmsg = next) {
 		next = ipc_kmsg_queue_next(kmsgq, kmsg);

 		for (;;) {
 			thread_t th;
 			mach_msg_size_t msize;

 			th = wait_queue_wakeup64_identity_locked(
 						port_waitq,
 						IPC_MQUEUE_RECEIVE,
 						THREAD_AWAKENED,
 						FALSE);
 			/* waitq/mqueue still locked, thread locked */

 			if (th == THREAD_NULL)
 				goto leave;

 			/*
 			 * If the receiver waited with a facility not directly
 			 * related to Mach messaging, then it isn't prepared to get
 			 * handed the message directly.  Just set it running, and
 			 * go look for another thread that can.
 			 */
 			if (th->ith_state != MACH_RCV_IN_PROGRESS) {
 				  thread_unlock(th);
 				  continue;
 			}

 			/*
 			 * Found a receiver. see if they can handle the message
 			 * correctly (the message is not too large for them, or
 			 * they didn't care to be informed that the message was
 			 * too large).  If they can't handle it, take them off
 			 * the list and let them go back and figure it out and
 			 * just move onto the next.
 			 */
 			msize = ipc_kmsg_copyout_size(kmsg, th->map);
 			if (th->ith_msize <
 					(msize + REQUESTED_TRAILER_SIZE(thread_is_64bit(th), th->ith_option))) {
 				th->ith_state = MACH_RCV_TOO_LARGE;
 				th->ith_msize = msize;
 				if (th->ith_option & MACH_RCV_LARGE) {
 					/*
 					 * let him go without message
 					 */
 					th->ith_receiver_name = port_mqueue->imq_receiver_name;
 					th->ith_kmsg = IKM_NULL;
 					th->ith_seqno = 0;
 					thread_unlock(th);
 					continue; /* find another thread */
 				}
 			} else {
 				th->ith_state = MACH_MSG_SUCCESS;
 			}

 			/*
 			 * This thread is going to take this message,
 			 * so give it to him.
 			 */
 			ipc_kmsg_rmqueue(kmsgq, kmsg);
 			ipc_mqueue_release_msgcount(port_mqueue);

 			th->ith_kmsg = kmsg;
 			th->ith_seqno = port_mqueue->imq_seqno++;
 			thread_unlock(th);
 			break;  /* go to next message */
 		}
 			
 	}
 leave:
 	imq_unlock(port_mqueue);
 	splx(s);
 	return KERN_SUCCESS;
 }

 /*
 *	Routine:	ipc_mqueue_changed
 *	Purpose:
 *		Wake up receivers waiting in a message queue.
 *	Conditions:
 *		The message queue is locked.
 */

 void
 ipc_mqueue_changed(
 	ipc_mqueue_t		mqueue)
 {
 	printf("\nAbout to call wait_queue_wakeup64_all_locked from ipc_mqueue_changed\n");
 	wait_queue_wakeup64_all_locked(
 				&mqueue->imq_wait_queue,
 				IPC_MQUEUE_RECEIVE,
 				THREAD_RESTART,
 				FALSE);		/* unlock waitq? */
 }


 		

 /*
 *	Routine:	ipc_mqueue_send
 *	Purpose:
 *		Send a message to a message queue.  The message holds a reference
 *		for the destination port for this message queue in the 
 *		msgh_remote_port field.
 *
 *		If unsuccessful, the caller still has possession of
 *		the message and must do something with it.  If successful,
 *		the message is queued, given to a receiver, or destroyed.
 *	Conditions:
 *		mqueue is locked.
 *	Returns:
 *		MACH_MSG_SUCCESS	The message was accepted.
 *		MACH_SEND_TIMED_OUT	Caller still has message.
 *		MACH_SEND_INTERRUPTED	Caller still has message.
 */
 mach_msg_return_t
 ipc_mqueue_send(
 	ipc_mqueue_t		mqueue,
 	ipc_kmsg_t		kmsg,
 	mach_msg_option_t	option,
 	mach_msg_timeout_t	send_timeout,
 	spl_t			s)
 {
 	int wresult;

 	/*
 	 *  Don't block if:
 	 *	1) We're under the queue limit.
 	 *	2) Caller used the MACH_SEND_ALWAYS internal option.
 	 *	3) Message is sent to a send-once right.
 	 */
 	if (!imq_full(mqueue) ||
 	    (!imq_full_kernel(mqueue) && 
 	     ((option & MACH_SEND_ALWAYS) ||
 	      (MACH_MSGH_BITS_REMOTE(kmsg->ikm_header->msgh_bits) ==
 	       MACH_MSG_TYPE_PORT_SEND_ONCE)))) {
 		mqueue->imq_msgcount++;
 		assert(mqueue->imq_msgcount > 0);
 		imq_unlock(mqueue);
 		splx(s);
 	} else {
 		thread_t cur_thread = current_thread();
 		uint64_t deadline;

 		/* 
 		 * We have to wait for space to be granted to us.
 		 */
 		if ((option & MACH_SEND_TIMEOUT) && (send_timeout == 0)) {
 			imq_unlock(mqueue);
 			splx(s);
 			return MACH_SEND_TIMED_OUT;
 		}
 		if (imq_full_kernel(mqueue)) {
 			imq_unlock(mqueue);
 			splx(s);
 			return MACH_SEND_NO_BUFFER;
 		}
 		mqueue->imq_fullwaiters = TRUE;
 		thread_lock(cur_thread);
 		if (option & MACH_SEND_TIMEOUT)
 			clock_interval_to_deadline(send_timeout, 1000*NSEC_PER_USEC, &deadline);
 		else
 			deadline = 0;
 		wresult = wait_queue_assert_wait64_locked(
 						&mqueue->imq_wait_queue,
 						IPC_MQUEUE_FULL,
 						THREAD_ABORTSAFE,
 						TIMEOUT_URGENCY_USER_NORMAL,
 						deadline, 0,
 						cur_thread);
 		thread_unlock(cur_thread);
 		imq_unlock(mqueue);
 		splx(s);
 		
 		if (wresult == THREAD_WAITING) {
 			wresult = thread_block(THREAD_CONTINUE_NULL);
 			counter(c_ipc_mqueue_send_block++);
 		}
 		
 		switch (wresult) {
 		case THREAD_TIMED_OUT:
 			assert(option & MACH_SEND_TIMEOUT);
 			return MACH_SEND_TIMED_OUT;
 			
 		case THREAD_AWAKENED:
 			/* we can proceed - inherited msgcount from waker */
 			assert(mqueue->imq_msgcount > 0);
 			break;
 			
 		case THREAD_INTERRUPTED:
 			return MACH_SEND_INTERRUPTED;
 			
 		case THREAD_RESTART:
 			/* mqueue is being destroyed */
 			return MACH_SEND_INVALID_DEST;
 		default:
 			panic("ipc_mqueue_send");
 		}
 	}

 	ipc_mqueue_post(mqueue, kmsg);
 	return MACH_MSG_SUCCESS;
 }


 /*
 *	Routine:	ipc_mqueue_release_msgcount
 *	Purpose:
 *		Release a message queue reference in the case where we
 *		found a waiter.
 *
 *	Conditions:
 *		The message queue is locked.
 *		The message corresponding to this reference is off the queue.
 */
 void
 ipc_mqueue_release_msgcount(
 	ipc_mqueue_t mqueue)	
 {
 	assert(imq_held(mqueue));
 	assert(mqueue->imq_msgcount > 1 || ipc_kmsg_queue_empty(&mqueue->imq_messages));

 	mqueue->imq_msgcount--;

 	if (!imq_full(mqueue) && mqueue->imq_fullwaiters) {
 		if (wait_queue_wakeup64_one_locked(
 						&mqueue->imq_wait_queue,
 						IPC_MQUEUE_FULL,
 						THREAD_AWAKENED,
 						FALSE) != KERN_SUCCESS) {
 			mqueue->imq_fullwaiters = FALSE;
 		} else {
 			/* gave away our slot - add reference back */
 			mqueue->imq_msgcount++; 
 		}
 	}
 }

 /*
 *	Routine:	ipc_mqueue_post
 *	Purpose:
 *		Post a message to a waiting receiver or enqueue it.  If a
 *		receiver is waiting, we can release our reserved space in
 *		the message queue.
 *
 *	Conditions:
 *		If we need to queue, our space in the message queue is reserved.
 */
 void
 ipc_mqueue_post(
 	register ipc_mqueue_t 	mqueue,
 	register ipc_kmsg_t		kmsg)
 {
 	spl_t s;

 	/*
 	 *	While the msg queue	is locked, we have control of the
 	 *  kmsg, so the ref in	it for the port is still good.
 	 *
 	 *	Check for a receiver for the message.
 	 */
 	s = splsched();
 	imq_lock(mqueue);
 	for (;;) {
 		wait_queue_t waitq = &mqueue->imq_wait_queue;
 		thread_t receiver;
 		mach_msg_size_t msize;

 		receiver = wait_queue_wakeup64_identity_locked(
 							waitq,
 							IPC_MQUEUE_RECEIVE,
 							THREAD_AWAKENED,
 							FALSE);
 		/* waitq still locked, thread locked */

 		if (receiver == THREAD_NULL) {
 			/* 
 			 * no receivers; queue kmsg
 			 */
 			assert(mqueue->imq_msgcount > 0);
 			ipc_kmsg_enqueue_macro(&mqueue->imq_messages, kmsg);
 			break;
 		}
 	
 		/*
 		 * If the receiver waited with a facility not directly
 		 * related to Mach messaging, then it isn't prepared to get
 		 * handed the message directly.  Just set it running, and
 		 * go look for another thread that can.
 		 */
 		if (receiver->ith_state != MACH_RCV_IN_PROGRESS) {
 				  thread_unlock(receiver);
 				  continue;
 		}

 	
 		/*
 		 * We found a waiting thread.
 		 * If the message is too large or the scatter list is too small
 		 * the thread we wake up will get that as its status.
 		 */
 		msize =	ipc_kmsg_copyout_size(kmsg, receiver->map);
 		if (receiver->ith_msize <
 				(msize + REQUESTED_TRAILER_SIZE(thread_is_64bit(receiver), receiver->ith_option))) {
 			receiver->ith_msize = msize;
 			receiver->ith_state = MACH_RCV_TOO_LARGE;
 		} else {
 			receiver->ith_state = MACH_MSG_SUCCESS;
 		}

 		/*
 		 * If there is no problem with the upcoming receive, or the
 		 * receiver thread didn't specifically ask for special too
 		 * large error condition, go ahead and select it anyway.
 		 */
 		if ((receiver->ith_state == MACH_MSG_SUCCESS) ||
 		    !(receiver->ith_option & MACH_RCV_LARGE)) {

 			receiver->ith_kmsg = kmsg;
 			receiver->ith_seqno = mqueue->imq_seqno++;
 			thread_unlock(receiver);

 			/* we didn't need our reserved spot in the queue */
 			ipc_mqueue_release_msgcount(mqueue);
 			break;
 		}

 		/*
 		 * Otherwise, this thread needs to be released to run
 		 * and handle its error without getting the message.  We
 		 * need to go back and pick another one.
 		 */
 		receiver->ith_receiver_name = mqueue->imq_receiver_name;
 		receiver->ith_kmsg = IKM_NULL;
 		receiver->ith_seqno = 0;
 		thread_unlock(receiver);
 	}

 	imq_unlock(mqueue);
 	splx(s);
 	
 	current_task()->messages_sent++;
 	return;
 }


 /* static */ void
 ipc_mqueue_receive_results(wait_result_t saved_wait_result)
 {
 	thread_t     		self = current_thread();
 	mach_msg_option_t	option = self->ith_option;

 	/*
 	 * why did we wake up?
 	 */
 	switch (saved_wait_result) {
 	case THREAD_TIMED_OUT:
 		self->ith_state = MACH_RCV_TIMED_OUT;
 		return;

 	case THREAD_INTERRUPTED:
 		self->ith_state = MACH_RCV_INTERRUPTED;
 		return;

 	case THREAD_RESTART:
 		/* something bad happened to the port/set */
 		self->ith_state = MACH_RCV_PORT_CHANGED;
 		return;

 	case THREAD_AWAKENED:
 		/*
 		 * We do not need to go select a message, somebody
 		 * handed us one (or a too-large indication).
 		 */
 		switch (self->ith_state) {
 		case MACH_RCV_SCATTER_SMALL:
 		case MACH_RCV_TOO_LARGE:
 			/*
 			 * Somebody tried to give us a too large
 			 * message. If we indicated that we cared,
 			 * then they only gave us the indication,
 			 * otherwise they gave us the indication
 			 * AND the message anyway.
 			 */
 			if (option & MACH_RCV_LARGE) {
 				return;
 			}

 		case MACH_MSG_SUCCESS:
 			return;

 		default:
 			panic("ipc_mqueue_receive_results: strange ith_state");
 		}

 	default:
 		panic("ipc_mqueue_receive_results: strange wait_result");
 	}
 }

 void
 ipc_mqueue_receive_continue(
 	__unused void *param,
 	wait_result_t wresult)
 {
 	ipc_mqueue_receive_results(wresult);
 	mach_msg_receive_continue();  /* hard-coded for now */
 }

 /*
 *	Routine:	ipc_mqueue_receive
 *	Purpose:
 *		Receive a message from a message queue.
 *
 *		If continuation is non-zero, then we might discard
 *		our kernel stack when we block.  We will continue
 *		after unblocking by executing continuation.
 *
 *		If resume is true, then we are resuming a receive
 *		operation after a blocked receive discarded our stack.
 *	Conditions:
 *		Our caller must hold a reference for the port or port set
 *		to which this queue belongs, to keep the queue
 *		from being deallocated.
 *
 *		The kmsg is returned with clean header fields
 *		and with the circular bit turned off.
 *	Returns:
 *		MACH_MSG_SUCCESS	Message returned in kmsgp.
 *		MACH_RCV_TOO_LARGE	Message size returned in kmsgp.
 *		MACH_RCV_TIMED_OUT	No message obtained.
 *		MACH_RCV_INTERRUPTED	No message obtained.
 *		MACH_RCV_PORT_DIED	Port/set died; no message.
 *		MACH_RCV_PORT_CHANGED	Port moved into set; no msg.
 *
 */

 void
 ipc_mqueue_receive(
 	ipc_mqueue_t            mqueue,
 	mach_msg_option_t       option,
 	mach_msg_size_t         max_size,
 	mach_msg_timeout_t      rcv_timeout,
 	int                     interruptible)
 {
 	wait_result_t           wresult;
        thread_t                self = current_thread();
        
        wresult = ipc_mqueue_receive_on_thread(mqueue, option, max_size,
                                               rcv_timeout, interruptible,
                                               self);
        if (wresult == THREAD_NOT_WAITING)
                return;

 	if (wresult == THREAD_WAITING) {
 		counter((interruptible == THREAD_ABORTSAFE) ? 
 			c_ipc_mqueue_receive_block_user++ :
 			c_ipc_mqueue_receive_block_kernel++);

 		if (self->ith_continuation)
 			thread_block(ipc_mqueue_receive_continue);
 			/* NOTREACHED */

 		wresult = thread_block(THREAD_CONTINUE_NULL);
 	}
 	ipc_mqueue_receive_results(wresult);
 }

 wait_result_t
 ipc_mqueue_receive_on_thread(
        ipc_mqueue_t            mqueue,
 	mach_msg_option_t       option,
 	mach_msg_size_t         max_size,
 	mach_msg_timeout_t      rcv_timeout,
 	int                     interruptible,
 	thread_t                thread)
 {
 	ipc_kmsg_queue_t        kmsgs;
 	wait_result_t           wresult;
 	uint64_t		deadline;
 	spl_t                   s;
 #if CONFIG_MACF_MACH
 	ipc_labelh_t lh;
 	task_t task;
 	int rc;
 #endif

 	s = splsched();
 	imq_lock(mqueue);
 	
 	if (imq_is_set(mqueue)) {
 		queue_t q;

 		q = &mqueue->imq_preposts;

 		/*
 		 * If we are waiting on a portset mqueue, we need to see if
 		 * any of the member ports have work for us.  Ports that
 		 * have (or recently had) messages will be linked in the
 		 * prepost queue for the portset. By holding the portset's
 		 * mqueue lock during the search, we tie up any attempts by
 		 * mqueue_deliver or portset membership changes that may
 		 * cross our path.
 		 */
 	search_set:
 		while(!queue_empty(q)) {
 			wait_queue_link_t wql;
 			ipc_mqueue_t port_mq;

 			queue_remove_first(q, wql, wait_queue_link_t, wql_preposts);
 			assert(!wql_is_preposted(wql));

 			/*
 			 * This is a lock order violation, so we have to do it
 			 * "softly," putting the link back on the prepost list
 			 * if it fails (at the tail is fine since the order of
 			 * handling messages from different sources in a set is
 			 * not guaranteed and we'd like to skip to the next source
 			 * if one is available).
 			 */
 			port_mq = (ipc_mqueue_t)wql->wql_queue;
 			if (!imq_lock_try(port_mq)) {
 				queue_enter(q, wql, wait_queue_link_t, wql_preposts);
 				imq_unlock(mqueue);
 				splx(s);
 				mutex_pause(0);
 				s = splsched();
 				imq_lock(mqueue);
 				goto search_set; /* start again at beginning - SMP */
 			}

 			/*
 			 * If there are no messages on this queue, just skip it
 			 * (we already removed the link from the set's prepost queue).
 			 */
 			kmsgs = &port_mq->imq_messages;
 			if (ipc_kmsg_queue_first(kmsgs) == IKM_NULL) {
 				imq_unlock(port_mq);
 				continue;
 			}

 			/*
 			 * There are messages, so reinsert the link back
 			 * at the tail of the preposted queue (for fairness)
 			 * while we still have the portset mqueue locked.
 			 */
 			queue_enter(q, wql, wait_queue_link_t, wql_preposts);
 			imq_unlock(mqueue);

 			/*
 			 * Continue on to handling the message with just
 			 * the port mqueue locked.
 			 */
 			ipc_mqueue_select_on_thread(port_mq, option, max_size, thread);
 			imq_unlock(port_mq);
 #if CONFIG_MACF_MACH
 			if (thread->task != TASK_NULL &&
 			    thread->ith_kmsg != NULL &&
 			    thread->ith_kmsg->ikm_sender != NULL) {
 				lh = thread->ith_kmsg->ikm_sender->label;
 				tasklabel_lock(thread->task);
 				ip_lock(lh->lh_port);
 				rc = mac_port_check_receive(&thread->task->maclabel,
                                                            &lh->lh_label);
 				ip_unlock(lh->lh_port);
 				tasklabel_unlock(thread->task);
 				if (rc)
 					thread->ith_state = MACH_RCV_INVALID_DATA;
 			}
 #endif
 			splx(s);
 			return THREAD_NOT_WAITING;
 			
 		}

 	} else {

 		/*
 		 * Receive on a single port. Just try to get the messages.
 		 */
 	  	kmsgs = &mqueue->imq_messages;
 		if (ipc_kmsg_queue_first(kmsgs) != IKM_NULL) {
 			ipc_mqueue_select_on_thread(mqueue, option, max_size, thread);
 			imq_unlock(mqueue);
 #if CONFIG_MACF_MACH
 			if (thread->task != TASK_NULL &&
 			    thread->ith_kmsg != NULL &&
 			    thread->ith_kmsg->ikm_sender != NULL) {
 				lh = thread->ith_kmsg->ikm_sender->label;
 				tasklabel_lock(thread->task);
 				ip_lock(lh->lh_port);
 				rc = mac_port_check_receive(&thread->task->maclabel,
                                                            &lh->lh_label);
 				ip_unlock(lh->lh_port);
 				tasklabel_unlock(thread->task);
 				if (rc)
 					thread->ith_state = MACH_RCV_INVALID_DATA;
 			}
 #endif
 			splx(s);
 			return THREAD_NOT_WAITING;
 		}
 	}
 	
 	/*
 	 * Looks like we'll have to block.  The mqueue we will
 	 * block on (whether the set's or the local port's) is
 	 * still locked.
 	 */
 	if (option & MACH_RCV_TIMEOUT) {
 		if (rcv_timeout == 0) {
 			imq_unlock(mqueue);
 			splx(s);
 			thread->ith_state = MACH_RCV_TIMED_OUT;
 			return THREAD_NOT_WAITING;
 		}
 	}

 	thread_lock(thread);
 	thread->ith_state = MACH_RCV_IN_PROGRESS;
 	thread->ith_option = option;
 	thread->ith_msize = max_size;

 	if (option & MACH_RCV_TIMEOUT)
 		clock_interval_to_deadline(rcv_timeout, 1000*NSEC_PER_USEC, &deadline);
 	else
 		deadline = 0;

 	wresult = wait_queue_assert_wait64_locked(&mqueue->imq_wait_queue,
 						  IPC_MQUEUE_RECEIVE,
 						  interruptible, 
 						  TIMEOUT_URGENCY_USER_NORMAL,
 						  deadline, 0,
 						  thread);
 	/* preposts should be detected above, not here */
 	if (wresult == THREAD_AWAKENED)
 		panic("ipc_mqueue_receive_on_thread: sleep walking");

 	thread_unlock(thread);
 	imq_unlock(mqueue);
 	splx(s);
 	return wresult;
 }


 /*
 *	Routine:	ipc_mqueue_select_on_thread
 *	Purpose:
 *		A receiver discovered that there was a message on the queue
 *		before he had to block.  Pick the message off the queue and
 *		"post" it to thread.
 *	Conditions:
 *		mqueue locked.
 *              thread not locked.
 *		There is a message.
 *	Returns:
 *		MACH_MSG_SUCCESS	Actually selected a message for ourselves.
 *		MACH_RCV_TOO_LARGE  May or may not have pull it, but it is large
 */
 void
 ipc_mqueue_select_on_thread(
 	ipc_mqueue_t		mqueue,
 	mach_msg_option_t	option,
 	mach_msg_size_t		max_size,
 	thread_t                thread)
 {
 	ipc_kmsg_t kmsg;
 	mach_msg_return_t mr = MACH_MSG_SUCCESS;
 	mach_msg_size_t rcv_size;

 	/*
 	 * Do some sanity checking of our ability to receive
 	 * before pulling the message off the queue.
 	 */
 	kmsg = ipc_kmsg_queue_first(&mqueue->imq_messages);
 	assert(kmsg != IKM_NULL);

 	/*
 	 * If we really can't receive it, but we had the
 	 * MACH_RCV_LARGE option set, then don't take it off
 	 * the queue, instead return the appropriate error
 	 * (and size needed).
 	 */
 	rcv_size = ipc_kmsg_copyout_size(kmsg, thread->map);
 	if (rcv_size + REQUESTED_TRAILER_SIZE(thread_is_64bit(thread), option) > max_size) {
 		mr = MACH_RCV_TOO_LARGE;
 		if (option & MACH_RCV_LARGE) {
 			thread->ith_receiver_name = mqueue->imq_receiver_name;
 			thread->ith_kmsg = IKM_NULL;
 			thread->ith_msize = rcv_size;
 			thread->ith_seqno = 0;
 			thread->ith_state = mr;
 			return;
 		}
 	}

 	ipc_kmsg_rmqueue_first_macro(&mqueue->imq_messages, kmsg);
 	ipc_mqueue_release_msgcount(mqueue);
 	thread->ith_seqno = mqueue->imq_seqno++;
 	thread->ith_kmsg = kmsg;
 	thread->ith_state = mr;

 	current_task()->messages_received++;
 	return;
 }

 /*
 *	Routine:	ipc_mqueue_peek
 *	Purpose:
 *		Peek at a (non-set) message queue to see if it has a message
 *		matching the sequence number provided (if zero, then the
 *		first message in the queue) and return vital info about the
 *		message.
 *
 *	Conditions:
 *		Locks may be held by callers, so this routine cannot block.
 *		Caller holds reference on the message queue.
 */
 unsigned
 ipc_mqueue_peek(ipc_mqueue_t 		mq,
 		mach_port_seqno_t	*seqnop,
 		mach_msg_size_t		*msg_sizep,
 		mach_msg_id_t		*msg_idp,
 		mach_msg_max_trailer_t 	*msg_trailerp)
 {
 	ipc_kmsg_queue_t kmsgq;
 	ipc_kmsg_t kmsg; 
 	mach_port_seqno_t seqno, msgoff;
 	int res = 0;
 	spl_t s;

 	assert(!imq_is_set(mq));

 	s = splsched();
 	imq_lock(mq);

 	seqno = (seqnop != NULL) ? seqno = *seqnop : 0;

 	if (seqno == 0) {
 		seqno = mq->imq_seqno;
 		msgoff = 0;
 	} else if (seqno >= mq->imq_seqno && 
 		   seqno < mq->imq_seqno + mq->imq_msgcount) {
 		msgoff = seqno - mq->imq_seqno;
 	} else
 		goto out;

 	/* look for the message that would match that seqno */
 	kmsgq = &mq->imq_messages;
 	kmsg = ipc_kmsg_queue_first(kmsgq);
 	while (msgoff-- && kmsg != IKM_NULL) {
 		kmsg = ipc_kmsg_queue_next(kmsgq, kmsg);
 	}
 	if (kmsg == IKM_NULL)
 		goto out;

 	/* found one - return the requested info */
 	if (seqnop != NULL)
 		*seqnop = seqno;
 	if (msg_sizep != NULL)
 		*msg_sizep = kmsg->ikm_header->msgh_size;
 	if (msg_idp != NULL)
 		*msg_idp = kmsg->ikm_header->msgh_id;
 	if (msg_trailerp != NULL)
 		memcpy(msg_trailerp, 
 		       (mach_msg_max_trailer_t *)((vm_offset_t)kmsg->ikm_header +
 						  round_msg(kmsg->ikm_header->msgh_size)),
 		       sizeof(mach_msg_max_trailer_t));
 	res = 1;

 out:
 	imq_unlock(mq);
 	splx(s);
 	return res;
 }

 /*
 *	Routine:	ipc_mqueue_set_peek
 *	Purpose:
 *		Peek at a message queue set to see if it has any ports
 *		with messages.
 *
 *	Conditions:
 *		Locks may be held by callers, so this routine cannot block.
 *		Caller holds reference on the message queue.
 */
 unsigned
 ipc_mqueue_set_peek(ipc_mqueue_t mq)
 {
 	wait_queue_link_t	wql;
 	queue_t			q;
 	spl_t s;
 	int res;

 	assert(imq_is_set(mq));

 	s = splsched();
 	imq_lock(mq);

 	/* 
 	 * peek at the contained port message queues, return as soon as
 	 * we spot a message on one of the message queues linked on the
 	 * prepost list.  No need to lock each message queue, as only the
 	 * head of each queue is checked. If a message wasn't there before
 	 * we entered here, no need to find it (if we do, great).
 	 */
 	res = 0;
 	q = &mq->imq_preposts;
 	queue_iterate(q, wql, wait_queue_link_t, wql_preposts) {
 		ipc_mqueue_t port_mq = (ipc_mqueue_t)wql->wql_queue;
 		ipc_kmsg_queue_t kmsgs = &port_mq->imq_messages;
 			
 		if (ipc_kmsg_queue_first(kmsgs) != IKM_NULL) {
 			res = 1;
 			break;
 		}
 	}
 	imq_unlock(mq);
 	splx(s);
 	return res;
 }

 /*
 *	Routine:	ipc_mqueue_set_gather_member_names
 *	Purpose:
 *		Iterate a message queue set to identify the member port
 *		names. Actual returned names is limited to maxnames entries,
 *		but we keep counting the actual number of members to let
 *		the caller decide to retry if necessary.
 *
 *	Conditions:
 *		Locks may be held by callers, so this routine cannot block.
 *		Caller holds reference on the message queue.
 */
 void
 ipc_mqueue_set_gather_member_names(
 	ipc_mqueue_t mq, 
 	ipc_entry_num_t maxnames, 
 	mach_port_name_t *names,
 	ipc_entry_num_t *actualp)
 {
 	wait_queue_link_t	wql;
 	queue_t			q;
 	spl_t s;
 	ipc_entry_num_t actual = 0;

 	assert(imq_is_set(mq));

 	s = splsched();
 	imq_lock(mq);

 	/* 
 	 * Iterate over the member ports through the mqueue set links
 	 * capturing as many names as we can.
 	 */
 	q = &mq->imq_setlinks;
 	queue_iterate(q, wql, wait_queue_link_t, wql_setlinks) {
 		ipc_mqueue_t port_mq = (ipc_mqueue_t)wql->wql_queue;

 		if (actual < maxnames)
 			names[actual] = port_mq->imq_receiver_name;
 		actual++;
 	}
 	imq_unlock(mq);
 	splx(s);

 	*actualp = actual;
 }


 /*
 *	Routine:	ipc_mqueue_destroy
 *	Purpose:
 *		Destroy a (non-set) message queue.
 *		Set any blocked senders running.
 *	   	Destroy the kmsgs in the queue.
 *	Conditions:
 *		Nothing locked.
 *		Receivers were removed when the receive right was "changed"
 */
 void
 ipc_mqueue_destroy(
 	ipc_mqueue_t	mqueue)
 {
 	ipc_kmsg_queue_t kmqueue;
 	ipc_kmsg_t kmsg;
 	boolean_t reap = FALSE;
 	spl_t s;

 	s = splsched();
 	imq_lock(mqueue);
 	/*
 	 *	rouse all blocked senders
 	 */
 	mqueue->imq_fullwaiters = FALSE;
 	printf("\nAbout to call wait_queue_wakeup64_all_locked from ipc_mqueue_destroy\n");
 	wait_queue_wakeup64_all_locked(
 				&mqueue->imq_wait_queue,
 				IPC_MQUEUE_FULL,
 				THREAD_RESTART,
 				FALSE);

 	/*
 	 * Move messages from the specified queue to the per-thread
 	 * clean/drain queue while we have the mqueue lock.
 	 */
 	kmqueue = &mqueue->imq_messages;
 	while ((kmsg = ipc_kmsg_dequeue(kmqueue)) != IKM_NULL) {
 		boolean_t first;
 		first = ipc_kmsg_delayed_destroy(kmsg);
 		if (first)
 			reap = first;
 	}

 	imq_unlock(mqueue);
 	splx(s);

 	/*
 	 * Destroy the messages we enqueued if we aren't nested
 	 * inside some other attempt to drain the same queue.
 	 */
 	if (reap)
 		ipc_kmsg_reap_delayed();
 }

 /*
 *	Routine:	ipc_mqueue_set_qlimit
 *	Purpose:
 *		Changes a message queue limit; the maximum number
 *		of messages which may be queued.
 *	Conditions:
 *		Nothing locked.
 */

 void
 ipc_mqueue_set_qlimit(
 	 ipc_mqueue_t			mqueue,
 	 mach_port_msgcount_t	qlimit)
 {
 	 spl_t s;

 	 assert(qlimit <= MACH_PORT_QLIMIT_MAX);

 	 /* wake up senders allowed by the new qlimit */
 	 s = splsched();
 	 imq_lock(mqueue);
 	 if (qlimit > mqueue->imq_qlimit) {
 		 mach_port_msgcount_t i, wakeup;

 		 /* caution: wakeup, qlimit are unsigned */
 		 wakeup = qlimit - mqueue->imq_qlimit;

 		 for (i = 0; i < wakeup; i++) {
 			 if (wait_queue_wakeup64_one_locked(
 							&mqueue->imq_wait_queue,
 							IPC_MQUEUE_FULL,
 							THREAD_AWAKENED,
 							FALSE) == KERN_NOT_WAITING) {
 					 mqueue->imq_fullwaiters = FALSE;
 					 break;
 			 }
 			 mqueue->imq_msgcount++;  /* give it to the awakened thread */
 		 }
 	 }
 	mqueue->imq_qlimit = qlimit;
 	imq_unlock(mqueue);
 	splx(s);
 }

 /*
 *	Routine:	ipc_mqueue_set_seqno
 *	Purpose:
 *		Changes an mqueue's sequence number.
 *	Conditions:
 *		Caller holds a reference to the queue's containing object.
 */
 void
 ipc_mqueue_set_seqno(
 	ipc_mqueue_t		mqueue,
 	mach_port_seqno_t	seqno)
 {
 	spl_t s;

 	s = splsched();
 	imq_lock(mqueue);
 	mqueue->imq_seqno = seqno;
 	imq_unlock(mqueue);
 	splx(s);
 }


 /*
 *	Routine:	ipc_mqueue_copyin
 *	Purpose:
 *		Convert a name in a space to a message queue.
 *	Conditions:
 *		Nothing locked.  If successful, the caller gets a ref for
 *		for the object.	This ref ensures the continued existence of
 *		the queue.
 *	Returns:
 *		MACH_MSG_SUCCESS	Found a message queue.
 *		MACH_RCV_INVALID_NAME	The space is dead.
 *		MACH_RCV_INVALID_NAME	The name doesn't denote a right.
 *		MACH_RCV_INVALID_NAME
 *			The denoted right is not receive or port set.
 *		MACH_RCV_IN_SET		Receive right is a member of a set.
 */

 mach_msg_return_t
 ipc_mqueue_copyin(
 	ipc_space_t		space,
 	mach_port_name_t	name,
 	ipc_mqueue_t		*mqueuep,
 	ipc_object_t		*objectp)
 {
 	ipc_entry_t entry;
 	ipc_object_t object;
 	ipc_mqueue_t mqueue;

 	is_read_lock(space);
 	if (!is_active(space)) {
 		is_read_unlock(space);
 		return MACH_RCV_INVALID_NAME;
 	}

 	entry = ipc_entry_lookup(space, name);
 	if (entry == IE_NULL) {
 		is_read_unlock(space);
 		return MACH_RCV_INVALID_NAME;
 	}

 	object = entry->ie_object;

 	if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) {
 		ipc_port_t port;

 		port = (ipc_port_t) object;
 		assert(port != IP_NULL);

 		ip_lock(port);
 		assert(ip_active(port));
 		assert(port->ip_receiver_name == name);
 		assert(port->ip_receiver == space);
 		is_read_unlock(space);
 		mqueue = &port->ip_messages;

 	} else if (entry->ie_bits & MACH_PORT_TYPE_PORT_SET) {
 		ipc_pset_t pset;

 		pset = (ipc_pset_t) object;
 		assert(pset != IPS_NULL);

 		ips_lock(pset);
 		assert(ips_active(pset));
 		assert(pset->ips_local_name == name);
 		is_read_unlock(space);

 		mqueue = &pset->ips_messages;
 	} else {
 		is_read_unlock(space);
 		return MACH_RCV_INVALID_NAME;
 	}

 	/*
 	 *	At this point, the object is locked and active,
 	 *	the space is unlocked, and mqueue is initialized.
 	 */

 	io_reference(object);
 	io_unlock(object);

 	*objectp = object;
 	*mqueuep = mqueue;
 	return MACH_MSG_SUCCESS;
 }
diff --git a/kern_event.c b/kern_event.c
 /*
 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 *
 */
 /*-
 * Copyright (c) 1999,2000,2001 Jonathan Lemon <[email protected]>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
 /*
 *	@(#)kern_event.c       1.0 (3/31/2000)
 */
 #include <stdint.h>

 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/proc_internal.h>
 #include <sys/kauth.h>
 #include <sys/malloc.h>
 #include <sys/unistd.h>
 #include <sys/file_internal.h>
 #include <sys/fcntl.h>
 #include <sys/select.h>
 #include <sys/queue.h>
 #include <sys/event.h>
 #include <sys/eventvar.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 #include <sys/vnode_internal.h>
 #include <string.h>
 #include <sys/proc_info.h>
 #include <sys/codesign.h>

 #include <kern/lock.h>
 #include <kern/clock.h>
 #include <kern/thread_call.h>
 #include <kern/sched_prim.h>
 #include <kern/zalloc.h>
 #include <kern/assert.h>

 #include <libkern/libkern.h>
 #include "net/net_str_id.h"

 #include <mach/task.h>

 #if VM_PRESSURE_EVENTS
 #include <kern/vm_pressure.h>
 #endif

 #if CONFIG_MEMORYSTATUS
 #include <sys/kern_memorystatus.h>
 #endif

 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");

 #define	KQ_EVENT	NULL



 /*Plumbing needed for wait queue logging.*/
 #define EVENT_MASK_BITS  

 typedef struct my_wait_queue { // happy little wait queue
 	unsigned long int                 
 	/* boolean_t */	wq_type:2,		/* only public field */
 					wq_fifo:1,		/* fifo wakeup policy? */
 					wq_prepost:1,	/* waitq supports prepost? set only */
 					wq_eventmask:((sizeof(long) * 8) - 4); 
 } myWaitQueue;

 #define _WAIT_QUEUE_inited		0x2
 #define wait_queue_is_valid(wq)	\
 	(((wq)->wq_type & ~1) == _WAIT_QUEUE_inited)




 static inline void kqlock(struct kqueue *kq);
 static inline void kqunlock(struct kqueue *kq);

 static int kqlock2knoteuse(struct kqueue *kq, struct knote *kn);
 static int kqlock2knoteusewait(struct kqueue *kq, struct knote *kn);
 static int kqlock2knotedrop(struct kqueue *kq, struct knote *kn);
 static int knoteuse2kqlock(struct kqueue *kq, struct knote *kn);

 static void kqueue_wakeup(struct kqueue *kq, int closed);
 static int kqueue_read(struct fileproc *fp, struct uio *uio,
    int flags, vfs_context_t ctx);
 static int kqueue_write(struct fileproc *fp, struct uio *uio,
    int flags, vfs_context_t ctx);
 static int kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data,
    vfs_context_t ctx);
 static int kqueue_select(struct fileproc *fp, int which, void *wql,
    vfs_context_t ctx);
 static int kqueue_close(struct fileglob *fg, vfs_context_t ctx);
 static int kqueue_kqfilter(struct fileproc *fp, struct knote *kn,
 	vfs_context_t ctx);
 static int kqueue_drain(struct fileproc *fp, vfs_context_t ctx);
 extern int kqueue_stat(struct fileproc *fp, void  *ub, int isstat64,
 	vfs_context_t ctx);

 static const struct fileops kqueueops = {
 	.fo_type = DTYPE_KQUEUE,
 	.fo_read = kqueue_read,
 	.fo_write = kqueue_write,
 	.fo_ioctl = kqueue_ioctl,
 	.fo_select = kqueue_select,
 	.fo_close = kqueue_close,
 	.fo_kqfilter = kqueue_kqfilter,
 	.fo_drain = kqueue_drain,
 };

 static int kevent_internal(struct proc *p, int iskev64, user_addr_t changelist,
    int nchanges, user_addr_t eventlist, int nevents, int fd,
    user_addr_t utimeout, unsigned int flags, int32_t *retval);
 static int kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp,
    struct proc *p, int iskev64);
 static int kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp,
    struct proc *p, int iskev64);
 char * kevent_description(struct kevent64_s *kevp, char *s, size_t n);

 static int kevent_callback(struct kqueue *kq, struct kevent64_s *kevp,
    void *data);
 static void kevent_continue(struct kqueue *kq, void *data, int error);
 static void kqueue_scan_continue(void *contp, wait_result_t wait_result);
 static int kqueue_process(struct kqueue *kq, kevent_callback_t callback,
    void *data, int *countp, struct proc *p);
 static int kqueue_begin_processing(struct kqueue *kq);
 static void kqueue_end_processing(struct kqueue *kq);
 static int knote_process(struct knote *kn, kevent_callback_t callback,
    void *data, struct kqtailq *inprocessp, struct proc *p);
 static void knote_put(struct knote *kn);
 static int knote_fdpattach(struct knote *kn, struct filedesc *fdp,
    struct proc *p);
 static void knote_drop(struct knote *kn, struct proc *p);
 static void knote_activate(struct knote *kn, int);
 static void knote_deactivate(struct knote *kn);
 static void knote_enqueue(struct knote *kn);
 static void knote_dequeue(struct knote *kn);
 static struct knote *knote_alloc(void);
 static void knote_free(struct knote *kn);

 static int filt_fileattach(struct knote *kn);
 static struct filterops file_filtops = {
 	.f_isfd = 1,
 	.f_attach = filt_fileattach,
 };

 static void filt_kqdetach(struct knote *kn);
 static int filt_kqueue(struct knote *kn, long hint);
 static struct filterops kqread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_kqdetach,
 	.f_event = filt_kqueue,
 };

 /* placeholder for not-yet-implemented filters */
 static int filt_badattach(struct knote *kn);
 static struct filterops bad_filtops = {
 	.f_attach = filt_badattach,
 };

 static int filt_procattach(struct knote *kn);
 static void filt_procdetach(struct knote *kn);
 static int filt_proc(struct knote *kn, long hint);
 static struct filterops proc_filtops = {
 	.f_attach = filt_procattach,
 	.f_detach = filt_procdetach,
 	.f_event = filt_proc,
 };

 #if VM_PRESSURE_EVENTS
 static int filt_vmattach(struct knote *kn);
 static void filt_vmdetach(struct knote *kn);
 static int filt_vm(struct knote *kn, long hint);
 static struct filterops vm_filtops = {
 	.f_attach = filt_vmattach,
 	.f_detach = filt_vmdetach,
 	.f_event = filt_vm,
 };
 #endif /* VM_PRESSURE_EVENTS */

 #if CONFIG_MEMORYSTATUS
 extern struct filterops memorystatus_filtops;
 #endif /* CONFIG_MEMORYSTATUS */

 extern struct filterops fs_filtops;

 extern struct filterops sig_filtops;

 /* Timer filter */
 static int filt_timerattach(struct knote *kn);
 static void filt_timerdetach(struct knote *kn);
 static int filt_timer(struct knote *kn, long hint);
 static void filt_timertouch(struct knote *kn, struct kevent64_s *kev,
    long type);
 static struct filterops timer_filtops = {
 	.f_attach = filt_timerattach,
 	.f_detach = filt_timerdetach,
 	.f_event = filt_timer,
 	.f_touch = filt_timertouch,
 };

 /* Helpers */
 static void filt_timerexpire(void *knx, void *param1);
 static int filt_timervalidate(struct knote *kn);
 static void filt_timerupdate(struct knote *kn);
 static void filt_timercancel(struct knote *kn);

 #define	TIMER_RUNNING		0x1
 #define	TIMER_CANCELWAIT	0x2

 static lck_mtx_t _filt_timerlock;
 static void filt_timerlock(void);
 static void filt_timerunlock(void);

 static zone_t knote_zone;

 #define	KN_HASH(val, mask)	(((val) ^ (val >> 8)) & (mask))

 #if 0
 extern struct filterops aio_filtops;
 #endif

 /* Mach portset filter */
 extern struct filterops machport_filtops;

 /* User filter */
 static int filt_userattach(struct knote *kn);
 static void filt_userdetach(struct knote *kn);
 static int filt_user(struct knote *kn, long hint);
 static void filt_usertouch(struct knote *kn, struct kevent64_s *kev,
    long type);
 static struct filterops user_filtops = {
 	.f_attach = filt_userattach,
 	.f_detach = filt_userdetach,
 	.f_event = filt_user,
 	.f_touch = filt_usertouch,
 };

 /*
 * Table for all system-defined filters.
 */
 static struct filterops *sysfilt_ops[] = {
 	&file_filtops,			/* EVFILT_READ */
 	&file_filtops,			/* EVFILT_WRITE */
 #if 0
 	&aio_filtops,			/* EVFILT_AIO */
 #else
 	&bad_filtops,			/* EVFILT_AIO */
 #endif
 	&file_filtops,			/* EVFILT_VNODE */
 	&proc_filtops,			/* EVFILT_PROC */
 	&sig_filtops,			/* EVFILT_SIGNAL */
 	&timer_filtops,			/* EVFILT_TIMER */
 	&machport_filtops,		/* EVFILT_MACHPORT */
 	&fs_filtops,			/* EVFILT_FS */
 	&user_filtops,			/* EVFILT_USER */
 	&bad_filtops,			/* unused */
 #if VM_PRESSURE_EVENTS
 	&vm_filtops,			/* EVFILT_VM */
 #else
 	&bad_filtops,			/* EVFILT_VM */
 #endif
 	&file_filtops,			/* EVFILT_SOCK */
 #if CONFIG_MEMORYSTATUS
 	&memorystatus_filtops,  /* EVFILT_MEMORYSTATUS */
 #else
 	&bad_filtops,			/* EVFILT_MEMORYSTATUS */
 #endif
 };

 /*
 * kqueue/note lock attributes and implementations
 *
 *	kqueues have locks, while knotes have use counts
 *	Most of the knote state is guarded by the object lock.
 *	the knote "inuse" count and status use the kqueue lock.
 */
 lck_grp_attr_t * kq_lck_grp_attr;
 lck_grp_t * kq_lck_grp;
 lck_attr_t * kq_lck_attr;

 static inline void
 kqlock(struct kqueue *kq)
 {
 	lck_spin_lock(&kq->kq_lock);
 }

 static inline void
 kqunlock(struct kqueue *kq)
 {
 	lck_spin_unlock(&kq->kq_lock);
 }

 /*
 * Convert a kq lock to a knote use referece.
 *
 *	If the knote is being dropped, we can't get
 *	a use reference, so just return with it
 *	still locked.
 *	- kq locked at entry
 *	- unlock on exit if we get the use reference
 */
 static int
 kqlock2knoteuse(struct kqueue *kq, struct knote *kn)
 {
 	if (kn->kn_status & KN_DROPPING)
 		return (0);
 	kn->kn_inuse++;
 	kqunlock(kq);
 	return (1);
 }

 /*
 * Convert a kq lock to a knote use referece,
 * but wait for attach and drop events to complete.
 *
 *	If the knote is being dropped, we can't get
 *	a use reference, so just return with it
 *	still locked.
 *	- kq locked at entry
 *	- kq always unlocked on exit
 */
 static int
 kqlock2knoteusewait(struct kqueue *kq, struct knote *kn)
 {
 	if ((kn->kn_status & (KN_DROPPING | KN_ATTACHING)) != 0) {
 		kn->kn_status |= KN_USEWAIT;
 		if (!wait_queue_is_valid((myWaitQueue*)kq->kq_wqs)) {
 			printf("\nAbout to call wait_queue_assert_wait with invalid wait queue from kqlock2knoteusewait.\n");
 		}
 		wait_queue_assert_wait((wait_queue_t)kq->kq_wqs,
 		    &kn->kn_status, THREAD_UNINT, 0);
 		kqunlock(kq);
 		thread_block(THREAD_CONTINUE_NULL);
 		return (0);
 	}
 	kn->kn_inuse++;
 	kqunlock(kq);
 	return (1);
 }

 /*
 * Convert from a knote use reference back to kq lock.
 *
 *	Drop a use reference and wake any waiters if
 *	this is the last one.
 *
 *	The exit return indicates if the knote is
 *	still alive - but the kqueue lock is taken
 *	unconditionally.
 */
 static int
 knoteuse2kqlock(struct kqueue *kq, struct knote *kn)
 {
 	kqlock(kq);
 	if (--kn->kn_inuse == 0) {
 		if ((kn->kn_status & KN_ATTACHING) != 0) {
 			kn->kn_status &= ~KN_ATTACHING;
 		}
 		if ((kn->kn_status & KN_USEWAIT) != 0) {
 			kn->kn_status &= ~KN_USEWAIT;
 			wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs,
 			    &kn->kn_status, THREAD_AWAKENED);
 		}
 	}
 	return ((kn->kn_status & KN_DROPPING) == 0);
 }

 /*
 * Convert a kq lock to a knote drop reference.
 *
 *	If the knote is in use, wait for the use count
 *	to subside.  We first mark our intention to drop
 *	it - keeping other users from "piling on."
 *	If we are too late, we have to wait for the
 *	other drop to complete.
 *
 *	- kq locked at entry
 *	- always unlocked on exit.
 *	- caller can't hold any locks that would prevent
 *	  the other dropper from completing.
 */
 static int
 kqlock2knotedrop(struct kqueue *kq, struct knote *kn)
 {
 	int oktodrop;

 	oktodrop = ((kn->kn_status & (KN_DROPPING | KN_ATTACHING)) == 0);
 	kn->kn_status |= KN_DROPPING;
 	if (oktodrop) {
 		if (kn->kn_inuse == 0) {
 			kqunlock(kq);
 			return (oktodrop);
 		}
 	}
 	kn->kn_status |= KN_USEWAIT;
 	if (!wait_queue_is_valid((myWaitQueue*)kq->kq_wqs)) {
 		printf("\nAbout to call wait_queue_assert_wait with invalid wait queue from kqlock2knotedrop.\n");
 	}
 	wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kn->kn_status,
 	    THREAD_UNINT, 0);
 	kqunlock(kq);
 	thread_block(THREAD_CONTINUE_NULL);
 	return (oktodrop);
 }

 /*
 * Release a knote use count reference.
 */
 static void
 knote_put(struct knote *kn)
 {
 	struct kqueue *kq = kn->kn_kq;

 	kqlock(kq);
 	if (--kn->kn_inuse == 0) {
 		if ((kn->kn_status & KN_USEWAIT) != 0) {
 			kn->kn_status &= ~KN_USEWAIT;
 			wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs,
 			    &kn->kn_status, THREAD_AWAKENED);
 		}
 	}
 	kqunlock(kq);
 }

 static int
 filt_fileattach(struct knote *kn)
 {
 	return (fo_kqfilter(kn->kn_fp, kn, vfs_context_current()));
 }

 #define	f_flag f_fglob->fg_flag
 #define	f_msgcount f_fglob->fg_msgcount
 #define	f_cred f_fglob->fg_cred
 #define	f_ops f_fglob->fg_ops
 #define	f_offset f_fglob->fg_offset
 #define	f_data f_fglob->fg_data

 static void
 filt_kqdetach(struct knote *kn)
 {
 	struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;

 	kqlock(kq);
 	KNOTE_DETACH(&kq->kq_sel.si_note, kn);
 	kqunlock(kq);
 }

 /*ARGSUSED*/
 static int
 filt_kqueue(struct knote *kn, __unused long hint)
 {
 	struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;

 	kn->kn_data = kq->kq_count;
 	return (kn->kn_data > 0);
 }

 static int
 filt_procattach(struct knote *kn)
 {
 	struct proc *p;

 	assert(PID_MAX < NOTE_PDATAMASK);

 	if ((kn->kn_sfflags & (NOTE_TRACK | NOTE_TRACKERR | NOTE_CHILD)) != 0)
 		return (ENOTSUP);

 	p = proc_find(kn->kn_id);
 	if (p == NULL) {
 		return (ESRCH);
 	}

 	const int NoteExitStatusBits = NOTE_EXIT | NOTE_EXITSTATUS;

 	if ((kn->kn_sfflags & NoteExitStatusBits) == NoteExitStatusBits)
 		do {
 			pid_t selfpid = proc_selfpid();

 			if (p->p_ppid == selfpid)
 				break;	/* parent => ok */

 			if ((p->p_lflag & P_LTRACED) != 0 &&
 			    (p->p_oppid == selfpid))
 				break;	/* parent-in-waiting => ok */

 			proc_rele(p);
 			return (EACCES);
 		} while (0);

 	proc_klist_lock();

 	kn->kn_flags |= EV_CLEAR;	/* automatically set */
 	kn->kn_ptr.p_proc = p;		/* store the proc handle */

 	KNOTE_ATTACH(&p->p_klist, kn);

 	proc_klist_unlock();

 	proc_rele(p);

 	return (0);
 }

 /*
 * The knote may be attached to a different process, which may exit,
 * leaving nothing for the knote to be attached to.  In that case,
 * the pointer to the process will have already been nulled out.
 */
 static void
 filt_procdetach(struct knote *kn)
 {
 	struct proc *p;

 	proc_klist_lock();

 	p = kn->kn_ptr.p_proc;
 	if (p != PROC_NULL) {
 		kn->kn_ptr.p_proc = PROC_NULL;
 		KNOTE_DETACH(&p->p_klist, kn);
 	}

 	proc_klist_unlock();
 }

 static int
 filt_proc(struct knote *kn, long hint)
 {
 	/*
 	 * Note: a lot of bits in hint may be obtained from the knote
 	 * To free some of those bits, see <rdar://problem/12592988> Freeing up
 	 * bits in hint for filt_proc
 	 */
 	/* hint is 0 when called from above */
 	if (hint != 0) {
 		u_int event;

 		/* ALWAYS CALLED WITH proc_klist_lock when (hint != 0) */

 		/*
 		 * mask off extra data
 		 */
 		event = (u_int)hint & NOTE_PCTRLMASK;

 		/*
 		 * termination lifecycle events can happen while a debugger
 		 * has reparented a process, in which case notifications
 		 * should be quashed except to the tracing parent. When
 		 * the debugger reaps the child (either via wait4(2) or
 		 * process exit), the child will be reparented to the original
 		 * parent and these knotes re-fired.
 		 */
 		if (event & NOTE_EXIT) {
 			if ((kn->kn_ptr.p_proc->p_oppid != 0)
 				&& (kn->kn_kq->kq_p->p_pid != kn->kn_ptr.p_proc->p_ppid)) {
 				/*
 				 * This knote is not for the current ptrace(2) parent, ignore.
 				 */
 				return 0;
 			}
 		}					

 		/*
 		 * if the user is interested in this event, record it.
 		 */
 		if (kn->kn_sfflags & event)
 			kn->kn_fflags |= event;

 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
 		if ((event == NOTE_REAP) || ((event == NOTE_EXIT) && !(kn->kn_sfflags & NOTE_REAP))) {
 			kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 		}
 #pragma clang diagnostic pop

 		if (event == NOTE_EXIT) {
 			kn->kn_data = 0;
 			if ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0) {
 				kn->kn_fflags |= NOTE_EXITSTATUS;
 				kn->kn_data |= (hint & NOTE_PDATAMASK);
 			}
 			if ((kn->kn_sfflags & NOTE_EXIT_DETAIL) != 0) {
 				kn->kn_fflags |= NOTE_EXIT_DETAIL;
 				if ((kn->kn_ptr.p_proc->p_lflag &
 				    P_LTERM_DECRYPTFAIL) != 0) {
 					kn->kn_data |= NOTE_EXIT_DECRYPTFAIL; 
 				}
 				if ((kn->kn_ptr.p_proc->p_lflag &
 				    P_LTERM_JETSAM) != 0) {
 					kn->kn_data |= NOTE_EXIT_MEMORY;
 					switch (kn->kn_ptr.p_proc->p_lflag &
 					    P_JETSAM_MASK) {
 						case P_JETSAM_VMPAGESHORTAGE:
 							kn->kn_data |= NOTE_EXIT_MEMORY_VMPAGESHORTAGE;
 							break;
 						case P_JETSAM_VMTHRASHING:
 							kn->kn_data |= NOTE_EXIT_MEMORY_VMTHRASHING;
 							break;
 						case P_JETSAM_VNODE:
 							kn->kn_data |= NOTE_EXIT_MEMORY_VNODE;
 							break;
 						case P_JETSAM_HIWAT:
 							kn->kn_data |= NOTE_EXIT_MEMORY_HIWAT;
 							break;
 						case P_JETSAM_PID:
 							kn->kn_data |= NOTE_EXIT_MEMORY_PID;
 							break;
 						case P_JETSAM_IDLEEXIT:
 							kn->kn_data |= NOTE_EXIT_MEMORY_IDLE;
 							break;
 					}
 				}
 				if ((kn->kn_ptr.p_proc->p_csflags &
 				    CS_KILLED) != 0) {
 					kn->kn_data |= NOTE_EXIT_CSERROR;
 				}
 			}
 		}

 	}

 	/* atomic check, no locking need when called from above */
 	return (kn->kn_fflags != 0);
 }

 #if VM_PRESSURE_EVENTS
 /*
 * Virtual memory kevents
 *
 * author: Matt Jacobson [[email protected]]
 */

 static int
 filt_vmattach(struct knote *kn)
 {
 	/*
 	 * The note will be cleared once the information has been flushed to
 	 * the client. If there is still pressure, we will be re-alerted.
 	 */
 	kn->kn_flags |= EV_CLEAR;
 	return (vm_knote_register(kn));
 }

 static void
 filt_vmdetach(struct knote *kn)
 {
 	vm_knote_unregister(kn);
 }

 static int
 filt_vm(struct knote *kn, long hint)
 {
 	/* hint == 0 means this is just an alive? check (always true) */
 	if (hint != 0) {
 		const pid_t pid = (pid_t)hint;
 		if ((kn->kn_sfflags & NOTE_VM_PRESSURE) &&
 		    (kn->kn_kq->kq_p->p_pid == pid)) {
 			kn->kn_fflags |= NOTE_VM_PRESSURE;
 		}
 	}

 	return (kn->kn_fflags != 0);
 }
 #endif /* VM_PRESSURE_EVENTS */

 /*
 * filt_timervalidate - process data from user
 *
 *	Converts to either interval or deadline format.
 *
 *	The saved-data field in the knote contains the
 *	time value.  The saved filter-flags indicates
 *	the unit of measurement.
 *
 *	After validation, either the saved-data field
 *	contains the interval in absolute time, or ext[0]
 *	contains the expected deadline. If that deadline
 *	is in the past, ext[0] is 0.
 *
 *	Returns EINVAL for unrecognized units of time.
 *
 *	Timer filter lock is held.
 *
 */
 static int
 filt_timervalidate(struct knote *kn)
 {
 	uint64_t multiplier;
 	uint64_t raw = 0;

 	switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) {
 	case NOTE_SECONDS:
 		multiplier = NSEC_PER_SEC;
 		break;
 	case NOTE_USECONDS:
 		multiplier = NSEC_PER_USEC;
 		break;
 	case NOTE_NSECONDS:
 		multiplier = 1;
 		break;
 	case 0: /* milliseconds (default) */
 		multiplier = NSEC_PER_SEC / 1000;
 		break;
 	default:
 		return (EINVAL);
 	}

 	/* transform the slop delta(leeway) in kn_ext[1] if passed to same time scale */
 	if(kn->kn_sfflags & NOTE_LEEWAY){
 		nanoseconds_to_absolutetime((uint64_t)kn->kn_ext[1] * multiplier, &raw);
 		kn->kn_ext[1] = raw;
 	}

 	nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw);

 	kn->kn_ext[0] = 0;
 	kn->kn_sdata = 0;

 	if (kn->kn_sfflags & NOTE_ABSOLUTE) {
 		clock_sec_t seconds;
 		clock_nsec_t nanoseconds;
 		uint64_t now;

 		clock_get_calendar_nanotime(&seconds, &nanoseconds);
 		nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC +
 		    nanoseconds, &now);

 		if (raw < now) {
 			/* time has already passed */
 			kn->kn_ext[0] = 0;
 		} else {
 			raw -= now;
 			clock_absolutetime_interval_to_deadline(raw,
 			    &kn->kn_ext[0]);
 		}
 	} else {
 		kn->kn_sdata = raw;
 	}

 	return (0);
 }

 /*
 * filt_timerupdate - compute the next deadline
 *
 * 	Repeating timers store their interval in kn_sdata. Absolute
 * 	timers have already calculated the deadline, stored in ext[0].
 *
 * 	On return, the next deadline (or zero if no deadline is needed)
 * 	is stored in kn_ext[0].
 *
 * 	Timer filter lock is held.
 */
 static void
 filt_timerupdate(struct knote *kn)
 {
 	/* if there's no interval, deadline is just in kn_ext[0] */
 	if (kn->kn_sdata == 0)
 		return;

 	/* if timer hasn't fired before, fire in interval nsecs */
 	if (kn->kn_ext[0] == 0) {
 		clock_absolutetime_interval_to_deadline(kn->kn_sdata,
 		    &kn->kn_ext[0]);
 	} else {
 		/*
 		 * If timer has fired before, schedule the next pop
 		 * relative to the last intended deadline.
 		 *
 		 * We could check for whether the deadline has expired,
 		 * but the thread call layer can handle that.
 		 */
 		kn->kn_ext[0] += kn->kn_sdata;
 	}
 }

 /*
 * filt_timerexpire - the timer callout routine
 *
 * Just propagate the timer event into the knote
 * filter routine (by going through the knote
 * synchronization point).  Pass a hint to
 * indicate this is a real event, not just a
 * query from above.
 */
 static void
 filt_timerexpire(void *knx, __unused void *spare)
 {
 	struct klist timer_list;
 	struct knote *kn = knx;

 	filt_timerlock();

 	kn->kn_hookid &= ~TIMER_RUNNING;

 	/* no "object" for timers, so fake a list */
 	SLIST_INIT(&timer_list);
 	SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext);
 	KNOTE(&timer_list, 1);

 	/* if someone is waiting for timer to pop */
 	if (kn->kn_hookid & TIMER_CANCELWAIT) {
 		struct kqueue *kq = kn->kn_kq;
 		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_hook,
 		    THREAD_AWAKENED);
 	}

 	filt_timerunlock();
 }

 /*
 * Cancel a running timer (or wait for the pop).
 * Timer filter lock is held.
 */
 static void
 filt_timercancel(struct knote *kn)
 {
 	struct kqueue *kq = kn->kn_kq;
 	thread_call_t callout = kn->kn_hook;
 	boolean_t cancelled;

 	if (kn->kn_hookid & TIMER_RUNNING) {
 		/* cancel the callout if we can */
 		cancelled = thread_call_cancel(callout);
 		if (cancelled) {
 			kn->kn_hookid &= ~TIMER_RUNNING;
 		} else {
 			/* we have to wait for the expire routine.  */
 			kn->kn_hookid |= TIMER_CANCELWAIT;
 			if (!wait_queue_is_valid((myWaitQueue*)kq->kq_wqs)) {
 				printf("\nAbout to call wait_queue_assert_wait with invalid wait queue from filt_timercancel.\n");
 			}
 			wait_queue_assert_wait((wait_queue_t)kq->kq_wqs,
 			    &kn->kn_hook, THREAD_UNINT, 0);
 			filt_timerunlock();
 			thread_block(THREAD_CONTINUE_NULL);
 			filt_timerlock();
 			assert((kn->kn_hookid & TIMER_RUNNING) == 0);
 		}
 	}
 }

 /*
 * Allocate a thread call for the knote's lifetime, and kick off the timer.
 */
 static int
 filt_timerattach(struct knote *kn)
 {
 	thread_call_t callout;
 	int error;

 	callout = thread_call_allocate(filt_timerexpire, kn);
 	if (NULL == callout)
 		return (ENOMEM);

 	filt_timerlock();
 	error = filt_timervalidate(kn);
 	if (error != 0) {
 		filt_timerunlock();
 		return (error);
 	}

 	kn->kn_hook = (void*)callout;
 	kn->kn_hookid = 0;

 	/* absolute=EV_ONESHOT */
 	if (kn->kn_sfflags & NOTE_ABSOLUTE)
 		kn->kn_flags |= EV_ONESHOT;

 	filt_timerupdate(kn);
 	if (kn->kn_ext[0]) {
 		kn->kn_flags |= EV_CLEAR;
 		unsigned int timer_flags = 0;
 		if (kn->kn_sfflags & NOTE_CRITICAL)
 			timer_flags |= THREAD_CALL_DELAY_USER_CRITICAL;
 		else if (kn->kn_sfflags & NOTE_BACKGROUND)
 			timer_flags |= THREAD_CALL_DELAY_USER_BACKGROUND;
 		else
 			timer_flags |= THREAD_CALL_DELAY_USER_NORMAL;

 		if (kn->kn_sfflags & NOTE_LEEWAY)
 			timer_flags |= THREAD_CALL_DELAY_LEEWAY;

 		thread_call_enter_delayed_with_leeway(callout, NULL,
 				kn->kn_ext[0], kn->kn_ext[1], timer_flags);

 		kn->kn_hookid |= TIMER_RUNNING;
 	} else {
 		/* fake immediate */
 		kn->kn_data = 1;
 	}

 	filt_timerunlock();
 	return (0);
 }

 /*
 * Shut down the timer if it's running, and free the callout.
 */
 static void
 filt_timerdetach(struct knote *kn)
 {
 	thread_call_t callout;

 	filt_timerlock();

 	callout = (thread_call_t)kn->kn_hook;
 	filt_timercancel(kn);

 	filt_timerunlock();

 	thread_call_free(callout);
 }



 static int
 filt_timer(struct knote *kn, long hint)
 {
 	int result;

 	if (hint) {
 		/* real timer pop -- timer lock held by filt_timerexpire */
 		kn->kn_data++;

 		if (((kn->kn_hookid & TIMER_CANCELWAIT) == 0) &&
 				((kn->kn_flags & EV_ONESHOT) == 0)) {

 			/* evaluate next time to fire */
 			filt_timerupdate(kn);

 			if (kn->kn_ext[0]) {
 				unsigned int timer_flags = 0;

 				/* keep the callout and re-arm */
 				if (kn->kn_sfflags & NOTE_CRITICAL)
 					timer_flags |= THREAD_CALL_DELAY_USER_CRITICAL;
 				else if (kn->kn_sfflags & NOTE_BACKGROUND)
 					timer_flags |= THREAD_CALL_DELAY_USER_BACKGROUND;
 				else
 					timer_flags |= THREAD_CALL_DELAY_USER_NORMAL;

 				if (kn->kn_sfflags & NOTE_LEEWAY)
 					timer_flags |= THREAD_CALL_DELAY_LEEWAY;

 				thread_call_enter_delayed_with_leeway(kn->kn_hook, NULL,
 						kn->kn_ext[0], kn->kn_ext[1], timer_flags);

 				kn->kn_hookid |= TIMER_RUNNING;
 			}
 		}

 		return (1);
 	}

 	/* user-query */
 	filt_timerlock();

 	result = (kn->kn_data != 0);

 	filt_timerunlock();

 	return (result);
 }


 /*
 * filt_timertouch - update knote with new user input
 *
 * Cancel and restart the timer based on new user data. When
 * the user picks up a knote, clear the count of how many timer
 * pops have gone off (in kn_data).
 */
 static void
 filt_timertouch(struct knote *kn, struct kevent64_s *kev, long type)
 {
 	int error;
 	filt_timerlock();

 	switch (type) {
 	case EVENT_REGISTER:
 		/* cancel current call */
 		filt_timercancel(kn);

 		/* recalculate deadline */
 		kn->kn_sdata = kev->data;
 		kn->kn_sfflags = kev->fflags;
 		kn->kn_ext[0] = kev->ext[0];
 		kn->kn_ext[1] = kev->ext[1];

 		error = filt_timervalidate(kn);
 		if (error) {
 			/* no way to report error, so mark it in the knote */
 			kn->kn_flags |= EV_ERROR;
 			kn->kn_data = error;
 			break;
 		}

 		/* start timer if necessary */
 		filt_timerupdate(kn);

 		if (kn->kn_ext[0]) {
 			unsigned int timer_flags = 0;
 			if (kn->kn_sfflags & NOTE_CRITICAL)
 				timer_flags |= THREAD_CALL_DELAY_USER_CRITICAL;
 			else if (kn->kn_sfflags & NOTE_BACKGROUND)
 				timer_flags |= THREAD_CALL_DELAY_USER_BACKGROUND;
 			else
 				timer_flags |= THREAD_CALL_DELAY_USER_NORMAL;

 			if (kn->kn_sfflags & NOTE_LEEWAY)
 				timer_flags |= THREAD_CALL_DELAY_LEEWAY;

 			thread_call_enter_delayed_with_leeway(kn->kn_hook, NULL,
 					kn->kn_ext[0], kn->kn_ext[1], timer_flags);

 			kn->kn_hookid |= TIMER_RUNNING;
 		} else {
 			/* pretend the timer has fired */
 			kn->kn_data = 1;
 		}

 		break;

 	case EVENT_PROCESS:
 		/* reset the timer pop count in kn_data */
 		*kev = kn->kn_kevent;
 		kev->ext[0] = 0;
 		kn->kn_data = 0;
 		if (kn->kn_flags & EV_CLEAR)
 			kn->kn_fflags = 0;
 		break;
 	default:
 		panic("%s: - invalid type (%ld)", __func__, type);
 		break;
 	}

 	filt_timerunlock();
 }

 static void
 filt_timerlock(void)
 {
 	lck_mtx_lock(&_filt_timerlock);
 }

 static void
 filt_timerunlock(void)
 {
 	lck_mtx_unlock(&_filt_timerlock);
 }

 static int
 filt_userattach(struct knote *kn)
 {
 	/* EVFILT_USER knotes are not attached to anything in the kernel */
 	kn->kn_hook = NULL;
 	if (kn->kn_fflags & NOTE_TRIGGER) {
 		kn->kn_hookid = 1;
 	} else {
 		kn->kn_hookid = 0;
 	}
 	return (0);
 }

 static void
 filt_userdetach(__unused struct knote *kn)
 {
 	/* EVFILT_USER knotes are not attached to anything in the kernel */
 }

 static int
 filt_user(struct knote *kn, __unused long hint)
 {
 	return (kn->kn_hookid);
 }

 static void
 filt_usertouch(struct knote *kn, struct kevent64_s *kev, long type)
 {
 	uint32_t ffctrl;
 	switch (type) {
 	case EVENT_REGISTER:
 		if (kev->fflags & NOTE_TRIGGER) {
 			kn->kn_hookid = 1;
 		}

 		ffctrl = kev->fflags & NOTE_FFCTRLMASK;
 		kev->fflags &= NOTE_FFLAGSMASK;
 		switch (ffctrl) {
 		case NOTE_FFNOP:
 			break;
 		case NOTE_FFAND:
 			OSBitAndAtomic(kev->fflags, &kn->kn_sfflags);
 			break;
 		case NOTE_FFOR:
 			OSBitOrAtomic(kev->fflags, &kn->kn_sfflags);
 			break;
 		case NOTE_FFCOPY:
 			kn->kn_sfflags = kev->fflags;
 			break;
 		}
 		kn->kn_sdata = kev->data;
 		break;
 	case EVENT_PROCESS:
 		*kev = kn->kn_kevent;
 		kev->fflags = (volatile UInt32)kn->kn_sfflags;
 		kev->data = kn->kn_sdata;
 		if (kn->kn_flags & EV_CLEAR) {
 			kn->kn_hookid = 0;
 			kn->kn_data = 0;
 			kn->kn_fflags = 0;
 		}
 		break;
 	default:
 		panic("%s: - invalid type (%ld)", __func__, type);
 		break;
 	}
 }

 /*
 * JMM - placeholder for not-yet-implemented filters
 */
 static int
 filt_badattach(__unused struct knote *kn)
 {
 	return (ENOTSUP);
 }

 struct kqueue *
 kqueue_alloc(struct proc *p)
 {
 	struct filedesc *fdp = p->p_fd;
 	struct kqueue *kq;

 	MALLOC_ZONE(kq, struct kqueue *, sizeof (struct kqueue), M_KQUEUE,
 	    M_WAITOK);
 	if (kq != NULL) {
 		wait_queue_set_t wqs;

 		wqs = wait_queue_set_alloc(SYNC_POLICY_FIFO |
 		    SYNC_POLICY_PREPOST);
 		if (wqs != NULL) {
 			bzero(kq, sizeof (struct kqueue));
 			lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr);
 			TAILQ_INIT(&kq->kq_head);
 			kq->kq_wqs = wqs;
 			kq->kq_p = p;
 		} else {
 			FREE_ZONE(kq, sizeof (struct kqueue), M_KQUEUE);
 		}
 	}

 	if (fdp->fd_knlistsize < 0) {
 		proc_fdlock(p);
 		if (fdp->fd_knlistsize < 0)
 			fdp->fd_knlistsize = 0;	/* this process has had a kq */
 		proc_fdunlock(p);
 	}

 	return (kq);
 }

 /*
 * kqueue_dealloc - detach all knotes from a kqueue and free it
 *
 * 	We walk each list looking for knotes referencing this
 *	this kqueue.  If we find one, we try to drop it.  But
 *	if we fail to get a drop reference, that will wait
 *	until it is dropped.  So, we can just restart again
 *	safe in the assumption that the list will eventually
 *	not contain any more references to this kqueue (either
 *	we dropped them all, or someone else did).
 *
 *	Assumes no new events are being added to the kqueue.
 *	Nothing locked on entry or exit.
 */
 void
 kqueue_dealloc(struct kqueue *kq)
 {
 	struct proc *p = kq->kq_p;
 	struct filedesc *fdp = p->p_fd;
 	struct knote *kn;
 	int i;

 	proc_fdlock(p);
 	for (i = 0; i < fdp->fd_knlistsize; i++) {
 		kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 		while (kn != NULL) {
 			if (kq == kn->kn_kq) {
 				kqlock(kq);
 				proc_fdunlock(p);
 				/* drop it ourselves or wait */
 				if (kqlock2knotedrop(kq, kn)) {
 					kn->kn_fop->f_detach(kn);
 					knote_drop(kn, p);
 				}
 				proc_fdlock(p);
 				/* start over at beginning of list */
 				kn = SLIST_FIRST(&fdp->fd_knlist[i]);
 				continue;
 			}
 			kn = SLIST_NEXT(kn, kn_link);
 		}
 	}
 	if (fdp->fd_knhashmask != 0) {
 		for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) {
 			kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 			while (kn != NULL) {
 				if (kq == kn->kn_kq) {
 					kqlock(kq);
 					proc_fdunlock(p);
 					/* drop it ourselves or wait */
 					if (kqlock2knotedrop(kq, kn)) {
 						kn->kn_fop->f_detach(kn);
 						knote_drop(kn, p);
 					}
 					proc_fdlock(p);
 					/* start over at beginning of list */
 					kn = SLIST_FIRST(&fdp->fd_knhash[i]);
 					continue;
 				}
 				kn = SLIST_NEXT(kn, kn_link);
 			}
 		}
 	}
 	proc_fdunlock(p);

 	/*
 	 * before freeing the wait queue set for this kqueue,
 	 * make sure it is unlinked from all its containing (select) sets.
 	 */
 	wait_queue_unlink_all((wait_queue_t)kq->kq_wqs);
 	wait_queue_set_free(kq->kq_wqs);
 	lck_spin_destroy(&kq->kq_lock, kq_lck_grp);
 	FREE_ZONE(kq, sizeof (struct kqueue), M_KQUEUE);
 }

 int
 kqueue_body(struct proc *p, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval)
 {
 	struct kqueue *kq;
 	struct fileproc *fp;
 	int fd, error;

 	error = falloc_withalloc(p,
 	    &fp, &fd, vfs_context_current(), fp_zalloc, cra);
 	if (error) {
 		return (error);
 	}

 	kq = kqueue_alloc(p);
 	if (kq == NULL) {
 		fp_free(p, fd, fp);
 		return (ENOMEM);
 	}

 	fp->f_flag = FREAD | FWRITE;
 	fp->f_ops = &kqueueops;
 	fp->f_data = kq;

 	proc_fdlock(p);
 	*fdflags(p, fd) |= UF_EXCLOSE;
 	procfdtbl_releasefd(p, fd, NULL);
 	fp_drop(p, fd, fp, 1);
 	proc_fdunlock(p);

 	*retval = fd;
 	return (error);
 }

 int
 kqueue(struct proc *p, __unused struct kqueue_args *uap, int32_t *retval)
 {
 	return (kqueue_body(p, fileproc_alloc_init, NULL, retval));
 }

 static int
 kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p,
    int iskev64)
 {
 	int advance;
 	int error;

 	if (iskev64) {
 		advance = sizeof (struct kevent64_s);
 		error = copyin(*addrp, (caddr_t)kevp, advance);
 	} else if (IS_64BIT_PROCESS(p)) {
 		struct user64_kevent kev64;
 		bzero(kevp, sizeof (struct kevent64_s));

 		advance = sizeof (kev64);
 		error = copyin(*addrp, (caddr_t)&kev64, advance);
 		if (error)
 			return (error);
 		kevp->ident = kev64.ident;
 		kevp->filter = kev64.filter;
 		kevp->flags = kev64.flags;
 		kevp->fflags = kev64.fflags;
 		kevp->data = kev64.data;
 		kevp->udata = kev64.udata;
 	} else {
 		struct user32_kevent kev32;
 		bzero(kevp, sizeof (struct kevent64_s));

 		advance = sizeof (kev32);
 		error = copyin(*addrp, (caddr_t)&kev32, advance);
 		if (error)
 			return (error);
 		kevp->ident = (uintptr_t)kev32.ident;
 		kevp->filter = kev32.filter;
 		kevp->flags = kev32.flags;
 		kevp->fflags = kev32.fflags;
 		kevp->data = (intptr_t)kev32.data;
 		kevp->udata = CAST_USER_ADDR_T(kev32.udata);
 	}
 	if (!error)
 		*addrp += advance;
 	return (error);
 }

 static int
 kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p,
    int iskev64)
 {
 	int advance;
 	int error;

 	if (iskev64) {
 		advance = sizeof (struct kevent64_s);
 		error = copyout((caddr_t)kevp, *addrp, advance);
 	} else if (IS_64BIT_PROCESS(p)) {
 		struct user64_kevent kev64;

 		/*
 		 * deal with the special case of a user-supplied
 		 * value of (uintptr_t)-1.
 		 */
 		kev64.ident = (kevp->ident == (uintptr_t)-1) ?
 		    (uint64_t)-1LL : (uint64_t)kevp->ident;

 		kev64.filter = kevp->filter;
 		kev64.flags = kevp->flags;
 		kev64.fflags = kevp->fflags;
 		kev64.data = (int64_t) kevp->data;
 		kev64.udata = kevp->udata;
 		advance = sizeof (kev64);
 		error = copyout((caddr_t)&kev64, *addrp, advance);
 	} else {
 		struct user32_kevent kev32;

 		kev32.ident = (uint32_t)kevp->ident;
 		kev32.filter = kevp->filter;
 		kev32.flags = kevp->flags;
 		kev32.fflags = kevp->fflags;
 		kev32.data = (int32_t)kevp->data;
 		kev32.udata = kevp->udata;
 		advance = sizeof (kev32);
 		error = copyout((caddr_t)&kev32, *addrp, advance);
 	}
 	if (!error)
 		*addrp += advance;
 	return (error);
 }

 /*
 * kevent_continue - continue a kevent syscall after blocking
 *
 *	assume we inherit a use count on the kq fileglob.
 */

 static void
 kevent_continue(__unused struct kqueue *kq, void *data, int error)
 {
 	struct _kevent *cont_args;
 	struct fileproc *fp;
 	int32_t *retval;
 	int noutputs;
 	int fd;
 	struct proc *p = current_proc();

 	cont_args = (struct _kevent *)data;
 	noutputs = cont_args->eventout;
 	retval = cont_args->retval;
 	fd = cont_args->fd;
 	fp = cont_args->fp;

 	fp_drop(p, fd, fp, 0);

 	/* don't restart after signals... */
 	if (error == ERESTART)
 		error = EINTR;
 	else if (error == EWOULDBLOCK)
 		error = 0;
 	if (error == 0)
 		*retval = noutputs;
 	unix_syscall_return(error);
 }

 /*
 * kevent - [syscall] register and wait for kernel events
 *
 */
 int
 kevent(struct proc *p, struct kevent_args *uap, int32_t *retval)
 {
 	return (kevent_internal(p,
 	    0,
 	    uap->changelist,
 	    uap->nchanges,
 	    uap->eventlist,
 	    uap->nevents,
 	    uap->fd,
 	    uap->timeout,
 	    0, /* no flags from old kevent() call */
 	    retval));
 }

 int
 kevent64(struct proc *p, struct kevent64_args *uap, int32_t *retval)
 {
 	return (kevent_internal(p,
 	    1,
 	    uap->changelist,
 	    uap->nchanges,
 	    uap->eventlist,
 	    uap->nevents,
 	    uap->fd,
 	    uap->timeout,
 	    uap->flags,
 	    retval));
 }

 static int
 kevent_internal(struct proc *p, int iskev64, user_addr_t changelist,
    int nchanges, user_addr_t ueventlist, int nevents, int fd,
    user_addr_t utimeout, __unused unsigned int flags,
    int32_t *retval)
 {
 	struct _kevent *cont_args;
 	uthread_t ut;
 	struct kqueue *kq;
 	struct fileproc *fp;
 	struct kevent64_s kev;
 	int error, noutputs;
 	struct timeval atv;

 	/* convert timeout to absolute - if we have one */
 	if (utimeout != USER_ADDR_NULL) {
 		struct timeval rtv;
 		if (IS_64BIT_PROCESS(p)) {
 			struct user64_timespec ts;
 			error = copyin(utimeout, &ts, sizeof(ts));
 			if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0)
 				error = EINVAL;
 			else
 				TIMESPEC_TO_TIMEVAL(&rtv, &ts);
 		} else {
 			struct user32_timespec ts;
 			error = copyin(utimeout, &ts, sizeof(ts));
 			TIMESPEC_TO_TIMEVAL(&rtv, &ts);
 		}
 		if (error)
 			return (error);
 		if (itimerfix(&rtv))
 			return (EINVAL);
 		getmicrouptime(&atv);
 		timevaladd(&atv, &rtv);
 	} else {
 		atv.tv_sec = 0;
 		atv.tv_usec = 0;
 	}

 	/* get a usecount for the kq itself */
 	if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0)
 		return (error);

 	/* each kq should only be used for events of one type */
 	kqlock(kq);
 	if (kq->kq_state & (KQ_KEV32 | KQ_KEV64)) {
 		if (((iskev64 && (kq->kq_state & KQ_KEV32)) ||
 			(!iskev64 && (kq->kq_state & KQ_KEV64)))) {
 			error = EINVAL;
 			kqunlock(kq);
 			goto errorout;
 		}
 	} else {
 		kq->kq_state |= (iskev64 ? KQ_KEV64 : KQ_KEV32);
 	}
 	kqunlock(kq);

 	/* register all the change requests the user provided... */
 	noutputs = 0;
 	while (nchanges > 0 && error == 0) {
 		error = kevent_copyin(&changelist, &kev, p, iskev64);
 		if (error)
 			break;

 		kev.flags &= ~EV_SYSFLAGS;
 		error = kevent_register(kq, &kev, p);
 		if ((error || (kev.flags & EV_RECEIPT)) && nevents > 0) {
 			kev.flags = EV_ERROR;
 			kev.data = error;
 			error = kevent_copyout(&kev, &ueventlist, p, iskev64);
 			if (error == 0) {
 				nevents--;
 				noutputs++;
 			}
 		}
 		nchanges--;
 	}

 	/* store the continuation/completion data in the uthread */
 	ut = (uthread_t)get_bsdthread_info(current_thread());
 	cont_args = &ut->uu_kevent.ss_kevent;
 	cont_args->fp = fp;
 	cont_args->fd = fd;
 	cont_args->retval = retval;
 	cont_args->eventlist = ueventlist;
 	cont_args->eventcount = nevents;
 	cont_args->eventout = noutputs;
 	cont_args->eventsize = iskev64;

 	if (nevents > 0 && noutputs == 0 && error == 0)
 		error = kqueue_scan(kq, kevent_callback,
 		    kevent_continue, cont_args,
 		    &atv, p);
 	kevent_continue(kq, cont_args, error);

 errorout:
 	fp_drop(p, fd, fp, 0);
 	return (error);
 }


 /*
 * kevent_callback - callback for each individual event
 *
 * called with nothing locked
 * caller holds a reference on the kqueue
 */
 static int
 kevent_callback(__unused struct kqueue *kq, struct kevent64_s *kevp,
    void *data)
 {
 	struct _kevent *cont_args;
 	int error;
 	int iskev64;

 	cont_args = (struct _kevent *)data;
 	assert(cont_args->eventout < cont_args->eventcount);

 	iskev64 = cont_args->eventsize;

 	/*
 	 * Copy out the appropriate amount of event data for this user.
 	 */
 	error = kevent_copyout(kevp, &cont_args->eventlist, current_proc(),
 	    iskev64);

 	/*
 	 * If there isn't space for additional events, return
 	 * a harmless error to stop the processing here
 	 */
 	if (error == 0 && ++cont_args->eventout == cont_args->eventcount)
 		error = EWOULDBLOCK;
 	return (error);
 }

 /*
 * kevent_description - format a description of a kevent for diagnostic output
 *
 * called with a 128-byte string buffer
 */

 char *
 kevent_description(struct kevent64_s *kevp, char *s, size_t n)
 {
 	snprintf(s, n,
 	    "kevent="
 	    "{.ident=%#llx, .filter=%d, .flags=%#x, .fflags=%#x, .data=%#llx, .udata=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}",
 	    kevp->ident,
 	    kevp->filter,
 	    kevp->flags,
 	    kevp->fflags,
 	    kevp->data,
 	    kevp->udata,
 	    kevp->ext[0],
 	    kevp->ext[1]);

 	return (s);
 }

 /*
 * kevent_register - add a new event to a kqueue
 *
 *	Creates a mapping between the event source and
 *	the kqueue via a knote data structure.
 *
 *	Because many/most the event sources are file
 *	descriptor related, the knote is linked off
 *	the filedescriptor table for quick access.
 *
 *	called with nothing locked
 *	caller holds a reference on the kqueue
 */

 int
 kevent_register(struct kqueue *kq, struct kevent64_s *kev,
    __unused struct proc *ctxp)
 {
 	struct proc *p = kq->kq_p;
 	struct filedesc *fdp = p->p_fd;
 	struct filterops *fops;
 	struct fileproc *fp = NULL;
 	struct knote *kn = NULL;
 	int error = 0;

 	if (kev->filter < 0) {
 		if (kev->filter + EVFILT_SYSCOUNT < 0)
 			return (EINVAL);
 		fops = sysfilt_ops[~kev->filter];	/* to 0-base index */
 	} else {
 		/*
 		 * XXX
 		 * filter attach routine is responsible for insuring that
 		 * the identifier can be attached to it.
 		 */
 		printf("unknown filter: %d\n", kev->filter);
 		return (EINVAL);
 	}

 restart:
 	/* this iocount needs to be dropped if it is not registered */
 	proc_fdlock(p);
 	if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 1)) != 0) {
 		proc_fdunlock(p);
 		return (error);
 	}

 	if (fops->f_isfd) {
 		/* fd-based knotes are linked off the fd table */
 		if (kev->ident < (u_int)fdp->fd_knlistsize) {
 			SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
 				if (kq == kn->kn_kq &&
 				    kev->filter == kn->kn_filter)
 					break;
 		}
 	} else {
 		/* hash non-fd knotes here too */
 		if (fdp->fd_knhashmask != 0) {
 			struct klist *list;

 			list = &fdp->fd_knhash[
 			    KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
 			SLIST_FOREACH(kn, list, kn_link)
 				if (kev->ident == kn->kn_id &&
 				    kq == kn->kn_kq &&
 				    kev->filter == kn->kn_filter)
 					break;
 		}
 	}

 	/*
 	 * kn now contains the matching knote, or NULL if no match
 	 */
 	if (kn == NULL) {
 		if ((kev->flags & (EV_ADD|EV_DELETE)) == EV_ADD) {
 			kn = knote_alloc();
 			if (kn == NULL) {
 				proc_fdunlock(p);
 				error = ENOMEM;
 				goto done;
 			}
 			kn->kn_fp = fp;
 			kn->kn_kq = kq;
 			kn->kn_tq = &kq->kq_head;
 			kn->kn_fop = fops;
 			kn->kn_sfflags = kev->fflags;
 			kn->kn_sdata = kev->data;
 			kev->fflags = 0;
 			kev->data = 0;
 			kn->kn_kevent = *kev;
 			kn->kn_inuse = 1;  /* for f_attach() */
 			kn->kn_status = KN_ATTACHING;

 			/* before anyone can find it */
 			if (kev->flags & EV_DISABLE)
 				kn->kn_status |= KN_DISABLED;

 			error = knote_fdpattach(kn, fdp, p);
 			proc_fdunlock(p);

 			if (error) {
 				knote_free(kn);
 				goto done;
 			}

 			/*
 			 * apply reference count to knote structure, and
 			 * do not release it at the end of this routine.
 			 */
 			fp = NULL;

 			error = fops->f_attach(kn);

 			kqlock(kq);

 			if (error != 0) {
 				/*
 				 * Failed to attach correctly, so drop.
 				 * All other possible users/droppers
 				 * have deferred to us.
 				 */
 				kn->kn_status |= KN_DROPPING;
 				kqunlock(kq);
 				knote_drop(kn, p);
 				goto done;
 			} else if (kn->kn_status & KN_DROPPING) {
 				/*
 				 * Attach succeeded, but someone else
 				 * deferred their drop - now we have
 				 * to do it for them (after detaching).
 				 */
 				kqunlock(kq);
 				kn->kn_fop->f_detach(kn);
 				knote_drop(kn, p);
 				goto done;
 			}
 			kn->kn_status &= ~KN_ATTACHING;
 			kqunlock(kq);
 		} else {
 			proc_fdunlock(p);
 			error = ENOENT;
 			goto done;
 		}
 	} else {
 		/* existing knote - get kqueue lock */
 		kqlock(kq);
 		proc_fdunlock(p);

 		if (kev->flags & EV_DELETE) {
 			knote_dequeue(kn);
 			kn->kn_status |= KN_DISABLED;
 			if (kqlock2knotedrop(kq, kn)) {
 				kn->kn_fop->f_detach(kn);
 				knote_drop(kn, p);
 			}
 			goto done;
 		}

 		/* update status flags for existing knote */
 		if (kev->flags & EV_DISABLE) {
 			knote_dequeue(kn);
 			kn->kn_status |= KN_DISABLED;
 		} else if (kev->flags & EV_ENABLE) {
 			kn->kn_status &= ~KN_DISABLED;
 			if (kn->kn_status & KN_ACTIVE)
 				knote_enqueue(kn);
 		}

 		/*
 		 * The user may change some filter values after the
 		 * initial EV_ADD, but doing so will not reset any
 		 * filter which have already been triggered.
 		 */
 		kn->kn_kevent.udata = kev->udata;
 		if (fops->f_isfd || fops->f_touch == NULL) {
 			kn->kn_sfflags = kev->fflags;
 			kn->kn_sdata = kev->data;
 		}

 		/*
 		 * If somebody is in the middle of dropping this
 		 * knote - go find/insert a new one.  But we have
 		 * wait for this one to go away first. Attaches
 		 * running in parallel may also drop/modify the
 		 * knote.  Wait for those to complete as well and
 		 * then start over if we encounter one.
 		 */
 		if (!kqlock2knoteusewait(kq, kn)) {
 			/* kqueue, proc_fdlock both unlocked */
 			goto restart;
 		}

 		/*
 		 * Call touch routine to notify filter of changes
 		 * in filter values.
 		 */
 		if (!fops->f_isfd && fops->f_touch != NULL)
 			fops->f_touch(kn, kev, EVENT_REGISTER);
 	}
 	/* still have use ref on knote */

 	/*
 	 * If the knote is not marked to always stay enqueued,
 	 * invoke the filter routine to see if it should be
 	 * enqueued now.
 	 */
 	if ((kn->kn_status & KN_STAYQUEUED) == 0 && kn->kn_fop->f_event(kn, 0)) {
 		if (knoteuse2kqlock(kq, kn))
 			knote_activate(kn, 1);
 		kqunlock(kq);
 	} else {
 		knote_put(kn);
 	}

 done:
 	if (fp != NULL)
 		fp_drop(p, kev->ident, fp, 0);
 	return (error);
 }


 /*
 * knote_process - process a triggered event
 *
 *	Validate that it is really still a triggered event
 *	by calling the filter routines (if necessary).  Hold
 *	a use reference on the knote to avoid it being detached.
 *	If it is still considered triggered, invoke the callback
 *	routine provided and move it to the provided inprocess
 *	queue.
 *
 *	caller holds a reference on the kqueue.
 *	kqueue locked on entry and exit - but may be dropped
 */
 static int
 knote_process(struct knote *kn,
    kevent_callback_t callback,
    void *data,
    struct kqtailq *inprocessp,
    struct proc *p)
 {
 	struct kqueue *kq = kn->kn_kq;
 	struct kevent64_s kev;
 	int touch;
 	int result;
 	int error;

 	/*
 	 * Determine the kevent state we want to return.
 	 *
 	 * Some event states need to be revalidated before returning
 	 * them, others we take the snapshot at the time the event
 	 * was enqueued.
 	 *
 	 * Events with non-NULL f_touch operations must be touched.
 	 * Triggered events must fill in kev for the callback.
 	 *
 	 * Convert our lock to a use-count and call the event's
 	 * filter routine(s) to update.
 	 */
 	if ((kn->kn_status & KN_DISABLED) != 0) {
 		result = 0;
 		touch = 0;
 	} else {
 		int revalidate;

 		result = 1;
 		revalidate = ((kn->kn_status & KN_STAYQUEUED) != 0 ||
 		    (kn->kn_flags & EV_ONESHOT) == 0);
 		touch = (!kn->kn_fop->f_isfd && kn->kn_fop->f_touch != NULL);

 		if (revalidate || touch) {
 			if (revalidate)
 				knote_deactivate(kn);

 			/* call the filter/touch routines with just a ref */
 			if (kqlock2knoteuse(kq, kn)) {
 				/* if we have to revalidate, call the filter */
 				if (revalidate) {
 					result = kn->kn_fop->f_event(kn, 0);
 				}

 				/*
 				 * capture the kevent data - using touch if
 				 * specified
 				 */
 				if (result && touch) {
 					kn->kn_fop->f_touch(kn, &kev,
 					    EVENT_PROCESS);
 				}

 				/*
 				 * convert back to a kqlock - bail if the knote
 				 * went away
 				 */
 				if (!knoteuse2kqlock(kq, kn)) {
 					return (EJUSTRETURN);
 				} else if (result) {
 					/*
 					 * if revalidated as alive, make sure
 					 * it's active
 					 */
 					if (!(kn->kn_status & KN_ACTIVE)) {
 						knote_activate(kn, 0);
 					}

 					/*
 					 * capture all events that occurred
 					 * during filter
 					 */
 					if (!touch) {
 						kev = kn->kn_kevent;
 					}

 				} else if ((kn->kn_status & KN_STAYQUEUED) == 0) {
 					/*
 					 * was already dequeued, so just bail on
 					 * this one
 					 */
 					return (EJUSTRETURN);
 				}
 			} else {
 				return (EJUSTRETURN);
 			}
 		} else {
 			kev = kn->kn_kevent;
 		}
 	}

 	/* move knote onto inprocess queue */
 	assert(kn->kn_tq == &kq->kq_head);
 	TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
 	kn->kn_tq = inprocessp;
 	TAILQ_INSERT_TAIL(inprocessp, kn, kn_tqe);

 	/*
 	 * Determine how to dispatch the knote for future event handling.
 	 * not-fired: just return (do not callout).
 	 * One-shot: deactivate it.
 	 * Clear: deactivate and clear the state.
 	 * Dispatch: don't clear state, just deactivate it and mark it disabled.
 	 * All others: just leave where they are.
 	 */

 	if (result == 0) {
 		return (EJUSTRETURN);
 	} else if ((kn->kn_flags & EV_ONESHOT) != 0) {
 		knote_deactivate(kn);
 		if (kqlock2knotedrop(kq, kn)) {
 			kn->kn_fop->f_detach(kn);
 			knote_drop(kn, p);
 		}
 	} else if ((kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) != 0) {
 		if ((kn->kn_flags & EV_DISPATCH) != 0) {
 			/* deactivate and disable all dispatch knotes */
 			knote_deactivate(kn);
 			kn->kn_status |= KN_DISABLED;
 		} else if (!touch || kn->kn_fflags == 0) {
 			/* only deactivate if nothing since the touch */
 			knote_deactivate(kn);
 		}
 		if (!touch && (kn->kn_flags & EV_CLEAR) != 0) {
 			/* manually clear non-touch knotes */
 			kn->kn_data = 0;
 			kn->kn_fflags = 0;
 		}
 		kqunlock(kq);
 	} else {
 		/*
 		 * leave on inprocess queue.  We'll
 		 * move all the remaining ones back
 		 * the kq queue and wakeup any
 		 * waiters when we are done.
 		 */
 		kqunlock(kq);
 	}

 	/* callback to handle each event as we find it */
 	error = (callback)(kq, &kev, data);

 	kqlock(kq);
 	return (error);
 }

 /*
 * Return 0 to indicate that processing should proceed,
 * -1 if there is nothing to process.
 *
 * Called with kqueue locked and returns the same way,
 * but may drop lock temporarily.
 */
 static int
 kqueue_begin_processing(struct kqueue *kq)
 {
 	for (;;) {
 		if (kq->kq_count == 0) {
 			return (-1);
 		}

 		/* if someone else is processing the queue, wait */
 		if (kq->kq_nprocess != 0) {
 			if (!wait_queue_is_valid((myWaitQueue*)kq->kq_wqs)) {
 				printf("\nAbout to call wait_queue_assert_wait with invalid wait queue from kqueue_begin_processing.\n");
 			}
 			wait_queue_assert_wait((wait_queue_t)kq->kq_wqs,
 			    &kq->kq_nprocess, THREAD_UNINT, 0);
 			kq->kq_state |= KQ_PROCWAIT;
 			kqunlock(kq);
 			thread_block(THREAD_CONTINUE_NULL);
 			kqlock(kq);
 		} else {
 			kq->kq_nprocess = 1;
 			return (0);
 		}
 	}
 }

 /*
 * Called with kqueue lock held.
 */
 static void
 kqueue_end_processing(struct kqueue *kq)
 {
 	kq->kq_nprocess = 0;
 	if (kq->kq_state & KQ_PROCWAIT) {
 		kq->kq_state &= ~KQ_PROCWAIT;
 		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs,
 		    &kq->kq_nprocess, THREAD_AWAKENED);
 	}
 }

 /*
 * kqueue_process - process the triggered events in a kqueue
 *
 *	Walk the queued knotes and validate that they are
 *	really still triggered events by calling the filter
 *	routines (if necessary).  Hold a use reference on
 *	the knote to avoid it being detached. For each event
 *	that is still considered triggered, invoke the
 *	callback routine provided.
 *
 *	caller holds a reference on the kqueue.
 *	kqueue locked on entry and exit - but may be dropped
 *	kqueue list locked (held for duration of call)
 */

 static int
 kqueue_process(struct kqueue *kq,
    kevent_callback_t callback,
    void *data,
    int *countp,
    struct proc *p)
 {
 	struct kqtailq inprocess;
 	struct knote *kn;
 	int nevents;
 	int error;

 	TAILQ_INIT(&inprocess);

 	if (kqueue_begin_processing(kq) == -1) {
 		*countp = 0;
 		/* Nothing to process */
 		return (0);
 	}

 	/*
 	 * Clear any pre-posted status from previous runs, so we
 	 * only detect events that occur during this run.
 	 */
 	wait_queue_sub_clearrefs(kq->kq_wqs);

 	/*
 	 * loop through the enqueued knotes, processing each one and
 	 * revalidating those that need it. As they are processed,
 	 * they get moved to the inprocess queue (so the loop can end).
 	 */
 	error = 0;
 	nevents = 0;

 	while (error == 0 &&
 	    (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) {
 		error = knote_process(kn, callback, data, &inprocess, p);
 		if (error == EJUSTRETURN)
 			error = 0;
 		else
 			nevents++;
 	}

 	/*
 	 * With the kqueue still locked, move any knotes
 	 * remaining on the inprocess queue back to the
 	 * kq's queue and wake up any waiters.
 	 */
 	while ((kn = TAILQ_FIRST(&inprocess)) != NULL) {
 		assert(kn->kn_tq == &inprocess);
 		TAILQ_REMOVE(&inprocess, kn, kn_tqe);
 		kn->kn_tq = &kq->kq_head;
 		TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 	}

 	kqueue_end_processing(kq);

 	*countp = nevents;
 	return (error);
 }


 static void
 kqueue_scan_continue(void *data, wait_result_t wait_result)
 {
 	thread_t self = current_thread();
 	uthread_t ut = (uthread_t)get_bsdthread_info(self);
 	struct _kqueue_scan * cont_args = &ut->uu_kevent.ss_kqueue_scan;
 	struct kqueue *kq = (struct kqueue *)data;
 	int error;
 	int count;

 	/* convert the (previous) wait_result to a proper error */
 	switch (wait_result) {
 	case THREAD_AWAKENED:
 		kqlock(kq);
 		error = kqueue_process(kq, cont_args->call, cont_args, &count,
 		    current_proc());
 		if (error == 0 && count == 0) {
 			if (!wait_queue_is_valid((myWaitQueue*)kq->kq_wqs)) {
 				printf("\nAbout to call wait_queue_assert_wait with invalid wait queue from kqueue_scan_continue.\n");
 			}
 			wait_queue_assert_wait((wait_queue_t)kq->kq_wqs,
 			    KQ_EVENT, THREAD_ABORTSAFE, cont_args->deadline);
 			kq->kq_state |= KQ_SLEEP;
 			kqunlock(kq);
 			thread_block_parameter(kqueue_scan_continue, kq);
 			/* NOTREACHED */
 		}
 		kqunlock(kq);
 		break;
 	case THREAD_TIMED_OUT:
 		error = EWOULDBLOCK;
 		break;
 	case THREAD_INTERRUPTED:
 		error = EINTR;
 		break;
 	case THREAD_RESTART:
 		printf("\nkqueue_scan_continue was called with a wait_result of THREAD_RESTART. A vanilla 2422 XNU kernel would have panicked!\n");
 		error = EBADF;
 		break;
 	default:
 		panic("%s: - invalid wait_result (%d)", __func__,
 		    wait_result);
 		error = 0;
 	}

 	/* call the continuation with the results */
 	assert(cont_args->cont != NULL);
 	(cont_args->cont)(kq, cont_args->data, error);
 }


 /*
 * kqueue_scan - scan and wait for events in a kqueue
 *
 *	Process the triggered events in a kqueue.
 *
 *	If there are no events triggered arrange to
 *	wait for them. If the caller provided a
 *	continuation routine, then kevent_scan will
 *	also.
 *
 *	The callback routine must be valid.
 *	The caller must hold a use-count reference on the kq.
 */

 int
 kqueue_scan(struct kqueue *kq,
 	    kevent_callback_t callback,
 	    kqueue_continue_t continuation,
 	    void *data,
 	    struct timeval *atvp,
 	    struct proc *p)
 {
 	thread_continue_t cont = THREAD_CONTINUE_NULL;
 	uint64_t deadline;
 	int error;
 	int first;

 	assert(callback != NULL);

 	first = 1;
 	for (;;) {
 		wait_result_t wait_result;
 		int count;

 		/*
 		 * Make a pass through the kq to find events already
 		 * triggered.
 		 */
 		kqlock(kq);
 		error = kqueue_process(kq, callback, data, &count, p);
 		if (error || count)
 			break; /* lock still held */

 		/* looks like we have to consider blocking */
 		if (first) {
 			first = 0;
 			/* convert the timeout to a deadline once */
 			if (atvp->tv_sec || atvp->tv_usec) {
 				uint64_t now;

 				clock_get_uptime(&now);
 				nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC +
 							    atvp->tv_usec * (long)NSEC_PER_USEC,
 							    &deadline);
 				if (now >= deadline) {
 					/* non-blocking call */
 					error = EWOULDBLOCK;
 					break; /* lock still held */
 				}
 				deadline -= now;
 				clock_absolutetime_interval_to_deadline(deadline, &deadline);
 			} else {
 				deadline = 0; 	/* block forever */
 			}

 			if (continuation) {
 				uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
 				struct _kqueue_scan *cont_args = &ut->uu_kevent.ss_kqueue_scan;

 				cont_args->call = callback;
 				cont_args->cont = continuation;
 				cont_args->deadline = deadline;
 				cont_args->data = data;
 				cont = kqueue_scan_continue;
 			}
 		}

 		/* go ahead and wait */
 		if (!wait_queue_is_valid((myWaitQueue*)kq->kq_wqs)) {
 			printf("\nAbout to call wait_queue_assert_wait_with_leeway with invalid wait queue from kqueue_scan.\n");
 		}
 		wait_queue_assert_wait_with_leeway((wait_queue_t)kq->kq_wqs,
 		    KQ_EVENT, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL,
 		    deadline, 0);
 		kq->kq_state |= KQ_SLEEP;
 		kqunlock(kq);
 		wait_result = thread_block_parameter(cont, kq);
 		/* NOTREACHED if (continuation != NULL) */

 		switch (wait_result) {
 		case THREAD_AWAKENED:
 			continue;
 		case THREAD_TIMED_OUT:
 			return (EWOULDBLOCK);
 		case THREAD_INTERRUPTED:
 			return (EINTR);
 		default:
 			panic("%s: - bad wait_result (%d)", __func__,
 			    wait_result);
 			error = 0;
 		}
 	}
 	kqunlock(kq);
 	return (error);
 }


 /*
 * XXX
 * This could be expanded to call kqueue_scan, if desired.
 */
 /*ARGSUSED*/
 static int
 kqueue_read(__unused struct fileproc *fp,
    __unused struct uio *uio,
    __unused int flags,
    __unused vfs_context_t ctx)
 {
 	return (ENXIO);
 }

 /*ARGSUSED*/
 static int
 kqueue_write(__unused struct fileproc *fp,
    __unused struct uio *uio,
    __unused int flags,
    __unused vfs_context_t ctx)
 {
 	return (ENXIO);
 }

 /*ARGSUSED*/
 static int
 kqueue_ioctl(__unused struct fileproc *fp,
    __unused u_long com,
    __unused caddr_t data,
    __unused vfs_context_t ctx)
 {
 	return (ENOTTY);
 }

 /*ARGSUSED*/
 static int
 kqueue_select(struct fileproc *fp, int which, void *wql,
    __unused vfs_context_t ctx)
 {
 	struct kqueue *kq = (struct kqueue *)fp->f_data;
 	struct knote *kn;
 	struct kqtailq inprocessq;
 	int retnum = 0;

 	if (which != FREAD)
 		return (0);

 	TAILQ_INIT(&inprocessq);

 	kqlock(kq);
 	/*
 	 * If this is the first pass, link the wait queue associated with the
 	 * the kqueue onto the wait queue set for the select().  Normally we
 	 * use selrecord() for this, but it uses the wait queue within the
 	 * selinfo structure and we need to use the main one for the kqueue to
 	 * catch events from KN_STAYQUEUED sources. So we do the linkage manually.
 	 * (The select() call will unlink them when it ends).
 	 */
 	if (wql != NULL) {
 		thread_t cur_act = current_thread();
 		struct uthread * ut = get_bsdthread_info(cur_act);

 		kq->kq_state |= KQ_SEL;
 		wait_queue_link_noalloc((wait_queue_t)kq->kq_wqs, ut->uu_wqset,
 		    (wait_queue_link_t)wql);
 	}

 	if (kqueue_begin_processing(kq) == -1) {
 		kqunlock(kq);
 		return (0);
 	}

 	if (kq->kq_count != 0) {
 		/*
 		 * there is something queued - but it might be a
 		 * KN_STAYQUEUED knote, which may or may not have
 		 * any events pending.  So, we have to walk the
 		 * list of knotes to see, and peek at the stay-
 		 * queued ones to be really sure.
 		 */
 		while ((kn = (struct knote *)TAILQ_FIRST(&kq->kq_head)) != NULL) {
 			if ((kn->kn_status & KN_STAYQUEUED) == 0) {
 				retnum = 1;
 				goto out;
 			}

 			TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
 			TAILQ_INSERT_TAIL(&inprocessq, kn, kn_tqe);

 			if (kqlock2knoteuse(kq, kn)) {
 				unsigned peek;

 				peek = kn->kn_fop->f_peek(kn);
 				if (knoteuse2kqlock(kq, kn)) {
 					if (peek > 0) {
 						retnum = 1;
 						goto out;
 					}
 				} else {
 					retnum = 0;
 				}
 			}
 		}
 	}

 out:
 	/* Return knotes to active queue */
 	while ((kn = TAILQ_FIRST(&inprocessq)) != NULL) {
 		TAILQ_REMOVE(&inprocessq, kn, kn_tqe);
 		kn->kn_tq = &kq->kq_head;
 		TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 	}

 	kqueue_end_processing(kq);
 	kqunlock(kq);
 	return (retnum);
 }

 /*
 * kqueue_close -
 */
 /*ARGSUSED*/
 static int
 kqueue_close(struct fileglob *fg, __unused vfs_context_t ctx)
 {
 	struct kqueue *kq = (struct kqueue *)fg->fg_data;

 	kqueue_dealloc(kq);
 	fg->fg_data = NULL;
 	return (0);
 }

 /*ARGSUSED*/
 /*
 * The callers has taken a use-count reference on this kqueue and will donate it
 * to the kqueue we are being added to.  This keeps the kqueue from closing until
 * that relationship is torn down.
 */
 static int
 kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx)
 {
 	struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
 	struct kqueue *parentkq = kn->kn_kq;

 	if (parentkq == kq ||
 	    kn->kn_filter != EVFILT_READ)
 		return (1);

 	/*
 	 * We have to avoid creating a cycle when nesting kqueues
 	 * inside another.  Rather than trying to walk the whole
 	 * potential DAG of nested kqueues, we just use a simple
 	 * ceiling protocol.  When a kqueue is inserted into another,
 	 * we check that the (future) parent is not already nested
 	 * into another kqueue at a lower level than the potenial
 	 * child (because it could indicate a cycle).  If that test
 	 * passes, we just mark the nesting levels accordingly.
 	 */

 	kqlock(parentkq);
 	if (parentkq->kq_level > 0 &&
 	    parentkq->kq_level < kq->kq_level)
 	{
 		kqunlock(parentkq);
 		return (1);
 	} else {
 		/* set parent level appropriately */
 		if (parentkq->kq_level == 0)
 			parentkq->kq_level = 2;
 		if (parentkq->kq_level < kq->kq_level + 1)
 			parentkq->kq_level = kq->kq_level + 1;
 		kqunlock(parentkq);

 		kn->kn_fop = &kqread_filtops;
 		kqlock(kq);
 		KNOTE_ATTACH(&kq->kq_sel.si_note, kn);
 		/* indicate nesting in child, if needed */
 		if (kq->kq_level == 0)
 			kq->kq_level = 1;
 		kqunlock(kq);
 		return (0);
 	}
 }

 /*
 * kqueue_drain - called when kq is closed
 */
 /*ARGSUSED*/
 static int
 kqueue_drain(struct fileproc *fp, __unused vfs_context_t ctx)
 {
 	struct kqueue *kq = (struct kqueue *)fp->f_fglob->fg_data;
 	kqlock(kq);
 	kqueue_wakeup(kq, 1);
 	kqunlock(kq);
 	return (0);
 }

 /*ARGSUSED*/
 int
 kqueue_stat(struct fileproc *fp, void *ub, int isstat64,  __unused vfs_context_t ctx)
 {

 	struct kqueue *kq = (struct kqueue *)fp->f_data;
 	if (isstat64 != 0) {
 		struct stat64 *sb64 = (struct stat64 *)ub;

 		bzero((void *)sb64, sizeof(*sb64));
 		sb64->st_size = kq->kq_count;
 		if (kq->kq_state & KQ_KEV64)
 			sb64->st_blksize = sizeof(struct kevent64_s);
 		else
 			sb64->st_blksize = sizeof(struct kevent);
 		sb64->st_mode = S_IFIFO;
 	} else {
 		struct stat *sb = (struct stat *)ub;

 		bzero((void *)sb, sizeof(*sb));
 		sb->st_size = kq->kq_count;
 		if (kq->kq_state & KQ_KEV64)
 			sb->st_blksize = sizeof(struct kevent64_s);
 		else
 			sb->st_blksize = sizeof(struct kevent);
 		sb->st_mode = S_IFIFO;
 	}

 	return (0);
 }

 /*
 * Called with the kqueue locked
 */
 static void
 kqueue_wakeup(struct kqueue *kq, int closed)
 {
 	if ((kq->kq_state & (KQ_SLEEP | KQ_SEL)) != 0 || kq->kq_nprocess > 0) {
 		kq->kq_state &= ~(KQ_SLEEP | KQ_SEL);
 		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, KQ_EVENT,
 		    (closed) ? THREAD_INTERRUPTED : THREAD_AWAKENED);
 	}
 }

 void
 klist_init(struct klist *list)
 {
 	SLIST_INIT(list);
 }


 /*
 * Query/Post each knote in the object's list
 *
 *	The object lock protects the list. It is assumed
 *	that the filter/event routine for the object can
 *	determine that the object is already locked (via
 *	the hint) and not deadlock itself.
 *
 *	The object lock should also hold off pending
 *	detach/drop operations.  But we'll prevent it here
 *	too - just in case.
 */
 void
 knote(struct klist *list, long hint)
 {
 	struct knote *kn;

 	SLIST_FOREACH(kn, list, kn_selnext) {
 		struct kqueue *kq = kn->kn_kq;

 		kqlock(kq);
 		if (kqlock2knoteuse(kq, kn)) {
 			int result;

 			/* call the event with only a use count */
 			result = kn->kn_fop->f_event(kn, hint);

 			/* if its not going away and triggered */
 			if (knoteuse2kqlock(kq, kn) && result)
 				knote_activate(kn, 1);
 			/* lock held again */
 		}
 		kqunlock(kq);
 	}
 }

 /*
 * attach a knote to the specified list.  Return true if this is the first entry.
 * The list is protected by whatever lock the object it is associated with uses.
 */
 int
 knote_attach(struct klist *list, struct knote *kn)
 {
 	int ret = SLIST_EMPTY(list);
 	SLIST_INSERT_HEAD(list, kn, kn_selnext);
 	return (ret);
 }

 /*
 * detach a knote from the specified list.  Return true if that was the last entry.
 * The list is protected by whatever lock the object it is associated with uses.
 */
 int
 knote_detach(struct klist *list, struct knote *kn)
 {
 	SLIST_REMOVE(list, kn, knote, kn_selnext);
 	return (SLIST_EMPTY(list));
 }

 /*
 * For a given knote, link a provided wait queue directly with the kqueue.
 * Wakeups will happen via recursive wait queue support.  But nothing will move
 * the knote to the active list at wakeup (nothing calls knote()).  Instead,
 * we permanently enqueue them here.
 *
 * kqueue and knote references are held by caller.
 *
 * caller provides the wait queue link structure.
 */
 int
 knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql)
 {
 	struct kqueue *kq = kn->kn_kq;
 	kern_return_t kr;

 	kr = wait_queue_link_noalloc(wq, kq->kq_wqs, wql);
 	if (kr == KERN_SUCCESS) {
 		knote_markstayqueued(kn);
 		return (0);
 	} else {
 		return (EINVAL);
 	}
 }

 /*
 * Unlink the provided wait queue from the kqueue associated with a knote.
 * Also remove it from the magic list of directly attached knotes.
 *
 * Note that the unlink may have already happened from the other side, so
 * ignore any failures to unlink and just remove it from the kqueue list.
 *
 * On success, caller is responsible for the link structure
 */
 int
 knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp)
 {
 	struct kqueue *kq = kn->kn_kq;
 	kern_return_t kr;

 	kr = wait_queue_unlink_nofree(wq, kq->kq_wqs, wqlp);
 	kqlock(kq);
 	kn->kn_status &= ~KN_STAYQUEUED;
 	knote_dequeue(kn);
 	kqunlock(kq);
 	return ((kr != KERN_SUCCESS) ? EINVAL : 0);
 }

 /*
 * remove all knotes referencing a specified fd
 *
 * Essentially an inlined knote_remove & knote_drop
 * when we know for sure that the thing is a file
 *
 * Entered with the proc_fd lock already held.
 * It returns the same way, but may drop it temporarily.
 */
 void
 knote_fdclose(struct proc *p, int fd)
 {
 	struct filedesc *fdp = p->p_fd;
 	struct klist *list;
 	struct knote *kn;

 	list = &fdp->fd_knlist[fd];
 	while ((kn = SLIST_FIRST(list)) != NULL) {
 		struct kqueue *kq = kn->kn_kq;

 		if (kq->kq_p != p)
 			panic("%s: proc mismatch (kq->kq_p=%p != p=%p)",
 			    __func__, kq->kq_p, p);

 		kqlock(kq);
 		proc_fdunlock(p);

 		/*
 		 * Convert the lock to a drop ref.
 		 * If we get it, go ahead and drop it.
 		 * Otherwise, we waited for it to
 		 * be dropped by the other guy, so
 		 * it is safe to move on in the list.
 		 */
 		if (kqlock2knotedrop(kq, kn)) {
 			kn->kn_fop->f_detach(kn);
 			knote_drop(kn, p);
 		}

 		proc_fdlock(p);

 		/* the fd tables may have changed - start over */
 		list = &fdp->fd_knlist[fd];
 	}
 }

 /* proc_fdlock held on entry (and exit) */
 static int
 knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p)
 {
 	struct klist *list = NULL;

 	if (! kn->kn_fop->f_isfd) {
 		if (fdp->fd_knhashmask == 0)
 			fdp->fd_knhash = hashinit(CONFIG_KN_HASHSIZE, M_KQUEUE,
 			    &fdp->fd_knhashmask);
 		list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
 	} else {
 		if ((u_int)fdp->fd_knlistsize <= kn->kn_id) {
 			u_int size = 0;

 			if (kn->kn_id >= (uint64_t)p->p_rlimit[RLIMIT_NOFILE].rlim_cur
 			    || kn->kn_id >= (uint64_t)maxfiles)
 				return (EINVAL);

 			/* have to grow the fd_knlist */
 			size = fdp->fd_knlistsize;
 			while (size <= kn->kn_id)
 				size += KQEXTENT;

 			if (size >= (UINT_MAX/sizeof(struct klist *)))
 				return (EINVAL);

 			MALLOC(list, struct klist *,
 			    size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
 			if (list == NULL)
 				return (ENOMEM);

 			bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
 			    fdp->fd_knlistsize * sizeof(struct klist *));
 			bzero((caddr_t)list +
 			    fdp->fd_knlistsize * sizeof(struct klist *),
 			    (size - fdp->fd_knlistsize) * sizeof(struct klist *));
 			FREE(fdp->fd_knlist, M_KQUEUE);
 			fdp->fd_knlist = list;
 			fdp->fd_knlistsize = size;
 		}
 		list = &fdp->fd_knlist[kn->kn_id];
 	}
 	SLIST_INSERT_HEAD(list, kn, kn_link);
 	return (0);
 }



 /*
 * should be called at spl == 0, since we don't want to hold spl
 * while calling fdrop and free.
 */
 static void
 knote_drop(struct knote *kn, __unused struct proc *ctxp)
 {
 	struct kqueue *kq = kn->kn_kq;
 	struct proc *p = kq->kq_p;
 	struct filedesc *fdp = p->p_fd;
 	struct klist *list;
 	int needswakeup;

 	proc_fdlock(p);
 	if (kn->kn_fop->f_isfd)
 		list = &fdp->fd_knlist[kn->kn_id];
 	else
 		list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];

 	SLIST_REMOVE(list, kn, knote, kn_link);
 	kqlock(kq);
 	knote_dequeue(kn);
 	needswakeup = (kn->kn_status & KN_USEWAIT);
 	kqunlock(kq);
 	proc_fdunlock(p);

 	if (needswakeup)
 		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status,
 		    THREAD_AWAKENED);

 	if (kn->kn_fop->f_isfd)
 		fp_drop(p, kn->kn_id, kn->kn_fp, 0);

 	knote_free(kn);
 }

 /* called with kqueue lock held */
 static void
 knote_activate(struct knote *kn, int propagate)
 {
 	struct kqueue *kq = kn->kn_kq;

 	kn->kn_status |= KN_ACTIVE;
 	knote_enqueue(kn);
 	kqueue_wakeup(kq, 0);

 	/* this is a real event: wake up the parent kq, too */
 	if (propagate)
 		KNOTE(&kq->kq_sel.si_note, 0);
 }

 /* called with kqueue lock held */
 static void
 knote_deactivate(struct knote *kn)
 {
 	kn->kn_status &= ~KN_ACTIVE;
 	knote_dequeue(kn);
 }

 /* called with kqueue lock held */
 static void
 knote_enqueue(struct knote *kn)
 {
 	if ((kn->kn_status & (KN_QUEUED | KN_STAYQUEUED)) == KN_STAYQUEUED ||
 	    (kn->kn_status & (KN_QUEUED | KN_STAYQUEUED | KN_DISABLED)) == 0) {
 		struct kqtailq *tq = kn->kn_tq;
 		struct kqueue *kq = kn->kn_kq;

 		TAILQ_INSERT_TAIL(tq, kn, kn_tqe);
 		kn->kn_status |= KN_QUEUED;
 		kq->kq_count++;
 	}
 }

 /* called with kqueue lock held */
 static void
 knote_dequeue(struct knote *kn)
 {
 	struct kqueue *kq = kn->kn_kq;

 	if ((kn->kn_status & (KN_QUEUED | KN_STAYQUEUED)) == KN_QUEUED) {
 		struct kqtailq *tq = kn->kn_tq;

 		TAILQ_REMOVE(tq, kn, kn_tqe);
 		kn->kn_tq = &kq->kq_head;
 		kn->kn_status &= ~KN_QUEUED;
 		kq->kq_count--;
 	}
 }

 void
 knote_init(void)
 {
 	knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote),
 	    8192, "knote zone");

 	/* allocate kq lock group attribute and group */
 	kq_lck_grp_attr = lck_grp_attr_alloc_init();

 	kq_lck_grp = lck_grp_alloc_init("kqueue",  kq_lck_grp_attr);

 	/* Allocate kq lock attribute */
 	kq_lck_attr = lck_attr_alloc_init();

 	/* Initialize the timer filter lock */
 	lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);

 #if VM_PRESSURE_EVENTS
 	/* Initialize the vm pressure list lock */
 	vm_pressure_init(kq_lck_grp, kq_lck_attr);
 #endif

 #if CONFIG_MEMORYSTATUS
 	/* Initialize the memorystatus list lock */
 	memorystatus_kevent_init(kq_lck_grp, kq_lck_attr);
 #endif
 }
 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)

 static struct knote *
 knote_alloc(void)
 {
 	return ((struct knote *)zalloc(knote_zone));
 }

 static void
 knote_free(struct knote *kn)
 {
 	zfree(knote_zone, kn);
 }

 #if SOCKETS
 #include <sys/param.h>
 #include <sys/socket.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
 #include <sys/mbuf.h>
 #include <sys/kern_event.h>
 #include <sys/malloc.h>
 #include <sys/sys_domain.h>
 #include <sys/syslog.h>

 static lck_grp_attr_t *kev_lck_grp_attr;
 static lck_attr_t *kev_lck_attr;
 static lck_grp_t *kev_lck_grp;
 static decl_lck_rw_data(,kev_lck_data);
 static lck_rw_t *kev_rwlock = &kev_lck_data;

 static int kev_attach(struct socket *so, int proto, struct proc *p);
 static int kev_detach(struct socket *so);
 static int kev_control(struct socket *so, u_long cmd, caddr_t data,
    struct ifnet *ifp, struct proc *p);
 static lck_mtx_t * event_getlock(struct socket *, int);
 static int event_lock(struct socket *, int, void *);
 static int event_unlock(struct socket *, int, void *);

 static int event_sofreelastref(struct socket *);
 static void kev_delete(struct kern_event_pcb *);

 static struct pr_usrreqs event_usrreqs = {
 	.pru_attach =		kev_attach,
 	.pru_control =		kev_control,
 	.pru_detach =		kev_detach,
 	.pru_soreceive =	soreceive,
 };

 static struct protosw eventsw[] = {
 {
 	.pr_type =		SOCK_RAW,
 	.pr_protocol =		SYSPROTO_EVENT,
 	.pr_flags =		PR_ATOMIC,
 	.pr_usrreqs =		&event_usrreqs,
 	.pr_lock =		event_lock,
 	.pr_unlock =		event_unlock,
 	.pr_getlock =		event_getlock,
 }
 };

 static lck_mtx_t *
 event_getlock(struct socket *so, int locktype)
 {
 #pragma unused(locktype)
 	struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *)so->so_pcb;

 	if (so->so_pcb != NULL)  {
 		if (so->so_usecount < 0)
 			panic("%s: so=%p usecount=%d lrh= %s\n", __func__,
 			    so, so->so_usecount, solockhistory_nr(so));
 			/* NOTREACHED */
 	} else {
 		panic("%s: so=%p NULL NO so_pcb %s\n", __func__,
 		    so, solockhistory_nr(so));
 		/* NOTREACHED */
 	}
 	return (&ev_pcb->evp_mtx);
 }

 static int
 event_lock(struct socket *so, int refcount, void *lr)
 {
 	void *lr_saved;

 	if (lr == NULL)
 		lr_saved = __builtin_return_address(0);
 	else
 		lr_saved = lr;

 	if (so->so_pcb != NULL) {
 		lck_mtx_lock(&((struct kern_event_pcb *)so->so_pcb)->evp_mtx);
 	} else  {
 		panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__,
 		    so, lr_saved, solockhistory_nr(so));
 		/* NOTREACHED */
 	}

 	if (so->so_usecount < 0) {
 		panic("%s: so=%p so_pcb=%p lr=%p ref=%d lrh= %s\n", __func__,
 		    so, so->so_pcb, lr_saved, so->so_usecount,
 		    solockhistory_nr(so));
 		/* NOTREACHED */
 	}

 	if (refcount)
 		so->so_usecount++;

 	so->lock_lr[so->next_lock_lr] = lr_saved;
 	so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
 	return (0);
 }

 static int
 event_unlock(struct socket *so, int refcount, void *lr)
 {
 	void *lr_saved;
 	lck_mtx_t *mutex_held;

 	if (lr == NULL)
 		lr_saved = __builtin_return_address(0);
 	else
 		lr_saved = lr;

 	if (refcount)
 		so->so_usecount--;

 	if (so->so_usecount < 0) {
 		panic("%s: so=%p usecount=%d lrh= %s\n", __func__,
 		    so, so->so_usecount, solockhistory_nr(so));
 		/* NOTREACHED */
 	}
 	if (so->so_pcb == NULL) {
 		panic("%s: so=%p NO PCB usecount=%d lr=%p lrh= %s\n", __func__,
 		    so, so->so_usecount, (void *)lr_saved,
 		    solockhistory_nr(so));
 		/* NOTREACHED */
 	}
 	mutex_held = (&((struct kern_event_pcb *)so->so_pcb)->evp_mtx);

 	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 	so->unlock_lr[so->next_unlock_lr] = lr_saved;
 	so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;

 	if (so->so_usecount == 0) {
 		VERIFY(so->so_flags & SOF_PCBCLEARING);
 		event_sofreelastref(so);
 	} else {
 		lck_mtx_unlock(mutex_held);
 	}

 	return (0);
 }

 static int
 event_sofreelastref(struct socket *so)
 {
 	struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *)so->so_pcb;

 	lck_mtx_assert(&(ev_pcb->evp_mtx), LCK_MTX_ASSERT_OWNED);

 	so->so_pcb = NULL;

 	/*
 	 * Disable upcall in the event another thread is in kev_post_msg()
 	 * appending record to the receive socket buffer, since sbwakeup()
 	 * may release the socket lock otherwise.
 	 */
 	so->so_rcv.sb_flags &= ~SB_UPCALL;
 	so->so_snd.sb_flags &= ~SB_UPCALL;
 	so->so_event = NULL;
 	lck_mtx_unlock(&(ev_pcb->evp_mtx));

 	lck_mtx_assert(&(ev_pcb->evp_mtx), LCK_MTX_ASSERT_NOTOWNED);
 	lck_rw_lock_exclusive(kev_rwlock);
 	LIST_REMOVE(ev_pcb, evp_link);
 	lck_rw_done(kev_rwlock);
 	kev_delete(ev_pcb);

 	sofreelastref(so, 1);
 	return (0);
 }

 static int event_proto_count = (sizeof (eventsw) / sizeof (struct protosw));

 static
 struct kern_event_head kern_event_head;

 static u_int32_t static_event_id = 0;

 #define	EVPCB_ZONE_MAX		65536
 #define	EVPCB_ZONE_NAME		"kerneventpcb"
 static struct zone *ev_pcb_zone;

 /*
 * Install the protosw's for the NKE manager.  Invoked at extension load time
 */
 void
 kern_event_init(struct domain *dp)
 {
 	struct protosw *pr;
 	int i;

 	VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
 	VERIFY(dp == systemdomain);

 	kev_lck_grp_attr = lck_grp_attr_alloc_init();
 	if (kev_lck_grp_attr == NULL) {
 		panic("%s: lck_grp_attr_alloc_init failed\n", __func__);
 		/* NOTREACHED */
 	}

 	kev_lck_grp = lck_grp_alloc_init("Kernel Event Protocol",
 	    kev_lck_grp_attr);
 	if (kev_lck_grp == NULL) {
 		panic("%s: lck_grp_alloc_init failed\n", __func__);
 		/* NOTREACHED */
 	}

 	kev_lck_attr = lck_attr_alloc_init();
 	if (kev_lck_attr == NULL) {
 		panic("%s: lck_attr_alloc_init failed\n", __func__);
 		/* NOTREACHED */
 	}

 	lck_rw_init(kev_rwlock, kev_lck_grp, kev_lck_attr);
 	if (kev_rwlock == NULL) {
 		panic("%s: lck_mtx_alloc_init failed\n", __func__);
 		/* NOTREACHED */
 	}

 	for (i = 0, pr = &eventsw[0]; i < event_proto_count; i++, pr++)
 		net_add_proto(pr, dp, 1);

 	ev_pcb_zone = zinit(sizeof(struct kern_event_pcb),
 	    EVPCB_ZONE_MAX * sizeof(struct kern_event_pcb), 0, EVPCB_ZONE_NAME);
 	if (ev_pcb_zone == NULL) {
 		panic("%s: failed allocating ev_pcb_zone", __func__);
 		/* NOTREACHED */
 	}
 	zone_change(ev_pcb_zone, Z_EXPAND, TRUE);
 	zone_change(ev_pcb_zone, Z_CALLERACCT, TRUE);
 }

 static int
 kev_attach(struct socket *so, __unused int proto, __unused struct proc *p)
 {
 	int error = 0;
 	struct kern_event_pcb *ev_pcb;

 	error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE);
 	if (error != 0)
 		return (error);

 	if ((ev_pcb = (struct kern_event_pcb *)zalloc(ev_pcb_zone)) == NULL) {
 		return (ENOBUFS);
 	}
 	bzero(ev_pcb, sizeof(struct kern_event_pcb));
 	lck_mtx_init(&ev_pcb->evp_mtx, kev_lck_grp, kev_lck_attr);

 	ev_pcb->evp_socket = so;
 	ev_pcb->evp_vendor_code_filter = 0xffffffff;

 	so->so_pcb = (caddr_t) ev_pcb;
 	lck_rw_lock_exclusive(kev_rwlock);
 	LIST_INSERT_HEAD(&kern_event_head, ev_pcb, evp_link);
 	lck_rw_done(kev_rwlock);

 	return (error);
 }

 static void
 kev_delete(struct kern_event_pcb *ev_pcb)
 {
 	VERIFY(ev_pcb != NULL);
 	lck_mtx_destroy(&ev_pcb->evp_mtx, kev_lck_grp);
 	zfree(ev_pcb_zone, ev_pcb);
 }

 static int
 kev_detach(struct socket *so)
 {
 	struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb;

 	if (ev_pcb != NULL) {
 		soisdisconnected(so);
 		so->so_flags |= SOF_PCBCLEARING;
 	}

 	return (0);
 }

 /*
 * For now, kev_vendor_code and mbuf_tags use the same
 * mechanism.
 */
 errno_t kev_vendor_code_find(
 	const char	*string,
 	u_int32_t 	*out_vendor_code)
 {
 	if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) {
 		return (EINVAL);
 	}
 	return (net_str_id_find_internal(string, out_vendor_code,
 	    NSI_VENDOR_CODE, 1));
 }

 errno_t
 kev_msg_post(struct kev_msg *event_msg)
 {
 	mbuf_tag_id_t min_vendor, max_vendor;

 	net_str_id_first_last(&min_vendor, &max_vendor, NSI_VENDOR_CODE);

 	if (event_msg == NULL)
 		return (EINVAL);

 	/* 
 	 * Limit third parties to posting events for registered vendor codes
 	 * only
 	 */
 	if (event_msg->vendor_code < min_vendor ||
 	    event_msg->vendor_code > max_vendor)
 		return (EINVAL);

 	return (kev_post_msg(event_msg));
 }

 int
 kev_post_msg(struct kev_msg *event_msg)
 {
 	struct mbuf *m, *m2;
 	struct kern_event_pcb *ev_pcb;
 	struct kern_event_msg *ev;
 	char *tmp;
 	u_int32_t total_size;
 	int i;

 	/* Verify the message is small enough to fit in one mbuf w/o cluster */
 	total_size = KEV_MSG_HEADER_SIZE;

 	for (i = 0; i < 5; i++) {
 		if (event_msg->dv[i].data_length == 0)
 			break;
 		total_size += event_msg->dv[i].data_length;
 	}

 	if (total_size > MLEN) {
 		return (EMSGSIZE);
 	}

 	m = m_get(M_DONTWAIT, MT_DATA);
 	if (m == 0)
 	    return (ENOBUFS);

 	ev = mtod(m, struct kern_event_msg *);
 	total_size = KEV_MSG_HEADER_SIZE;

 	tmp = (char *) &ev->event_data[0];
 	for (i = 0; i < 5; i++) {
 		if (event_msg->dv[i].data_length == 0)
 			break;

 		total_size += event_msg->dv[i].data_length;
 		bcopy(event_msg->dv[i].data_ptr, tmp,
 		    event_msg->dv[i].data_length);
 		tmp += event_msg->dv[i].data_length;
 	}

 	ev->id = ++static_event_id;
 	ev->total_size   = total_size;
 	ev->vendor_code  = event_msg->vendor_code;
 	ev->kev_class    = event_msg->kev_class;
 	ev->kev_subclass = event_msg->kev_subclass;
 	ev->event_code   = event_msg->event_code;

 	m->m_len = total_size;
 	lck_rw_lock_shared(kev_rwlock);
 	for (ev_pcb = LIST_FIRST(&kern_event_head);
 	    ev_pcb;
 	    ev_pcb = LIST_NEXT(ev_pcb, evp_link)) {
 		lck_mtx_lock(&ev_pcb->evp_mtx);
 		if (ev_pcb->evp_socket->so_pcb == NULL) {
 			lck_mtx_unlock(&ev_pcb->evp_mtx);
 			continue;
 		}
 		if (ev_pcb->evp_vendor_code_filter != KEV_ANY_VENDOR) {
 			if (ev_pcb->evp_vendor_code_filter != ev->vendor_code) {
 				lck_mtx_unlock(&ev_pcb->evp_mtx);
 				continue;
 			}

 			if (ev_pcb->evp_class_filter != KEV_ANY_CLASS) {
 				if (ev_pcb->evp_class_filter != ev->kev_class) {
 					lck_mtx_unlock(&ev_pcb->evp_mtx);
 					continue;
 				}

 				if ((ev_pcb->evp_subclass_filter != KEV_ANY_SUBCLASS) &&
 				    (ev_pcb->evp_subclass_filter != ev->kev_subclass)) {
 					lck_mtx_unlock(&ev_pcb->evp_mtx);
 					continue;
 				}
 			}
 		}

 		m2 = m_copym(m, 0, m->m_len, M_NOWAIT);
 		if (m2 == 0) {
 			m_free(m);
 			lck_mtx_unlock(&ev_pcb->evp_mtx);
 			lck_rw_done(kev_rwlock);
 			return (ENOBUFS);
 		}
 		if (sbappendrecord(&ev_pcb->evp_socket->so_rcv, m2))
 			sorwakeup(ev_pcb->evp_socket);
 		lck_mtx_unlock(&ev_pcb->evp_mtx);
 	}
 	m_free(m);
 	lck_rw_done(kev_rwlock);

 	return (0);
 }

 static int
 kev_control(struct socket *so,
    u_long cmd,
    caddr_t data,
    __unused struct ifnet *ifp,
    __unused struct proc *p)
 {
 	struct kev_request *kev_req = (struct kev_request *) data;
 	struct kern_event_pcb  *ev_pcb;
 	struct kev_vendor_code *kev_vendor;
 	u_int32_t  *id_value = (u_int32_t *) data;

 	switch (cmd) {
 		case SIOCGKEVID:
 			*id_value = static_event_id;
 			break;
 		case SIOCSKEVFILT:
 			ev_pcb = (struct kern_event_pcb *) so->so_pcb;
 			ev_pcb->evp_vendor_code_filter = kev_req->vendor_code;
 			ev_pcb->evp_class_filter = kev_req->kev_class;
 			ev_pcb->evp_subclass_filter  = kev_req->kev_subclass;
 			break;
 		case SIOCGKEVFILT:
 			ev_pcb = (struct kern_event_pcb *) so->so_pcb;
 			kev_req->vendor_code = ev_pcb->evp_vendor_code_filter;
 			kev_req->kev_class   = ev_pcb->evp_class_filter;
 			kev_req->kev_subclass = ev_pcb->evp_subclass_filter;
 			break;
 		case SIOCGKEVVENDOR:
 			kev_vendor = (struct kev_vendor_code *)data;
 			/* Make sure string is NULL terminated */
 			kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0;
 			return (net_str_id_find_internal(kev_vendor->vendor_string,
 			    &kev_vendor->vendor_code, NSI_VENDOR_CODE, 0));
 		default:
 			return (ENOTSUP);
 	}

 	return (0);
 }

 #endif /* SOCKETS */


 int
 fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo)
 {
 	struct vinfo_stat * st;

 	/* No need for the funnel as fd is kept alive */
 	st = &kinfo->kq_stat;

 	st->vst_size = kq->kq_count;
 	if (kq->kq_state & KQ_KEV64)
 		st->vst_blksize = sizeof(struct kevent64_s);
 	else
 		st->vst_blksize = sizeof(struct kevent);
 	st->vst_mode = S_IFIFO;
 	if (kq->kq_state & KQ_SEL)
 		kinfo->kq_state |=  PROC_KQUEUE_SELECT;
 	if (kq->kq_state & KQ_SLEEP)
 		kinfo->kq_state |= PROC_KQUEUE_SLEEP;

 	return (0);
 }


 void
 knote_markstayqueued(struct knote *kn)
 {
 	kqlock(kn->kn_kq);
 	kn->kn_status |= KN_STAYQUEUED;
 	knote_enqueue(kn);
 	kqunlock(kn->kn_kq);
 }
diff --git a/wait_queue.c b/wait_queue.c
 /*
 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 * 
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
 /*
 * @OSF_FREE_COPYRIGHT@
 */
 /* 
 * Mach Operating System
 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
 * All Rights Reserved.
 * 
 * Permission to use, copy, modify and distribute this software and its
 * documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 * 
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 * 
 * Carnegie Mellon requests users of this software to return to
 * 
 *  Software Distribution Coordinator  or  [email protected]
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 * 
 * any improvements or extensions that they make and grant Carnegie Mellon
 * the rights to redistribute these changes.
 */
 /*
 */
 /*
 *	File:	wait_queue.c (adapted from sched_prim.c)
 *	Author:	Avadis Tevanian, Jr.
 *	Date:	1986
 *
 *	Primitives for manipulating wait queues: either global
 *	ones from sched_prim.c, or private ones associated with
 *	particular structures(pots, semaphores, etc..).
 */

 #include <kern/kern_types.h>
 #include <kern/simple_lock.h>
 #include <kern/zalloc.h>
 #include <kern/queue.h>
 #include <kern/spl.h>
 #include <mach/sync_policy.h>
 #include <kern/mach_param.h>
 #include <kern/sched_prim.h>

 #include <kern/wait_queue.h>
 #include <vm/vm_kern.h>

 /* forward declarations */
 static boolean_t wait_queue_member_locked(
 			wait_queue_t		wq,
 			wait_queue_set_t	wq_set);

 static void wait_queues_init(void);

 #define WAIT_QUEUE_MAX thread_max
 #define WAIT_QUEUE_SET_MAX task_max * 3
 #define WAIT_QUEUE_LINK_MAX PORT_MAX / 2 + (WAIT_QUEUE_MAX * WAIT_QUEUE_SET_MAX) / 64

 static zone_t _wait_queue_link_zone;
 static zone_t _wait_queue_set_zone;
 static zone_t _wait_queue_zone;

 /* see rdar://6737748&5561610; we need an unshadowed
 * definition of a WaitQueueLink for debugging,
 * but it needs to be used somewhere to wind up in
 * the dSYM file. */
 volatile WaitQueueLink *unused_except_for_debugging;


 /*
 *	Waiting protocols and implementation:
 *
 *	Each thread may be waiting for exactly one event; this event
 *	is set using assert_wait().  That thread may be awakened either
 *	by performing a thread_wakeup_prim() on its event,
 *	or by directly waking that thread up with clear_wait().
 *
 *	The implementation of wait events uses a hash table.  Each
 *	bucket is queue of threads having the same hash function
 *	value; the chain for the queue (linked list) is the run queue
 *	field.  [It is not possible to be waiting and runnable at the
 *	same time.]
 *
 *	Locks on both the thread and on the hash buckets govern the
 *	wait event field and the queue chain field.  Because wakeup
 *	operations only have the event as an argument, the event hash
 *	bucket must be locked before any thread.
 *
 *	Scheduling operations may also occur at interrupt level; therefore,
 *	interrupts below splsched() must be prevented when holding
 *	thread or hash bucket locks.
 *
 *	The wait event hash table declarations are as follows:
 */

 struct wait_queue boot_wait_queue[1];
 __private_extern__ struct wait_queue *wait_queues = &boot_wait_queue[0];
 __private_extern__ uint32_t num_wait_queues = 1;

 #define	P2ROUNDUP(x, align) (-(-((uint32_t)(x)) & -(align)))
 #define ROUNDDOWN(x,y)	(((x)/(y))*(y))

 static uint32_t
 compute_wait_hash_size(void)
 {
 	uint32_t hsize, queues;
 	
 	if (PE_parse_boot_argn("wqsize", &hsize, sizeof(hsize)))
 		return (hsize);

 	queues = thread_max / 11;
 	hsize = P2ROUNDUP(queues * sizeof(struct wait_queue), PAGE_SIZE);

 	return hsize;
 }

 static void
 wait_queues_init(void)
 {
 	uint32_t	i, whsize, qsz;
 	kern_return_t	kret;

 	/*
 	 * Determine the amount of memory we're willing to reserve for
 	 * the waitqueue hash table
 	 */
 	whsize = compute_wait_hash_size();

 	/* Determine the number of waitqueues we can fit. */
 	qsz = sizeof (struct wait_queue);
 	whsize = ROUNDDOWN(whsize, qsz);
 	num_wait_queues = whsize / qsz;

 	/*
 	 * The hash algorithm requires that this be a power of 2, so we
 	 * just mask off all the low-order bits.
 	 */
 	for (i = 0; i < 31; i++) {
 		uint32_t bit = (1 << i);
 		if ((num_wait_queues & bit) == num_wait_queues)
 			break;
 		num_wait_queues &= ~bit;
 	}
 	assert(num_wait_queues > 0);

 	/* Now determine how much memory we really need. */
 	whsize = P2ROUNDUP(num_wait_queues * qsz, PAGE_SIZE);

 	kret = kernel_memory_allocate(kernel_map, (vm_offset_t *) &wait_queues,
 	    whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);

 	if (kret != KERN_SUCCESS || wait_queues == NULL)
 		panic("kernel_memory_allocate() failed to allocate wait queues, error: %d, whsize: 0x%x", kret, whsize);

 	for (i = 0; i < num_wait_queues; i++) {
 		wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO);
 	}
 }

 void
 wait_queue_bootstrap(void)
 {
 	wait_queues_init();
 	_wait_queue_zone = zinit(sizeof(struct wait_queue),
 				      WAIT_QUEUE_MAX * sizeof(struct wait_queue),
 				      sizeof(struct wait_queue),
 				      "wait queues");
 	zone_change(_wait_queue_zone, Z_NOENCRYPT, TRUE);

 	_wait_queue_set_zone = zinit(sizeof(struct wait_queue_set),
 				      WAIT_QUEUE_SET_MAX * sizeof(struct wait_queue_set),
 				      sizeof(struct wait_queue_set),
 				      "wait queue sets");
 	zone_change(_wait_queue_set_zone, Z_NOENCRYPT, TRUE);

 	_wait_queue_link_zone = zinit(sizeof(struct _wait_queue_link),
 				      WAIT_QUEUE_LINK_MAX * sizeof(struct _wait_queue_link),
 				      sizeof(struct _wait_queue_link),
 				      "wait queue links");
 	zone_change(_wait_queue_link_zone, Z_NOENCRYPT, TRUE);
 }

 /*
 *	Routine:        wait_queue_init
 *	Purpose:
 *		Initialize a previously allocated wait queue.
 *	Returns:
 *		KERN_SUCCESS - The wait_queue_t was initialized
 *		KERN_INVALID_ARGUMENT - The policy parameter was invalid
 */
 kern_return_t
 wait_queue_init(
 	wait_queue_t wq,
 	int policy)
 {
 	/* only FIFO and LIFO for now */
 	if ((policy & SYNC_POLICY_FIXED_PRIORITY) != 0)
 		return KERN_INVALID_ARGUMENT;

 	wq->wq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0);
 	wq->wq_type = _WAIT_QUEUE_inited;
 	wq->wq_eventmask = 0;
 	queue_init(&wq->wq_queue);
 	hw_lock_init(&wq->wq_interlock);
 	return KERN_SUCCESS;
 }

 /*
 *	Routine:		   wait_queue_alloc
 *	Purpose:
 *		Allocate and initialize a wait queue for use outside of
 *		of the mach part of the kernel.
 *	Conditions:
 *		Nothing locked - can block.
 *	Returns:
 *		The allocated and initialized wait queue
 *		WAIT_QUEUE_NULL if there is a resource shortage
 */
 wait_queue_t
 wait_queue_alloc(
 	int policy)
 {
 	wait_queue_t wq;
 	kern_return_t ret;

 	wq = (wait_queue_t) zalloc(_wait_queue_zone);
 	if (wq != WAIT_QUEUE_NULL) {
 		ret = wait_queue_init(wq, policy);
 		if (ret != KERN_SUCCESS) {
 			zfree(_wait_queue_zone, wq);
 			wq = WAIT_QUEUE_NULL;
 		}
 	}
 	return wq;
 }

 /*
 *	Routine:        wait_queue_free
 *	Purpose:
 *		Free an allocated wait queue.
 *	Conditions:
 *		May block.
 */
 kern_return_t
 wait_queue_free(
 	wait_queue_t wq)
 {
 	if (!wait_queue_is_queue(wq))
 		return KERN_INVALID_ARGUMENT;
 	if (!queue_empty(&wq->wq_queue))
 		return KERN_FAILURE;
 	zfree(_wait_queue_zone, wq);
 	return KERN_SUCCESS;
 }

 /*
 *	Routine:        wait_queue_set_init
 *	Purpose:
 *		Initialize a previously allocated wait queue set.
 *	Returns:
 *		KERN_SUCCESS - The wait_queue_set_t was initialized
 *		KERN_INVALID_ARGUMENT - The policy parameter was invalid
 */
 kern_return_t
 wait_queue_set_init(
 	wait_queue_set_t wqset,
 	int policy)
 {
 	kern_return_t ret;

 	ret = wait_queue_init(&wqset->wqs_wait_queue, policy);
 	if (ret != KERN_SUCCESS)
 		return ret;

 	wqset->wqs_wait_queue.wq_type = _WAIT_QUEUE_SET_inited;
 	if (policy & SYNC_POLICY_PREPOST)
 		wqset->wqs_wait_queue.wq_prepost = TRUE;
 	else 
 		wqset->wqs_wait_queue.wq_prepost = FALSE;
 	queue_init(&wqset->wqs_setlinks);
 	queue_init(&wqset->wqs_preposts);
 	return KERN_SUCCESS;
 }


 kern_return_t
 wait_queue_sub_init(
 	wait_queue_set_t wqset,
 	int policy)
 {
 	return wait_queue_set_init(wqset, policy);
 }

 kern_return_t
 wait_queue_sub_clearrefs(
        wait_queue_set_t wq_set)
 {
 	wait_queue_link_t wql;
 	queue_t q;
 	spl_t s;

 	if (!wait_queue_is_set(wq_set))
 		return KERN_INVALID_ARGUMENT;

 	s = splsched();
 	wqs_lock(wq_set);
 	q = &wq_set->wqs_preposts;
 	while (!queue_empty(q)) {
 		queue_remove_first(q, wql, wait_queue_link_t, wql_preposts);
 		assert(!wql_is_preposted(wql));
 	}
 	wqs_unlock(wq_set);
 	splx(s);
 	return KERN_SUCCESS;
 }

 /*
 *	Routine:        wait_queue_set_alloc
 *	Purpose:
 *		Allocate and initialize a wait queue set for
 *		use outside of the mach part of the kernel.
 *	Conditions:
 *		May block.
 *	Returns:
 *		The allocated and initialized wait queue set
 *		WAIT_QUEUE_SET_NULL if there is a resource shortage
 */
 wait_queue_set_t
 wait_queue_set_alloc(
    int policy)
 {
 	wait_queue_set_t wq_set;

 	wq_set = (wait_queue_set_t) zalloc(_wait_queue_set_zone);
 	if (wq_set != WAIT_QUEUE_SET_NULL) {
 		kern_return_t ret;

 		ret = wait_queue_set_init(wq_set, policy);
 		if (ret != KERN_SUCCESS) {
 			zfree(_wait_queue_set_zone, wq_set);
 			wq_set = WAIT_QUEUE_SET_NULL;
 		}
 	}
 	return wq_set;
 }

 /*
 *     Routine:        wait_queue_set_free
 *     Purpose:
 *             Free an allocated wait queue set
 *     Conditions:
 *             May block.
 */
 kern_return_t
 wait_queue_set_free(
 	wait_queue_set_t wq_set)
 {
 	if (!wait_queue_is_set(wq_set))
 		return KERN_INVALID_ARGUMENT;

 	if (!queue_empty(&wq_set->wqs_wait_queue.wq_queue))
 		return KERN_FAILURE;

 	zfree(_wait_queue_set_zone, wq_set);
 	return KERN_SUCCESS;
 }


 /*
 *	
 *     Routine:        wait_queue_set_size
 *     Routine:        wait_queue_link_size
 *     Purpose:
 *             Return the size of opaque wait queue structures
 */
 unsigned int wait_queue_set_size(void) { return sizeof(WaitQueueSet); }
 unsigned int wait_queue_link_size(void) { return sizeof(WaitQueueLink); }

 /* declare a unique type for wait queue link structures */
 static unsigned int _wait_queue_link;
 static unsigned int _wait_queue_link_noalloc;
 static unsigned int _wait_queue_unlinked;

 #define WAIT_QUEUE_LINK ((void *)&_wait_queue_link)
 #define WAIT_QUEUE_LINK_NOALLOC ((void *)&_wait_queue_link_noalloc)
 #define WAIT_QUEUE_UNLINKED ((void *)&_wait_queue_unlinked)

 #define WAIT_QUEUE_ELEMENT_CHECK(wq, wqe) \
 	WQASSERT(((wqe)->wqe_queue == (wq) && \
 	  queue_next(queue_prev((queue_t) (wqe))) == (queue_t)(wqe)), \
 	  "wait queue element list corruption: wq=%#x, wqe=%#x", \
 	  (wq), (wqe))

 #define WQSPREV(wqs, wql) ((wait_queue_link_t)queue_prev( \
 			((&(wqs)->wqs_setlinks == (queue_t)(wql)) ? \
 			(queue_t)(wql) : &(wql)->wql_setlinks)))

 #define WQSNEXT(wqs, wql) ((wait_queue_link_t)queue_next( \
 			((&(wqs)->wqs_setlinks == (queue_t)(wql)) ? \
 			(queue_t)(wql) : &(wql)->wql_setlinks)))

 #define WAIT_QUEUE_SET_LINK_CHECK(wqs, wql) \
 		WQASSERT(((((wql)->wql_type == WAIT_QUEUE_LINK) || \
 			   ((wql)->wql_type == WAIT_QUEUE_LINK_NOALLOC)) && \
 			((wql)->wql_setqueue == (wqs)) && \
 			(((wql)->wql_queue->wq_type == _WAIT_QUEUE_inited) || \
 			 ((wql)->wql_queue->wq_type == _WAIT_QUEUE_SET_inited)) && \
 			(WQSNEXT((wqs), WQSPREV((wqs),(wql))) == (wql))), \
 			"wait queue set links corruption: wqs=%#x, wql=%#x", \
 			 (wqs), (wql))

 #if defined(_WAIT_QUEUE_DEBUG_)

 #define WQASSERT(e, s, p0, p1) ((e) ? 0 : panic(s, p0, p1))

 #define WAIT_QUEUE_CHECK(wq) \
 MACRO_BEGIN \
 	queue_t q2 = &(wq)->wq_queue; \
 	wait_queue_element_t wqe2 = (wait_queue_element_t) queue_first(q2); \
 	while (!queue_end(q2, (queue_entry_t)wqe2)) { \
 		WAIT_QUEUE_ELEMENT_CHECK((wq), wqe2); \
 		wqe2 = (wait_queue_element_t) queue_next((queue_t) wqe2); \
 	} \
 MACRO_END

 #define WAIT_QUEUE_SET_CHECK(wqs) \
 MACRO_BEGIN \
 	queue_t q2 = &(wqs)->wqs_setlinks; \
 	wait_queue_link_t wql2 = (wait_queue_link_t) queue_first(q2); \
 	while (!queue_end(q2, (queue_entry_t)wql2)) { \
 		WAIT_QUEUE_SET_LINK_CHECK((wqs), wql2); \
 		wql2 = (wait_queue_link_t) wql2->wql_setlinks.next; \
 	} \
 MACRO_END

 #else /* !_WAIT_QUEUE_DEBUG_ */

 #define WQASSERT(e, s, p0, p1) assert(e)

 #define WAIT_QUEUE_CHECK(wq)
 #define WAIT_QUEUE_SET_CHECK(wqs)

 #endif /* !_WAIT_QUEUE_DEBUG_ */

 /*
 *	Routine:	wait_queue_global
 *	Purpose:
 *		Indicate if this wait queue is a global wait queue or not.
 */
 static boolean_t
 wait_queue_global(
 	wait_queue_t wq)
 {
 	if ((wq >= wait_queues) && (wq <= (wait_queues + num_wait_queues))) {
 		return TRUE;
 	}
 	return FALSE;
 }


 /*
 *	Routine:	wait_queue_member_locked
 *	Purpose:
 *		Indicate if this set queue is a member of the queue
 *	Conditions:
 *		The wait queue is locked
 *		The set queue is just that, a set queue
 */
 static boolean_t
 wait_queue_member_locked(
 	wait_queue_t wq,
 	wait_queue_set_t wq_set)
 {
 	wait_queue_element_t wq_element;
 	queue_t q;

 	assert(wait_queue_held(wq));
 	assert(wait_queue_is_set(wq_set));

 	q = &wq->wq_queue;

 	wq_element = (wait_queue_element_t) queue_first(q);
 	while (!queue_end(q, (queue_entry_t)wq_element)) {
 		WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
 		if ((wq_element->wqe_type == WAIT_QUEUE_LINK) ||
 		    (wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC)) {
 			wait_queue_link_t wql = (wait_queue_link_t)wq_element;

 			if (wql->wql_setqueue == wq_set)
 				return TRUE;
 		}
 		wq_element = (wait_queue_element_t)
 			     queue_next((queue_t) wq_element);
 	}
 	return FALSE;
 }
 	

 /*
 *	Routine:	wait_queue_member
 *	Purpose:
 *		Indicate if this set queue is a member of the queue
 *	Conditions:
 *		The set queue is just that, a set queue
 */
 boolean_t
 wait_queue_member(
 	wait_queue_t wq,
 	wait_queue_set_t wq_set)
 {
 	boolean_t ret;
 	spl_t s;

 	if (!wait_queue_is_set(wq_set))
 		return FALSE;

 	s = splsched();
 	wait_queue_lock(wq);
 	ret = wait_queue_member_locked(wq, wq_set);
 	wait_queue_unlock(wq);
 	splx(s);

 	return ret;
 }


 /*
 *	Routine:	wait_queue_link_internal
 *	Purpose:
 *		Insert a set wait queue into a wait queue.  This
 *		requires us to link the two together using a wait_queue_link
 *		structure that was provided.
 *	Conditions:
 *		The wait queue being inserted must be inited as a set queue
 *		The wait_queue_link structure must already be properly typed
 */
 static 
 kern_return_t
 wait_queue_link_internal(
 	wait_queue_t wq,
 	wait_queue_set_t wq_set,
 	wait_queue_link_t wql)
 {
 	wait_queue_element_t wq_element;
 	queue_t q;
 	spl_t s;

 	if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set))
  		return KERN_INVALID_ARGUMENT;

 	/*
 	 * There are probably fewer threads and sets associated with
 	 * the wait queue than there are wait queues associated with
 	 * the set.  So let's validate it that way.
 	 */
 	s = splsched();
 	wait_queue_lock(wq);
 	q = &wq->wq_queue;
 	wq_element = (wait_queue_element_t) queue_first(q);
 	while (!queue_end(q, (queue_entry_t)wq_element)) {
 		WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
 		if ((wq_element->wqe_type == WAIT_QUEUE_LINK ||
 		     wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) &&
 		    ((wait_queue_link_t)wq_element)->wql_setqueue == wq_set) {
 			wait_queue_unlock(wq);
 			splx(s);
 			return KERN_ALREADY_IN_SET;
 		}
 		wq_element = (wait_queue_element_t)
 				queue_next((queue_t) wq_element);
 	}

 	/*
 	 * Not already a member, so we can add it.
 	 */
 	wqs_lock(wq_set);

 	WAIT_QUEUE_SET_CHECK(wq_set);

 	assert(wql->wql_type == WAIT_QUEUE_LINK ||
 	       wql->wql_type == WAIT_QUEUE_LINK_NOALLOC);

 	wql->wql_queue = wq;
 	wql_clear_prepost(wql);
 	queue_enter(&wq->wq_queue, wql, wait_queue_link_t, wql_links);
 	wql->wql_setqueue = wq_set;
 	queue_enter(&wq_set->wqs_setlinks, wql, wait_queue_link_t, wql_setlinks);

 	wqs_unlock(wq_set);
 	wait_queue_unlock(wq);
 	splx(s);

 	return KERN_SUCCESS;
 }	

 /*
 *	Routine:	wait_queue_link_noalloc
 *	Purpose:
 *		Insert a set wait queue into a wait queue.  This
 *		requires us to link the two together using a wait_queue_link
 *		structure that we allocate.
 *	Conditions:
 *		The wait queue being inserted must be inited as a set queue
 */
 kern_return_t
 wait_queue_link_noalloc(
 	wait_queue_t wq,
 	wait_queue_set_t wq_set,
 	wait_queue_link_t wql)
 {
 	wql->wql_type = WAIT_QUEUE_LINK_NOALLOC;
 	return wait_queue_link_internal(wq, wq_set, wql);
 }

 /*
 *	Routine:	wait_queue_link
 *	Purpose:
 *		Insert a set wait queue into a wait queue.  This
 *		requires us to link the two together using a wait_queue_link
 *		structure that we allocate.
 *	Conditions:
 *		The wait queue being inserted must be inited as a set queue
 */
 kern_return_t
 wait_queue_link(
 	wait_queue_t wq,
 	wait_queue_set_t wq_set)
 {
 	wait_queue_link_t wql;
 	kern_return_t ret;

 	wql = (wait_queue_link_t) zalloc(_wait_queue_link_zone);
 	if (wql == WAIT_QUEUE_LINK_NULL)
 		return KERN_RESOURCE_SHORTAGE;

 	wql->wql_type = WAIT_QUEUE_LINK;
 	ret = wait_queue_link_internal(wq, wq_set, wql);
 	if (ret != KERN_SUCCESS)
 		zfree(_wait_queue_link_zone, wql);

 	return ret;
 }	

 wait_queue_link_t
 wait_queue_link_allocate(void)
 {
 	wait_queue_link_t wql;

 	wql = zalloc(_wait_queue_link_zone); /* Can't fail */
 	bzero(wql, sizeof(*wql));
 	wql->wql_type = WAIT_QUEUE_UNLINKED;

 	return wql;
 }

 kern_return_t
 wait_queue_link_free(wait_queue_link_t wql) 
 {
 	zfree(_wait_queue_link_zone, wql);
 	return KERN_SUCCESS;
 }


 /*
 *	Routine:	wait_queue_unlink_locked
 *	Purpose:
 *		Undo the linkage between a wait queue and a set.
 */
 static void
 wait_queue_unlink_locked(
 	wait_queue_t wq,
 	wait_queue_set_t wq_set,
 	wait_queue_link_t wql)
 {
 	assert(wait_queue_held(wq));
 	assert(wait_queue_held(&wq_set->wqs_wait_queue));

 	wql->wql_queue = WAIT_QUEUE_NULL;
 	queue_remove(&wq->wq_queue, wql, wait_queue_link_t, wql_links);
 	wql->wql_setqueue = WAIT_QUEUE_SET_NULL;
 	queue_remove(&wq_set->wqs_setlinks, wql, wait_queue_link_t, wql_setlinks);
 	if (wql_is_preposted(wql)) {
 		queue_t ppq = &wq_set->wqs_preposts;
 		queue_remove(ppq, wql, wait_queue_link_t, wql_preposts);
 	}
 	wql->wql_type = WAIT_QUEUE_UNLINKED;

 	WAIT_QUEUE_CHECK(wq);
 	WAIT_QUEUE_SET_CHECK(wq_set);
 }

 /*
 *	Routine:	wait_queue_unlink_nofree
 *	Purpose:
 *		Remove the linkage between a wait queue and a set,
 *		returning the linkage structure to the caller to
 *		free later.
 *	Conditions:
 *		The wait queue being must be a member set queue
 */
 kern_return_t
 wait_queue_unlink_nofree(
 	wait_queue_t wq,
 	wait_queue_set_t wq_set,
 	wait_queue_link_t *wqlp)
 {
 	wait_queue_element_t wq_element;
 	wait_queue_link_t wql;
 	queue_t q;
 	spl_t s;

 	if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) {
 		return KERN_INVALID_ARGUMENT;
 	}
 	s = splsched();
 	wait_queue_lock(wq);

 	q = &wq->wq_queue;
 	wq_element = (wait_queue_element_t) queue_first(q);
 	while (!queue_end(q, (queue_entry_t)wq_element)) {
 		WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
 		if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
 		    wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {

 		   	wql = (wait_queue_link_t)wq_element;
 			
 			if (wql->wql_setqueue == wq_set) {

 				wqs_lock(wq_set);
 				wait_queue_unlink_locked(wq, wq_set, wql);
 				wqs_unlock(wq_set);
 				wait_queue_unlock(wq);
 				splx(s);
 				*wqlp = wql;
 				return KERN_SUCCESS;
 			}
 		}
 		wq_element = (wait_queue_element_t)
 				queue_next((queue_t) wq_element);
 	}
 	wait_queue_unlock(wq);
 	splx(s);
 	return KERN_NOT_IN_SET;
 }	

 /*
 *	Routine:	wait_queue_unlink
 *	Purpose:
 *		Remove the linkage between a wait queue and a set,
 *		freeing the linkage structure.
 *	Conditions:
 *		The wait queue being must be a member set queue
 */
 kern_return_t
 wait_queue_unlink(
 	wait_queue_t wq,
 	wait_queue_set_t wq_set)
 {
 	wait_queue_element_t wq_element;
 	wait_queue_link_t wql;
 	queue_t q;
 	spl_t s;

 	if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) {
 		return KERN_INVALID_ARGUMENT;
 	}
 	s = splsched();
 	wait_queue_lock(wq);

 	q = &wq->wq_queue;
 	wq_element = (wait_queue_element_t) queue_first(q);
 	while (!queue_end(q, (queue_entry_t)wq_element)) {
 		WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
 		if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
 		    wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {

 		   	wql = (wait_queue_link_t)wq_element;
 			
 			if (wql->wql_setqueue == wq_set) {
 				boolean_t alloced;

 				alloced = (wql->wql_type == WAIT_QUEUE_LINK);
 				wqs_lock(wq_set);
 				wait_queue_unlink_locked(wq, wq_set, wql);
 				wqs_unlock(wq_set);
 				wait_queue_unlock(wq);
 				splx(s);
 				if (alloced)
 					zfree(_wait_queue_link_zone, wql);
 				return KERN_SUCCESS;
 			}
 		}
 		wq_element = (wait_queue_element_t)
 				queue_next((queue_t) wq_element);
 	}
 	wait_queue_unlock(wq);
 	splx(s);
 	return KERN_NOT_IN_SET;
 }	

 /*
 *	Routine:	wait_queue_unlink_all_nofree_locked
 *	Purpose:
 *		Remove the linkage between a wait queue and all its sets.
 *		All the linkage structures are returned to the caller for
 *		later freeing.
 *	Conditions:
 *		Wait queue locked.
 */

 static void
 wait_queue_unlink_all_nofree_locked(
 	wait_queue_t wq,
 	queue_t links)
 {
 	wait_queue_element_t wq_element;
 	wait_queue_element_t wq_next_element;
 	wait_queue_set_t wq_set;
 	wait_queue_link_t wql;
 	queue_t q;

 	q = &wq->wq_queue;

 	wq_element = (wait_queue_element_t) queue_first(q);
 	while (!queue_end(q, (queue_entry_t)wq_element)) {

 		WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
 		wq_next_element = (wait_queue_element_t)
 			     queue_next((queue_t) wq_element);

 		if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
 		    wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
 			wql = (wait_queue_link_t)wq_element;
 			wq_set = wql->wql_setqueue;
 			wqs_lock(wq_set);
 			wait_queue_unlink_locked(wq, wq_set, wql);
 			wqs_unlock(wq_set);
 			enqueue(links, &wql->wql_links);
 		}
 		wq_element = wq_next_element;
 	}
 }	

 /*
 *	Routine:	wait_queue_unlink_all_nofree
 *	Purpose:
 *		Remove the linkage between a wait queue and all its sets.
 *		All the linkage structures are returned to the caller for
 *		later freeing.
 *	Conditions:
 *		Nothing of interest locked.
 */

 kern_return_t
 wait_queue_unlink_all_nofree(
 	wait_queue_t wq,
 	queue_t links)
 {
 	spl_t s;

 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning KERN_INVALID_ARGUMENT from wait_queue_unlink_all_nofree\n");
 		return KERN_INVALID_ARGUMENT;
 	}

 	s = splsched();
 	wait_queue_lock(wq);
 	wait_queue_unlink_all_nofree_locked(wq, links);
 	wait_queue_unlock(wq);
 	splx(s);

 	return(KERN_SUCCESS);
 }	

 /*
 *	Routine:	wait_queue_unlink_all_locked
 *	Purpose:
 *		Remove the linkage between a locked wait queue and all its
 *		sets and enqueue the allocated ones onto the links queue
 *		provided.
 *	Conditions:
 *		Wait queue locked.
 */
 static void
 wait_queue_unlink_all_locked(
 	wait_queue_t wq,
 	queue_t links)
 {
 	wait_queue_element_t wq_element;
 	wait_queue_element_t wq_next_element;
 	wait_queue_set_t wq_set;
 	wait_queue_link_t wql;
 	queue_t q;

 	q = &wq->wq_queue;

 	wq_element = (wait_queue_element_t) queue_first(q);
 	while (!queue_end(q, (queue_entry_t)wq_element)) {
 		boolean_t alloced;

 		WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
 		wq_next_element = (wait_queue_element_t)
 			     queue_next((queue_t) wq_element);

 		alloced = (wq_element->wqe_type == WAIT_QUEUE_LINK);
 		if (alloced || wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
 			wql = (wait_queue_link_t)wq_element;
 			wq_set = wql->wql_setqueue;
 			wqs_lock(wq_set);
 			wait_queue_unlink_locked(wq, wq_set, wql);
 			wqs_unlock(wq_set);
 			if (alloced)
 				enqueue(links, &wql->wql_links);
 		}
 		wq_element = wq_next_element;
 	}

 }


 /*
 *	Routine:	wait_queue_unlink_all
 *	Purpose:
 *		Remove the linkage between a wait queue and all its sets.
 *		All the linkage structures that were allocated internally
 *		are freed.  The others are the caller's responsibility.
 *	Conditions:
 *		Nothing of interest locked.
 */

 kern_return_t
 wait_queue_unlink_all(
 	wait_queue_t wq)
 {
 	wait_queue_link_t wql;
 	queue_head_t links_queue_head;
 	queue_t links = &links_queue_head;
 	spl_t s;

 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning KERN_INVALID_ARGUMENT from wait_queue_unlink_all\n");
 		return KERN_INVALID_ARGUMENT;
 	}

 	queue_init(links);

 	s = splsched();
 	wait_queue_lock(wq);
 	wait_queue_unlink_all_locked(wq, links);
 	wait_queue_unlock(wq);
 	splx(s);

 	while(!queue_empty(links)) {
 		wql = (wait_queue_link_t) dequeue(links);
 		zfree(_wait_queue_link_zone, wql);
 	}

 	return(KERN_SUCCESS);
 }	

 /* legacy interface naming */
 kern_return_t
 wait_subqueue_unlink_all(
 	wait_queue_set_t	wq_set)
 {
 	return wait_queue_set_unlink_all(wq_set);
 }


 /*
 *	Routine:	wait_queue_set_unlink_all_nofree
 *	Purpose:
 *		Remove the linkage between a set wait queue and all its
 *		member wait queues and all the sets it may be a member of.
 *		The links structures are returned for later freeing by the
 *		caller.
 *	Conditions:
 *		The wait queue must be a set
 */
 kern_return_t
 wait_queue_set_unlink_all_nofree(
 	wait_queue_set_t wq_set,
 	queue_t		links)
 {
 	wait_queue_link_t wql;
 	wait_queue_t wq;
 	queue_t q;
 	spl_t s;

 	if (!wait_queue_is_set(wq_set)) {
 		return KERN_INVALID_ARGUMENT;
 	}

 retry:
 	s = splsched();
 	wqs_lock(wq_set);

 	/* remove the wait queues that are members of our set */
 	q = &wq_set->wqs_setlinks;

 	wql = (wait_queue_link_t)queue_first(q);
 	while (!queue_end(q, (queue_entry_t)wql)) {
 		WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
 		wq = wql->wql_queue;
 		if (wait_queue_lock_try(wq)) {
 			wait_queue_unlink_locked(wq, wq_set, wql);
 			wait_queue_unlock(wq);
 			enqueue(links, &wql->wql_links);
 			wql = (wait_queue_link_t)queue_first(q);
 		} else {
 			wqs_unlock(wq_set);
 			splx(s);
 			delay(1);
 			goto retry;
 		}
 	}

 	/* remove this set from sets it belongs to */
 	wait_queue_unlink_all_nofree_locked(&wq_set->wqs_wait_queue, links);

 	wqs_unlock(wq_set);
 	splx(s);

 	return(KERN_SUCCESS);
 }	

 /*
 *	Routine:	wait_queue_set_unlink_all
 *	Purpose:
 *		Remove the linkage between a set wait queue and all its
 *		member wait queues and all the sets it may be members of.
 *		The link structures are freed for those	links which were
 *		dynamically allocated.
 *	Conditions:
 *		The wait queue must be a set
 */
 kern_return_t
 wait_queue_set_unlink_all(
 	wait_queue_set_t wq_set)
 {
 	wait_queue_link_t wql;
 	wait_queue_t wq;
 	queue_t q;
 	queue_head_t links_queue_head;
 	queue_t links = &links_queue_head;
 	spl_t s;

 	if (!wait_queue_is_set(wq_set)) {
 		return KERN_INVALID_ARGUMENT;
 	}

 	queue_init(links);

 retry:
 	s = splsched();
 	wqs_lock(wq_set);

 	/* remove the wait queues that are members of our set */
 	q = &wq_set->wqs_setlinks;

 	wql = (wait_queue_link_t)queue_first(q);
 	while (!queue_end(q, (queue_entry_t)wql)) {
 		WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
 		wq = wql->wql_queue;
 		if (wait_queue_lock_try(wq)) {
 			boolean_t alloced;

 			alloced = (wql->wql_type == WAIT_QUEUE_LINK);
 			wait_queue_unlink_locked(wq, wq_set, wql);
 			wait_queue_unlock(wq);
 			if (alloced)
 				enqueue(links, &wql->wql_links);
 			wql = (wait_queue_link_t)queue_first(q);
 		} else {
 			wqs_unlock(wq_set);
 			splx(s);
 			delay(1);
 			goto retry;
 		}
 	}


 	/* remove this set from sets it belongs to */
 	wait_queue_unlink_all_locked(&wq_set->wqs_wait_queue, links);

 	wqs_unlock(wq_set);
 	splx(s);

 	while (!queue_empty (links)) {
 		wql = (wait_queue_link_t) dequeue(links);
 		zfree(_wait_queue_link_zone, wql);
 	}
 	return(KERN_SUCCESS);
 }	

 kern_return_t
 wait_queue_set_unlink_one(
 	wait_queue_set_t wq_set,
 	wait_queue_link_t wql)
 {
 	wait_queue_t wq;
 	spl_t s;

 	assert(wait_queue_is_set(wq_set));

 retry:
 	s = splsched();
 	wqs_lock(wq_set);

 	WAIT_QUEUE_SET_CHECK(wq_set);

 	/* Already unlinked, e.g. by selclearthread() */
 	if (wql->wql_type == WAIT_QUEUE_UNLINKED) {
 		goto out;
 	}

 	WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);

 	/* On a wait queue, and we hold set queue lock ... */
 	wq = wql->wql_queue;
 	if (wait_queue_lock_try(wq)) {
 		wait_queue_unlink_locked(wq, wq_set, wql);
 		wait_queue_unlock(wq);
 	} else {
 		wqs_unlock(wq_set);
 		splx(s);
 		delay(1);
 		goto retry;
 	}

 out:
 	wqs_unlock(wq_set);
 	splx(s);

 	return KERN_SUCCESS;
 }

 /*
 *	Routine:	wait_queue_assert_wait64_locked
 *	Purpose:
 *		Insert the current thread into the supplied wait queue
 *		waiting for a particular event to be posted to that queue.
 *
 *	Conditions:
 *		The wait queue is assumed locked.
 *		The waiting thread is assumed locked.
 *
 */
 __private_extern__ wait_result_t
 wait_queue_assert_wait64_locked(
 	wait_queue_t wq,
 	event64_t event,
 	wait_interrupt_t interruptible,
 	wait_timeout_urgency_t urgency,
 	uint64_t deadline,
 	uint64_t leeway,
 	thread_t thread)
 {
 	wait_result_t wait_result;
 	boolean_t realtime;

 	if (!wait_queue_assert_possible(thread))
 		panic("wait_queue_assert_wait64_locked");

 	if (wq->wq_type == _WAIT_QUEUE_SET_inited) {
 		wait_queue_set_t wqs = (wait_queue_set_t)wq;

 		if (event == NO_EVENT64 && wqs_is_preposted(wqs))
 			return(THREAD_AWAKENED);
 	}

 	/*
 	 * Realtime threads get priority for wait queue placements.
 	 * This allows wait_queue_wakeup_one to prefer a waiting
 	 * realtime thread, similar in principle to performing
 	 * a wait_queue_wakeup_all and allowing scheduler prioritization
 	 * to run the realtime thread, but without causing the
 	 * lock contention of that scenario.
 	 */
 	realtime = (thread->sched_pri >= BASEPRI_REALTIME);

 	/*
 	 * This is the extent to which we currently take scheduling attributes
 	 * into account.  If the thread is vm priviledged, we stick it at
 	 * the front of the queue.  Later, these queues will honor the policy
 	 * value set at wait_queue_init time.
 	 */
 	wait_result = thread_mark_wait_locked(thread, interruptible);
 	if (wait_result == THREAD_WAITING) {
 		if (!wq->wq_fifo
 			|| (thread->options & TH_OPT_VMPRIV)
 			|| realtime)
 			enqueue_head(&wq->wq_queue, (queue_entry_t) thread);
 		else
 			enqueue_tail(&wq->wq_queue, (queue_entry_t) thread);

 		thread->wait_event = event;
 		thread->wait_queue = wq;

 		if (deadline != 0) {

 			if (!timer_call_enter_with_leeway(&thread->wait_timer, NULL,
 				deadline, leeway, urgency, FALSE))
 				thread->wait_timer_active++;
 			thread->wait_timer_is_set = TRUE;
 		}
 		if (wait_queue_global(wq)) {
 			wq->wq_eventmask = wq->wq_eventmask | CAST_TO_EVENT_MASK(event);
 		}

 	}
 	return(wait_result);
 }

 /*
 *	Routine:	wait_queue_assert_wait
 *	Purpose:
 *		Insert the current thread into the supplied wait queue
 *		waiting for a particular event to be posted to that queue.
 *
 *	Conditions:
 *		nothing of interest locked.
 */
 wait_result_t
 wait_queue_assert_wait(
 	wait_queue_t wq,
 	event_t event,
 	wait_interrupt_t interruptible,
 	uint64_t deadline)
 {
 	spl_t s;
 	wait_result_t ret;
 	thread_t thread = current_thread();

 	/* If it is an invalid wait queue, you can't wait on it */
 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning thread->wait_result = THREAD_RESTART from wait_queue_assert_wait\n");
 		return (thread->wait_result = THREAD_RESTART);
 	}

 	s = splsched();
 	wait_queue_lock(wq);
 	thread_lock(thread);
 	ret = wait_queue_assert_wait64_locked(wq, CAST_DOWN(event64_t,event),
 					      interruptible, 
 					      TIMEOUT_URGENCY_SYS_NORMAL, 
 					      deadline, 0,
 					      thread);
 	thread_unlock(thread);
 	wait_queue_unlock(wq);
 	splx(s);
 	return(ret);
 }

 /*
 *	Routine:	wait_queue_assert_wait_with_leeway
 *	Purpose:
 *		Insert the current thread into the supplied wait queue
 *		waiting for a particular event to be posted to that queue.
 *		Deadline values are specified with urgency and leeway.
 *
 *	Conditions:
 *		nothing of interest locked.
 */
 wait_result_t
 wait_queue_assert_wait_with_leeway(
 	wait_queue_t wq,
 	event_t event,
 	wait_interrupt_t interruptible,
 	wait_timeout_urgency_t urgency,
 	uint64_t deadline,
 	uint64_t leeway)
 {
 	spl_t s;
 	wait_result_t ret;
 	thread_t thread = current_thread();

 	/* If it is an invalid wait queue, you can't wait on it */
 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning thread->wait_result = THREAD_RESTART from wait_queue_assert_wait_with_leeway\n");
 		return (thread->wait_result = THREAD_RESTART);
 	}

 	s = splsched();
 	wait_queue_lock(wq);
 	thread_lock(thread);
 	ret = wait_queue_assert_wait64_locked(wq, CAST_DOWN(event64_t,event),
 					      interruptible, 
 					      urgency, deadline, leeway,
 					      thread);
 	thread_unlock(thread);
 	wait_queue_unlock(wq);
 	splx(s);
 	return(ret);
 }

 /*
 *	Routine:	wait_queue_assert_wait64
 *	Purpose:
 *		Insert the current thread into the supplied wait queue
 *		waiting for a particular event to be posted to that queue.
 *	Conditions:
 *		nothing of interest locked.
 */
 wait_result_t
 wait_queue_assert_wait64(
 	wait_queue_t wq,
 	event64_t event,
 	wait_interrupt_t interruptible,
 	uint64_t deadline)
 {
 	spl_t s;
 	wait_result_t ret;
 	thread_t thread = current_thread();

 	/* If it is an invalid wait queue, you cant wait on it */
 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning thread->wait_result = THREAD_RESTART from wait_queue_assert_wait64\n");
 		return (thread->wait_result = THREAD_RESTART);
 	}

 	s = splsched();
 	wait_queue_lock(wq);
 	thread_lock(thread);
 	ret = wait_queue_assert_wait64_locked(wq, event, interruptible, 
 					      TIMEOUT_URGENCY_SYS_NORMAL,
 					      deadline, 0,
 					      thread);
 	thread_unlock(thread);
 	wait_queue_unlock(wq);
 	splx(s);
 	return(ret);
 }

 /*
 *	Routine:	wait_queue_assert_wait64_with_leeway
 *	Purpose:
 *		Insert the current thread into the supplied wait queue
 *		waiting for a particular event to be posted to that queue.
 *		Deadline values are specified with urgency and leeway.
 *	Conditions:
 *		nothing of interest locked.
 */
 wait_result_t
 wait_queue_assert_wait64_with_leeway(
 	wait_queue_t wq,
 	event64_t event,
 	wait_interrupt_t interruptible,
 	wait_timeout_urgency_t urgency,
 	uint64_t deadline,
 	uint64_t leeway)
 {
 	spl_t s;
 	wait_result_t ret;
 	thread_t thread = current_thread();

 	/* If it is an invalid wait queue, you cant wait on it */
 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning thread->wait_result = THREAD_RESTART from wait_queue_assert_wait64_with_leeway\n");
 		return (thread->wait_result = THREAD_RESTART);
 	}

 	s = splsched();
 	wait_queue_lock(wq);
 	thread_lock(thread);
 	ret = wait_queue_assert_wait64_locked(wq, event, interruptible, 
 					      urgency, deadline, leeway,
 					      thread);
 	thread_unlock(thread);
 	wait_queue_unlock(wq);
 	splx(s);
 	return(ret);
 }

 /*
 *	Routine:	_wait_queue_select64_all
 *	Purpose:
 *		Select all threads off a wait queue that meet the
 *		supplied criteria.
 *	Conditions:
 *		at splsched
 *		wait queue locked
 *		wake_queue initialized and ready for insertion
 *		possibly recursive
 *	Returns:
 *		a queue of locked threads
 */
 static void
 _wait_queue_select64_all(
 	wait_queue_t wq,
 	event64_t event,
 	queue_t wake_queue)
 {
 	wait_queue_element_t wq_element;
 	wait_queue_element_t wqe_next;
 	unsigned long eventmask = 0;
 	boolean_t is_queue_global = FALSE;
 	queue_t q;

 	is_queue_global = wait_queue_global(wq);
 	if (is_queue_global) {
 		eventmask = CAST_TO_EVENT_MASK(event);
 		if ((wq->wq_eventmask & eventmask) != eventmask) {
 			return;
 		}
 		eventmask = 0;
 	}
 	q = &wq->wq_queue;

 	wq_element = (wait_queue_element_t) queue_first(q);
 	while (!queue_end(q, (queue_entry_t)wq_element)) {
 		WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
 		wqe_next = (wait_queue_element_t)
 			   queue_next((queue_t) wq_element);

 		/*
 		 * We may have to recurse if this is a compound wait queue.
 		 */
 		if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
 		    wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
 			wait_queue_link_t wql = (wait_queue_link_t)wq_element;
 			wait_queue_set_t set_queue = wql->wql_setqueue;

 			/*
 			 * We have to check the set wait queue. If it is marked
 			 * as pre-post, and it is the "generic event" then mark
 			 * it pre-posted now (if not already).
 			 */
 			wqs_lock(set_queue);
 			if (event == NO_EVENT64 && set_queue->wqs_prepost && !wql_is_preposted(wql)) {
 				queue_t ppq = &set_queue->wqs_preposts;
 				queue_enter(ppq, wql, wait_queue_link_t, wql_preposts);
 			}
 			if (! wait_queue_empty(&set_queue->wqs_wait_queue)) 
 				_wait_queue_select64_all(&set_queue->wqs_wait_queue, event, wake_queue);
 			wqs_unlock(set_queue);
 		} else {
 			
 			/*
 			 * Otherwise, its a thread.  If it is waiting on
 			 * the event we are posting to this queue, pull
 			 * it off the queue and stick it in out wake_queue.
 			 */
 			thread_t t = (thread_t)(void *)wq_element;

 			if (t->wait_event == event) {
 				thread_lock(t);
 				remqueue((queue_entry_t) t);
 				enqueue (wake_queue, (queue_entry_t) t);
 				t->wait_queue = WAIT_QUEUE_NULL;
 				t->wait_event = NO_EVENT64;
 				t->at_safe_point = FALSE;
 				/* returned locked */
 			} else {
 				if (is_queue_global) {
 					eventmask = eventmask | 
 						CAST_TO_EVENT_MASK(t->wait_event);
 				}
 			}
 		}
 		wq_element = wqe_next;
 	}
 	/* Update event mask if global wait queue */
 	if (is_queue_global) {
 		wq->wq_eventmask = eventmask;
 	}

 }

 /*
 *	Routine:        wait_queue_wakeup64_all_locked
 *	Purpose:
 *		Wakeup some number of threads that are in the specified
 *		wait queue and waiting on the specified event.
 *	Conditions:
 *		wait queue already locked (may be released).
 *	Returns:
 *		KERN_SUCCESS - Threads were woken up
 *		KERN_NOT_WAITING - No threads were waiting <wq,event> pair
 */
 __private_extern__ kern_return_t
 wait_queue_wakeup64_all_locked(
 	wait_queue_t wq,
 	event64_t event,
 	wait_result_t result,
 	boolean_t unlock)
 {
 	queue_head_t wake_queue_head;
 	queue_t q = &wake_queue_head;
 	kern_return_t res;

 //	assert(wait_queue_held(wq));
 //	if(!wq->wq_interlock.lock_data) {		/* (BRINGUP */
 //		panic("wait_queue_wakeup64_all_locked: lock not held on %p\n", wq);	/* (BRINGUP) */
 //	}

 	queue_init(q);

 	/*
 	 * Select the threads that we will wake up.	 The threads
 	 * are returned to us locked and cleanly removed from the
 	 * wait queue.
 	 */
 	_wait_queue_select64_all(wq, event, q);
 	if (unlock)
 		wait_queue_unlock(wq);

 	/*
 	 * For each thread, set it running.
 	 */
 	res = KERN_NOT_WAITING;
 	while (!queue_empty (q)) {
 		thread_t thread = (thread_t)(void *) dequeue(q);
 		res = thread_go(thread, result);
 		assert(res == KERN_SUCCESS);
 		thread_unlock(thread);
 	}
 	return res;
 }


 /*
 *	Routine:		wait_queue_wakeup_all
 *	Purpose:
 *		Wakeup some number of threads that are in the specified
 *		wait queue and waiting on the specified event.
 *	Conditions:
 *		Nothing locked
 *	Returns:
 *		KERN_SUCCESS - Threads were woken up
 *		KERN_NOT_WAITING - No threads were waiting <wq,event> pair
 */
 kern_return_t
 wait_queue_wakeup_all(
 	wait_queue_t wq,
 	event_t event,
 	wait_result_t result)
 {
 	kern_return_t ret;
 	spl_t s;

 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning KERN_INVALID_ARGUMENT from wait_queue_wakeup_all\n");
 		return KERN_INVALID_ARGUMENT;
 	}

 	s = splsched();
 	wait_queue_lock(wq);
 //	if(!wq->wq_interlock.lock_data) {		/* (BRINGUP */
 //		panic("wait_queue_wakeup_all: we did not get the lock on %p\n", wq);	/* (BRINGUP) */
 //	}
 	ret = wait_queue_wakeup64_all_locked(
 				wq, CAST_DOWN(event64_t,event),
 				result, TRUE);
 	/* lock released */
 	splx(s);
 	return ret;
 }

 /*
 *	Routine:		wait_queue_wakeup64_all
 *	Purpose:
 *		Wakeup some number of threads that are in the specified
 *		wait queue and waiting on the specified event.
 *	Conditions:
 *		Nothing locked
 *	Returns:
 *		KERN_SUCCESS - Threads were woken up
 *		KERN_NOT_WAITING - No threads were waiting <wq,event> pair
 */
 kern_return_t
 wait_queue_wakeup64_all(
 	wait_queue_t wq,
 	event64_t event,
 	wait_result_t result)
 {
 	kern_return_t ret;
 	spl_t s;

 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning KERN_INVALID_ARGUMENT from wait_queue_wakeup64_all\n");
 		return KERN_INVALID_ARGUMENT;
 	}

 	s = splsched();
 	wait_queue_lock(wq);
 	ret = wait_queue_wakeup64_all_locked(wq, event, result, TRUE);
 	/* lock released */
 	splx(s);
 	return ret;
 }

 /*
 *	Routine:	_wait_queue_select64_one
 *	Purpose:
 *		Select the best thread off a wait queue that meet the
 *		supplied criteria.
 * 	Conditions:
 *		at splsched
 *		wait queue locked
 *		possibly recursive
 * 	Returns:
 *		a locked thread - if one found
 *	Note:
 *		This is where the sync policy of the wait queue comes
 *		into effect.  For now, we just assume FIFO/LIFO.
 */
 static thread_t
 _wait_queue_select64_one(
 	wait_queue_t wq,
 	event64_t event)
 {
 	wait_queue_element_t wq_element;
 	wait_queue_element_t wqe_next;
 	thread_t t = THREAD_NULL;
 	thread_t fifo_thread = THREAD_NULL;
 	boolean_t is_queue_fifo = TRUE;
 	boolean_t is_queue_global = FALSE;
 	boolean_t thread_imp_donor = FALSE;
 	boolean_t realtime = FALSE;
 	unsigned long eventmask = 0;
 	queue_t q;

 	if (wait_queue_global(wq)) {
 		eventmask = CAST_TO_EVENT_MASK(event);
 		if ((wq->wq_eventmask & eventmask) != eventmask) {
 			return THREAD_NULL;
 		}
 		eventmask = 0;
 		is_queue_global = TRUE;
 #if IMPORTANCE_INHERITANCE
 		is_queue_fifo = FALSE;
 #endif /* IMPORTANCE_INHERITANCE */
 	}

 	q = &wq->wq_queue;

 	wq_element = (wait_queue_element_t) queue_first(q);
 	while (!queue_end(q, (queue_entry_t)wq_element)) {
 		WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
 		wqe_next = (wait_queue_element_t)
 			       queue_next((queue_t) wq_element);

 		/*
 		 * We may have to recurse if this is a compound wait queue.
 		 */
 		if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
 		    wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
 			wait_queue_link_t wql = (wait_queue_link_t)wq_element;
 			wait_queue_set_t set_queue = wql->wql_setqueue;

 			/*
 			 * We have to check the set wait queue. If the set
 			 * supports pre-posting, it isn't already preposted,
 			 * and we didn't find a thread in the set, then mark it.
 			 *
 			 * If we later find a thread, there may be a spurious
 			 * pre-post here on this set.  The wait side has to check
 			 * for that either pre- or post-wait.
 			 */
 			wqs_lock(set_queue);
 			if (! wait_queue_empty(&set_queue->wqs_wait_queue)) {
 				t = _wait_queue_select64_one(&set_queue->wqs_wait_queue, event);
 			}
 			if (t != THREAD_NULL) {
 				wqs_unlock(set_queue);
 				return t;
 			}
 			if (event == NO_EVENT64 && set_queue->wqs_prepost && !wql_is_preposted(wql)) {
 				queue_t ppq = &set_queue->wqs_preposts;
 				queue_enter(ppq, wql, wait_queue_link_t, wql_preposts);
 			}
 			wqs_unlock(set_queue);

 		} else {
 			
 			/*
 			 * Otherwise, its a thread.  If it is waiting on
 			 * the event we are posting to this queue, pull
 			 * it off the queue and stick it in out wake_queue.
 			 */
 			t = (thread_t)(void *)wq_element;
 			if (t->wait_event == event) {
 				if (fifo_thread == THREAD_NULL) {
 					fifo_thread = t;
 				}
 #if IMPORTANCE_INHERITANCE
 				/* 
 				 * Checking imp donor bit does not need thread lock or
 				 * or task lock since we have the wait queue lock and
 				 * thread can not be removed from it without acquiring
 				 * wait queue lock. The imp donor bit may change
 				 * once we read its value, but it is ok to wake 
 				 * a thread while someone drops importance assertion 
 				 * on the that thread.
 				 */
 				thread_imp_donor = task_is_importance_donor(t->task);
 #endif /* IMPORTANCE_INHERITANCE */
 				realtime = (t->sched_pri >= BASEPRI_REALTIME);
 				if (is_queue_fifo || thread_imp_donor || realtime || 
 						(t->options & TH_OPT_VMPRIV)) {
 					thread_lock(t);
 					remqueue((queue_entry_t) t);
 					t->wait_queue = WAIT_QUEUE_NULL;
 					t->wait_event = NO_EVENT64;
 					t->at_safe_point = FALSE;
 					return t;	/* still locked */
 				}
 			}
 			if (is_queue_global) {
 				eventmask = eventmask | CAST_TO_EVENT_MASK(t->wait_event);
 			}
 			t = THREAD_NULL;
 		}
 		wq_element = wqe_next;
 	}

 	if (is_queue_global) {
 		wq->wq_eventmask = eventmask;
 	}
 #if IMPORTANCE_INHERITANCE
 	if (fifo_thread != THREAD_NULL) {
 		thread_lock(fifo_thread);
 		remqueue((queue_entry_t) fifo_thread);
 		fifo_thread->wait_queue = WAIT_QUEUE_NULL;
 		fifo_thread->wait_event = NO_EVENT64;
 		fifo_thread->at_safe_point = FALSE;
 		return fifo_thread;	/* still locked */
 	}
 #endif /* IMPORTANCE_INHERITANCE */
 	return THREAD_NULL;
 }


 /*
 *	Routine:	wait_queue_pull_thread_locked
 *	Purpose:
 *		Pull a thread off its wait queue and (possibly) unlock 
 *		the waitq.
 * 	Conditions:
 *		at splsched
 *		wait queue locked
 *		thread locked
 * 	Returns:
 *		with the thread still locked.
 */
 void
 wait_queue_pull_thread_locked(
 	wait_queue_t waitq,
 	thread_t thread,
 	boolean_t unlock)
 {

 	assert(thread->wait_queue == waitq);

 	remqueue((queue_entry_t)thread );
 	thread->wait_queue = WAIT_QUEUE_NULL;
 	thread->wait_event = NO_EVENT64;
 	thread->at_safe_point = FALSE;
 	if (unlock)
 		wait_queue_unlock(waitq);
 }


 /*
 *	Routine:	wait_queue_select64_thread
 *	Purpose:
 *		Look for a thread and remove it from the queues, if
 *		(and only if) the thread is waiting on the supplied
 *		<wait_queue, event> pair.
 * 	Conditions:
 *		at splsched
 *		wait queue locked
 *		possibly recursive
 * 	Returns:
 *		KERN_NOT_WAITING: Thread is not waiting here.
 *		KERN_SUCCESS: It was, and is now removed (returned locked)
 */
 static kern_return_t
 _wait_queue_select64_thread(
 	wait_queue_t wq,
 	event64_t event,
 	thread_t thread)
 {
 	wait_queue_element_t wq_element;
 	wait_queue_element_t wqe_next;
 	kern_return_t res = KERN_NOT_WAITING;
 	queue_t q = &wq->wq_queue;

 	thread_lock(thread);
 	if ((thread->wait_queue == wq) && (thread->wait_event == event)) {
 		remqueue((queue_entry_t) thread);
 		thread->at_safe_point = FALSE;
 		thread->wait_event = NO_EVENT64;
 		thread->wait_queue = WAIT_QUEUE_NULL;
 		/* thread still locked */
 		return KERN_SUCCESS;
 	}
 	thread_unlock(thread);
 	
 	/*
 	 * The wait_queue associated with the thread may be one of this
 	 * wait queue's sets.  Go see.  If so, removing it from
 	 * there is like removing it from here.
 	 */
 	wq_element = (wait_queue_element_t) queue_first(q);
 	while (!queue_end(q, (queue_entry_t)wq_element)) {
 		WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
 		wqe_next = (wait_queue_element_t)
 			       queue_next((queue_t) wq_element);

 		if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
 		    wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
 			wait_queue_link_t wql = (wait_queue_link_t)wq_element;
 			wait_queue_set_t set_queue = wql->wql_setqueue;

 			wqs_lock(set_queue);
 			if (! wait_queue_empty(&set_queue->wqs_wait_queue)) {
 				res = _wait_queue_select64_thread(&set_queue->wqs_wait_queue,
 								event,
 								thread);
 			}
 			wqs_unlock(set_queue);
 			if (res == KERN_SUCCESS)
 				return KERN_SUCCESS;
 		}
 		wq_element = wqe_next;
 	}
 	return res;
 }


 /*
 *	Routine:	wait_queue_wakeup64_identity_locked
 *	Purpose:
 *		Select a single thread that is most-eligible to run and set
 *		set it running.  But return the thread locked.
 *
 * 	Conditions:
 *		at splsched
 *		wait queue locked
 *		possibly recursive
 * 	Returns:
 *		a pointer to the locked thread that was awakened
 */
 __private_extern__ thread_t
 wait_queue_wakeup64_identity_locked(
 	wait_queue_t wq,
 	event64_t event,
 	wait_result_t result,
 	boolean_t unlock)
 {
 	kern_return_t res;
 	thread_t thread;

 	assert(wait_queue_held(wq));

 	thread = _wait_queue_select64_one(wq, event);
 	if (unlock)
 		wait_queue_unlock(wq);

 	if (thread) {
 		res = thread_go(thread, result);
 		assert(res == KERN_SUCCESS);
 	}
 	return thread;  /* still locked if not NULL */
 }


 /*
 *	Routine:	wait_queue_wakeup64_one_locked
 *	Purpose:
 *		Select a single thread that is most-eligible to run and set
 *		set it runnings.
 *
 * 	Conditions:
 *		at splsched
 *		wait queue locked
 *		possibly recursive
 * 	Returns:
 *		KERN_SUCCESS: It was, and is, now removed.
 *		KERN_NOT_WAITING - No thread was waiting <wq,event> pair
 */
 __private_extern__ kern_return_t
 wait_queue_wakeup64_one_locked(
 	wait_queue_t wq,
 	event64_t event,
 	wait_result_t result,
 	boolean_t unlock)
 {
 	thread_t thread;

 	assert(wait_queue_held(wq));

 	thread = _wait_queue_select64_one(wq, event);
 	if (unlock)
 		wait_queue_unlock(wq);

 	if (thread) {
 		kern_return_t res;
 		
 		res = thread_go(thread, result);
 		assert(res == KERN_SUCCESS);
 		thread_unlock(thread);
 		return res;
 	}

 	return KERN_NOT_WAITING;
 }

 /*
 *	Routine:	wait_queue_wakeup_one
 *	Purpose:
 *		Wakeup the most appropriate thread that is in the specified
 *		wait queue for the specified event.
 *	Conditions:
 *		Nothing locked
 *	Returns:
 *		KERN_SUCCESS - Thread was woken up
 *		KERN_NOT_WAITING - No thread was waiting <wq,event> pair
 */
 kern_return_t
 wait_queue_wakeup_one(
 	wait_queue_t wq,
 	event_t event,
 	wait_result_t result,
 	int priority)
 {
 	thread_t thread;
 	spl_t s;

 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning KERN_INVALID_ARGUMENT from wait_queue_wakeup_one\n");
 		return KERN_INVALID_ARGUMENT;
 	}

 	s = splsched();
 	wait_queue_lock(wq);
 	thread = _wait_queue_select64_one(wq, CAST_DOWN(event64_t,event));
 	wait_queue_unlock(wq);

 	if (thread) {
 		kern_return_t res;

 		if (thread->sched_pri < priority) {
 			if (priority <= MAXPRI) {
 				set_sched_pri(thread, priority);

 				thread->was_promoted_on_wakeup = 1;
 				thread->sched_flags |= TH_SFLAG_PROMOTED;
 			}
 		}
 		res = thread_go(thread, result);
 		assert(res == KERN_SUCCESS);
 		thread_unlock(thread);
 		splx(s);
 		return res;
 	}

 	splx(s);
 	return KERN_NOT_WAITING;
 }

 /*
 *	Routine:	wait_queue_wakeup64_one
 *	Purpose:
 *		Wakeup the most appropriate thread that is in the specified
 *		wait queue for the specified event.
 *	Conditions:
 *		Nothing locked
 *	Returns:
 *		KERN_SUCCESS - Thread was woken up
 *		KERN_NOT_WAITING - No thread was waiting <wq,event> pair
 */
 kern_return_t
 wait_queue_wakeup64_one(
 	wait_queue_t wq,
 	event64_t event,
 	wait_result_t result)
 {
 	thread_t thread;
 	spl_t s;

 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning KERN_INVALID_ARGUMENT from wait_queue_wakeup64_one\n");
 		return KERN_INVALID_ARGUMENT;
 	}
 	s = splsched();
 	wait_queue_lock(wq);
 	thread = _wait_queue_select64_one(wq, event);
 	wait_queue_unlock(wq);

 	if (thread) {
 		kern_return_t res;

 		res = thread_go(thread, result);
 		assert(res == KERN_SUCCESS);
 		thread_unlock(thread);
 		splx(s);
 		return res;
 	}

 	splx(s);
 	return KERN_NOT_WAITING;
 }


 /*
 *	Routine:	wait_queue_wakeup64_thread_locked
 *	Purpose:
 *		Wakeup the particular thread that was specified if and only
 *		it was in this wait queue (or one of it's set queues)
 *		and waiting on the specified event.
 *
 *		This is much safer than just removing the thread from
 *		whatever wait queue it happens to be on.  For instance, it
 *		may have already been awoken from the wait you intended to
 *		interrupt and waited on something else (like another
 *		semaphore).
 *	Conditions:
 *		at splsched
 *		wait queue already locked (may be released).
 *	Returns:
 *		KERN_SUCCESS - the thread was found waiting and awakened
 *		KERN_NOT_WAITING - the thread was not waiting here
 */
 __private_extern__ kern_return_t
 wait_queue_wakeup64_thread_locked(
 	wait_queue_t wq,
 	event64_t event,
 	thread_t thread,
 	wait_result_t result,
 	boolean_t unlock)
 {
 	kern_return_t res;

 	assert(wait_queue_held(wq));

 	/*
 	 * See if the thread was still waiting there.  If so, it got
 	 * dequeued and returned locked.
 	 */
 	res = _wait_queue_select64_thread(wq, event, thread);
 	if (unlock)
 	    wait_queue_unlock(wq);

 	if (res != KERN_SUCCESS)
 		return KERN_NOT_WAITING;

 	res = thread_go(thread, result);
 	assert(res == KERN_SUCCESS);
 	thread_unlock(thread);
 	return res;
 }

 /*
 *	Routine:	wait_queue_wakeup_thread
 *	Purpose:
 *		Wakeup the particular thread that was specified if and only
 *		it was in this wait queue (or one of it's set queues)
 *		and waiting on the specified event.
 *
 *		This is much safer than just removing the thread from
 *		whatever wait queue it happens to be on.  For instance, it
 *		may have already been awoken from the wait you intended to
 *		interrupt and waited on something else (like another
 *		semaphore).
 *	Conditions:
 *		nothing of interest locked
 *		we need to assume spl needs to be raised
 *	Returns:
 *		KERN_SUCCESS - the thread was found waiting and awakened
 *		KERN_NOT_WAITING - the thread was not waiting here
 */
 kern_return_t
 wait_queue_wakeup_thread(
 	wait_queue_t wq,
 	event_t event,
 	thread_t thread,
 	wait_result_t result)
 {
 	kern_return_t res;
 	spl_t s;

 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning KERN_INVALID_ARGUMENT from wait_queue_wakeup_thread\n");
 		return KERN_INVALID_ARGUMENT;
 	}

 	s = splsched();
 	wait_queue_lock(wq);
 	res = _wait_queue_select64_thread(wq, CAST_DOWN(event64_t,event), thread);
 	wait_queue_unlock(wq);

 	if (res == KERN_SUCCESS) {
 		res = thread_go(thread, result);
 		assert(res == KERN_SUCCESS);
 		thread_unlock(thread);
 		splx(s);
 		return res;
 	}
 	splx(s);
 	return KERN_NOT_WAITING;
 }

 /*
 *	Routine:	wait_queue_wakeup64_thread
 *	Purpose:
 *		Wakeup the particular thread that was specified if and only
 *		it was in this wait queue (or one of it's set's queues)
 *		and waiting on the specified event.
 *
 *		This is much safer than just removing the thread from
 *		whatever wait queue it happens to be on.  For instance, it
 *		may have already been awoken from the wait you intended to
 *		interrupt and waited on something else (like another
 *		semaphore).
 *	Conditions:
 *		nothing of interest locked
 *		we need to assume spl needs to be raised
 *	Returns:
 *		KERN_SUCCESS - the thread was found waiting and awakened
 *		KERN_NOT_WAITING - the thread was not waiting here
 */
 kern_return_t
 wait_queue_wakeup64_thread(
 	wait_queue_t wq,
 	event64_t event,
 	thread_t thread,
 	wait_result_t result)
 {
 	kern_return_t res;
 	spl_t s;

 	if (!wait_queue_is_valid(wq)) {
 		printf("\nReturning KERN_INVALID_ARGUMENT from wait_queue_wakeup64_thread\n");
 		return KERN_INVALID_ARGUMENT;
 	}

 	s = splsched();
 	wait_queue_lock(wq);
 	res = _wait_queue_select64_thread(wq, event, thread);
 	wait_queue_unlock(wq);

 	if (res == KERN_SUCCESS) {
 		res = thread_go(thread, result);
 		assert(res == KERN_SUCCESS);
 		thread_unlock(thread);
 		splx(s);
 		return res;
 	}
 	splx(s);
 	return KERN_NOT_WAITING;
 }