Index: projects/calloutng/sys/kern/kern_condvar.c
===================================================================
--- projects/calloutng/sys/kern/kern_condvar.c	(revision 237201)
+++ projects/calloutng/sys/kern/kern_condvar.c	(revision 237202)
@@ -1,453 +1,457 @@
 /*-
  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/condvar.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/resourcevar.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/ktrace.h>
 #endif
 
 /*
  * Common sanity checks for cv_wait* functions.
  */
 #define	CV_ASSERT(cvp, lock, td) do {					\
 	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
 	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
 	KASSERT((lock) != NULL, ("%s: lock NULL", __func__));		\
 } while (0)
 
 /*
  * Initialize a condition variable.  Must be called before use.
  */
 void
 cv_init(struct cv *cvp, const char *desc)
 {
 
 	cvp->cv_description = desc;
 	cvp->cv_waiters = 0;
 }
 
 /*
  * Destroy a condition variable.  The condition variable must be re-initialized
  * in order to be re-used.
  */
 void
 cv_destroy(struct cv *cvp)
 {
 #ifdef INVARIANTS
 	struct sleepqueue *sq;
 
 	sleepq_lock(cvp);
 	sq = sleepq_lookup(cvp);
 	sleepq_release(cvp);
 	KASSERT(sq == NULL, ("%s: associated sleep queue non-empty", __func__));
 #endif
 }
 
 /*
  * Wait on a condition variable.  The current thread is placed on the condition
  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
  * condition variable will resume the thread.  The mutex is released before
  * sleeping and will be held on return.  It is recommended that the mutex be
  * held when cv_signal or cv_broadcast are called.
  */
 void
 _cv_wait(struct cv *cvp, struct lock_object *lock)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 	struct lock_class *class;
 	struct thread *td;
 	int lock_state;
 
 	td = curthread;
 	lock_state = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, cv_wmesg(cvp));
 #endif
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	class = LOCK_CLASS(lock);
 
 	if (cold || panicstr) {
 		/*
 		 * During autoconfiguration, just give interrupts
 		 * a chance, then just return.  Don't run any other
 		 * thread or panic below, in case this is the idle
 		 * process and already asleep.
 		 */
 		return;
 	}
 
 	sleepq_lock(cvp);
 
 	cvp->cv_waiters++;
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	sleepq_wait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, cv_wmesg(cvp));
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 }
 
 /*
  * Wait on a condition variable.  This function differs from cv_wait by
  * not aquiring the mutex after condition variable was signaled.
  */
 void
 _cv_wait_unlock(struct cv *cvp, struct lock_object *lock)
 {
 	struct lock_class *class;
 	struct thread *td;
 
 	td = curthread;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, cv_wmesg(cvp));
 #endif
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	KASSERT(lock != &Giant.lock_object,
 	    ("cv_wait_unlock cannot be used with Giant"));
 	class = LOCK_CLASS(lock);
 
 	if (cold || panicstr) {
 		/*
 		 * During autoconfiguration, just give interrupts
 		 * a chance, then just return.  Don't run any other
 		 * thread or panic below, in case this is the idle
 		 * process and already asleep.
 		 */
 		class->lc_unlock(lock);
 		return;
 	}
 
 	sleepq_lock(cvp);
 
 	cvp->cv_waiters++;
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	if (class->lc_flags & LC_SLEEPABLE)
 		sleepq_release(cvp);
 	class->lc_unlock(lock);
 	if (class->lc_flags & LC_SLEEPABLE)
 		sleepq_lock(cvp);
 	sleepq_wait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, cv_wmesg(cvp));
 #endif
 	PICKUP_GIANT();
 }
 
 /*
  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
  * a signal was caught.  If ERESTART is returned the system call should be
  * restarted if possible.
  */
 int
 _cv_wait_sig(struct cv *cvp, struct lock_object *lock)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 	struct lock_class *class;
 	struct thread *td;
 	int lock_state, rval;
 
 	td = curthread;
 	lock_state = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, cv_wmesg(cvp));
 #endif
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	class = LOCK_CLASS(lock);
 
 	if (cold || panicstr) {
 		/*
 		 * After a panic, or during autoconfiguration, just give
 		 * interrupts a chance, then just return; don't run any other
 		 * procs or panic below, in case this is the idle process and
 		 * already asleep.
 		 */
 		return (0);
 	}
 
 	sleepq_lock(cvp);
 
 	cvp->cv_waiters++;
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
 	    SLEEPQ_INTERRUPTIBLE, 0);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_wait_sig(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, cv_wmesg(cvp));
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
  * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
  * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
  * expires.
  */
 int
 _cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 	struct lock_class *class;
 	struct thread *td;
 	int lock_state, rval;
 
 	td = curthread;
 	lock_state = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, cv_wmesg(cvp));
 #endif
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	class = LOCK_CLASS(lock);
 
 	if (cold || panicstr) {
 		/*
 		 * After a panic, or during autoconfiguration, just give
 		 * interrupts a chance, then just return; don't run any other
 		 * thread or panic below, in case this is the idle process and
 		 * already asleep.
 		 */
 		return 0;
 	}
 
 	sleepq_lock(cvp);
 
 	cvp->cv_waiters++;
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	sleepq_set_timeout(cvp, timo);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_timedwait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, cv_wmesg(cvp));
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
  * Wait on a condition variable for at most timo/hz seconds, allowing
  * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
  * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
  * a signal was caught.
  */
 int
-_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo)
+_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, 
+    struct bintime *bt, int timo) 
 {
 	WITNESS_SAVE_DECL(lock_witness);
 	struct lock_class *class;
 	struct thread *td;
 	int lock_state, rval;
 
 	td = curthread;
 	lock_state = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, cv_wmesg(cvp));
 #endif
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	class = LOCK_CLASS(lock);
 
 	if (cold || panicstr) {
 		/*
 		 * After a panic, or during autoconfiguration, just give
 		 * interrupts a chance, then just return; don't run any other
 		 * thread or panic below, in case this is the idle process and
 		 * already asleep.
 		 */
 		return 0;
 	}
 
 	sleepq_lock(cvp);
 
 	cvp->cv_waiters++;
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
 	    SLEEPQ_INTERRUPTIBLE, 0);
-	sleepq_set_timeout(cvp, timo);
+	if (bt == NULL)	
+		sleepq_set_timeout(cvp, timo);
+	else
+		sleepq_set_timeout_bt(cvp, *bt);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_timedwait_sig(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, cv_wmesg(cvp));
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
  * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
  * the swapper if the process is not in memory, so that it can bring the
  * sleeping process in.  Note that this may also result in additional threads
  * being made runnable.  Should be called with the same mutex as was passed to
  * cv_wait held.
  */
 void
 cv_signal(struct cv *cvp)
 {
 	int wakeup_swapper;
 
 	wakeup_swapper = 0;
 	sleepq_lock(cvp);
 	if (cvp->cv_waiters > 0) {
 		cvp->cv_waiters--;
 		wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, 0);
 	}
 	sleepq_release(cvp);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
  * Should be called with the same mutex as was passed to cv_wait held.
  */
 void
 cv_broadcastpri(struct cv *cvp, int pri)
 {
 	int wakeup_swapper;
 
 	/*
 	 * XXX sleepq_broadcast pri argument changed from -1 meaning
 	 * no pri to 0 meaning no pri.
 	 */
 	wakeup_swapper = 0;
 	if (pri == -1)
 		pri = 0;
 	sleepq_lock(cvp);
 	if (cvp->cv_waiters > 0) {
 		cvp->cv_waiters = 0;
 		wakeup_swapper = sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0);
 	}
 	sleepq_release(cvp);
 	if (wakeup_swapper)
 		kick_proc0();
 }
Index: projects/calloutng/sys/kern/subr_sleepqueue.c
===================================================================
--- projects/calloutng/sys/kern/subr_sleepqueue.c	(revision 237201)
+++ projects/calloutng/sys/kern/subr_sleepqueue.c	(revision 237202)
@@ -1,1255 +1,1245 @@
 /*-
  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Implementation of sleep queues used to hold queue of threads blocked on
  * a wait channel.  Sleep queues different from turnstiles in that wait
  * channels are not owned by anyone, so there is no priority propagation.
  * Sleep queues can also provide a timeout and can also be interrupted by
  * signals.  That said, there are several similarities between the turnstile
  * and sleep queue implementations.  (Note: turnstiles were implemented
  * first.)  For example, both use a hash table of the same size where each
  * bucket is referred to as a "chain" that contains both a spin lock and
  * a linked list of queues.  An individual queue is located by using a hash
  * to pick a chain, locking the chain, and then walking the chain searching
  * for the queue.  This means that a wait channel object does not need to
  * embed it's queue head just as locks do not embed their turnstile queue
  * head.  Threads also carry around a sleep queue that they lend to the
  * wait channel when blocking.  Just as in turnstiles, the queue includes
  * a free list of the sleep queues of other threads blocked on the same
  * wait channel in the case of multiple waiters.
  *
  * Some additional functionality provided by sleep queues include the
  * ability to set a timeout.  The timeout is managed using a per-thread
  * callout that resumes a thread if it is asleep.  A thread may also
  * catch signals while it is asleep (aka an interruptible sleep).  The
  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
  * sleep queues also provide some extra assertions.  One is not allowed to
  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
  * must consistently use the same lock to synchronize with a wait channel,
  * though this check is currently only a warning for sleep/wakeup due to
  * pre-existing abuse of that API.  The same lock must also be held when
  * awakening threads, though that is currently only enforced for condition
  * variables.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_sleepqueue_profiling.h"
 #include "opt_ddb.h"
 #include "opt_kdtrace.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/sysctl.h>
 
 #include <vm/uma.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 /*
  * Constants for the hash table of sleep queue chains.  These constants are
  * the same ones that 4BSD (and possibly earlier versions of BSD) used.
  * Basically, we ignore the lower 8 bits of the address since most wait
  * channel pointers are aligned and only look at the next 7 bits for the
  * hash.  SC_TABLESIZE must be a power of two for SC_MASK to work properly.
  */
 #define	SC_TABLESIZE	128			/* Must be power of 2. */
 #define	SC_MASK		(SC_TABLESIZE - 1)
 #define	SC_SHIFT	8
 #define	SC_HASH(wc)	(((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK)
 #define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
 #define NR_SLEEPQS      2
 /*
  * There two different lists of sleep queues.  Both lists are connected
  * via the sq_hash entries.  The first list is the sleep queue chain list
  * that a sleep queue is on when it is attached to a wait channel.  The
  * second list is the free list hung off of a sleep queue that is attached
  * to a wait channel.
  *
  * Each sleep queue also contains the wait channel it is attached to, the
  * list of threads blocked on that wait channel, flags specific to the
  * wait channel, and the lock used to synchronize with a wait channel.
  * The flags are used to catch mismatches between the various consumers
  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
  * The lock pointer is only used when invariants are enabled for various
  * debugging checks.
  *
  * Locking key:
  *  c - sleep queue chain lock
  */
 struct sleepqueue {
 	TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];	/* (c) Blocked threads. */
 	u_int sq_blockedcnt[NR_SLEEPQS];	/* (c) N. of blocked threads. */
 	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
 	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
 	void	*sq_wchan;			/* (c) Wait channel. */
 	int	sq_type;			/* (c) Queue type. */
 #ifdef INVARIANTS
 	struct lock_object *sq_lock;		/* (c) Associated lock. */
 #endif
 };
 
 struct sleepqueue_chain {
 	LIST_HEAD(, sleepqueue) sc_queues;	/* List of sleep queues. */
 	struct mtx sc_lock;			/* Spin lock for this chain. */
 #ifdef SLEEPQUEUE_PROFILING
 	u_int	sc_depth;			/* Length of sc_queues. */
 	u_int	sc_max_depth;			/* Max length of sc_queues. */
 #endif
 };
 
 #ifdef SLEEPQUEUE_PROFILING
 u_int sleepq_max_depth;
 static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
 static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
     "sleepq chain stats");
 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
     0, "maxmimum depth achieved of a single chain");
 
 static void	sleepq_profile(const char *wmesg);
 static int	prof_enabled;
 #endif
 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
 static uma_zone_t sleepq_zone;
 
 /*
  * Prototypes for non-exported routines.
  */
 static int	sleepq_catch_signals(void *wchan, int pri);
 static int	sleepq_check_signals(void);
 static int	sleepq_check_timeout(void);
 #ifdef INVARIANTS
 static void	sleepq_dtor(void *mem, int size, void *arg);
 #endif
 static int	sleepq_init(void *mem, int size, int flags);
 static int	sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
 		    int pri);
 static void	sleepq_switch(void *wchan, int pri);
 static void	sleepq_timeout(void *arg);
 
 SDT_PROBE_DECLARE(sched, , , sleep);
 SDT_PROBE_DECLARE(sched, , , wakeup);
 
 /*
  * Early initialization of sleep queues that is called from the sleepinit()
  * SYSINIT.
  */
 void
 init_sleepqueues(void)
 {
 #ifdef SLEEPQUEUE_PROFILING
 	struct sysctl_oid *chain_oid;
 	char chain_name[10];
 #endif
 	int i;
 
 	for (i = 0; i < SC_TABLESIZE; i++) {
 		LIST_INIT(&sleepq_chains[i].sc_queues);
 		mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
 		    MTX_SPIN | MTX_RECURSE);
 #ifdef SLEEPQUEUE_PROFILING
 		snprintf(chain_name, sizeof(chain_name), "%d", i);
 		chain_oid = SYSCTL_ADD_NODE(NULL, 
 		    SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
 		    chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
 		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
 		    "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
 		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
 		    "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
 		    NULL);
 #endif
 	}
 	sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
 #ifdef INVARIANTS
 	    NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
 #else
 	    NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
 #endif
 	
 	thread0.td_sleepqueue = sleepq_alloc();
 }
 
 /*
  * Get a sleep queue for a new thread.
  */
 struct sleepqueue *
 sleepq_alloc(void)
 {
 
 	return (uma_zalloc(sleepq_zone, M_WAITOK));
 }
 
 /*
  * Free a sleep queue when a thread is destroyed.
  */
 void
 sleepq_free(struct sleepqueue *sq)
 {
 
 	uma_zfree(sleepq_zone, sq);
 }
 
 /*
  * Lock the sleep queue chain associated with the specified wait channel.
  */
 void
 sleepq_lock(void *wchan)
 {
 	struct sleepqueue_chain *sc;
 
 	sc = SC_LOOKUP(wchan);
 	mtx_lock_spin(&sc->sc_lock);
 }
 
 /*
  * Look up the sleep queue associated with a given wait channel in the hash
  * table locking the associated sleep queue chain.  If no queue is found in
  * the table, NULL is returned.
  */
 struct sleepqueue *
 sleepq_lookup(void *wchan)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 		if (sq->sq_wchan == wchan)
 			return (sq);
 	return (NULL);
 }
 
 /*
  * Unlock the sleep queue chain associated with a given wait channel.
  */
 void
 sleepq_release(void *wchan)
 {
 	struct sleepqueue_chain *sc;
 
 	sc = SC_LOOKUP(wchan);
 	mtx_unlock_spin(&sc->sc_lock);
 }
 
 /*
  * Places the current thread on the sleep queue for the specified wait
  * channel.  If INVARIANTS is enabled, then it associates the passed in
  * lock with the sleepq to make sure it is held when that sleep queue is
  * woken up.
  */
 void
 sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
     int queue)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	MPASS(td->td_sleepqueue != NULL);
 	MPASS(wchan != NULL);
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 
 	/* If this thread is not allowed to sleep, die a horrible death. */
 	KASSERT(!(td->td_pflags & TDP_NOSLEEPING),
 	    ("Trying sleep, but thread marked as sleeping prohibited"));
 
 	/* Look up the sleep queue associated with the wait channel 'wchan'. */
 	sq = sleepq_lookup(wchan);
 
 	/*
 	 * If the wait channel does not already have a sleep queue, use
 	 * this thread's sleep queue.  Otherwise, insert the current thread
 	 * into the sleep queue already in use by this wait channel.
 	 */
 	if (sq == NULL) {
 #ifdef INVARIANTS
 		int i;
 
 		sq = td->td_sleepqueue;
 		for (i = 0; i < NR_SLEEPQS; i++) {
 			KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
 			    ("thread's sleep queue %d is not empty", i));
 			KASSERT(sq->sq_blockedcnt[i] == 0,
 			    ("thread's sleep queue %d count mismatches", i));
 		}
 		KASSERT(LIST_EMPTY(&sq->sq_free),
 		    ("thread's sleep queue has a non-empty free list"));
 		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
 		sq->sq_lock = lock;
 #endif
 #ifdef SLEEPQUEUE_PROFILING
 		sc->sc_depth++;
 		if (sc->sc_depth > sc->sc_max_depth) {
 			sc->sc_max_depth = sc->sc_depth;
 			if (sc->sc_max_depth > sleepq_max_depth)
 				sleepq_max_depth = sc->sc_max_depth;
 		}
 #endif
 		sq = td->td_sleepqueue;
 		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
 		sq->sq_wchan = wchan;
 		sq->sq_type = flags & SLEEPQ_TYPE;
 	} else {
 		MPASS(wchan == sq->sq_wchan);
 		MPASS(lock == sq->sq_lock);
 		MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
 		LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
 	}
 	thread_lock(td);
 	TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
 	sq->sq_blockedcnt[queue]++;
 	td->td_sleepqueue = NULL;
 	td->td_sqqueue = queue;
 	td->td_wchan = wchan;
 	td->td_wmesg = wmesg;
 	if (flags & SLEEPQ_INTERRUPTIBLE) {
 		td->td_flags |= TDF_SINTR;
 		td->td_flags &= ~TDF_SLEEPABORT;
 		if (flags & SLEEPQ_STOP_ON_BDRY)
 			td->td_flags |= TDF_SBDRY;
 	}
 	thread_unlock(td);
 }
 
 /*
  * Sets a timeout that will remove the current thread from the specified
  * sleep queue after timo ticks if the thread has not already been awakened.
  */
 void 
-sleepq_set_timeout_bt(void *wchan, struct bintime bt)
+_sleepq_set_timeout(void *wchan, struct bintime *bt, int timo)
 {
 
 	struct sleepqueue_chain *sc;
 	struct thread *td;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	MPASS(TD_ON_SLEEPQ(td));
 	MPASS(td->td_sleepqueue == NULL);
 	MPASS(wchan != NULL);
-	callout_reset_bt_on(&td->td_slpcallout, bt, sleepq_timeout, td, PCPU_GET(cpuid), 0);
-}
-
-void
-sleepq_set_timeout(void *wchan, int timo)
-{
-	struct sleepqueue_chain *sc;
-	struct thread *td;
-
-	td = curthread;
-	sc = SC_LOOKUP(wchan);
-	mtx_assert(&sc->sc_lock, MA_OWNED);
-	MPASS(TD_ON_SLEEPQ(td));
-	MPASS(td->td_sleepqueue == NULL);
-	MPASS(wchan != NULL);
-	callout_reset_curcpu(&td->td_slpcallout, timo, sleepq_timeout, td);
+	if (bt == NULL) 
+		callout_reset_curcpu(&td->td_slpcallout, timo, 
+		    sleepq_timeout, td);
+	else
+		callout_reset_bt_on(&td->td_slpcallout, *bt, 
+		    sleepq_timeout, td, PCPU_GET(cpuid), 0); 
 }
 
 /*
  * Return the number of actual sleepers for the specified queue.
  */
 u_int
 sleepq_sleepcnt(void *wchan, int queue)
 {
 	struct sleepqueue *sq;
 
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL)
 		return (0);
 	return (sq->sq_blockedcnt[queue]);
 }
 
 /*
  * Marks the pending sleep of the current thread as interruptible and
  * makes an initial check for pending signals before putting a thread
  * to sleep. Enters and exits with the thread lock held.  Thread lock
  * may have transitioned from the sleepq lock to a run lock.
  */
 static int
 sleepq_catch_signals(void *wchan, int pri)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
 	struct proc *p;
 	struct sigacts *ps;
 	int sig, ret, stop_allowed;
 
 	td = curthread;
 	p = curproc;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	MPASS(wchan != NULL);
 	if ((td->td_pflags & TDP_WAKEUP) != 0) {
 		td->td_pflags &= ~TDP_WAKEUP;
 		ret = EINTR;
 		thread_lock(td);
 		goto out;
 	}
 
 	/*
 	 * See if there are any pending signals for this thread.  If not
 	 * we can switch immediately.  Otherwise do the signal processing
 	 * directly.
 	 */
 	thread_lock(td);
 	if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0) {
 		sleepq_switch(wchan, pri);
 		return (0);
 	}
 	stop_allowed = (td->td_flags & TDF_SBDRY) ? SIG_STOP_NOT_ALLOWED :
 	    SIG_STOP_ALLOWED;
 	thread_unlock(td);
 	mtx_unlock_spin(&sc->sc_lock);
 	CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
 		(void *)td, (long)p->p_pid, td->td_name);
 	PROC_LOCK(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	sig = cursig(td, stop_allowed);
 	if (sig == 0) {
 		mtx_unlock(&ps->ps_mtx);
 		ret = thread_suspend_check(1);
 		MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
 	} else {
 		if (SIGISMEMBER(ps->ps_sigintr, sig))
 			ret = EINTR;
 		else
 			ret = ERESTART;
 		mtx_unlock(&ps->ps_mtx);
 	}
 	/*
 	 * Lock the per-process spinlock prior to dropping the PROC_LOCK
 	 * to avoid a signal delivery race.  PROC_LOCK, PROC_SLOCK, and
 	 * thread_lock() are currently held in tdsendsignal().
 	 */
 	PROC_SLOCK(p);
 	mtx_lock_spin(&sc->sc_lock);
 	PROC_UNLOCK(p);
 	thread_lock(td);
 	PROC_SUNLOCK(p);
 	if (ret == 0) {
 		sleepq_switch(wchan, pri);
 		return (0);
 	}
 out:
 	/*
 	 * There were pending signals and this thread is still
 	 * on the sleep queue, remove it from the sleep queue.
 	 */
 	if (TD_ON_SLEEPQ(td)) {
 		sq = sleepq_lookup(wchan);
 		if (sleepq_resume_thread(sq, td, 0)) {
 #ifdef INVARIANTS
 			/*
 			 * This thread hasn't gone to sleep yet, so it
 			 * should not be swapped out.
 			 */
 			panic("not waking up swapper");
 #endif
 		}
 	}
 	mtx_unlock_spin(&sc->sc_lock);
 	MPASS(td->td_lock != &sc->sc_lock);
 	return (ret);
 }
 
 /*
  * Switches to another thread if we are still asleep on a sleep queue.
  * Returns with thread lock.
  */
 static void
 sleepq_switch(void *wchan, int pri)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
 	/* 
 	 * If we have a sleep queue, then we've already been woken up, so
 	 * just return.
 	 */
 	if (td->td_sleepqueue != NULL) {
 		mtx_unlock_spin(&sc->sc_lock);
 		return;
 	}
 
 	/*
 	 * If TDF_TIMEOUT is set, then our sleep has been timed out
 	 * already but we are still on the sleep queue, so dequeue the
 	 * thread and return.
 	 */
 	if (td->td_flags & TDF_TIMEOUT) {
 		MPASS(TD_ON_SLEEPQ(td));
 		sq = sleepq_lookup(wchan);
 		if (sleepq_resume_thread(sq, td, 0)) {
 #ifdef INVARIANTS
 			/*
 			 * This thread hasn't gone to sleep yet, so it
 			 * should not be swapped out.
 			 */
 			panic("not waking up swapper");
 #endif
 		}
 		mtx_unlock_spin(&sc->sc_lock);
 		return;		
 	}
 #ifdef SLEEPQUEUE_PROFILING
 	if (prof_enabled)
 		sleepq_profile(td->td_wmesg);
 #endif
 	MPASS(td->td_sleepqueue == NULL);
 	sched_sleep(td, pri);
 	thread_lock_set(td, &sc->sc_lock);
 	SDT_PROBE0(sched, , , sleep);
 	TD_SET_SLEEPING(td);
 	mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
 	KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
 	CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 }
 
 /*
  * Check to see if we timed out.
  */
 static int
 sleepq_check_timeout(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
 	/*
 	 * If TDF_TIMEOUT is set, we timed out.
 	 */
 	if (td->td_flags & TDF_TIMEOUT) {
 		td->td_flags &= ~TDF_TIMEOUT;
 		return (EWOULDBLOCK);
 	}
 
 	/*
 	 * If TDF_TIMOFAIL is set, the timeout ran after we had
 	 * already been woken up.
 	 */
 	if (td->td_flags & TDF_TIMOFAIL)
 		td->td_flags &= ~TDF_TIMOFAIL;
 
 	/*
 	 * If callout_stop() fails, then the timeout is running on
 	 * another CPU, so synchronize with it to avoid having it
 	 * accidentally wake up a subsequent sleep.
 	 */
 	else if (callout_stop(&td->td_slpcallout) == 0) {
 		td->td_flags |= TDF_TIMEOUT;
 		TD_SET_SLEEPING(td);
 		mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
 	}
 	return (0);
 }
 
 /*
  * Check to see if we were awoken by a signal.
  */
 static int
 sleepq_check_signals(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
 	/* We are no longer in an interruptible sleep. */
 	if (td->td_flags & TDF_SINTR)
 		td->td_flags &= ~(TDF_SINTR | TDF_SBDRY);
 
 	if (td->td_flags & TDF_SLEEPABORT) {
 		td->td_flags &= ~TDF_SLEEPABORT;
 		return (td->td_intrval);
 	}
 
 	return (0);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue.
  */
 void
 sleepq_wait(void *wchan, int pri)
 {
 	struct thread *td;
 
 	td = curthread;
 	MPASS(!(td->td_flags & TDF_SINTR));
 	thread_lock(td);
 	sleepq_switch(wchan, pri);
 	thread_unlock(td);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue
  * or it is interrupted by a signal.
  */
 int
 sleepq_wait_sig(void *wchan, int pri)
 {
 	int rcatch;
 	int rval;
 
 	rcatch = sleepq_catch_signals(wchan, pri);
 	rval = sleepq_check_signals();
 	thread_unlock(curthread);
 	if (rcatch)
 		return (rcatch);
 	return (rval);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue
  * or it times out while waiting.
  */
 int
 sleepq_timedwait(void *wchan, int pri)
 {
 	struct thread *td;
 	int rval;
 
 	td = curthread;
 	MPASS(!(td->td_flags & TDF_SINTR));
 	thread_lock(td);
 	sleepq_switch(wchan, pri);
 	rval = sleepq_check_timeout();
 	thread_unlock(td);
 
 	return (rval);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue,
  * it is interrupted by a signal, or it times out waiting to be awakened.
  */
 int
 sleepq_timedwait_sig(void *wchan, int pri)
 {
 	int rcatch, rvalt, rvals;
 
 	rcatch = sleepq_catch_signals(wchan, pri);
 	rvalt = sleepq_check_timeout();
 	rvals = sleepq_check_signals();
 	thread_unlock(curthread);
 	if (rcatch)
 		return (rcatch);
 	if (rvals)
 		return (rvals);
 	return (rvalt);
 }
 
 /*
  * Returns the type of sleepqueue given a waitchannel.
  */
 int
 sleepq_type(void *wchan)
 {
 	struct sleepqueue *sq;
 	int type;
 
 	MPASS(wchan != NULL);
 
 	sleepq_lock(wchan);
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL) {
 		sleepq_release(wchan);
 		return (-1);
 	}
 	type = sq->sq_type;
 	sleepq_release(wchan);
 	return (type);
 }
 
 /*
  * Removes a thread from a sleep queue and makes it
  * runnable.
  */
 static int
 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
 {
 	struct sleepqueue_chain *sc;
 
 	MPASS(td != NULL);
 	MPASS(sq->sq_wchan != NULL);
 	MPASS(td->td_wchan == sq->sq_wchan);
 	MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	sc = SC_LOOKUP(sq->sq_wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 
 	SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
 
 	/* Remove the thread from the queue. */
 	sq->sq_blockedcnt[td->td_sqqueue]--;
 	TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
 
 	/*
 	 * Get a sleep queue for this thread.  If this is the last waiter,
 	 * use the queue itself and take it out of the chain, otherwise,
 	 * remove a queue from the free list.
 	 */
 	if (LIST_EMPTY(&sq->sq_free)) {
 		td->td_sleepqueue = sq;
 #ifdef INVARIANTS
 		sq->sq_wchan = NULL;
 #endif
 #ifdef SLEEPQUEUE_PROFILING
 		sc->sc_depth--;
 #endif
 	} else
 		td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
 	LIST_REMOVE(td->td_sleepqueue, sq_hash);
 
 	td->td_wmesg = NULL;
 	td->td_wchan = NULL;
 	td->td_flags &= ~(TDF_SINTR | TDF_SBDRY);
 
 	CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, td->td_name);
 
 	/* Adjust priority if requested. */
 	MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
 	if (pri != 0 && td->td_priority > pri &&
 	    PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
 		sched_prio(td, pri);
 
 	/*
 	 * Note that thread td might not be sleeping if it is running
 	 * sleepq_catch_signals() on another CPU or is blocked on its
 	 * proc lock to check signals.  There's no need to mark the
 	 * thread runnable in that case.
 	 */
 	if (TD_IS_SLEEPING(td)) {
 		TD_CLR_SLEEPING(td);
 		return (setrunnable(td));
 	}
 	return (0);
 }
 
 #ifdef INVARIANTS
 /*
  * UMA zone item deallocator.
  */
 static void
 sleepq_dtor(void *mem, int size, void *arg)
 {
 	struct sleepqueue *sq;
 	int i;
 
 	sq = mem;
 	for (i = 0; i < NR_SLEEPQS; i++) {
 		MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
 		MPASS(sq->sq_blockedcnt[i] == 0);
 	}
 }
 #endif
 
 /*
  * UMA zone item initializer.
  */
 static int
 sleepq_init(void *mem, int size, int flags)
 {
 	struct sleepqueue *sq;
 	int i;
 
 	bzero(mem, size);
 	sq = mem;
 	for (i = 0; i < NR_SLEEPQS; i++) {
 		TAILQ_INIT(&sq->sq_blocked[i]);
 		sq->sq_blockedcnt[i] = 0;
 	}
 	LIST_INIT(&sq->sq_free);
 	return (0);
 }
 
 /*
  * Find the highest priority thread sleeping on a wait channel and resume it.
  */
 int
 sleepq_signal(void *wchan, int flags, int pri, int queue)
 {
 	struct sleepqueue *sq;
 	struct thread *td, *besttd;
 	int wakeup_swapper;
 
 	CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL)
 		return (0);
 	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 
 	/*
 	 * Find the highest priority thread on the queue.  If there is a
 	 * tie, use the thread that first appears in the queue as it has
 	 * been sleeping the longest since threads are always added to
 	 * the tail of sleep queues.
 	 */
 	besttd = NULL;
 	TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
 		if (besttd == NULL || td->td_priority < besttd->td_priority)
 			besttd = td;
 	}
 	MPASS(besttd != NULL);
 	thread_lock(besttd);
 	wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);
 	thread_unlock(besttd);
 	return (wakeup_swapper);
 }
 
 /*
  * Resume all threads sleeping on a specified wait channel.
  */
 int
 sleepq_broadcast(void *wchan, int flags, int pri, int queue)
 {
 	struct sleepqueue *sq;
 	struct thread *td, *tdn;
 	int wakeup_swapper;
 
 	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL)
 		return (0);
 	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 
 	/* Resume all blocked threads on the sleep queue. */
 	wakeup_swapper = 0;
 	TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
 		thread_lock(td);
 		if (sleepq_resume_thread(sq, td, pri))
 			wakeup_swapper = 1;
 		thread_unlock(td);
 	}
 	return (wakeup_swapper);
 }
 
 /*
  * Time sleeping threads out.  When the timeout expires, the thread is
  * removed from the sleep queue and made runnable if it is still asleep.
  */
 static void
 sleepq_timeout(void *arg)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
 	void *wchan;
 	int wakeup_swapper;
 
 	td = arg;
 	wakeup_swapper = 0;
 	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 
 	/*
 	 * First, see if the thread is asleep and get the wait channel if
 	 * it is.
 	 */
 	thread_lock(td);
 	if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
 		wchan = td->td_wchan;
 		sc = SC_LOOKUP(wchan);
 		THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
 		sq = sleepq_lookup(wchan);
 		MPASS(sq != NULL);
 		td->td_flags |= TDF_TIMEOUT;
 		wakeup_swapper = sleepq_resume_thread(sq, td, 0);
 		thread_unlock(td);
 		if (wakeup_swapper)
 			kick_proc0();
 		return;
 	}
 
 	/*
 	 * If the thread is on the SLEEPQ but isn't sleeping yet, it
 	 * can either be on another CPU in between sleepq_add() and
 	 * one of the sleepq_*wait*() routines or it can be in
 	 * sleepq_catch_signals().
 	 */
 	if (TD_ON_SLEEPQ(td)) {
 		td->td_flags |= TDF_TIMEOUT;
 		thread_unlock(td);
 		return;
 	}
 
 	/*
 	 * Now check for the edge cases.  First, if TDF_TIMEOUT is set,
 	 * then the other thread has already yielded to us, so clear
 	 * the flag and resume it.  If TDF_TIMEOUT is not set, then the
 	 * we know that the other thread is not on a sleep queue, but it
 	 * hasn't resumed execution yet.  In that case, set TDF_TIMOFAIL
 	 * to let it know that the timeout has already run and doesn't
 	 * need to be canceled.
 	 */
 	if (td->td_flags & TDF_TIMEOUT) {
 		MPASS(TD_IS_SLEEPING(td));
 		td->td_flags &= ~TDF_TIMEOUT;
 		TD_CLR_SLEEPING(td);
 		wakeup_swapper = setrunnable(td);
 	} else
 		td->td_flags |= TDF_TIMOFAIL;
 	thread_unlock(td);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * Resumes a specific thread from the sleep queue associated with a specific
  * wait channel if it is on that queue.
  */
 void
 sleepq_remove(struct thread *td, void *wchan)
 {
 	struct sleepqueue *sq;
 	int wakeup_swapper;
 
 	/*
 	 * Look up the sleep queue for this wait channel, then re-check
 	 * that the thread is asleep on that channel, if it is not, then
 	 * bail.
 	 */
 	MPASS(wchan != NULL);
 	sleepq_lock(wchan);
 	sq = sleepq_lookup(wchan);
 	/*
 	 * We can not lock the thread here as it may be sleeping on a
 	 * different sleepq.  However, holding the sleepq lock for this
 	 * wchan can guarantee that we do not miss a wakeup for this
 	 * channel.  The asserts below will catch any false positives.
 	 */
 	if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 		sleepq_release(wchan);
 		return;
 	}
 	/* Thread is asleep on sleep queue sq, so wake it up. */
 	thread_lock(td);
 	MPASS(sq != NULL);
 	MPASS(td->td_wchan == wchan);
 	wakeup_swapper = sleepq_resume_thread(sq, td, 0);
 	thread_unlock(td);
 	sleepq_release(wchan);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * Abort a thread as if an interrupt had occurred.  Only abort
  * interruptible waits (unfortunately it isn't safe to abort others).
  */
 int
 sleepq_abort(struct thread *td, int intrval)
 {
 	struct sleepqueue *sq;
 	void *wchan;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	MPASS(TD_ON_SLEEPQ(td));
 	MPASS(td->td_flags & TDF_SINTR);
 	MPASS(intrval == EINTR || intrval == ERESTART);
 
 	/*
 	 * If the TDF_TIMEOUT flag is set, just leave. A
 	 * timeout is scheduled anyhow.
 	 */
 	if (td->td_flags & TDF_TIMEOUT)
 		return (0);
 
 	CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 	td->td_intrval = intrval;
 	td->td_flags |= TDF_SLEEPABORT;
 	/*
 	 * If the thread has not slept yet it will find the signal in
 	 * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
 	 * we have to do it here.
 	 */
 	if (!TD_IS_SLEEPING(td))
 		return (0);
 	wchan = td->td_wchan;
 	MPASS(wchan != NULL);
 	sq = sleepq_lookup(wchan);
 	MPASS(sq != NULL);
 
 	/* Thread is asleep on sleep queue sq, so wake it up. */
 	return (sleepq_resume_thread(sq, td, 0));
 }
 
 #ifdef SLEEPQUEUE_PROFILING
 #define	SLEEPQ_PROF_LOCATIONS	1024
 #define	SLEEPQ_SBUFSIZE		512
 struct sleepq_prof {
 	LIST_ENTRY(sleepq_prof) sp_link;
 	const char	*sp_wmesg;
 	long		sp_count;
 };
 
 LIST_HEAD(sqphead, sleepq_prof);
 
 struct sqphead sleepq_prof_free;
 struct sqphead sleepq_hash[SC_TABLESIZE];
 static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
 static struct mtx sleepq_prof_lock;
 MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
 
 static void
 sleepq_profile(const char *wmesg)
 {
 	struct sleepq_prof *sp;
 
 	mtx_lock_spin(&sleepq_prof_lock);
 	if (prof_enabled == 0)
 		goto unlock;
 	LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
 		if (sp->sp_wmesg == wmesg)
 			goto done;
 	sp = LIST_FIRST(&sleepq_prof_free);
 	if (sp == NULL)
 		goto unlock;
 	sp->sp_wmesg = wmesg;
 	LIST_REMOVE(sp, sp_link);
 	LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
 done:
 	sp->sp_count++;
 unlock:
 	mtx_unlock_spin(&sleepq_prof_lock);
 	return;
 }
 
 static void
 sleepq_prof_reset(void)
 {
 	struct sleepq_prof *sp;
 	int enabled;
 	int i;
 
 	mtx_lock_spin(&sleepq_prof_lock);
 	enabled = prof_enabled;
 	prof_enabled = 0;
 	for (i = 0; i < SC_TABLESIZE; i++)
 		LIST_INIT(&sleepq_hash[i]);
 	LIST_INIT(&sleepq_prof_free);
 	for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
 		sp = &sleepq_profent[i];
 		sp->sp_wmesg = NULL;
 		sp->sp_count = 0;
 		LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
 	}
 	prof_enabled = enabled;
 	mtx_unlock_spin(&sleepq_prof_lock);
 }
 
 static int
 enable_sleepq_prof(SYSCTL_HANDLER_ARGS)
 {
 	int error, v;
 
 	v = prof_enabled;
 	error = sysctl_handle_int(oidp, &v, v, req);
 	if (error)
 		return (error);
 	if (req->newptr == NULL)
 		return (error);
 	if (v == prof_enabled)
 		return (0);
 	if (v == 1)
 		sleepq_prof_reset();
 	mtx_lock_spin(&sleepq_prof_lock);
 	prof_enabled = !!v;
 	mtx_unlock_spin(&sleepq_prof_lock);
 
 	return (0);
 }
 
 static int
 reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 {
 	int error, v;
 
 	v = 0;
 	error = sysctl_handle_int(oidp, &v, 0, req);
 	if (error)
 		return (error);
 	if (req->newptr == NULL)
 		return (error);
 	if (v == 0)
 		return (0);
 	sleepq_prof_reset();
 
 	return (0);
 }
 
 static int
 dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct sleepq_prof *sp;
 	struct sbuf *sb;
 	int enabled;
 	int error;
 	int i;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
 	sbuf_printf(sb, "\nwmesg\tcount\n");
 	enabled = prof_enabled;
 	mtx_lock_spin(&sleepq_prof_lock);
 	prof_enabled = 0;
 	mtx_unlock_spin(&sleepq_prof_lock);
 	for (i = 0; i < SC_TABLESIZE; i++) {
 		LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
 			sbuf_printf(sb, "%s\t%ld\n",
 			    sp->sp_wmesg, sp->sp_count);
 		}
 	}
 	mtx_lock_spin(&sleepq_prof_lock);
 	prof_enabled = enabled;
 	mtx_unlock_spin(&sleepq_prof_lock);
 
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 	return (error);
 }
 
 SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics");
 SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
     NULL, 0, reset_sleepq_prof_stats, "I",
     "Reset sleepqueue profiling statistics");
 SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
     NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling");
 #endif
 
 #ifdef DDB
 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 #ifdef INVARIANTS
 	struct lock_object *lock;
 #endif
 	struct thread *td;
 	void *wchan;
 	int i;
 
 	if (!have_addr)
 		return;
 
 	/*
 	 * First, see if there is an active sleep queue for the wait channel
 	 * indicated by the address.
 	 */
 	wchan = (void *)addr;
 	sc = SC_LOOKUP(wchan);
 	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 		if (sq->sq_wchan == wchan)
 			goto found;
 
 	/*
 	 * Second, see if there is an active sleep queue at the address
 	 * indicated.
 	 */
 	for (i = 0; i < SC_TABLESIZE; i++)
 		LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 			if (sq == (struct sleepqueue *)addr)
 				goto found;
 		}
 
 	db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 	return;
 found:
 	db_printf("Wait channel: %p\n", sq->sq_wchan);
 	db_printf("Queue type: %d\n", sq->sq_type);
 #ifdef INVARIANTS
 	if (sq->sq_lock) {
 		lock = sq->sq_lock;
 		db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 		    LOCK_CLASS(lock)->lc_name, lock->lo_name);
 	}
 #endif
 	db_printf("Blocked threads:\n");
 	for (i = 0; i < NR_SLEEPQS; i++) {
 		db_printf("\nQueue[%d]:\n", i);
 		if (TAILQ_EMPTY(&sq->sq_blocked[i]))
 			db_printf("\tempty\n");
 		else
 			TAILQ_FOREACH(td, &sq->sq_blocked[0],
 				      td_slpq) {
 				db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 					  td->td_tid, td->td_proc->p_pid,
 					  td->td_name);
 			}
 		db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
 	}
 }
 
 /* Alias 'show sleepqueue' to 'show sleepq'. */
 DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
 #endif
Index: projects/calloutng/sys/kern/sys_generic.c
===================================================================
--- projects/calloutng/sys/kern/sys_generic.c	(revision 237201)
+++ projects/calloutng/sys/kern/sys_generic.c	(revision 237202)
@@ -1,1770 +1,1775 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_compat.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/capability.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/socketvar.h>
 #include <sys/uio.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/poll.h>
 #include <sys/resourcevar.h>
 #include <sys/selinfo.h>
 #include <sys/sleepqueue.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/vnode.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/condvar.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <security/audit/audit.h>
 
 int iosize_max_clamp = 1;
 SYSCTL_INT(_debug, OID_AUTO, iosize_max_clamp, CTLFLAG_RW,
     &iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX");
 /*
  * Assert that the return value of read(2) and write(2) syscalls fits
  * into a register.  If not, an architecture will need to provide the
  * usermode wrappers to reconstruct the result.
  */
 CTASSERT(sizeof(register_t) >= sizeof(size_t));
 
 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
 
 static int	pollout(struct thread *, struct pollfd *, struct pollfd *,
 		    u_int);
 static int	pollscan(struct thread *, struct pollfd *, u_int);
 static int	pollrescan(struct thread *);
 static int	selscan(struct thread *, fd_mask **, fd_mask **, int);
 static int	selrescan(struct thread *, fd_mask **, fd_mask **);
 static void	selfdalloc(struct thread *, void *);
 static void	selfdfree(struct seltd *, struct selfd *);
 static int	dofileread(struct thread *, int, struct file *, struct uio *,
 		    off_t, int);
 static int	dofilewrite(struct thread *, int, struct file *, struct uio *,
 		    off_t, int);
 static void	doselwakeup(struct selinfo *, int);
 static void	seltdinit(struct thread *);
-static int	seltdwait(struct thread *, int);
+static int	seltdwait(struct thread *, struct bintime *, int);
 static void	seltdclear(struct thread *);
 
 /*
  * One seltd per-thread allocated on demand as needed.
  *
  *	t - protected by st_mtx
  * 	k - Only accessed by curthread or read-only
  */
 struct seltd {
 	STAILQ_HEAD(, selfd)	st_selq;	/* (k) List of selfds. */
 	struct selfd		*st_free1;	/* (k) free fd for read set. */
 	struct selfd		*st_free2;	/* (k) free fd for write set. */
 	struct mtx		st_mtx;		/* Protects struct seltd */
 	struct cv		st_wait;	/* (t) Wait channel. */
 	int			st_flags;	/* (t) SELTD_ flags. */
 };
 
 #define	SELTD_PENDING	0x0001			/* We have pending events. */
 #define	SELTD_RESCAN	0x0002			/* Doing a rescan. */
 
 /*
  * One selfd allocated per-thread per-file-descriptor.
  *	f - protected by sf_mtx
  */
 struct selfd {
 	STAILQ_ENTRY(selfd)	sf_link;	/* (k) fds owned by this td. */
 	TAILQ_ENTRY(selfd)	sf_threads;	/* (f) fds on this selinfo. */
 	struct selinfo		*sf_si;		/* (f) selinfo when linked. */
 	struct mtx		*sf_mtx;	/* Pointer to selinfo mtx. */
 	struct seltd		*sf_td;		/* (k) owning seltd. */
 	void			*sf_cookie;	/* (k) fd or pollfd. */
 };
 
 static uma_zone_t selfd_zone;
 static struct mtx_pool *mtxpool_select;
 
 #ifndef _SYS_SYSPROTO_H_
 struct read_args {
 	int	fd;
 	void	*buf;
 	size_t	nbyte;
 };
 #endif
 int
 sys_read(td, uap)
 	struct thread *td;
 	struct read_args *uap;
 {
 	struct uio auio;
 	struct iovec aiov;
 	int error;
 
 	if (uap->nbyte > IOSIZE_MAX)
 		return (EINVAL);
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->nbyte;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_resid = uap->nbyte;
 	auio.uio_segflg = UIO_USERSPACE;
 	error = kern_readv(td, uap->fd, &auio);
 	return(error);
 }
 
 /*
  * Positioned read system call
  */
 #ifndef _SYS_SYSPROTO_H_
 struct pread_args {
 	int	fd;
 	void	*buf;
 	size_t	nbyte;
 	int	pad;
 	off_t	offset;
 };
 #endif
 int
 sys_pread(td, uap)
 	struct thread *td;
 	struct pread_args *uap;
 {
 	struct uio auio;
 	struct iovec aiov;
 	int error;
 
 	if (uap->nbyte > IOSIZE_MAX)
 		return (EINVAL);
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->nbyte;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_resid = uap->nbyte;
 	auio.uio_segflg = UIO_USERSPACE;
 	error = kern_preadv(td, uap->fd, &auio, uap->offset);
 	return(error);
 }
 
 int
 freebsd6_pread(td, uap)
 	struct thread *td;
 	struct freebsd6_pread_args *uap;
 {
 	struct pread_args oargs;
 
 	oargs.fd = uap->fd;
 	oargs.buf = uap->buf;
 	oargs.nbyte = uap->nbyte;
 	oargs.offset = uap->offset;
 	return (sys_pread(td, &oargs));
 }
 
 /*
  * Scatter read system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct readv_args {
 	int	fd;
 	struct	iovec *iovp;
 	u_int	iovcnt;
 };
 #endif
 int
 sys_readv(struct thread *td, struct readv_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_readv(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 kern_readv(struct thread *td, int fd, struct uio *auio)
 {
 	struct file *fp;
 	int error;
 
 	error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp);
 	if (error)
 		return (error);
 	error = dofileread(td, fd, fp, auio, (off_t)-1, 0);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Scatter positioned read system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct preadv_args {
 	int	fd;
 	struct	iovec *iovp;
 	u_int	iovcnt;
 	off_t	offset;
 };
 #endif
 int
 sys_preadv(struct thread *td, struct preadv_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_preadv(td, uap->fd, auio, uap->offset);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 kern_preadv(td, fd, auio, offset)
 	struct thread *td;
 	int fd;
 	struct uio *auio;
 	off_t offset;
 {
 	struct file *fp;
 	int error;
 
 	error = fget_read(td, fd, CAP_READ, &fp);
 	if (error)
 		return (error);
 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
 		error = ESPIPE;
 	else if (offset < 0 && fp->f_vnode->v_type != VCHR)
 		error = EINVAL;
 	else
 		error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common code for readv and preadv that reads data in
  * from a file using the passed in uio, offset, and flags.
  */
 static int
 dofileread(td, fd, fp, auio, offset, flags)
 	struct thread *td;
 	int fd;
 	struct file *fp;
 	struct uio *auio;
 	off_t offset;
 	int flags;
 {
 	ssize_t cnt;
 	int error;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 
 	/* Finish zero length reads right here */
 	if (auio->uio_resid == 0) {
 		td->td_retval[0] = 0;
 		return(0);
 	}
 	auio->uio_rw = UIO_READ;
 	auio->uio_offset = offset;
 	auio->uio_td = td;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO)) 
 		ktruio = cloneuio(auio);
 #endif
 	cnt = auio->uio_resid;
 	if ((error = fo_read(fp, auio, td->td_ucred, flags, td))) {
 		if (auio->uio_resid != cnt && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	}
 	cnt -= auio->uio_resid;
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = cnt;
 		ktrgenio(fd, UIO_READ, ktruio, error);
 	}
 #endif
 	td->td_retval[0] = cnt;
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct write_args {
 	int	fd;
 	const void *buf;
 	size_t	nbyte;
 };
 #endif
 int
 sys_write(td, uap)
 	struct thread *td;
 	struct write_args *uap;
 {
 	struct uio auio;
 	struct iovec aiov;
 	int error;
 
 	if (uap->nbyte > IOSIZE_MAX)
 		return (EINVAL);
 	aiov.iov_base = (void *)(uintptr_t)uap->buf;
 	aiov.iov_len = uap->nbyte;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_resid = uap->nbyte;
 	auio.uio_segflg = UIO_USERSPACE;
 	error = kern_writev(td, uap->fd, &auio);
 	return(error);
 }
 
 /*
  * Positioned write system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct pwrite_args {
 	int	fd;
 	const void *buf;
 	size_t	nbyte;
 	int	pad;
 	off_t	offset;
 };
 #endif
 int
 sys_pwrite(td, uap)
 	struct thread *td;
 	struct pwrite_args *uap;
 {
 	struct uio auio;
 	struct iovec aiov;
 	int error;
 
 	if (uap->nbyte > IOSIZE_MAX)
 		return (EINVAL);
 	aiov.iov_base = (void *)(uintptr_t)uap->buf;
 	aiov.iov_len = uap->nbyte;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_resid = uap->nbyte;
 	auio.uio_segflg = UIO_USERSPACE;
 	error = kern_pwritev(td, uap->fd, &auio, uap->offset);
 	return(error);
 }
 
 int
 freebsd6_pwrite(td, uap)
 	struct thread *td;
 	struct freebsd6_pwrite_args *uap;
 {
 	struct pwrite_args oargs;
 
 	oargs.fd = uap->fd;
 	oargs.buf = uap->buf;
 	oargs.nbyte = uap->nbyte;
 	oargs.offset = uap->offset;
 	return (sys_pwrite(td, &oargs));
 }
 
 /*
  * Gather write system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct writev_args {
 	int	fd;
 	struct	iovec *iovp;
 	u_int	iovcnt;
 };
 #endif
 int
 sys_writev(struct thread *td, struct writev_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_writev(td, uap->fd, auio);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 kern_writev(struct thread *td, int fd, struct uio *auio)
 {
 	struct file *fp;
 	int error;
 
 	error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp);
 	if (error)
 		return (error);
 	error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Gather positioned write system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct pwritev_args {
 	int	fd;
 	struct	iovec *iovp;
 	u_int	iovcnt;
 	off_t	offset;
 };
 #endif
 int
 sys_pwritev(struct thread *td, struct pwritev_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_pwritev(td, uap->fd, auio, uap->offset);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 kern_pwritev(td, fd, auio, offset)
 	struct thread *td;
 	struct uio *auio;
 	int fd;
 	off_t offset;
 {
 	struct file *fp;
 	int error;
 
 	error = fget_write(td, fd, CAP_WRITE, &fp);
 	if (error)
 		return (error);
 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
 		error = ESPIPE;
 	else if (offset < 0 && fp->f_vnode->v_type != VCHR)
 		error = EINVAL;
 	else
 		error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common code for writev and pwritev that writes data to
  * a file using the passed in uio, offset, and flags.
  */
 static int
 dofilewrite(td, fd, fp, auio, offset, flags)
 	struct thread *td;
 	int fd;
 	struct file *fp;
 	struct uio *auio;
 	off_t offset;
 	int flags;
 {
 	ssize_t cnt;
 	int error;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 
 	auio->uio_rw = UIO_WRITE;
 	auio->uio_td = td;
 	auio->uio_offset = offset;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(auio);
 #endif
 	cnt = auio->uio_resid;
 	if (fp->f_type == DTYPE_VNODE)
 		bwillwrite();
 	if ((error = fo_write(fp, auio, td->td_ucred, flags, td))) {
 		if (auio->uio_resid != cnt && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 		/* Socket layer is responsible for issuing SIGPIPE. */
 		if (fp->f_type != DTYPE_SOCKET && error == EPIPE) {
 			PROC_LOCK(td->td_proc);
 			tdsignal(td, SIGPIPE);
 			PROC_UNLOCK(td->td_proc);
 		}
 	}
 	cnt -= auio->uio_resid;
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = cnt;
 		ktrgenio(fd, UIO_WRITE, ktruio, error);
 	}
 #endif
 	td->td_retval[0] = cnt;
 	return (error);
 }
 
 /*
  * Truncate a file given a file descriptor.
  *
  * Can't use fget_write() here, since must return EINVAL and not EBADF if the
  * descriptor isn't writable.
  */
 int
 kern_ftruncate(td, fd, length)
 	struct thread *td;
 	int fd;
 	off_t length;
 {
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	if (length < 0)
 		return (EINVAL);
 	error = fget(td, fd, CAP_FTRUNCATE, &fp);
 	if (error)
 		return (error);
 	AUDIT_ARG_FILE(td->td_proc, fp);
 	if (!(fp->f_flag & FWRITE)) {
 		fdrop(fp, td);
 		return (EINVAL);
 	}
 	error = fo_truncate(fp, length, td->td_ucred, td);
 	fdrop(fp, td);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct ftruncate_args {
 	int	fd;
 	int	pad;
 	off_t	length;
 };
 #endif
 int
 sys_ftruncate(td, uap)
 	struct thread *td;
 	struct ftruncate_args *uap;
 {
 
 	return (kern_ftruncate(td, uap->fd, uap->length));
 }
 
 #if defined(COMPAT_43)
 #ifndef _SYS_SYSPROTO_H_
 struct oftruncate_args {
 	int	fd;
 	long	length;
 };
 #endif
 int
 oftruncate(td, uap)
 	struct thread *td;
 	struct oftruncate_args *uap;
 {
 
 	return (kern_ftruncate(td, uap->fd, uap->length));
 }
 #endif /* COMPAT_43 */
 
 #ifndef _SYS_SYSPROTO_H_
 struct ioctl_args {
 	int	fd;
 	u_long	com;
 	caddr_t	data;
 };
 #endif
 /* ARGSUSED */
 int
 sys_ioctl(struct thread *td, struct ioctl_args *uap)
 {
 	u_long com;
 	int arg, error;
 	u_int size;
 	caddr_t data;
 
 	if (uap->com > 0xffffffff) {
 		printf(
 		    "WARNING pid %d (%s): ioctl sign-extension ioctl %lx\n",
 		    td->td_proc->p_pid, td->td_name, uap->com);
 		uap->com &= 0xffffffff;
 	}
 	com = uap->com;
 
 	/*
 	 * Interpret high order word to find amount of data to be
 	 * copied to/from the user's address space.
 	 */
 	size = IOCPARM_LEN(com);
 	if ((size > IOCPARM_MAX) ||
 	    ((com & (IOC_VOID  | IOC_IN | IOC_OUT)) == 0) ||
 #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
 	    ((com & IOC_OUT) && size == 0) ||
 #else
 	    ((com & (IOC_IN | IOC_OUT)) && size == 0) ||
 #endif
 	    ((com & IOC_VOID) && size > 0 && size != sizeof(int)))
 		return (ENOTTY);
 
 	if (size > 0) {
 		if (com & IOC_VOID) {
 			/* Integer argument. */
 			arg = (intptr_t)uap->data;
 			data = (void *)&arg;
 			size = 0;
 		} else
 			data = malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
 	} else
 		data = (void *)&uap->data;
 	if (com & IOC_IN) {
 		error = copyin(uap->data, data, (u_int)size);
 		if (error) {
 			if (size > 0)
 				free(data, M_IOCTLOPS);
 			return (error);
 		}
 	} else if (com & IOC_OUT) {
 		/*
 		 * Zero the buffer so the user always
 		 * gets back something deterministic.
 		 */
 		bzero(data, size);
 	}
 
 	error = kern_ioctl(td, uap->fd, com, data);
 
 	if (error == 0 && (com & IOC_OUT))
 		error = copyout(data, uap->data, (u_int)size);
 
 	if (size > 0)
 		free(data, M_IOCTLOPS);
 	return (error);
 }
 
 int
 kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
 {
 	struct file *fp;
 	struct filedesc *fdp;
 	int error;
 	int tmp;
 
 	AUDIT_ARG_FD(fd);
 	AUDIT_ARG_CMD(com);
 	if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0)
 		return (error);
 	if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
 		fdrop(fp, td);
 		return (EBADF);
 	}
 	fdp = td->td_proc->p_fd;
 	switch (com) {
 	case FIONCLEX:
 		FILEDESC_XLOCK(fdp);
 		fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
 		FILEDESC_XUNLOCK(fdp);
 		goto out;
 	case FIOCLEX:
 		FILEDESC_XLOCK(fdp);
 		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
 		FILEDESC_XUNLOCK(fdp);
 		goto out;
 	case FIONBIO:
 		if ((tmp = *(int *)data))
 			atomic_set_int(&fp->f_flag, FNONBLOCK);
 		else
 			atomic_clear_int(&fp->f_flag, FNONBLOCK);
 		data = (void *)&tmp;
 		break;
 	case FIOASYNC:
 		if ((tmp = *(int *)data))
 			atomic_set_int(&fp->f_flag, FASYNC);
 		else
 			atomic_clear_int(&fp->f_flag, FASYNC);
 		data = (void *)&tmp;
 		break;
 	}
 
 	error = fo_ioctl(fp, com, data, td->td_ucred, td);
 out:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 poll_no_poll(int events)
 {
 	/*
 	 * Return true for read/write.  If the user asked for something
 	 * special, return POLLNVAL, so that clients have a way of
 	 * determining reliably whether or not the extended
 	 * functionality is present without hard-coding knowledge
 	 * of specific filesystem implementations.
 	 */
 	if (events & ~POLLSTANDARD)
 		return (POLLNVAL);
 
 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
 }
 
 int
 sys_pselect(struct thread *td, struct pselect_args *uap)
 {
 	struct timespec ts;
 	struct timeval tv, *tvp;
 	sigset_t set, *uset;
 	int error;
 
 	if (uap->ts != NULL) {
 		error = copyin(uap->ts, &ts, sizeof(ts));
 		if (error != 0)
 		    return (error);
 		TIMESPEC_TO_TIMEVAL(&tv, &ts);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 	if (uap->sm != NULL) {
 		error = copyin(uap->sm, &set, sizeof(set));
 		if (error != 0)
 			return (error);
 		uset = &set;
 	} else
 		uset = NULL;
 	return (kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
 	    uset, NFDBITS));
 }
 
 int
 kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex,
     struct timeval *tvp, sigset_t *uset, int abi_nfdbits)
 {
 	int error;
 
 	if (uset != NULL) {
 		error = kern_sigprocmask(td, SIG_SETMASK, uset,
 		    &td->td_oldsigmask, 0);
 		if (error != 0)
 			return (error);
 		td->td_pflags |= TDP_OLDMASK;
 		/*
 		 * Make sure that ast() is called on return to
 		 * usermode and TDP_OLDMASK is cleared, restoring old
 		 * sigmask.
 		 */
 		thread_lock(td);
 		td->td_flags |= TDF_ASTPENDING;
 		thread_unlock(td);
 	}
 	error = kern_select(td, nd, in, ou, ex, tvp, abi_nfdbits);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct select_args {
 	int	nd;
 	fd_set	*in, *ou, *ex;
 	struct	timeval *tv;
 };
 #endif
 int
 sys_select(struct thread *td, struct select_args *uap)
 {
 	struct timeval tv, *tvp;
 	int error;
 
 	if (uap->tv != NULL) {
 		error = copyin(uap->tv, &tv, sizeof(tv));
 		if (error)
 			return (error);
 		tvp = &tv;
 	} else
 		tvp = NULL;
 
 	return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
 	    NFDBITS));
 }
 
 /*
  * In the unlikely case when user specified n greater then the last
  * open file descriptor, check that no bits are set after the last
  * valid fd.  We must return EBADF if any is set.
  *
  * There are applications that rely on the behaviour.
  *
  * nd is fd_lastfile + 1.
  */
 static int
 select_check_badfd(fd_set *fd_in, int nd, int ndu, int abi_nfdbits)
 {
 	char *addr, *oaddr;
 	int b, i, res;
 	uint8_t bits;
 
 	if (nd >= ndu || fd_in == NULL)
 		return (0);
 
 	oaddr = NULL;
 	bits = 0; /* silence gcc */
 	for (i = nd; i < ndu; i++) {
 		b = i / NBBY;
 #if BYTE_ORDER == LITTLE_ENDIAN
 		addr = (char *)fd_in + b;
 #else
 		addr = (char *)fd_in;
 		if (abi_nfdbits == NFDBITS) {
 			addr += rounddown(b, sizeof(fd_mask)) +
 			    sizeof(fd_mask) - 1 - b % sizeof(fd_mask);
 		} else {
 			addr += rounddown(b, sizeof(uint32_t)) +
 			    sizeof(uint32_t) - 1 - b % sizeof(uint32_t);
 		}
 #endif
 		if (addr != oaddr) {
 			res = fubyte(addr);
 			if (res == -1)
 				return (EFAULT);
 			oaddr = addr;
 			bits = res;
 		}
 		if ((bits & (1 << (i % NBBY))) != 0)
 			return (EBADF);
 	}
 	return (0);
 }
 
 int
 kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
     fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits)
 {
 	struct filedesc *fdp;
 	/*
 	 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
 	 * infds with the new FD_SETSIZE of 1024, and more than enough for
 	 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
 	 * of 256.
 	 */
 	fd_mask s_selbits[howmany(2048, NFDBITS)];
 	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
-	struct timeval atv, rtv, ttv;
+	struct bintime abt, rbt;
+	struct timeval atv;
 	int error, lf, ndu, timo;
 	u_int nbufbytes, ncpbytes, ncpubytes, nfdbits;
 
 	if (nd < 0)
 		return (EINVAL);
 	fdp = td->td_proc->p_fd;
 	ndu = nd;
 	lf = fdp->fd_lastfile;
 	if (nd > lf + 1)
 		nd = lf + 1;
 
 	error = select_check_badfd(fd_in, nd, ndu, abi_nfdbits);
 	if (error != 0)
 		return (error);
 	error = select_check_badfd(fd_ou, nd, ndu, abi_nfdbits);
 	if (error != 0)
 		return (error);
 	error = select_check_badfd(fd_ex, nd, ndu, abi_nfdbits);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Allocate just enough bits for the non-null fd_sets.  Use the
 	 * preallocated auto buffer if possible.
 	 */
 	nfdbits = roundup(nd, NFDBITS);
 	ncpbytes = nfdbits / NBBY;
 	ncpubytes = roundup(nd, abi_nfdbits) / NBBY;
 	nbufbytes = 0;
 	if (fd_in != NULL)
 		nbufbytes += 2 * ncpbytes;
 	if (fd_ou != NULL)
 		nbufbytes += 2 * ncpbytes;
 	if (fd_ex != NULL)
 		nbufbytes += 2 * ncpbytes;
 	if (nbufbytes <= sizeof s_selbits)
 		selbits = &s_selbits[0];
 	else
 		selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
 
 	/*
 	 * Assign pointers into the bit buffers and fetch the input bits.
 	 * Put the output buffers together so that they can be bzeroed
 	 * together.
 	 */
 	sbp = selbits;
 #define	getbits(name, x) \
 	do {								\
 		if (name == NULL) {					\
 			ibits[x] = NULL;				\
 			obits[x] = NULL;				\
 		} else {						\
 			ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;	\
 			obits[x] = sbp;					\
 			sbp += ncpbytes / sizeof *sbp;			\
 			error = copyin(name, ibits[x], ncpubytes);	\
 			if (error != 0)					\
 				goto done;				\
 			bzero((char *)ibits[x] + ncpubytes,		\
 			    ncpbytes - ncpubytes);			\
 		}							\
 	} while (0)
 	getbits(fd_in, 0);
 	getbits(fd_ou, 1);
 	getbits(fd_ex, 2);
 #undef	getbits
 
 #if BYTE_ORDER == BIG_ENDIAN && defined(__LP64__)
 	/*
 	 * XXX: swizzle_fdset assumes that if abi_nfdbits != NFDBITS,
 	 * we are running under 32-bit emulation. This should be more
 	 * generic.
 	 */
 #define swizzle_fdset(bits)						\
 	if (abi_nfdbits != NFDBITS && bits != NULL) {			\
 		int i;							\
 		for (i = 0; i < ncpbytes / sizeof *sbp; i++)		\
 			bits[i] = (bits[i] >> 32) | (bits[i] << 32);	\
 	}
 #else
 #define swizzle_fdset(bits)
 #endif
 
 	/* Make sure the bit order makes it through an ABI transition */
 	swizzle_fdset(ibits[0]);
 	swizzle_fdset(ibits[1]);
 	swizzle_fdset(ibits[2]);
 	
 	if (nbufbytes != 0)
 		bzero(selbits, nbufbytes / 2);
 
 	if (tvp != NULL) {
 		atv = *tvp;
-		if (itimerfix(&atv)) {
+		if (atv.tv_sec < 0 || atv.tv_usec < 0 || 
+		    atv.tv_usec >= 1000000) {
 			error = EINVAL;
 			goto done;
 		}
-		getmicrouptime(&rtv);
-		timevaladd(&atv, &rtv);
+		binuptime(&rbt);
+		timeval2bintime(&atv, &abt);
+		bintime_add(&abt, &rbt);
 	} else {
-		atv.tv_sec = 0;
-		atv.tv_usec = 0;
+		abt.sec = 0;
+		abt.frac = 0;
 	}
-	timo = 0;
 	seltdinit(td);
 	/* Iterate until the timeout expires or descriptors become ready. */
 	for (;;) {
 		error = selscan(td, ibits, obits, nd);
 		if (error || td->td_retval[0] != 0)
 			break;
-		if (atv.tv_sec || atv.tv_usec) {
-			getmicrouptime(&rtv);
-			if (timevalcmp(&rtv, &atv, >=))
+		if (abt.sec || abt.frac) {
+			binuptime(&rbt);
+			if (bintime_cmp(&rbt, &abt, >=))
 				break;
-			ttv = atv;
-			timevalsub(&ttv, &rtv);
-			timo = ttv.tv_sec > 24 * 60 * 60 ?
-			    24 * 60 * 60 * hz : tvtohz(&ttv);
+			error = seltdwait(td, &abt, 0);
 		}
-		error = seltdwait(td, timo);
+		else {
+			timo = 0;
+			error = seltdwait(td, NULL, timo);
+		}
 		if (error)
 			break;
 		error = selrescan(td, ibits, obits);
 		if (error || td->td_retval[0] != 0)
 			break;
 	}
 	seltdclear(td);
 
 done:
 	/* select is not restarted after signals... */
 	if (error == ERESTART)
 		error = EINTR;
 	if (error == EWOULDBLOCK)
 		error = 0;
 
 	/* swizzle bit order back, if necessary */
 	swizzle_fdset(obits[0]);
 	swizzle_fdset(obits[1]);
 	swizzle_fdset(obits[2]);
 #undef swizzle_fdset
 
 #define	putbits(name, x) \
 	if (name && (error2 = copyout(obits[x], name, ncpubytes))) \
 		error = error2;
 	if (error == 0) {
 		int error2;
 
 		putbits(fd_in, 0);
 		putbits(fd_ou, 1);
 		putbits(fd_ex, 2);
 #undef putbits
 	}
 	if (selbits != &s_selbits[0])
 		free(selbits, M_SELECT);
 
 	return (error);
 }
 /* 
  * Convert a select bit set to poll flags.
  *
  * The backend always returns POLLHUP/POLLERR if appropriate and we
  * return this as a set bit in any set.
  */
 static int select_flags[3] = {
     POLLRDNORM | POLLHUP | POLLERR,
     POLLWRNORM | POLLHUP | POLLERR,
     POLLRDBAND | POLLERR
 };
 
 /*
  * Compute the fo_poll flags required for a fd given by the index and
  * bit position in the fd_mask array.
  */
 static __inline int
 selflags(fd_mask **ibits, int idx, fd_mask bit)
 {
 	int flags;
 	int msk;
 
 	flags = 0;
 	for (msk = 0; msk < 3; msk++) {
 		if (ibits[msk] == NULL)
 			continue;
 		if ((ibits[msk][idx] & bit) == 0)
 			continue;
 		flags |= select_flags[msk];
 	}
 	return (flags);
 }
 
 /*
  * Set the appropriate output bits given a mask of fired events and the
  * input bits originally requested.
  */
 static __inline int
 selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events)
 {
 	int msk;
 	int n;
 
 	n = 0;
 	for (msk = 0; msk < 3; msk++) {
 		if ((events & select_flags[msk]) == 0)
 			continue;
 		if (ibits[msk] == NULL)
 			continue;
 		if ((ibits[msk][idx] & bit) == 0)
 			continue;
 		/*
 		 * XXX Check for a duplicate set.  This can occur because a
 		 * socket calls selrecord() twice for each poll() call
 		 * resulting in two selfds per real fd.  selrescan() will
 		 * call selsetbits twice as a result.
 		 */
 		if ((obits[msk][idx] & bit) != 0)
 			continue;
 		obits[msk][idx] |= bit;
 		n++;
 	}
 
 	return (n);
 }
 
 static __inline int
 getselfd_cap(struct filedesc *fdp, int fd, struct file **fpp)
 {
 	struct file *fp;
 #ifdef CAPABILITIES
 	struct file *fp_fromcap;
 	int error;
 #endif
 
 	if ((fp = fget_unlocked(fdp, fd)) == NULL)
 		return (EBADF);
 #ifdef CAPABILITIES
 	/*
 	 * If the file descriptor is for a capability, test rights and use
 	 * the file descriptor references by the capability.
 	 */
 	error = cap_funwrap(fp, CAP_POLL_EVENT, &fp_fromcap);
 	if (error) {
 		fdrop(fp, curthread);
 		return (error);
 	}
 	if (fp != fp_fromcap) {
 		fhold(fp_fromcap);
 		fdrop(fp, curthread);
 		fp = fp_fromcap;
 	}
 #endif /* CAPABILITIES */
 	*fpp = fp;
 	return (0);
 }
 
 /*
  * Traverse the list of fds attached to this thread's seltd and check for
  * completion.
  */
 static int
 selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits)
 {
 	struct filedesc *fdp;
 	struct selinfo *si;
 	struct seltd *stp;
 	struct selfd *sfp;
 	struct selfd *sfn;
 	struct file *fp;
 	fd_mask bit;
 	int fd, ev, n, idx;
 	int error;
 
 	fdp = td->td_proc->p_fd;
 	stp = td->td_sel;
 	n = 0;
 	STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
 		fd = (int)(uintptr_t)sfp->sf_cookie;
 		si = sfp->sf_si;
 		selfdfree(stp, sfp);
 		/* If the selinfo wasn't cleared the event didn't fire. */
 		if (si != NULL)
 			continue;
 		error = getselfd_cap(fdp, fd, &fp);
 		if (error)
 			return (error);
 		idx = fd / NFDBITS;
 		bit = (fd_mask)1 << (fd % NFDBITS);
 		ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td);
 		fdrop(fp, td);
 		if (ev != 0)
 			n += selsetbits(ibits, obits, idx, bit, ev);
 	}
 	stp->st_flags = 0;
 	td->td_retval[0] = n;
 	return (0);
 }
 
 /*
  * Perform the initial filedescriptor scan and register ourselves with
  * each selinfo.
  */
 static int
 selscan(td, ibits, obits, nfd)
 	struct thread *td;
 	fd_mask **ibits, **obits;
 	int nfd;
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	fd_mask bit;
 	int ev, flags, end, fd;
 	int n, idx;
 	int error;
 
 	fdp = td->td_proc->p_fd;
 	n = 0;
 	for (idx = 0, fd = 0; fd < nfd; idx++) {
 		end = imin(fd + NFDBITS, nfd);
 		for (bit = 1; fd < end; bit <<= 1, fd++) {
 			/* Compute the list of events we're interested in. */
 			flags = selflags(ibits, idx, bit);
 			if (flags == 0)
 				continue;
 			error = getselfd_cap(fdp, fd, &fp);
 			if (error)
 				return (error);
 			selfdalloc(td, (void *)(uintptr_t)fd);
 			ev = fo_poll(fp, flags, td->td_ucred, td);
 			fdrop(fp, td);
 			if (ev != 0)
 				n += selsetbits(ibits, obits, idx, bit, ev);
 		}
 	}
 
 	td->td_retval[0] = n;
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct poll_args {
 	struct pollfd *fds;
 	u_int	nfds;
 	int	timeout;
 };
 #endif
 int
 sys_poll(td, uap)
 	struct thread *td;
 	struct poll_args *uap;
 {
 	struct pollfd *bits;
 	struct pollfd smallbits[32];
-	struct timeval atv, rtv, ttv;
-	int error = 0, timo;
+	struct bintime abt, rbt;
+	struct timeval atv;
+	int error, timo;
 	u_int nfds;
 	size_t ni;
 
 	nfds = uap->nfds;
 	if (nfds > maxfilesperproc && nfds > FD_SETSIZE) 
 		return (EINVAL);
 	ni = nfds * sizeof(struct pollfd);
 	if (ni > sizeof(smallbits))
 		bits = malloc(ni, M_TEMP, M_WAITOK);
 	else
 		bits = smallbits;
 	error = copyin(uap->fds, bits, ni);
 	if (error)
 		goto done;
 	if (uap->timeout != INFTIM) {
 		atv.tv_sec = uap->timeout / 1000;
 		atv.tv_usec = (uap->timeout % 1000) * 1000;
-		if (itimerfix(&atv)) {
+		if (atv.tv_sec < 0 || atv.tv_usec < 0 || 
+		    atv.tv_usec >= 1000000) {
 			error = EINVAL;
 			goto done;
 		}
-		getmicrouptime(&rtv);
-		timevaladd(&atv, &rtv);
+		binuptime(&rbt);
+		timeval2bintime(&atv, &abt);
+		bintime_add(&abt, &rbt);
 	} else {
-		atv.tv_sec = 0;
-		atv.tv_usec = 0;
+		abt.sec = 0;
+		abt.frac = 0;
 	}
-	timo = 0;
 	seltdinit(td);
 	/* Iterate until the timeout expires or descriptors become ready. */
 	for (;;) {
 		error = pollscan(td, bits, nfds);
 		if (error || td->td_retval[0] != 0)
 			break;
-		if (atv.tv_sec || atv.tv_usec) {
-			getmicrouptime(&rtv);
-			if (timevalcmp(&rtv, &atv, >=))
+		if (abt.sec || abt.frac) {
+			binuptime(&rbt);
+			if (bintime_cmp(&rbt, &abt, >=))
 				break;
-			ttv = atv;
-			timevalsub(&ttv, &rtv);
-			timo = ttv.tv_sec > 24 * 60 * 60 ?
-			    24 * 60 * 60 * hz : tvtohz(&ttv);
+			error = seltdwait(td, &abt, 0);
+		} else { 
+			timo = 0;
+			error = seltdwait(td, NULL, timo);
 		}
-		error = seltdwait(td, timo);
 		if (error)
 			break;
 		error = pollrescan(td);
 		if (error || td->td_retval[0] != 0)
 			break;
 	}
 	seltdclear(td);
 
 done:
 	/* poll is not restarted after signals... */
 	if (error == ERESTART)
 		error = EINTR;
 	if (error == EWOULDBLOCK)
 		error = 0;
 	if (error == 0) {
 		error = pollout(td, bits, uap->fds, nfds);
 		if (error)
 			goto out;
 	}
 out:
 	if (ni > sizeof(smallbits))
 		free(bits, M_TEMP);
 	return (error);
 }
 
 static int
 pollrescan(struct thread *td)
 {
 	struct seltd *stp;
 	struct selfd *sfp;
 	struct selfd *sfn;
 	struct selinfo *si;
 	struct filedesc *fdp;
 	struct file *fp;
 	struct pollfd *fd;
 	int n;
 
 	n = 0;
 	fdp = td->td_proc->p_fd;
 	stp = td->td_sel;
 	FILEDESC_SLOCK(fdp);
 	STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
 		fd = (struct pollfd *)sfp->sf_cookie;
 		si = sfp->sf_si;
 		selfdfree(stp, sfp);
 		/* If the selinfo wasn't cleared the event didn't fire. */
 		if (si != NULL)
 			continue;
 		fp = fdp->fd_ofiles[fd->fd];
 #ifdef CAPABILITIES
 		if ((fp == NULL)
 		    || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) {
 #else
 		if (fp == NULL) {
 #endif
 			fd->revents = POLLNVAL;
 			n++;
 			continue;
 		}
 
 		/*
 		 * Note: backend also returns POLLHUP and
 		 * POLLERR if appropriate.
 		 */
 		fd->revents = fo_poll(fp, fd->events, td->td_ucred, td);
 		if (fd->revents != 0)
 			n++;
 	}
 	FILEDESC_SUNLOCK(fdp);
 	stp->st_flags = 0;
 	td->td_retval[0] = n;
 	return (0);
 }
 
 
 static int
 pollout(td, fds, ufds, nfd)
 	struct thread *td;
 	struct pollfd *fds;
 	struct pollfd *ufds;
 	u_int nfd;
 {
 	int error = 0;
 	u_int i = 0;
 	u_int n = 0;
 
 	for (i = 0; i < nfd; i++) {
 		error = copyout(&fds->revents, &ufds->revents,
 		    sizeof(ufds->revents));
 		if (error)
 			return (error);
 		if (fds->revents != 0)
 			n++;
 		fds++;
 		ufds++;
 	}
 	td->td_retval[0] = n;
 	return (0);
 }
 
 static int
 pollscan(td, fds, nfd)
 	struct thread *td;
 	struct pollfd *fds;
 	u_int nfd;
 {
 	struct filedesc *fdp = td->td_proc->p_fd;
 	int i;
 	struct file *fp;
 	int n = 0;
 
 	FILEDESC_SLOCK(fdp);
 	for (i = 0; i < nfd; i++, fds++) {
 		if (fds->fd >= fdp->fd_nfiles) {
 			fds->revents = POLLNVAL;
 			n++;
 		} else if (fds->fd < 0) {
 			fds->revents = 0;
 		} else {
 			fp = fdp->fd_ofiles[fds->fd];
 #ifdef CAPABILITIES
 			if ((fp == NULL)
 			    || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) {
 #else
 			if (fp == NULL) {
 #endif
 				fds->revents = POLLNVAL;
 				n++;
 			} else {
 				/*
 				 * Note: backend also returns POLLHUP and
 				 * POLLERR if appropriate.
 				 */
 				selfdalloc(td, fds);
 				fds->revents = fo_poll(fp, fds->events,
 				    td->td_ucred, td);
 				/*
 				 * POSIX requires POLLOUT to be never
 				 * set simultaneously with POLLHUP.
 				 */
 				if ((fds->revents & POLLHUP) != 0)
 					fds->revents &= ~POLLOUT;
 
 				if (fds->revents != 0)
 					n++;
 			}
 		}
 	}
 	FILEDESC_SUNLOCK(fdp);
 	td->td_retval[0] = n;
 	return (0);
 }
 
 /*
  * OpenBSD poll system call.
  *
  * XXX this isn't quite a true representation..  OpenBSD uses select ops.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct openbsd_poll_args {
 	struct pollfd *fds;
 	u_int	nfds;
 	int	timeout;
 };
 #endif
 int
 sys_openbsd_poll(td, uap)
 	register struct thread *td;
 	register struct openbsd_poll_args *uap;
 {
 	return (sys_poll(td, (struct poll_args *)uap));
 }
 
 /*
  * XXX This was created specifically to support netncp and netsmb.  This
  * allows the caller to specify a socket to wait for events on.  It returns
  * 0 if any events matched and an error otherwise.  There is no way to
  * determine which events fired.
  */
 int
 selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td)
 {
 	struct timeval atv, rtv, ttv;
 	int error, timo;
 
 	if (tvp != NULL) {
 		atv = *tvp;
 		if (itimerfix(&atv))
 			return (EINVAL);
 		getmicrouptime(&rtv);
 		timevaladd(&atv, &rtv);
 	} else {
 		atv.tv_sec = 0;
 		atv.tv_usec = 0;
 	}
 
 	timo = 0;
 	seltdinit(td);
 	/*
 	 * Iterate until the timeout expires or the socket becomes ready.
 	 */
 	for (;;) {
 		selfdalloc(td, NULL);
 		error = sopoll(so, events, NULL, td);
 		/* error here is actually the ready events. */
 		if (error)
 			return (0);
 		if (atv.tv_sec || atv.tv_usec) {
 			getmicrouptime(&rtv);
 			if (timevalcmp(&rtv, &atv, >=)) {
 				seltdclear(td);
 				return (EWOULDBLOCK);
 			}
 			ttv = atv;
 			timevalsub(&ttv, &rtv);
 			timo = ttv.tv_sec > 24 * 60 * 60 ?
 			    24 * 60 * 60 * hz : tvtohz(&ttv);
 		}
-		error = seltdwait(td, timo);
+		error = seltdwait(td, NULL, timo);
 		seltdclear(td);
 		if (error)
 			break;
 	}
 	/* XXX Duplicates ncp/smb behavior. */
 	if (error == ERESTART)
 		error = 0;
 	return (error);
 }
 
 /*
  * Preallocate two selfds associated with 'cookie'.  Some fo_poll routines
  * have two select sets, one for read and another for write.
  */
 static void
 selfdalloc(struct thread *td, void *cookie)
 {
 	struct seltd *stp;
 
 	stp = td->td_sel;
 	if (stp->st_free1 == NULL)
 		stp->st_free1 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
 	stp->st_free1->sf_td = stp;
 	stp->st_free1->sf_cookie = cookie;
 	if (stp->st_free2 == NULL)
 		stp->st_free2 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
 	stp->st_free2->sf_td = stp;
 	stp->st_free2->sf_cookie = cookie;
 }
 
 static void
 selfdfree(struct seltd *stp, struct selfd *sfp)
 {
 	STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link);
 	mtx_lock(sfp->sf_mtx);
 	if (sfp->sf_si)
 		TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads);
 	mtx_unlock(sfp->sf_mtx);
 	uma_zfree(selfd_zone, sfp);
 }
 
 /* Drain the waiters tied to all the selfd belonging the specified selinfo. */
 void
 seldrain(sip)
         struct selinfo *sip;
 {
 
 	/*
 	 * This feature is already provided by doselwakeup(), thus it is
 	 * enough to go for it.
 	 * Eventually, the context, should take care to avoid races
 	 * between thread calling select()/poll() and file descriptor
 	 * detaching, but, again, the races are just the same as
 	 * selwakeup().
 	 */
         doselwakeup(sip, -1);
 }
 
 /*
  * Record a select request.
  */
 void
 selrecord(selector, sip)
 	struct thread *selector;
 	struct selinfo *sip;
 {
 	struct selfd *sfp;
 	struct seltd *stp;
 	struct mtx *mtxp;
 
 	stp = selector->td_sel;
 	/*
 	 * Don't record when doing a rescan.
 	 */
 	if (stp->st_flags & SELTD_RESCAN)
 		return;
 	/*
 	 * Grab one of the preallocated descriptors.
 	 */
 	sfp = NULL;
 	if ((sfp = stp->st_free1) != NULL)
 		stp->st_free1 = NULL;
 	else if ((sfp = stp->st_free2) != NULL)
 		stp->st_free2 = NULL;
 	else
 		panic("selrecord: No free selfd on selq");
 	mtxp = sip->si_mtx;
 	if (mtxp == NULL)
 		mtxp = mtx_pool_find(mtxpool_select, sip);
 	/*
 	 * Initialize the sfp and queue it in the thread.
 	 */
 	sfp->sf_si = sip;
 	sfp->sf_mtx = mtxp;
 	STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link);
 	/*
 	 * Now that we've locked the sip, check for initialization.
 	 */
 	mtx_lock(mtxp);
 	if (sip->si_mtx == NULL) {
 		sip->si_mtx = mtxp;
 		TAILQ_INIT(&sip->si_tdlist);
 	}
 	/*
 	 * Add this thread to the list of selfds listening on this selinfo.
 	 */
 	TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads);
 	mtx_unlock(sip->si_mtx);
 }
 
 /* Wake up a selecting thread. */
 void
 selwakeup(sip)
 	struct selinfo *sip;
 {
 	doselwakeup(sip, -1);
 }
 
 /* Wake up a selecting thread, and set its priority. */
 void
 selwakeuppri(sip, pri)
 	struct selinfo *sip;
 	int pri;
 {
 	doselwakeup(sip, pri);
 }
 
 /*
  * Do a wakeup when a selectable event occurs.
  */
 static void
 doselwakeup(sip, pri)
 	struct selinfo *sip;
 	int pri;
 {
 	struct selfd *sfp;
 	struct selfd *sfn;
 	struct seltd *stp;
 
 	/* If it's not initialized there can't be any waiters. */
 	if (sip->si_mtx == NULL)
 		return;
 	/*
 	 * Locking the selinfo locks all selfds associated with it.
 	 */
 	mtx_lock(sip->si_mtx);
 	TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) {
 		/*
 		 * Once we remove this sfp from the list and clear the
 		 * sf_si seltdclear will know to ignore this si.
 		 */
 		TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads);
 		sfp->sf_si = NULL;
 		stp = sfp->sf_td;
 		mtx_lock(&stp->st_mtx);
 		stp->st_flags |= SELTD_PENDING;
 		cv_broadcastpri(&stp->st_wait, pri);
 		mtx_unlock(&stp->st_mtx);
 	}
 	mtx_unlock(sip->si_mtx);
 }
 
 static void
 seltdinit(struct thread *td)
 {
 	struct seltd *stp;
 
 	if ((stp = td->td_sel) != NULL)
 		goto out;
 	td->td_sel = stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO);
 	mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF);
 	cv_init(&stp->st_wait, "select");
 out:
 	stp->st_flags = 0;
 	STAILQ_INIT(&stp->st_selq);
 }
 
 static int
-seltdwait(struct thread *td, int timo)
+seltdwait(struct thread *td, struct bintime *bt, int timo)
 {
 	struct seltd *stp;
 	int error;
 
 	stp = td->td_sel;
 	/*
 	 * An event of interest may occur while we do not hold the seltd
 	 * locked so check the pending flag before we sleep.
 	 */
 	mtx_lock(&stp->st_mtx);
 	/*
 	 * Any further calls to selrecord will be a rescan.
 	 */
 	stp->st_flags |= SELTD_RESCAN;
 	if (stp->st_flags & SELTD_PENDING) {
 		mtx_unlock(&stp->st_mtx);
 		return (0);
 	}
-	if (timo > 0)
+	if (bt == NULL && timo > 0) 
 		error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo);
-	else
+	else if (bt != NULL)
+		error = cv_timedwait_bt_sig(&stp->st_wait, &stp->st_mtx, *bt);
+	else	
 		error = cv_wait_sig(&stp->st_wait, &stp->st_mtx);
 	mtx_unlock(&stp->st_mtx);
 
 	return (error);
 }
 
 void
 seltdfini(struct thread *td)
 {
 	struct seltd *stp;
 
 	stp = td->td_sel;
 	if (stp == NULL)
 		return;
 	if (stp->st_free1)
 		uma_zfree(selfd_zone, stp->st_free1);
 	if (stp->st_free2)
 		uma_zfree(selfd_zone, stp->st_free2);
 	td->td_sel = NULL;
 	free(stp, M_SELECT);
 }
 
 /*
  * Remove the references to the thread from all of the objects we were
  * polling.
  */
 static void
 seltdclear(struct thread *td)
 {
 	struct seltd *stp;
 	struct selfd *sfp;
 	struct selfd *sfn;
 
 	stp = td->td_sel;
 	STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn)
 		selfdfree(stp, sfp);
 	stp->st_flags = 0;
 }
 
 static void selectinit(void *);
 SYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL);
 static void
 selectinit(void *dummy __unused)
 {
 
 	selfd_zone = uma_zcreate("selfd", sizeof(struct selfd), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
 	mtxpool_select = mtx_pool_create("select mtxpool", 128, MTX_DEF);
 }
Index: projects/calloutng/sys/sys/condvar.h
===================================================================
--- projects/calloutng/sys/sys/condvar.h	(revision 237201)
+++ projects/calloutng/sys/sys/condvar.h	(revision 237202)
@@ -1,81 +1,84 @@
 /*-
  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_SYS_CONDVAR_H_
 #define	_SYS_CONDVAR_H_
 
 #ifndef	LOCORE
 #include <sys/queue.h>
 
 struct lock_object;
 struct thread;
 
 TAILQ_HEAD(cv_waitq, thread);
 
 /*
  * Condition variable.  The waiters count is protected by the mutex that
  * protects the condition; that is, the mutex that is passed to cv_wait*()
  * and is held across calls to cv_signal() and cv_broadcast().  It is an
  * optimization to avoid looking up the sleep queue if there are no waiters.
  */
 struct cv {
 	const char	*cv_description;
 	int		cv_waiters;
 };
 
 #ifdef _KERNEL
 void	cv_init(struct cv *cvp, const char *desc);
 void	cv_destroy(struct cv *cvp);
 
 void	_cv_wait(struct cv *cvp, struct lock_object *lock);
 void	_cv_wait_unlock(struct cv *cvp, struct lock_object *lock);
 int	_cv_wait_sig(struct cv *cvp, struct lock_object *lock);
 int	_cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo);
-int	_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo);
+int	_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock,
+	    struct bintime *bt, int timo);
 
 void	cv_signal(struct cv *cvp);
 void	cv_broadcastpri(struct cv *cvp, int pri);
 
 #define	cv_wait(cvp, lock)						\
 	_cv_wait((cvp), &(lock)->lock_object)
 #define	cv_wait_unlock(cvp, lock)					\
 	_cv_wait_unlock((cvp), &(lock)->lock_object)
 #define	cv_wait_sig(cvp, lock)						\
 	_cv_wait_sig((cvp), &(lock)->lock_object)
 #define	cv_timedwait(cvp, lock, timo)					\
 	_cv_timedwait((cvp), &(lock)->lock_object, (timo))
 #define	cv_timedwait_sig(cvp, lock, timo)				\
-	_cv_timedwait_sig((cvp), &(lock)->lock_object, (timo))
+	_cv_timedwait_sig((cvp), &(lock)->lock_object, (NULL), (timo))
+#define cv_timedwait_bt_sig(cvp, lock, bt)				\
+	_cv_timedwait_sig((cvp), &(lock)->lock_object, (&bt), (0))
 
 #define cv_broadcast(cvp)	cv_broadcastpri(cvp, 0)
 
 #define	cv_wmesg(cvp)		((cvp)->cv_description)
 
 #endif	/* _KERNEL */
 #endif	/* !LOCORE */
 #endif	/* _SYS_CONDVAR_H_ */
Index: projects/calloutng/sys/sys/sleepqueue.h
===================================================================
--- projects/calloutng/sys/sys/sleepqueue.h	(revision 237201)
+++ projects/calloutng/sys/sys/sleepqueue.h	(revision 237202)
@@ -1,121 +1,124 @@
 /*-
  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SLEEPQUEUE_H_
 #define _SYS_SLEEPQUEUE_H_
 
 /*
  * Sleep queue interface.  Sleep/wakeup, condition variables, and sx
  * locks use a sleep queue for the queue of threads blocked on a sleep
  * channel.
  *
  * A thread calls sleepq_lock() to lock the sleep queue chain associated
  * with a given wait channel.  A thread can then call call sleepq_add() to
  * add themself onto a sleep queue and call one of the sleepq_wait()
  * functions to actually go to sleep.  If a thread needs to abort a sleep
  * operation it should call sleepq_release() to unlock the associated sleep
  * queue chain lock.  If the thread also needs to remove itself from a queue
  * it just enqueued itself on, it can use sleepq_remove() instead.
  *
  * If the thread only wishes to sleep for a limited amount of time, it can
  * call sleepq_set_timeout() after sleepq_add() to setup a timeout.  It
  * should then use one of the sleepq_timedwait() functions to block.
  *
  * If the thread wants the sleep to be interruptible by signals, it can
  * call sleepq_catch_signals() after sleepq_add().  It should then use
  * one of the sleepq_wait_sig() functions to block.  After the thread has
  * been resumed, it should call sleepq_calc_signal_retval() to determine
  * if it should return EINTR or ERESTART passing in the value returned from
  * the earlier call to sleepq_catch_signals().
  *
  * A thread is normally resumed from a sleep queue by either the
  * sleepq_signal() or sleepq_broadcast() functions.  Sleepq_signal() wakes
  * the thread with the highest priority that is sleeping on the specified
  * wait channel.  Sleepq_broadcast() wakes all threads that are sleeping
  * on the specified wait channel.  A thread sleeping in an interruptible
  * sleep can be interrupted by calling sleepq_abort().  A thread can also
  * be removed from a specified sleep queue using the sleepq_remove()
  * function.  Note that the sleep queue chain must first be locked via
  * sleepq_lock() before calling sleepq_abort(), sleepq_broadcast(), or
  * sleepq_signal().  These routines each return a boolean that will be true
  * if at least one swapped-out thread was resumed.  In that case, the caller
  * is responsible for waking up the swapper by calling kick_proc0() after
  * releasing the sleep queue chain lock.
  *
  * Each thread allocates a sleep queue at thread creation via sleepq_alloc()
  * and releases it at thread destruction via sleepq_free().  Note that
  * a sleep queue is not tied to a specific thread and that the sleep queue
  * released at thread destruction may not be the same sleep queue that the
  * thread allocated when it was created.
  *
  * XXX: Some other parts of the kernel such as ithread sleeping may end up
  * using this interface as well (death to TDI_IWAIT!)
  */
 
 struct lock_object;
 struct sleepqueue;
 struct thread;
 
 #ifdef _KERNEL
 
 #define	SLEEPQ_TYPE		0x0ff		/* Mask of sleep queue types. */
 #define	SLEEPQ_SLEEP		0x00		/* Used by sleep/wakeup. */
 #define	SLEEPQ_CONDVAR		0x01		/* Used for a cv. */
 #define	SLEEPQ_PAUSE		0x02		/* Used by pause. */
 #define	SLEEPQ_SX		0x03		/* Used by an sx lock. */
 #define	SLEEPQ_LK		0x04		/* Used by a lockmgr. */
 #define	SLEEPQ_INTERRUPTIBLE	0x100		/* Sleep is interruptible. */
 #define	SLEEPQ_STOP_ON_BDRY	0x200		/* Stop sleeping thread on
 						   user mode boundary */
 
 void	init_sleepqueues(void);
 int	sleepq_abort(struct thread *td, int intrval);
 void	sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg,
 	    int flags, int queue);
 struct sleepqueue *sleepq_alloc(void);
 int	sleepq_broadcast(void *wchan, int flags, int pri, int queue);
 void	sleepq_free(struct sleepqueue *sq);
 void	sleepq_lock(void *wchan);
 struct sleepqueue *sleepq_lookup(void *wchan);
 void	sleepq_release(void *wchan);
 void	sleepq_remove(struct thread *td, void *wchan);
 int	sleepq_signal(void *wchan, int flags, int pri, int queue);
-void	sleepq_set_timeout_bt(void *wchan, struct bintime bt);
-void	sleepq_set_timeout(void *wchan, int timo);
+void	_sleepq_set_timeout(void *wchan, struct bintime *bt, int timo);
+#define	sleepq_set_timeout(wchan, timo)					\
+    _sleepq_set_timeout((wchan), (NULL), (timo))
+#define	sleepq_set_timeout_bt(wchan, bt)				\
+    _sleepq_set_timeout((wchan), (&bt), (0))
 u_int	sleepq_sleepcnt(void *wchan, int queue);
 int	sleepq_timedwait(void *wchan, int pri);
 int	sleepq_timedwait_sig(void *wchan, int pri);
 int	sleepq_type(void *wchan);
 void	sleepq_wait(void *wchan, int pri);
 int	sleepq_wait_sig(void *wchan, int pri);
 
 #endif	/* _KERNEL */
 #endif	/* !_SYS_SLEEPQUEUE_H_ */