Index: head/sys/kern/kern_condvar.c
===================================================================
--- head/sys/kern/kern_condvar.c	(revision 155740)
+++ head/sys/kern/kern_condvar.c	(revision 155741)
@@ -1,390 +1,383 @@
 /*-
  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/condvar.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/resourcevar.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/ktrace.h>
 #endif
 
 /*
  * Common sanity checks for cv_wait* functions.
  */
 #define	CV_ASSERT(cvp, mp, td) do {					\
 	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
 	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
 	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
 	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
 } while (0)
 
 /*
  * Initialize a condition variable.  Must be called before use.
  */
 void
 cv_init(struct cv *cvp, const char *desc)
 {
 
 	cvp->cv_description = desc;
 	cvp->cv_waiters = 0;
 }
 
 /*
  * Destroy a condition variable.  The condition variable must be re-initialized
  * in order to be re-used.
  */
 void
 cv_destroy(struct cv *cvp)
 {
 #ifdef INVARIANTS
 	struct sleepqueue *sq;
 
 	sleepq_lock(cvp);
 	sq = sleepq_lookup(cvp);
 	sleepq_release(cvp);
 	KASSERT(sq == NULL, ("%s: associated sleep queue non-empty", __func__));
 #endif
 }
 
 /*
  * Wait on a condition variable.  The current thread is placed on the condition
  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
  * condition variable will resume the thread.  The mutex is released before
  * sleeping and will be held on return.  It is recommended that the mutex be
  * held when cv_signal or cv_broadcast are called.
  */
 void
 cv_wait(struct cv *cvp, struct mtx *mp)
 {
 	WITNESS_SAVE_DECL(mp);
 
 	WITNESS_SAVE(&mp->mtx_object, mp);
 
 	if (cold || panicstr) {
 		/*
 		 * During autoconfiguration, just give interrupts
 		 * a chance, then just return.  Don't run any other
 		 * thread or panic below, in case this is the idle
 		 * process and already asleep.
 		 */
 		return;
 	}
 
 	cv_wait_unlock(cvp, mp);
 	mtx_lock(mp);
 	WITNESS_RESTORE(&mp->mtx_object, mp);
 }
 
 /*
  * Wait on a condition variable.  This function differs from cv_wait by
  * not aquiring the mutex after condition variable was signaled.
  */
 void
 cv_wait_unlock(struct cv *cvp, struct mtx *mp)
 {
 	struct thread *td;
 
 	td = curthread;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0);
 #endif
 	CV_ASSERT(cvp, mp, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
 	    "Waiting on \"%s\"", cvp->cv_description);
 
 	if (cold || panicstr) {
 		/*
 		 * During autoconfiguration, just give interrupts
 		 * a chance, then just return.  Don't run any other
 		 * thread or panic below, in case this is the idle
 		 * process and already asleep.
 		 */
 		mtx_unlock(mp);
 		return;
 	}
 
 	sleepq_lock(cvp);
 
 	cvp->cv_waiters++;
 	DROP_GIANT();
 	mtx_unlock(mp);
 
 	sleepq_add(cvp, mp, cvp->cv_description, SLEEPQ_CONDVAR);
 	sleepq_wait(cvp);
 
 	PICKUP_GIANT();
 }
 
 /*
  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
  * a signal was caught.  If ERESTART is returned the system call should be
  * restarted if possible.
  */
 int
 cv_wait_sig(struct cv *cvp, struct mtx *mp)
 {
 	struct thread *td;
 	struct proc *p;
-	int rval, sig;
+	int rval;
 	WITNESS_SAVE_DECL(mp);
 
 	td = curthread;
 	p = td->td_proc;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0);
 #endif
 	CV_ASSERT(cvp, mp, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	WITNESS_SAVE(&mp->mtx_object, mp);
 
 	if (cold || panicstr) {
 		/*
 		 * After a panic, or during autoconfiguration, just give
 		 * interrupts a chance, then just return; don't run any other
 		 * procs or panic below, in case this is the idle process and
 		 * already asleep.
 		 */
 		return (0);
 	}
 
 	sleepq_lock(cvp);
 
 	/*
 	 * Don't bother sleeping if we are exiting and not the exiting
 	 * thread or if our thread is marked as interrupted.
 	 */
 	mtx_lock_spin(&sched_lock);
 	rval = thread_sleep_check(td);
 	mtx_unlock_spin(&sched_lock);
 	if (rval != 0) {
 		sleepq_release(cvp);
 		return (rval);
 	}
 
 	cvp->cv_waiters++;
 	DROP_GIANT();
 	mtx_unlock(mp);
 
 	sleepq_add(cvp, mp, cvp->cv_description, SLEEPQ_CONDVAR |
 	    SLEEPQ_INTERRUPTIBLE);
-	sig = sleepq_catch_signals(cvp);
 	rval = sleepq_wait_sig(cvp);
-	if (rval == 0)
-		rval = sleepq_calc_signal_retval(sig);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0);
 #endif
 	PICKUP_GIANT();
 	mtx_lock(mp);
 	WITNESS_RESTORE(&mp->mtx_object, mp);
 
 	return (rval);
 }
 
 /*
  * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
  * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
  * expires.
  */
 int
 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
 {
 	struct thread *td;
 	int rval;
 	WITNESS_SAVE_DECL(mp);
 
 	td = curthread;
 	rval = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0);
 #endif
 	CV_ASSERT(cvp, mp, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	WITNESS_SAVE(&mp->mtx_object, mp);
 
 	if (cold || panicstr) {
 		/*
 		 * After a panic, or during autoconfiguration, just give
 		 * interrupts a chance, then just return; don't run any other
 		 * thread or panic below, in case this is the idle process and
 		 * already asleep.
 		 */
 		return 0;
 	}
 
 	sleepq_lock(cvp);
 
 	cvp->cv_waiters++;
 	DROP_GIANT();
 	mtx_unlock(mp);
 
 	sleepq_add(cvp, mp, cvp->cv_description, SLEEPQ_CONDVAR);
 	sleepq_set_timeout(cvp, timo);
 	rval = sleepq_timedwait(cvp);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0);
 #endif
 	PICKUP_GIANT();
 	mtx_lock(mp);
 	WITNESS_RESTORE(&mp->mtx_object, mp);
 
 	return (rval);
 }
 
 /*
  * Wait on a condition variable for at most timo/hz seconds, allowing
  * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
  * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
  * a signal was caught.
  */
 int
 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
 {
 	struct thread *td;
 	struct proc *p;
 	int rval;
-	int sig;
 	WITNESS_SAVE_DECL(mp);
 
 	td = curthread;
 	p = td->td_proc;
 	rval = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0);
 #endif
 	CV_ASSERT(cvp, mp, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	WITNESS_SAVE(&mp->mtx_object, mp);
 
 	if (cold || panicstr) {
 		/*
 		 * After a panic, or during autoconfiguration, just give
 		 * interrupts a chance, then just return; don't run any other
 		 * thread or panic below, in case this is the idle process and
 		 * already asleep.
 		 */
 		return 0;
 	}
 
 	sleepq_lock(cvp);
 
 	/*
 	 * Don't bother sleeping if we are exiting and not the exiting
 	 * thread or if our thread is marked as interrupted.
 	 */
 	mtx_lock_spin(&sched_lock);
 	rval = thread_sleep_check(td);
 	mtx_unlock_spin(&sched_lock);
 	if (rval != 0) {
 		sleepq_release(cvp);
 		return (rval);
 	}
 
 	cvp->cv_waiters++;
 	DROP_GIANT();
 	mtx_unlock(mp);
 
 	sleepq_add(cvp, mp, cvp->cv_description, SLEEPQ_CONDVAR |
 	    SLEEPQ_INTERRUPTIBLE);
 	sleepq_set_timeout(cvp, timo);
-	sig = sleepq_catch_signals(cvp);
-	rval = sleepq_timedwait_sig(cvp, sig != 0);
-	if (rval == 0)
-		rval = sleepq_calc_signal_retval(sig);
+	rval = sleepq_timedwait_sig(cvp);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0);
 #endif
 	PICKUP_GIANT();
 	mtx_lock(mp);
 	WITNESS_RESTORE(&mp->mtx_object, mp);
 
 	return (rval);
 }
 
 /*
  * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
  * the swapper if the process is not in memory, so that it can bring the
  * sleeping process in.  Note that this may also result in additional threads
  * being made runnable.  Should be called with the same mutex as was passed to
  * cv_wait held.
  */
 void
 cv_signal(struct cv *cvp)
 {
 
 	sleepq_lock(cvp);
 	if (cvp->cv_waiters > 0) {
 		cvp->cv_waiters--;
 		sleepq_signal(cvp, SLEEPQ_CONDVAR, -1);
 	} else
 		sleepq_release(cvp);
 }
 
 /*
  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
  * Should be called with the same mutex as was passed to cv_wait held.
  */
 void
 cv_broadcastpri(struct cv *cvp, int pri)
 {
 
 	sleepq_lock(cvp);
 	if (cvp->cv_waiters > 0) {
 		cvp->cv_waiters = 0;
 		sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri);
 	} else
 		sleepq_release(cvp);
 }
Index: head/sys/kern/kern_kse.c
===================================================================
--- head/sys/kern/kern_kse.c	(revision 155740)
+++ head/sys/kern/kern_kse.c	(revision 155741)
@@ -1,1467 +1,1467 @@
 /*-
  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
  *  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/imgact.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/smp.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/kse.h>
 #include <sys/ktr.h>
 #include <vm/uma.h>
 
 /*
  * KSEGRP related storage.
  */
 static uma_zone_t upcall_zone;
 
 /* DEBUG ONLY */
 extern int virtual_cpu;
 extern int thread_debug;
 
 extern int max_threads_per_proc;
 extern int max_groups_per_proc;
 extern int max_threads_hits;
 extern struct mtx kse_zombie_lock;
 
 
 TAILQ_HEAD(, kse_upcall) zombie_upcalls =
 	TAILQ_HEAD_INITIALIZER(zombie_upcalls);
 
 static int thread_update_usr_ticks(struct thread *td);
 static void thread_alloc_spare(struct thread *td);
 
 struct kse_upcall *
 upcall_alloc(void)
 {
 	struct kse_upcall *ku;
 
 	ku = uma_zalloc(upcall_zone, M_WAITOK | M_ZERO);
 	return (ku);
 }
 
 void
 upcall_free(struct kse_upcall *ku)
 {
 
 	uma_zfree(upcall_zone, ku);
 }
 
 void
 upcall_link(struct kse_upcall *ku, struct ksegrp *kg)
 {
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link);
 	ku->ku_ksegrp = kg;
 	kg->kg_numupcalls++;
 }
 
 void
 upcall_unlink(struct kse_upcall *ku)
 {
 	struct ksegrp *kg = ku->ku_ksegrp;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__));
 	TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link);
 	kg->kg_numupcalls--;
 	upcall_stash(ku);
 }
 
 void
 upcall_remove(struct thread *td)
 {
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	if (td->td_upcall != NULL) {
 		td->td_upcall->ku_owner = NULL;
 		upcall_unlink(td->td_upcall);
 		td->td_upcall = NULL;
 	}
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct kse_switchin_args {
 	struct kse_thr_mailbox *tmbx;
 	int flags;
 };
 #endif
 
 int
 kse_switchin(struct thread *td, struct kse_switchin_args *uap)
 {
 	struct kse_thr_mailbox tmbx;
 	struct kse_upcall *ku;
 	int error;
 
 	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
 		return (EINVAL);
 	error = (uap->tmbx == NULL) ? EINVAL : 0;
 	if (!error)
 		error = copyin(uap->tmbx, &tmbx, sizeof(tmbx));
 	if (!error && (uap->flags & KSE_SWITCHIN_SETTMBX))
 		error = (suword(&ku->ku_mailbox->km_curthread,
 			 (long)uap->tmbx) != 0 ? EINVAL : 0);
 	if (!error)
 		error = set_mcontext(td, &tmbx.tm_context.uc_mcontext);
 	if (!error) {
 		suword32(&uap->tmbx->tm_lwp, td->td_tid);
 		if (uap->flags & KSE_SWITCHIN_SETTMBX) {
 			td->td_mailbox = uap->tmbx;
 			td->td_pflags |= TDP_CAN_UNBIND;
 		}
 		if (td->td_proc->p_flag & P_TRACED) {
 			if (tmbx.tm_dflags & TMDF_SSTEP)
 				ptrace_single_step(td);
 			else
 				ptrace_clear_single_step(td);
 			if (tmbx.tm_dflags & TMDF_SUSPEND) {
 				mtx_lock_spin(&sched_lock);
 				/* fuword can block, check again */
 				if (td->td_upcall)
 					ku->ku_flags |= KUF_DOUPCALL;
 				mtx_unlock_spin(&sched_lock);
 			}
 		}
 	}
 	return ((error == 0) ? EJUSTRETURN : error);
 }
 
 /*
 struct kse_thr_interrupt_args {
 	struct kse_thr_mailbox * tmbx;
 	int cmd;
 	long data;
 };
 */
 int
 kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
 {
 	struct kse_execve_args args;
 	struct image_args iargs;
 	struct proc *p;
 	struct thread *td2;
 	struct kse_upcall *ku;
 	struct kse_thr_mailbox *tmbx;
 	uint32_t flags;
 	int error;
 
 	p = td->td_proc;
 
 	if (!(p->p_flag & P_SA))
 		return (EINVAL);
 
 	switch (uap->cmd) {
 	case KSE_INTR_SENDSIG:
 		if (uap->data < 0 || uap->data > _SIG_MAXSIG)
 			return (EINVAL);
 	case KSE_INTR_INTERRUPT:
 	case KSE_INTR_RESTART:
 		PROC_LOCK(p);
 		mtx_lock_spin(&sched_lock);
 		FOREACH_THREAD_IN_PROC(p, td2) {
 			if (td2->td_mailbox == uap->tmbx)
 				break;
 		}
 		if (td2 == NULL) {
 			mtx_unlock_spin(&sched_lock);
 			PROC_UNLOCK(p);
 			return (ESRCH);
 		}
 		if (uap->cmd == KSE_INTR_SENDSIG) {
 			if (uap->data > 0) {
 				td2->td_flags &= ~TDF_INTERRUPT;
 				mtx_unlock_spin(&sched_lock);
 				tdsignal(p, td2, (int)uap->data, NULL);
 			} else {
 				mtx_unlock_spin(&sched_lock);
 			}
 		} else {
 			td2->td_flags |= TDF_INTERRUPT | TDF_ASTPENDING;
 			if (TD_CAN_UNBIND(td2))
 				td2->td_upcall->ku_flags |= KUF_DOUPCALL;
 			if (uap->cmd == KSE_INTR_INTERRUPT)
 				td2->td_intrval = EINTR;
 			else
 				td2->td_intrval = ERESTART;
 			if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR))
-				sleepq_abort(td2);
+				sleepq_abort(td2, td2->td_intrval);
 			mtx_unlock_spin(&sched_lock);
 		}
 		PROC_UNLOCK(p);
 		break;
 	case KSE_INTR_SIGEXIT:
 		if (uap->data < 1 || uap->data > _SIG_MAXSIG)
 			return (EINVAL);
 		PROC_LOCK(p);
 		sigexit(td, (int)uap->data);
 		break;
 
 	case KSE_INTR_DBSUSPEND:
 		/* this sub-function is only for bound thread */
 		if (td->td_pflags & TDP_SA)
 			return (EINVAL);
 		ku = td->td_upcall;
 		tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
 		if (tmbx == NULL || tmbx == (void *)-1)
 			return (EINVAL);
 		flags = 0;
 		while ((p->p_flag & P_TRACED) && !(p->p_flag & P_SINGLE_EXIT)) {
 			flags = fuword32(&tmbx->tm_dflags);
 			if (!(flags & TMDF_SUSPEND))
 				break;
 			PROC_LOCK(p);
 			mtx_lock_spin(&sched_lock);
 			thread_stopped(p);
 			thread_suspend_one(td);
 			PROC_UNLOCK(p);
 			mi_switch(SW_VOL, NULL);
 			mtx_unlock_spin(&sched_lock);
 		}
 		return (0);
 
 	case KSE_INTR_EXECVE:
 		error = copyin((void *)uap->data, &args, sizeof(args));
 		if (error)
 			return (error);
 		error = exec_copyin_args(&iargs, args.path, UIO_USERSPACE,
 		    args.argv, args.envp);
 		if (error == 0)
 			error = kern_execve(td, &iargs, NULL);
 		if (error == 0) {
 			PROC_LOCK(p);
 			SIGSETOR(td->td_siglist, args.sigpend);
 			PROC_UNLOCK(p);
 			kern_sigprocmask(td, SIG_SETMASK, &args.sigmask, NULL,
 			    0);
 		}
 		return (error);
 
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 /*
 struct kse_exit_args {
 	register_t dummy;
 };
 */
 int
 kse_exit(struct thread *td, struct kse_exit_args *uap)
 {
 	struct proc *p;
 	struct ksegrp *kg;
 	struct kse_upcall *ku, *ku2;
 	int    error, count;
 
 	p = td->td_proc;
 	/* 
 	 * Ensure that this is only called from the UTS
 	 */
 	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
 		return (EINVAL);
 
 	kg = td->td_ksegrp;
 	count = 0;
 
 	/*
 	 * Calculate the existing non-exiting upcalls in this ksegroup.
 	 * If we are the last upcall but there are still other threads,
 	 * then do not exit. We need the other threads to be able to 
 	 * complete whatever they are doing.
 	 * XXX This relies on the userland knowing what to do if we return.
 	 * It may be a better choice to convert ourselves into a kse_release
 	 * ( or similar) and wait in the kernel to be needed.
 	 */
 	PROC_LOCK(p);
 	mtx_lock_spin(&sched_lock);
 	FOREACH_UPCALL_IN_GROUP(kg, ku2) {
 		if (ku2->ku_flags & KUF_EXITING)
 			count++;
 	}
 	if ((kg->kg_numupcalls - count) == 1 &&
 	    (kg->kg_numthreads > 1)) {
 		mtx_unlock_spin(&sched_lock);
 		PROC_UNLOCK(p);
 		return (EDEADLK);
 	}
 	ku->ku_flags |= KUF_EXITING;
 	mtx_unlock_spin(&sched_lock);
 	PROC_UNLOCK(p);
 
 	/* 
 	 * Mark the UTS mailbox as having been finished with.
 	 * If that fails then just go for a segfault.
 	 * XXX need to check it that can be deliverred without a mailbox.
 	 */
 	error = suword32(&ku->ku_mailbox->km_flags, ku->ku_mflags|KMF_DONE);
 	if (!(td->td_pflags & TDP_SA))
 		if (suword32(&td->td_mailbox->tm_lwp, 0))
 			error = EFAULT;
 	PROC_LOCK(p);
 	if (error)
 		psignal(p, SIGSEGV);
 	sigqueue_flush(&td->td_sigqueue);
 	mtx_lock_spin(&sched_lock);
 	upcall_remove(td);
 	if (p->p_numthreads != 1) {
 		/*
 		 * If we are not the last thread, but we are the last
 		 * thread in this ksegrp, then by definition this is not
 		 * the last group and we need to clean it up as well.
 		 * thread_exit will clean up the kseg as needed.
 		 */
 		thread_stopped(p);
 		thread_exit();
 		/* NOTREACHED */
 	}
 	/*
 	 * This is the last thread. Just return to the user.
 	 * We know that there is only one ksegrp too, as any others
 	 * would have been discarded in previous calls to thread_exit().
 	 * Effectively we have left threading mode..
 	 * The only real thing left to do is ensure that the
 	 * scheduler sets out concurrency back to 1 as that may be a
 	 * resource leak otherwise.
 	 * This is an A[PB]I issue.. what SHOULD we do?
 	 * One possibility is to return to the user. It may not cope well.
 	 * The other possibility would be to let the process exit.
 	 */
 	thread_unthread(td);
 	mtx_unlock_spin(&sched_lock);
 	PROC_UNLOCK(p);
 #if 1
 	return (0);
 #else
 	exit1(td, 0);
 #endif
 }
 
 /*
  * Either becomes an upcall or waits for an awakening event and
  * then becomes an upcall. Only error cases return.
  */
 /*
 struct kse_release_args {
 	struct timespec *timeout;
 };
 */
 int
 kse_release(struct thread *td, struct kse_release_args *uap)
 {
 	struct proc *p;
 	struct ksegrp *kg;
 	struct kse_upcall *ku;
 	struct timespec timeout;
 	struct timeval tv;
 	sigset_t sigset;
 	int error;
 
 	p = td->td_proc;
 	kg = td->td_ksegrp;
 	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
 		return (EINVAL);
 	if (uap->timeout != NULL) {
 		if ((error = copyin(uap->timeout, &timeout, sizeof(timeout))))
 			return (error);
 		TIMESPEC_TO_TIMEVAL(&tv, &timeout);
 	}
 	if (td->td_pflags & TDP_SA)
 		td->td_pflags |= TDP_UPCALLING;
 	else {
 		ku->ku_mflags = fuword32(&ku->ku_mailbox->km_flags);
 		if (ku->ku_mflags == -1) {
 			PROC_LOCK(p);
 			sigexit(td, SIGSEGV);
 		}
 	}
 	PROC_LOCK(p);
 	if (ku->ku_mflags & KMF_WAITSIGEVENT) {
 		/* UTS wants to wait for signal event */
 		if (!(p->p_flag & P_SIGEVENT) &&
 		    !(ku->ku_flags & KUF_DOUPCALL)) {
 			td->td_kflags |= TDK_KSERELSIG;
 			error = msleep(&p->p_siglist, &p->p_mtx, PPAUSE|PCATCH,
 			    "ksesigwait", (uap->timeout ? tvtohz(&tv) : 0));
 			td->td_kflags &= ~(TDK_KSERELSIG | TDK_WAKEUP);
 		}
 		p->p_flag &= ~P_SIGEVENT;
 		sigset = p->p_siglist;
 		PROC_UNLOCK(p);
 		error = copyout(&sigset, &ku->ku_mailbox->km_sigscaught,
 		    sizeof(sigset));
 	} else {
 		if ((ku->ku_flags & KUF_DOUPCALL) == 0 &&
 		    ((ku->ku_mflags & KMF_NOCOMPLETED) ||
 		     (kg->kg_completed == NULL))) {
 			kg->kg_upsleeps++;
 			td->td_kflags |= TDK_KSEREL;
 			error = msleep(&kg->kg_completed, &p->p_mtx,
 				PPAUSE|PCATCH, "kserel",
 				(uap->timeout ? tvtohz(&tv) : 0));
 			td->td_kflags &= ~(TDK_KSEREL | TDK_WAKEUP);
 			kg->kg_upsleeps--;
 		}
 		PROC_UNLOCK(p);
 	}
 	if (ku->ku_flags & KUF_DOUPCALL) {
 		mtx_lock_spin(&sched_lock);
 		ku->ku_flags &= ~KUF_DOUPCALL;
 		mtx_unlock_spin(&sched_lock);
 	}
 	return (0);
 }
 
 /* struct kse_wakeup_args {
 	struct kse_mailbox *mbx;
 }; */
 int
 kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
 {
 	struct proc *p;
 	struct ksegrp *kg;
 	struct kse_upcall *ku;
 	struct thread *td2;
 
 	p = td->td_proc;
 	td2 = NULL;
 	ku = NULL;
 	/* KSE-enabled processes only, please. */
 	if (!(p->p_flag & P_SA))
 		return (EINVAL);
 	PROC_LOCK(p);
 	mtx_lock_spin(&sched_lock);
 	if (uap->mbx) {
 		FOREACH_KSEGRP_IN_PROC(p, kg) {
 			FOREACH_UPCALL_IN_GROUP(kg, ku) {
 				if (ku->ku_mailbox == uap->mbx)
 					break;
 			}
 			if (ku)
 				break;
 		}
 	} else {
 		kg = td->td_ksegrp;
 		if (kg->kg_upsleeps) {
 			mtx_unlock_spin(&sched_lock);
 			wakeup(&kg->kg_completed);
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		ku = TAILQ_FIRST(&kg->kg_upcalls);
 	}
 	if (ku == NULL) {
 		mtx_unlock_spin(&sched_lock);
 		PROC_UNLOCK(p);
 		return (ESRCH);
 	}
 	if ((td2 = ku->ku_owner) == NULL) {
 		mtx_unlock_spin(&sched_lock);
 		panic("%s: no owner", __func__);
 	} else if (td2->td_kflags & (TDK_KSEREL | TDK_KSERELSIG)) {
 		mtx_unlock_spin(&sched_lock);
 		if (!(td2->td_kflags & TDK_WAKEUP)) {
 			td2->td_kflags |= TDK_WAKEUP;
 			if (td2->td_kflags & TDK_KSEREL)
 				sleepq_remove(td2, &kg->kg_completed);
 			else
 				sleepq_remove(td2, &p->p_siglist);
 		}
 	} else {
 		ku->ku_flags |= KUF_DOUPCALL;
 		mtx_unlock_spin(&sched_lock);
 	}
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 /*
  * No new KSEG: first call: use current KSE, don't schedule an upcall
  * All other situations, do allocate max new KSEs and schedule an upcall.
  *
  * XXX should be changed so that 'first' behaviour lasts for as long
  * as you have not made a kse in this ksegrp. i.e. as long as we do not have
  * a mailbox..
  */
 /* struct kse_create_args {
 	struct kse_mailbox *mbx;
 	int newgroup;
 }; */
 int
 kse_create(struct thread *td, struct kse_create_args *uap)
 {
 	struct ksegrp *newkg;
 	struct ksegrp *kg;
 	struct proc *p;
 	struct kse_mailbox mbx;
 	struct kse_upcall *newku;
 	int err, ncpus, sa = 0, first = 0;
 	struct thread *newtd;
 
 	p = td->td_proc;
 	kg = td->td_ksegrp;
 	if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
 		return (err);
 
 	ncpus = mp_ncpus;
 	if (virtual_cpu != 0)
 		ncpus = virtual_cpu;
 	/*
 	 * If the new UTS mailbox says that this
 	 * will be a BOUND lwp, then it had better
 	 * have its thread mailbox already there.
 	 * In addition, this ksegrp will be limited to
 	 * a concurrency of 1. There is more on this later.
 	 */
 	if (mbx.km_flags & KMF_BOUND) {
 		if (mbx.km_curthread == NULL) 
 			return (EINVAL);
 		ncpus = 1;
 	} else {
 		sa = TDP_SA;
 	}
 
 	PROC_LOCK(p);
 	/*
 	 * Processes using the other threading model can't
 	 * suddenly start calling this one
 	 */
 	if ((p->p_flag & (P_SA|P_HADTHREADS)) == P_HADTHREADS) {
 		PROC_UNLOCK(p);
 		return (EINVAL);
 	}
 
 	/*
 	 * Limit it to NCPU upcall contexts per ksegrp in any case.
 	 * There is a small race here as we don't hold proclock
 	 * until we inc the ksegrp count, but it's not really a big problem
 	 * if we get one too many, but we save a proc lock.
 	 */
 	if ((!uap->newgroup) && (kg->kg_numupcalls >= ncpus)) {
 		PROC_UNLOCK(p);
 		return (EPROCLIM);
 	}
 
 	if (!(p->p_flag & P_SA)) {
 		first = 1;
 		p->p_flag |= P_SA|P_HADTHREADS;
 	}
 
 	PROC_UNLOCK(p);
 	/*
 	 * Now pay attention!
 	 * If we are going to be bound, then we need to be either
 	 * a new group, or the first call ever. In either
 	 * case we will be creating (or be) the only thread in a group.
 	 * and the concurrency will be set to 1.
 	 * This is not quite right, as we may still make ourself 
 	 * bound after making other ksegrps but it will do for now.
 	 * The library will only try do this much.
 	 */
 	if (!sa && !(uap->newgroup || first))
 		return (EINVAL);
 
 	if (uap->newgroup) {
 		newkg = ksegrp_alloc();
 		bzero(&newkg->kg_startzero,
 		    __rangeof(struct ksegrp, kg_startzero, kg_endzero));
 		bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
 		    __rangeof(struct ksegrp, kg_startcopy, kg_endcopy));
 		sched_init_concurrency(newkg);
 		PROC_LOCK(p);
 		if (p->p_numksegrps >= max_groups_per_proc) {
 			PROC_UNLOCK(p);
 			ksegrp_free(newkg);
 			return (EPROCLIM);
 		}
 		ksegrp_link(newkg, p);
 		mtx_lock_spin(&sched_lock);
 		sched_fork_ksegrp(td, newkg);
 		mtx_unlock_spin(&sched_lock);
 		PROC_UNLOCK(p);
 	} else {
 		/*
 		 * We want to make a thread in our own ksegrp.
 		 * If we are just the first call, either kind
 		 * is ok, but if not then either we must be 
 		 * already an upcallable thread to make another,
 		 * or a bound thread to make one of those.
 		 * Once again, not quite right but good enough for now.. XXXKSE
 		 */
 		if (!first && ((td->td_pflags & TDP_SA) != sa))
 			return (EINVAL);
 
 		newkg = kg;
 	}
 
 	/* 
 	 * This test is a bit "indirect".
 	 * It might simplify things if we made a direct way of testing
 	 * if a ksegrp has been worked on before.
 	 * In the case of a bound request and the concurrency being set to 
 	 * one, the concurrency will already be 1 so it's just inefficient
 	 * but not dangerous to call this again. XXX
 	 */
 	if (newkg->kg_numupcalls == 0) {
 		/*
 		 * Initialize KSE group with the appropriate
 		 * concurrency.
 		 *
 		 * For a multiplexed group, create as as much concurrency
 		 * as the number of physical cpus.
 		 * This increases concurrency in the kernel even if the
 		 * userland is not MP safe and can only run on a single CPU.
 		 * In an ideal world, every physical cpu should execute a
 		 * thread.  If there is enough concurrency, threads in the
 		 * kernel can be executed parallel on different cpus at
 		 * full speed without being restricted by the number of
 		 * upcalls the userland provides.
 		 * Adding more upcall structures only increases concurrency
 		 * in userland.
 		 *
 		 * For a bound thread group, because there is only one thread
 		 * in the group, we only set the concurrency for the group 
 		 * to 1.  A thread in this kind of group will never schedule
 		 * an upcall when blocked.  This simulates pthread system
 		 * scope thread behaviour.
 		 */
 		sched_set_concurrency(newkg, ncpus);
 	}
 	/* 
 	 * Even bound LWPs get a mailbox and an upcall to hold it.
 	 */
 	newku = upcall_alloc();
 	newku->ku_mailbox = uap->mbx;
 	newku->ku_func = mbx.km_func;
 	bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t));
 
 	/*
 	 * For the first call this may not have been set.
 	 * Of course nor may it actually be needed.
 	 */
 	if (td->td_standin == NULL)
 		thread_alloc_spare(td);
 
 	PROC_LOCK(p);
 	mtx_lock_spin(&sched_lock);
 	if (newkg->kg_numupcalls >= ncpus) {
 		mtx_unlock_spin(&sched_lock);
 		PROC_UNLOCK(p);
 		upcall_free(newku);
 		return (EPROCLIM);
 	}
 
 	/*
 	 * If we are the first time, and a normal thread,
 	 * then transfer all the signals back to the 'process'.
 	 * SA threading will make a special thread to handle them.
 	 */
 	if (first && sa) {
 		sigqueue_move_set(&td->td_sigqueue, &p->p_sigqueue, 
 			&td->td_sigqueue.sq_signals);
 		SIGFILLSET(td->td_sigmask);
 		SIG_CANTMASK(td->td_sigmask);
 	}
 
 	/*
 	 * Make the new upcall available to the ksegrp.
 	 * It may or may not use it, but it's available.
 	 */
 	upcall_link(newku, newkg);
 	PROC_UNLOCK(p);
 	if (mbx.km_quantum)
 		newkg->kg_upquantum = max(1, mbx.km_quantum / tick);
 
 	/*
 	 * Each upcall structure has an owner thread, find which
 	 * one owns it.
 	 */
 	if (uap->newgroup) {
 		/*
 		 * Because the new ksegrp hasn't a thread,
 		 * create an initial upcall thread to own it.
 		 */
 		newtd = thread_schedule_upcall(td, newku);
 	} else {
 		/*
 		 * If the current thread hasn't an upcall structure,
 		 * just assign the upcall to it.
 		 * It'll just return.
 		 */
 		if (td->td_upcall == NULL) {
 			newku->ku_owner = td;
 			td->td_upcall = newku;
 			newtd = td;
 		} else {
 			/*
 			 * Create a new upcall thread to own it.
 			 */
 			newtd = thread_schedule_upcall(td, newku);
 		}
 	}
 	mtx_unlock_spin(&sched_lock);
 
 	/*
 	 * Let the UTS instance know its LWPID.
 	 * It doesn't really care. But the debugger will.
 	 */
 	suword32(&newku->ku_mailbox->km_lwp, newtd->td_tid);
 
 	/*
 	 * In the same manner, if the UTS has a current user thread, 
 	 * then it is also running on this LWP so set it as well.
 	 * The library could do that of course.. but why not..
 	 */
 	if (mbx.km_curthread)
 		suword32(&mbx.km_curthread->tm_lwp, newtd->td_tid);
 
 	
 	if (sa) {
 		newtd->td_pflags |= TDP_SA;
 	} else {
 		newtd->td_pflags &= ~TDP_SA;
 
 		/*
 		 * Since a library will use the mailbox pointer to 
 		 * identify even a bound thread, and the mailbox pointer
 		 * will never be allowed to change after this syscall
 		 * for a bound thread, set it here so the library can
 		 * find the thread after the syscall returns.
 		 */
 		newtd->td_mailbox = mbx.km_curthread;
 
 		if (newtd != td) {
 			/*
 			 * If we did create a new thread then
 			 * make sure it goes to the right place
 			 * when it starts up, and make sure that it runs 
 			 * at full speed when it gets there. 
 			 * thread_schedule_upcall() copies all cpu state
 			 * to the new thread, so we should clear single step
 			 * flag here.
 			 */
 			cpu_set_upcall_kse(newtd, newku->ku_func,
 				newku->ku_mailbox, &newku->ku_stack);
 			if (p->p_flag & P_TRACED)
 				ptrace_clear_single_step(newtd);
 		}
 	}
 	
 	/* 
 	 * If we are starting a new thread, kick it off.
 	 */
 	if (newtd != td) {
 		mtx_lock_spin(&sched_lock);
 		setrunqueue(newtd, SRQ_BORING);
 		mtx_unlock_spin(&sched_lock);
 	}
 	return (0);
 }
 
 /*
  * Initialize global thread allocation resources.
  */
 void
 kseinit(void)
 {
 
 	upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
 }
 
 /*
  * Stash an embarasingly extra upcall into the zombie upcall queue.
  */
 
 void
 upcall_stash(struct kse_upcall *ku)
 {
 	mtx_lock_spin(&kse_zombie_lock);
 	TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link);
 	mtx_unlock_spin(&kse_zombie_lock);
 }
 
 /*
  * Reap zombie kse resource.
  */
 void
 kse_GC(void)
 {
 	struct kse_upcall *ku_first, *ku_next;
 
 	/*
 	 * Don't even bother to lock if none at this instant,
 	 * we really don't care about the next instant..
 	 */
 	if (!TAILQ_EMPTY(&zombie_upcalls)) {
 		mtx_lock_spin(&kse_zombie_lock);
 		ku_first = TAILQ_FIRST(&zombie_upcalls);
 		if (ku_first)
 			TAILQ_INIT(&zombie_upcalls);
 		mtx_unlock_spin(&kse_zombie_lock);
 		while (ku_first) {
 			ku_next = TAILQ_NEXT(ku_first, ku_link);
 			upcall_free(ku_first);
 			ku_first = ku_next;
 		}
 	}
 }
 
 /*
  * Store the thread context in the UTS's mailbox.
  * then add the mailbox at the head of a list we are building in user space.
  * The list is anchored in the ksegrp structure.
  */
 int
 thread_export_context(struct thread *td, int willexit)
 {
 	struct proc *p;
 	struct ksegrp *kg;
 	uintptr_t mbx;
 	void *addr;
 	int error = 0, sig;
 	mcontext_t mc;
 
 	p = td->td_proc;
 	kg = td->td_ksegrp;
 
 	/*
 	 * Post sync signal, or process SIGKILL and SIGSTOP.
 	 * For sync signal, it is only possible when the signal is not
 	 * caught by userland or process is being debugged.
 	 */
 	PROC_LOCK(p);
 	if (td->td_flags & TDF_NEEDSIGCHK) {
 		mtx_lock_spin(&sched_lock);
 		td->td_flags &= ~TDF_NEEDSIGCHK;
 		mtx_unlock_spin(&sched_lock);
 		mtx_lock(&p->p_sigacts->ps_mtx);
 		while ((sig = cursig(td)) != 0)
 			postsig(sig);
 		mtx_unlock(&p->p_sigacts->ps_mtx);
 	}
 	if (willexit)
 		SIGFILLSET(td->td_sigmask);
 	PROC_UNLOCK(p);
 
 	/* Export the user/machine context. */
 	get_mcontext(td, &mc, 0);
 	addr = (void *)(&td->td_mailbox->tm_context.uc_mcontext);
 	error = copyout(&mc, addr, sizeof(mcontext_t));
 	if (error)
 		goto bad;
 
 	addr = (caddr_t)(&td->td_mailbox->tm_lwp);
 	if (suword32(addr, 0)) {
 		error = EFAULT;
 		goto bad;
 	}
 
 	/* Get address in latest mbox of list pointer */
 	addr = (void *)(&td->td_mailbox->tm_next);
 	/*
 	 * Put the saved address of the previous first
 	 * entry into this one
 	 */
 	for (;;) {
 		mbx = (uintptr_t)kg->kg_completed;
 		if (suword(addr, mbx)) {
 			error = EFAULT;
 			goto bad;
 		}
 		PROC_LOCK(p);
 		if (mbx == (uintptr_t)kg->kg_completed) {
 			kg->kg_completed = td->td_mailbox;
 			/*
 			 * The thread context may be taken away by
 			 * other upcall threads when we unlock
 			 * process lock. it's no longer valid to
 			 * use it again in any other places.
 			 */
 			td->td_mailbox = NULL;
 			PROC_UNLOCK(p);
 			break;
 		}
 		PROC_UNLOCK(p);
 	}
 	td->td_usticks = 0;
 	return (0);
 
 bad:
 	PROC_LOCK(p);
 	sigexit(td, SIGILL);
 	return (error);
 }
 
 /*
  * Take the list of completed mailboxes for this KSEGRP and put them on this
  * upcall's mailbox as it's the next one going up.
  */
 static int
 thread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku)
 {
 	struct proc *p = kg->kg_proc;
 	void *addr;
 	uintptr_t mbx;
 
 	addr = (void *)(&ku->ku_mailbox->km_completed);
 	for (;;) {
 		mbx = (uintptr_t)kg->kg_completed;
 		if (suword(addr, mbx)) {
 			PROC_LOCK(p);
 			psignal(p, SIGSEGV);
 			PROC_UNLOCK(p);
 			return (EFAULT);
 		}
 		PROC_LOCK(p);
 		if (mbx == (uintptr_t)kg->kg_completed) {
 			kg->kg_completed = NULL;
 			PROC_UNLOCK(p);
 			break;
 		}
 		PROC_UNLOCK(p);
 	}
 	return (0);
 }
 
 /*
  * This function should be called at statclock interrupt time
  */
 int
 thread_statclock(int user)
 {
 	struct thread *td = curthread;
 
 	if (!(td->td_pflags & TDP_SA))
 		return (0);
 	if (user) {
 		/* Current always do via ast() */
 		mtx_lock_spin(&sched_lock);
 		td->td_flags |= TDF_ASTPENDING;
 		mtx_unlock_spin(&sched_lock);
 		td->td_uuticks++;
 	} else if (td->td_mailbox != NULL)
 		td->td_usticks++;
 	return (0);
 }
 
 /*
  * Export state clock ticks for userland
  */
 static int
 thread_update_usr_ticks(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 	caddr_t addr;
 	u_int uticks;
 
 	if (td->td_mailbox == NULL)
 		return (-1);
 
 	if ((uticks = td->td_uuticks) != 0) {
 		td->td_uuticks = 0;
 		addr = (caddr_t)&td->td_mailbox->tm_uticks;
 		if (suword32(addr, uticks+fuword32(addr)))
 			goto error;
 	}
 	if ((uticks = td->td_usticks) != 0) {
 		td->td_usticks = 0;
 		addr = (caddr_t)&td->td_mailbox->tm_sticks;
 		if (suword32(addr, uticks+fuword32(addr)))
 			goto error;
 	}
 	return (0);
 
 error:
 	PROC_LOCK(p);
 	psignal(p, SIGSEGV);
 	PROC_UNLOCK(p);
 	return (-2);
 }
 
 /*
  * This function is intended to be used to initialize a spare thread
  * for upcall. Initialize thread's large data area outside sched_lock
  * for thread_schedule_upcall(). The crhold is also here to get it out
  * from the schedlock as it has a mutex op itself.
  * XXX BUG.. we need to get the cr ref after the thread has 
  * checked and chenged its own, not 6 months before...  
  */
 void
 thread_alloc_spare(struct thread *td)
 {
 	struct thread *spare;
 
 	if (td->td_standin)
 		return;
 	spare = thread_alloc();
 	td->td_standin = spare;
 	bzero(&spare->td_startzero,
 	    __rangeof(struct thread, td_startzero, td_endzero));
 	spare->td_proc = td->td_proc;
 	spare->td_ucred = crhold(td->td_ucred);
 }
 
 /*
  * Create a thread and schedule it for upcall on the KSE given.
  * Use our thread's standin so that we don't have to allocate one.
  */
 struct thread *
 thread_schedule_upcall(struct thread *td, struct kse_upcall *ku)
 {
 	struct thread *td2;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	/*
 	 * Schedule an upcall thread on specified kse_upcall,
 	 * the kse_upcall must be free.
 	 * td must have a spare thread.
 	 */
 	KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__));
 	if ((td2 = td->td_standin) != NULL) {
 		td->td_standin = NULL;
 	} else {
 		panic("no reserve thread when scheduling an upcall");
 		return (NULL);
 	}
 	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
 	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
 	/*
 	 * Bzero already done in thread_alloc_spare() because we can't
 	 * do the crhold here because we are in schedlock already.
 	 */
 	bcopy(&td->td_startcopy, &td2->td_startcopy,
 	    __rangeof(struct thread, td_startcopy, td_endcopy));
 	thread_link(td2, ku->ku_ksegrp);
 	/* inherit parts of blocked thread's context as a good template */
 	cpu_set_upcall(td2, td);
 	/* Let the new thread become owner of the upcall */
 	ku->ku_owner   = td2;
 	td2->td_upcall = ku;
 	td2->td_flags  = 0;
 	td2->td_pflags = TDP_SA|TDP_UPCALLING;
 	td2->td_state  = TDS_CAN_RUN;
 	td2->td_inhibitors = 0;
 	SIGFILLSET(td2->td_sigmask);
 	SIG_CANTMASK(td2->td_sigmask);
 	sched_fork_thread(td, td2);
 	return (td2);	/* bogus.. should be a void function */
 }
 
 /*
  * It is only used when thread generated a trap and process is being
  * debugged.
  */
 void
 thread_signal_add(struct thread *td, ksiginfo_t *ksi)
 {
 	struct proc *p;
 	struct sigacts *ps;
 	int error;
 
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	ps = p->p_sigacts;
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 
 	mtx_unlock(&ps->ps_mtx);
 	SIGADDSET(td->td_sigmask, ksi->ksi_signo);
 	PROC_UNLOCK(p);
 	error = copyout(&ksi->ksi_info, &td->td_mailbox->tm_syncsig,
 			sizeof(siginfo_t));
 	if (error) {
 		PROC_LOCK(p);
 		sigexit(td, SIGSEGV);
 	}
 	PROC_LOCK(p);
 	mtx_lock(&ps->ps_mtx);
 }
 #include "opt_sched.h"
 struct thread *
 thread_switchout(struct thread *td, int flags, struct thread *nextthread)
 {
 	struct kse_upcall *ku;
 	struct thread *td2;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	/*
 	 * If the outgoing thread is in threaded group and has never
 	 * scheduled an upcall, decide whether this is a short
 	 * or long term event and thus whether or not to schedule
 	 * an upcall.
 	 * If it is a short term event, just suspend it in
 	 * a way that takes its KSE with it.
 	 * Select the events for which we want to schedule upcalls.
 	 * For now it's just sleep or if thread is suspended but
 	 * process wide suspending flag is not set (debugger
 	 * suspends thread).
 	 * XXXKSE eventually almost any inhibition could do.
 	 */
 	if (TD_CAN_UNBIND(td) && (td->td_standin) &&
 	    (TD_ON_SLEEPQ(td) || (TD_IS_SUSPENDED(td) &&
 	     !P_SHOULDSTOP(td->td_proc)))) {
 		/*
 		 * Release ownership of upcall, and schedule an upcall
 		 * thread, this new upcall thread becomes the owner of
 		 * the upcall structure. It will be ahead of us in the
 		 * run queue, so as we are stopping, it should either
 		 * start up immediatly, or at least before us if
 		 * we release our slot.
 		 */
 		ku = td->td_upcall;
 		ku->ku_owner = NULL;
 		td->td_upcall = NULL;
 		td->td_pflags &= ~TDP_CAN_UNBIND;
 		td2 = thread_schedule_upcall(td, ku);
 		if (flags & SW_INVOL || nextthread) {
 			setrunqueue(td2, SRQ_YIELDING);
 		} else {
 			/* Keep up with reality.. we have one extra thread 
 			 * in the picture.. and it's 'running'.
 			 */
 			return td2;
 		}
 	}
 	return (nextthread);
 }
 
 /*
  * Setup done on the thread when it enters the kernel.
  */
 void
 thread_user_enter(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 	struct ksegrp *kg;
 	struct kse_upcall *ku;
 	struct kse_thr_mailbox *tmbx;
 	uint32_t flags;
 
 	/*
 	 * First check that we shouldn't just abort. we
 	 * can suspend it here or just exit.
 	 */
 	if (__predict_false(P_SHOULDSTOP(p))) {
 		PROC_LOCK(p);
 		thread_suspend_check(0);
 		PROC_UNLOCK(p);
 	}
 
 	if (!(td->td_pflags & TDP_SA))
 		return;
 
 	/*
 	 * If we are doing a syscall in a KSE environment,
 	 * note where our mailbox is.
 	 */
 
 	kg = td->td_ksegrp;
 	ku = td->td_upcall;
 
 	KASSERT(ku != NULL, ("no upcall owned"));
 	KASSERT(ku->ku_owner == td, ("wrong owner"));
 	KASSERT(!TD_CAN_UNBIND(td), ("can unbind"));
 
 	if (td->td_standin == NULL)
 		thread_alloc_spare(td);
 	ku->ku_mflags = fuword32((void *)&ku->ku_mailbox->km_flags);
 	tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
 	if ((tmbx == NULL) || (tmbx == (void *)-1L) ||
 	    (ku->ku_mflags & KMF_NOUPCALL)) {
 		td->td_mailbox = NULL;
 	} else {
 		flags = fuword32(&tmbx->tm_flags);
 		/*
 		 * On some architectures, TP register points to thread
 		 * mailbox but not points to kse mailbox, and userland
 		 * can not atomically clear km_curthread, but can
 		 * use TP register, and set TMF_NOUPCALL in thread
 		 * flag	to indicate a critical region.
 		 */
 		if (flags & TMF_NOUPCALL) {
 			td->td_mailbox = NULL;
 		} else {
 			td->td_mailbox = tmbx;
 			td->td_pflags |= TDP_CAN_UNBIND;
 			if (__predict_false(p->p_flag & P_TRACED)) {
 				flags = fuword32(&tmbx->tm_dflags);
 				if (flags & TMDF_SUSPEND) {
 					mtx_lock_spin(&sched_lock);
 					/* fuword can block, check again */
 					if (td->td_upcall)
 						ku->ku_flags |= KUF_DOUPCALL;
 					mtx_unlock_spin(&sched_lock);
 				}
 			}
 		}
 	}
 }
 
 /*
  * The extra work we go through if we are a threaded process when we
  * return to userland.
  *
  * If we are a KSE process and returning to user mode, check for
  * extra work to do before we return (e.g. for more syscalls
  * to complete first).  If we were in a critical section, we should
  * just return to let it finish. Same if we were in the UTS (in
  * which case the mailbox's context's busy indicator will be set).
  * The only traps we suport will have set the mailbox.
  * We will clear it here.
  */
 int
 thread_userret(struct thread *td, struct trapframe *frame)
 {
 	struct kse_upcall *ku;
 	struct ksegrp *kg, *kg2;
 	struct proc *p;
 	struct timespec ts;
 	int error = 0, upcalls, uts_crit;
 
 	/* Nothing to do with bound thread */
 	if (!(td->td_pflags & TDP_SA))
 		return (0);
 
 	/*
 	 * Update stat clock count for userland
 	 */
 	if (td->td_mailbox != NULL) {
 		thread_update_usr_ticks(td);
 		uts_crit = 0;
 	} else {
 		uts_crit = 1;
 	}
 
 	p = td->td_proc;
 	kg = td->td_ksegrp;
 	ku = td->td_upcall;
 
 	/*
 	 * Optimisation:
 	 * This thread has not started any upcall.
 	 * If there is no work to report other than ourself,
 	 * then it can return direct to userland.
 	 */
 	if (TD_CAN_UNBIND(td)) {
 		td->td_pflags &= ~TDP_CAN_UNBIND;
 		if ((td->td_flags & TDF_NEEDSIGCHK) == 0 &&
 		    (kg->kg_completed == NULL) &&
 		    (ku->ku_flags & KUF_DOUPCALL) == 0 &&
 		    (kg->kg_upquantum && ticks < kg->kg_nextupcall)) {
 			nanotime(&ts);
 			error = copyout(&ts,
 				(caddr_t)&ku->ku_mailbox->km_timeofday,
 				sizeof(ts));
 			td->td_mailbox = 0;
 			ku->ku_mflags = 0;
 			if (error)
 				goto out;
 			return (0);
 		}
 		thread_export_context(td, 0);
 		/*
 		 * There is something to report, and we own an upcall
 		 * structure, we can go to userland.
 		 * Turn ourself into an upcall thread.
 		 */
 		td->td_pflags |= TDP_UPCALLING;
 	} else if (td->td_mailbox && (ku == NULL)) {
 		thread_export_context(td, 1);
 		PROC_LOCK(p);
 		if (kg->kg_upsleeps)
 			wakeup(&kg->kg_completed);
 		WITNESS_WARN(WARN_PANIC, &p->p_mtx.mtx_object,
 		    "thread exiting in userret");
 		sigqueue_flush(&td->td_sigqueue);
 		mtx_lock_spin(&sched_lock);
 		thread_stopped(p);
 		thread_exit();
 		/* NOTREACHED */
 	}
 
 	KASSERT(ku != NULL, ("upcall is NULL"));
 	KASSERT(TD_CAN_UNBIND(td) == 0, ("can unbind"));
 
 	if (p->p_numthreads > max_threads_per_proc) {
 		max_threads_hits++;
 		PROC_LOCK(p);
 		mtx_lock_spin(&sched_lock);
 		p->p_maxthrwaits++;
 		while (p->p_numthreads > max_threads_per_proc) {
 			upcalls = 0;
 			FOREACH_KSEGRP_IN_PROC(p, kg2) {
 				if (kg2->kg_numupcalls == 0)
 					upcalls++;
 				else
 					upcalls += kg2->kg_numupcalls;
 			}
 			if (upcalls >= max_threads_per_proc)
 				break;
 			mtx_unlock_spin(&sched_lock);
 			if (msleep(&p->p_numthreads, &p->p_mtx, PPAUSE|PCATCH,
 			    "maxthreads", hz/10) != EWOULDBLOCK) {
 				mtx_lock_spin(&sched_lock);
 				break;
 			} else {
 				mtx_lock_spin(&sched_lock);
 			}
 		}
 		p->p_maxthrwaits--;
 		mtx_unlock_spin(&sched_lock);
 		PROC_UNLOCK(p);
 	}
 
 	if (td->td_pflags & TDP_UPCALLING) {
 		uts_crit = 0;
 		kg->kg_nextupcall = ticks + kg->kg_upquantum;
 		/*
 		 * There is no more work to do and we are going to ride
 		 * this thread up to userland as an upcall.
 		 * Do the last parts of the setup needed for the upcall.
 		 */
 		CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
 		    td, td->td_proc->p_pid, td->td_proc->p_comm);
 
 		td->td_pflags &= ~TDP_UPCALLING;
 		if (ku->ku_flags & KUF_DOUPCALL) {
 			mtx_lock_spin(&sched_lock);
 			ku->ku_flags &= ~KUF_DOUPCALL;
 			mtx_unlock_spin(&sched_lock);
 		}
 		/*
 		 * Set user context to the UTS
 		 */
 		if (!(ku->ku_mflags & KMF_NOUPCALL)) {
 			cpu_set_upcall_kse(td, ku->ku_func, ku->ku_mailbox,
 				&ku->ku_stack);
 			if (p->p_flag & P_TRACED)
 				ptrace_clear_single_step(td);
 			error = suword32(&ku->ku_mailbox->km_lwp,
 					td->td_tid);
 			if (error)
 				goto out;
 			error = suword(&ku->ku_mailbox->km_curthread, 0);
 			if (error)
 				goto out;
 		}
 
 		/*
 		 * Unhook the list of completed threads.
 		 * anything that completes after this gets to
 		 * come in next time.
 		 * Put the list of completed thread mailboxes on
 		 * this KSE's mailbox.
 		 */
 		if (!(ku->ku_mflags & KMF_NOCOMPLETED) &&
 		    (error = thread_link_mboxes(kg, ku)) != 0)
 			goto out;
 	}
 	if (!uts_crit) {
 		nanotime(&ts);
 		error = copyout(&ts, &ku->ku_mailbox->km_timeofday, sizeof(ts));
 	}
 
 out:
 	if (error) {
 		/*
 		 * Things are going to be so screwed we should just kill
 		 * the process.
 		 * how do we do that?
 		 */
 		PROC_LOCK(p);
 		psignal(p, SIGSEGV);
 		PROC_UNLOCK(p);
 	} else {
 		/*
 		 * Optimisation:
 		 * Ensure that we have a spare thread available,
 		 * for when we re-enter the kernel.
 		 */
 		if (td->td_standin == NULL)
 			thread_alloc_spare(td);
 	}
 
 	ku->ku_mflags = 0;
 	td->td_mailbox = NULL;
 	td->td_usticks = 0;
 	return (error);	/* go sync */
 }
 
 /*
  * called after ptrace resumed a process, force all
  * virtual CPUs to schedule upcall for SA process,
  * because debugger may have changed something in userland,
  * we should notice UTS as soon as possible.
  */
 void
 thread_continued(struct proc *p)
 {
 	struct ksegrp *kg;
 	struct kse_upcall *ku;
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(P_SHOULDSTOP(p), ("process not stopped"));
 
 	if (!(p->p_flag & P_SA))
 		return;
 
 	if (p->p_flag & P_TRACED) {
 		FOREACH_KSEGRP_IN_PROC(p, kg) {
 			td = TAILQ_FIRST(&kg->kg_threads);
 			if (td == NULL)
 				continue;
 			/* not a SA group, nothing to do */
 			if (!(td->td_pflags & TDP_SA))
 				continue;
 			FOREACH_UPCALL_IN_GROUP(kg, ku) {
 				mtx_lock_spin(&sched_lock);
 				ku->ku_flags |= KUF_DOUPCALL;
 				mtx_unlock_spin(&sched_lock);
 				wakeup(&kg->kg_completed);
 			}
 		}
 	}
 }
Index: head/sys/kern/kern_sig.c
===================================================================
--- head/sys/kern/kern_sig.c	(revision 155740)
+++ head/sys/kern/kern_sig.c	(revision 155741)
@@ -1,3286 +1,3291 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_sig.c	8.7 (Berkeley) 4/18/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/acct.h>
 #include <sys/condvar.h>
 #include <sys/event.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
 #include <sys/kse.h>
 #include <sys/ktr.h>
 #include <sys/ktrace.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/syslog.h>
 #include <sys/sysproto.h>
 #include <sys/timers.h>
 #include <sys/unistd.h>
 #include <sys/wait.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 #include <posix4/posix4.h>
 #include <machine/cpu.h>
 
 #include <security/audit/audit.h>
 
 #if defined (__alpha__) && !defined(COMPAT_43)
 #error "You *really* need COMPAT_43 on the alpha for longjmp(3)"
 #endif
 
 #define	ONSIG	32		/* NSIG for osig* syscalls.  XXX. */
 
 static int	coredump(struct thread *);
 static char	*expand_name(const char *, uid_t, pid_t);
 static int	killpg1(struct thread *td, int sig, int pgid, int all);
 static int	issignal(struct thread *p);
 static int	sigprop(int sig);
-static void	tdsigwakeup(struct thread *, int, sig_t);
+static void	tdsigwakeup(struct thread *, int, sig_t, int);
 static void	sig_suspend_threads(struct thread *, struct proc *, int);
 static int	filt_sigattach(struct knote *kn);
 static void	filt_sigdetach(struct knote *kn);
 static int	filt_signal(struct knote *kn, long hint);
 static struct thread *sigtd(struct proc *p, int sig, int prop);
 static int	kern_sigtimedwait(struct thread *, sigset_t,
 			ksiginfo_t *, struct timespec *);
 static int	do_tdsignal(struct proc *, struct thread *, int, ksiginfo_t *);
 static void	sigqueue_start(void);
 
 static uma_zone_t	ksiginfo_zone = NULL;
 struct filterops sig_filtops =
 	{ 0, filt_sigattach, filt_sigdetach, filt_signal };
 
 static int	kern_logsigexit = 1;
 SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, 
     &kern_logsigexit, 0, 
     "Log processes quitting on abnormal signals to syslog(3)");
 
 static int	kern_forcesigexit = 1;
 SYSCTL_INT(_kern, OID_AUTO, forcesigexit, CTLFLAG_RW,
     &kern_forcesigexit, 0, "Force trap signal to be handled");
 
 SYSCTL_NODE(_kern, OID_AUTO, sigqueue, CTLFLAG_RW, 0, "POSIX real time signal");
 
 static int	max_pending_per_proc = 128;
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, max_pending_per_proc, CTLFLAG_RW,
     &max_pending_per_proc, 0, "Max pending signals per proc");
 
 static int	preallocate_siginfo = 1024;
 TUNABLE_INT("kern.sigqueue.preallocate", &preallocate_siginfo);
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, preallocate, CTLFLAG_RD,
     &preallocate_siginfo, 0, "Preallocated signal memory size");
 
 static int	signal_overflow = 0;
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, overflow, CTLFLAG_RD,
     &signal_overflow, 0, "Number of signals overflew");
 
 static int	signal_alloc_fail = 0;
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, alloc_fail, CTLFLAG_RD,
     &signal_alloc_fail, 0, "signals failed to be allocated");
 
 SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL);
 
 /*
  * Policy -- Can ucred cr1 send SIGIO to process cr2?
  * Should use cr_cansignal() once cr_cansignal() allows SIGIO and SIGURG
  * in the right situations.
  */
 #define CANSIGIO(cr1, cr2) \
 	((cr1)->cr_uid == 0 || \
 	    (cr1)->cr_ruid == (cr2)->cr_ruid || \
 	    (cr1)->cr_uid == (cr2)->cr_ruid || \
 	    (cr1)->cr_ruid == (cr2)->cr_uid || \
 	    (cr1)->cr_uid == (cr2)->cr_uid)
 
 int sugid_coredump;
 SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RW, 
     &sugid_coredump, 0, "Enable coredumping set user/group ID processes");
 
 static int	do_coredump = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW,
 	&do_coredump, 0, "Enable/Disable coredumps");
 
 static int	set_core_nodump_flag = 0;
 SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag,
 	0, "Enable setting the NODUMP flag on coredump files");
 
 /*
  * Signal properties and actions.
  * The array below categorizes the signals and their default actions
  * according to the following properties:
  */
 #define	SA_KILL		0x01		/* terminates process by default */
 #define	SA_CORE		0x02		/* ditto and coredumps */
 #define	SA_STOP		0x04		/* suspend process */
 #define	SA_TTYSTOP	0x08		/* ditto, from tty */
 #define	SA_IGNORE	0x10		/* ignore by default */
 #define	SA_CONT		0x20		/* continue if suspended */
 #define	SA_CANTMASK	0x40		/* non-maskable, catchable */
 #define	SA_PROC		0x80		/* deliverable to any thread */
 
 static int sigproptbl[NSIG] = {
         SA_KILL|SA_PROC,		/* SIGHUP */
         SA_KILL|SA_PROC,		/* SIGINT */
         SA_KILL|SA_CORE|SA_PROC,	/* SIGQUIT */
         SA_KILL|SA_CORE,		/* SIGILL */
         SA_KILL|SA_CORE,		/* SIGTRAP */
         SA_KILL|SA_CORE,		/* SIGABRT */
         SA_KILL|SA_CORE|SA_PROC,	/* SIGEMT */
         SA_KILL|SA_CORE,		/* SIGFPE */
         SA_KILL|SA_PROC,		/* SIGKILL */
         SA_KILL|SA_CORE,		/* SIGBUS */
         SA_KILL|SA_CORE,		/* SIGSEGV */
         SA_KILL|SA_CORE,		/* SIGSYS */
         SA_KILL|SA_PROC,		/* SIGPIPE */
         SA_KILL|SA_PROC,		/* SIGALRM */
         SA_KILL|SA_PROC,		/* SIGTERM */
         SA_IGNORE|SA_PROC,		/* SIGURG */
         SA_STOP|SA_PROC,		/* SIGSTOP */
         SA_STOP|SA_TTYSTOP|SA_PROC,	/* SIGTSTP */
         SA_IGNORE|SA_CONT|SA_PROC,	/* SIGCONT */
         SA_IGNORE|SA_PROC,		/* SIGCHLD */
         SA_STOP|SA_TTYSTOP|SA_PROC,	/* SIGTTIN */
         SA_STOP|SA_TTYSTOP|SA_PROC,	/* SIGTTOU */
         SA_IGNORE|SA_PROC,		/* SIGIO */
         SA_KILL,			/* SIGXCPU */
         SA_KILL,			/* SIGXFSZ */
         SA_KILL|SA_PROC,		/* SIGVTALRM */
         SA_KILL|SA_PROC,		/* SIGPROF */
         SA_IGNORE|SA_PROC,		/* SIGWINCH  */
         SA_IGNORE|SA_PROC,		/* SIGINFO */
         SA_KILL|SA_PROC,		/* SIGUSR1 */
         SA_KILL|SA_PROC,		/* SIGUSR2 */
 };
 
 static void
 sigqueue_start(void)
 {
 	ksiginfo_zone = uma_zcreate("ksiginfo", sizeof(ksiginfo_t),
 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_prealloc(ksiginfo_zone, preallocate_siginfo);
 	p31b_setcfg(CTL_P1003_1B_REALTIME_SIGNALS, _POSIX_REALTIME_SIGNALS);
 	p31b_setcfg(CTL_P1003_1B_RTSIG_MAX, SIGRTMAX - SIGRTMIN + 1);
 	p31b_setcfg(CTL_P1003_1B_SIGQUEUE_MAX, max_pending_per_proc);
 }
 
 ksiginfo_t *
 ksiginfo_alloc(int wait)
 {
 	int flags;
 
 	flags = M_ZERO;
 	if (! wait)
 		flags |= M_NOWAIT;
 	if (ksiginfo_zone != NULL)
 		return ((ksiginfo_t *)uma_zalloc(ksiginfo_zone, flags));
 	return (NULL);
 }
 
 void
 ksiginfo_free(ksiginfo_t *ksi)
 {
 	uma_zfree(ksiginfo_zone, ksi);
 }
 
 static __inline int
 ksiginfo_tryfree(ksiginfo_t *ksi)
 {
 	if (!(ksi->ksi_flags & KSI_EXT)) {
 		uma_zfree(ksiginfo_zone, ksi);
 		return (1);
 	}
 	return (0);
 }
 
 void
 sigqueue_init(sigqueue_t *list, struct proc *p)
 {
 	SIGEMPTYSET(list->sq_signals);
 	TAILQ_INIT(&list->sq_list);
 	list->sq_proc = p;
 	list->sq_flags = SQ_INIT;
 }
 
 /*
  * Get a signal's ksiginfo.
  * Return:
  * 	0	-	signal not found
  *	others	-	signal number
  */ 
 int
 sigqueue_get(sigqueue_t *sq, int signo, ksiginfo_t *si)
 {
 	struct proc *p = sq->sq_proc;
 	struct ksiginfo *ksi, *next;
 	int count = 0;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited"));
 
 	if (!SIGISMEMBER(sq->sq_signals, signo))
 		return (0);
 
 	for (ksi = TAILQ_FIRST(&sq->sq_list); ksi != NULL; ksi = next) {
 		next = TAILQ_NEXT(ksi, ksi_link);
 		if (ksi->ksi_signo == signo) {
 			if (count == 0) {
 				TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 				ksi->ksi_sigq = NULL;
 				ksiginfo_copy(ksi, si);
 				if (ksiginfo_tryfree(ksi) && p != NULL)
 					p->p_pendingcnt--;
 			}
 			count++;
 		}
 	}
 
 	if (count <= 1)
 		SIGDELSET(sq->sq_signals, signo);
 	si->ksi_signo = signo;
 	return (signo);
 }
 
 void
 sigqueue_take(ksiginfo_t *ksi)
 {
 	struct ksiginfo *kp;
 	struct proc	*p;
 	sigqueue_t	*sq;
 
 	if (ksi == NULL || (sq = ksi->ksi_sigq) == NULL)
 		return;
 
 	p = sq->sq_proc;
 	TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 	ksi->ksi_sigq = NULL;
 	if (!(ksi->ksi_flags & KSI_EXT) && p != NULL)
 		p->p_pendingcnt--;
 
 	for (kp = TAILQ_FIRST(&sq->sq_list); kp != NULL;
 	     kp = TAILQ_NEXT(kp, ksi_link)) {
 		if (kp->ksi_signo == ksi->ksi_signo)
 			break;
 	}
 	if (kp == NULL)
 		SIGDELSET(sq->sq_signals, ksi->ksi_signo);
 }
 
 int
 sigqueue_add(sigqueue_t *sq, int signo, ksiginfo_t *si)
 {
 	struct proc *p = sq->sq_proc;
 	struct ksiginfo *ksi;
 	int ret = 0;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited"));
 	
 	if (signo == SIGKILL || signo == SIGSTOP || si == NULL)
 		goto out_set_bit;
 
 	/* directly insert the ksi, don't copy it */
 	if (si->ksi_flags & KSI_INS) {
 		TAILQ_INSERT_TAIL(&sq->sq_list, si, ksi_link);
 		si->ksi_sigq = sq;
 		goto out_set_bit;
 	}
 
 	if (__predict_false(ksiginfo_zone == NULL))
 		goto out_set_bit;
 	
 	if (p != NULL && p->p_pendingcnt >= max_pending_per_proc) {
 		signal_overflow++;
 		ret = EAGAIN;
 	} else if ((ksi = ksiginfo_alloc(0)) == NULL) {
 		signal_alloc_fail++;
 		ret = EAGAIN;
 	} else {
 		if (p != NULL)
 			p->p_pendingcnt++;
 		ksiginfo_copy(si, ksi);
 		ksi->ksi_signo = signo;
 		TAILQ_INSERT_TAIL(&sq->sq_list, ksi, ksi_link);
 		ksi->ksi_sigq = sq;
 	}
 
 	if ((si->ksi_flags & KSI_TRAP) != 0) {
 		ret = 0;
 		goto out_set_bit;
 	}
 
 	if (ret != 0)
 		return (ret);
 	
 out_set_bit:
 	SIGADDSET(sq->sq_signals, signo);
 	return (ret);
 }
 
 void
 sigqueue_flush(sigqueue_t *sq)
 {
 	struct proc *p = sq->sq_proc;
 	ksiginfo_t *ksi;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited"));
 
 	if (p != NULL)
 		PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	while ((ksi = TAILQ_FIRST(&sq->sq_list)) != NULL) {
 		TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 		ksi->ksi_sigq = NULL;
 		if (ksiginfo_tryfree(ksi) && p != NULL)
 			p->p_pendingcnt--;
 	}
 
 	SIGEMPTYSET(sq->sq_signals);
 }
 
 void
 sigqueue_collect_set(sigqueue_t *sq, sigset_t *set)
 {
 	ksiginfo_t *ksi;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited"));
 
 	TAILQ_FOREACH(ksi, &sq->sq_list, ksi_link)
 		SIGADDSET(*set, ksi->ksi_signo);
 }
 
 void
 sigqueue_move_set(sigqueue_t *src, sigqueue_t *dst, sigset_t *setp)
 {
 	sigset_t tmp, set;
 	struct proc *p1, *p2;
 	ksiginfo_t *ksi, *next;
 
 	KASSERT(src->sq_flags & SQ_INIT, ("src sigqueue not inited"));
 	KASSERT(dst->sq_flags & SQ_INIT, ("dst sigqueue not inited"));
 	/*
 	 * make a copy, this allows setp to point to src or dst
 	 * sq_signals without trouble.
 	 */
 	set = *setp;
 	p1 = src->sq_proc;
 	p2 = dst->sq_proc;
 	/* Move siginfo to target list */
 	for (ksi = TAILQ_FIRST(&src->sq_list); ksi != NULL; ksi = next) {
 		next = TAILQ_NEXT(ksi, ksi_link);
 		if (SIGISMEMBER(set, ksi->ksi_signo)) {
 			TAILQ_REMOVE(&src->sq_list, ksi, ksi_link);
 			if (p1 != NULL)
 				p1->p_pendingcnt--;
 			TAILQ_INSERT_TAIL(&dst->sq_list, ksi, ksi_link);
 			ksi->ksi_sigq = dst;
 			if (p2 != NULL)
 				p2->p_pendingcnt++;
 		}
 	}
 
 	/* Move pending bits to target list */
 	tmp = src->sq_signals;
 	SIGSETAND(tmp, set);
 	SIGSETOR(dst->sq_signals, tmp);
 	SIGSETNAND(src->sq_signals, tmp);
 
 	/* Finally, rescan src queue and set pending bits for it */
 	sigqueue_collect_set(src, &src->sq_signals);
 }
 
 void
 sigqueue_move(sigqueue_t *src, sigqueue_t *dst, int signo)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, signo);
 	sigqueue_move_set(src, dst, &set);
 }
 
 void
 sigqueue_delete_set(sigqueue_t *sq, sigset_t *set)
 {
 	struct proc *p = sq->sq_proc;
 	ksiginfo_t *ksi, *next;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("src sigqueue not inited"));
 
 	/* Remove siginfo queue */
 	for (ksi = TAILQ_FIRST(&sq->sq_list); ksi != NULL; ksi = next) {
 		next = TAILQ_NEXT(ksi, ksi_link);
 		if (SIGISMEMBER(*set, ksi->ksi_signo)) {
 			TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 			ksi->ksi_sigq = NULL;
 			if (ksiginfo_tryfree(ksi) && p != NULL)
 				p->p_pendingcnt--;
 		}
 	}
 	SIGSETNAND(sq->sq_signals, *set);
 	/* Finally, rescan queue and set pending bits for it */
 	sigqueue_collect_set(sq, &sq->sq_signals);
 }
 
 void
 sigqueue_delete(sigqueue_t *sq, int signo)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, signo);
 	sigqueue_delete_set(sq, &set);
 }
 
 /* Remove a set of signals for a process */
 void
 sigqueue_delete_set_proc(struct proc *p, sigset_t *set)
 {
 	sigqueue_t worklist;
 	struct thread *td0;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sigqueue_init(&worklist, NULL);
 	sigqueue_move_set(&p->p_sigqueue, &worklist, set);
 
 	mtx_lock_spin(&sched_lock);
 	FOREACH_THREAD_IN_PROC(p, td0)
 		sigqueue_move_set(&td0->td_sigqueue, &worklist, set);
 	mtx_unlock_spin(&sched_lock);
 
 	sigqueue_flush(&worklist);
 }
 
 void
 sigqueue_delete_proc(struct proc *p, int signo)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, signo);
 	sigqueue_delete_set_proc(p, &set);
 }
 
 void
 sigqueue_delete_stopmask_proc(struct proc *p)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, SIGSTOP);
 	SIGADDSET(set, SIGTSTP);
 	SIGADDSET(set, SIGTTIN);
 	SIGADDSET(set, SIGTTOU);
 	sigqueue_delete_set_proc(p, &set);
 }
 
 /*
  * Determine signal that should be delivered to process p, the current
  * process, 0 if none.  If there is a pending stop signal with default
  * action, the process stops in issignal().
  *
  * MP SAFE.
  */
 int
 cursig(struct thread *td)
 {
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	mtx_assert(&td->td_proc->p_sigacts->ps_mtx, MA_OWNED);
 	mtx_assert(&sched_lock, MA_NOTOWNED);
 	return (SIGPENDING(td) ? issignal(td) : 0);
 }
 
 /*
  * Arrange for ast() to handle unmasked pending signals on return to user
  * mode.  This must be called whenever a signal is added to td_sigqueue or
  * unmasked in td_sigmask.
  */
 void
 signotify(struct thread *td)
 {
 	struct proc *p;
 	sigset_t set, saved;
 
 	p = td->td_proc;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/*
 	 * If our mask changed we may have to move signal that were
 	 * previously masked by all threads to our sigqueue.
 	 */
 	set = p->p_sigqueue.sq_signals;
 	if (p->p_flag & P_SA)
 		saved = p->p_sigqueue.sq_signals;
 	SIGSETNAND(set, td->td_sigmask);
 	if (! SIGISEMPTY(set))
 		sigqueue_move_set(&p->p_sigqueue, &td->td_sigqueue, &set);
 	if (SIGPENDING(td)) {
 		mtx_lock_spin(&sched_lock);
 		td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING;
 		mtx_unlock_spin(&sched_lock);
 	}
 	if ((p->p_flag & P_SA) && !(p->p_flag & P_SIGEVENT)) {
 		if (!SIGSETEQ(saved, p->p_sigqueue.sq_signals)) {
 			/* pending set changed */
 			p->p_flag |= P_SIGEVENT;
 			wakeup(&p->p_siglist);
 		}
 	}
 }
 
 int
 sigonstack(size_t sp)
 {
 	struct thread *td = curthread;
 
 	return ((td->td_pflags & TDP_ALTSTACK) ?
 #if defined(COMPAT_43)
 	    ((td->td_sigstk.ss_size == 0) ?
 		(td->td_sigstk.ss_flags & SS_ONSTACK) :
 		((sp - (size_t)td->td_sigstk.ss_sp) < td->td_sigstk.ss_size))
 #else
 	    ((sp - (size_t)td->td_sigstk.ss_sp) < td->td_sigstk.ss_size)
 #endif
 	    : 0);
 }
 
 static __inline int
 sigprop(int sig)
 {
 
 	if (sig > 0 && sig < NSIG)
 		return (sigproptbl[_SIG_IDX(sig)]);
 	return (0);
 }
 
 int
 sig_ffs(sigset_t *set)
 {
 	int i;
 
 	for (i = 0; i < _SIG_WORDS; i++)
 		if (set->__bits[i])
 			return (ffs(set->__bits[i]) + (i * 32));
 	return (0);
 }
 
 /*
  * kern_sigaction
  * sigaction
  * freebsd4_sigaction
  * osigaction
  *
  * MPSAFE
  */
 int
 kern_sigaction(td, sig, act, oact, flags)
 	struct thread *td;
 	register int sig;
 	struct sigaction *act, *oact;
 	int flags;
 {
 	struct sigacts *ps;
 	struct proc *p = td->td_proc;
 
 	if (!_SIG_VALID(sig))
 		return (EINVAL);
 
 	PROC_LOCK(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	if (oact) {
 		oact->sa_handler = ps->ps_sigact[_SIG_IDX(sig)];
 		oact->sa_mask = ps->ps_catchmask[_SIG_IDX(sig)];
 		oact->sa_flags = 0;
 		if (SIGISMEMBER(ps->ps_sigonstack, sig))
 			oact->sa_flags |= SA_ONSTACK;
 		if (!SIGISMEMBER(ps->ps_sigintr, sig))
 			oact->sa_flags |= SA_RESTART;
 		if (SIGISMEMBER(ps->ps_sigreset, sig))
 			oact->sa_flags |= SA_RESETHAND;
 		if (SIGISMEMBER(ps->ps_signodefer, sig))
 			oact->sa_flags |= SA_NODEFER;
 		if (SIGISMEMBER(ps->ps_siginfo, sig))
 			oact->sa_flags |= SA_SIGINFO;
 		if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDSTOP)
 			oact->sa_flags |= SA_NOCLDSTOP;
 		if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDWAIT)
 			oact->sa_flags |= SA_NOCLDWAIT;
 	}
 	if (act) {
 		if ((sig == SIGKILL || sig == SIGSTOP) &&
 		    act->sa_handler != SIG_DFL) {
 			mtx_unlock(&ps->ps_mtx);
 			PROC_UNLOCK(p);
 			return (EINVAL);
 		}
 
 		/*
 		 * Change setting atomically.
 		 */
 
 		ps->ps_catchmask[_SIG_IDX(sig)] = act->sa_mask;
 		SIG_CANTMASK(ps->ps_catchmask[_SIG_IDX(sig)]);
 		if (act->sa_flags & SA_SIGINFO) {
 			ps->ps_sigact[_SIG_IDX(sig)] =
 			    (__sighandler_t *)act->sa_sigaction;
 			SIGADDSET(ps->ps_siginfo, sig);
 		} else {
 			ps->ps_sigact[_SIG_IDX(sig)] = act->sa_handler;
 			SIGDELSET(ps->ps_siginfo, sig);
 		}
 		if (!(act->sa_flags & SA_RESTART))
 			SIGADDSET(ps->ps_sigintr, sig);
 		else
 			SIGDELSET(ps->ps_sigintr, sig);
 		if (act->sa_flags & SA_ONSTACK)
 			SIGADDSET(ps->ps_sigonstack, sig);
 		else
 			SIGDELSET(ps->ps_sigonstack, sig);
 		if (act->sa_flags & SA_RESETHAND)
 			SIGADDSET(ps->ps_sigreset, sig);
 		else
 			SIGDELSET(ps->ps_sigreset, sig);
 		if (act->sa_flags & SA_NODEFER)
 			SIGADDSET(ps->ps_signodefer, sig);
 		else
 			SIGDELSET(ps->ps_signodefer, sig);
 		if (sig == SIGCHLD) {
 			if (act->sa_flags & SA_NOCLDSTOP)
 				ps->ps_flag |= PS_NOCLDSTOP;
 			else
 				ps->ps_flag &= ~PS_NOCLDSTOP;
 			if (act->sa_flags & SA_NOCLDWAIT) {
 				/*
 				 * Paranoia: since SA_NOCLDWAIT is implemented
 				 * by reparenting the dying child to PID 1 (and
 				 * trust it to reap the zombie), PID 1 itself
 				 * is forbidden to set SA_NOCLDWAIT.
 				 */
 				if (p->p_pid == 1)
 					ps->ps_flag &= ~PS_NOCLDWAIT;
 				else
 					ps->ps_flag |= PS_NOCLDWAIT;
 			} else
 				ps->ps_flag &= ~PS_NOCLDWAIT;
 			if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN)
 				ps->ps_flag |= PS_CLDSIGIGN;
 			else
 				ps->ps_flag &= ~PS_CLDSIGIGN;
 		}
 		/*
 		 * Set bit in ps_sigignore for signals that are set to SIG_IGN,
 		 * and for signals set to SIG_DFL where the default is to
 		 * ignore. However, don't put SIGCONT in ps_sigignore, as we
 		 * have to restart the process.
 		 */
 		if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN ||
 		    (sigprop(sig) & SA_IGNORE &&
 		     ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL)) {
 			if ((p->p_flag & P_SA) &&
 			     SIGISMEMBER(p->p_sigqueue.sq_signals, sig)) {
 				p->p_flag |= P_SIGEVENT;
 				wakeup(&p->p_siglist);
 			}
 			/* never to be seen again */
 			sigqueue_delete_proc(p, sig);
 			if (sig != SIGCONT)
 				/* easier in psignal */
 				SIGADDSET(ps->ps_sigignore, sig);
 			SIGDELSET(ps->ps_sigcatch, sig);
 		} else {
 			SIGDELSET(ps->ps_sigignore, sig);
 			if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL)
 				SIGDELSET(ps->ps_sigcatch, sig);
 			else
 				SIGADDSET(ps->ps_sigcatch, sig);
 		}
 #ifdef COMPAT_FREEBSD4
 		if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN ||
 		    ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL ||
 		    (flags & KSA_FREEBSD4) == 0)
 			SIGDELSET(ps->ps_freebsd4, sig);
 		else
 			SIGADDSET(ps->ps_freebsd4, sig);
 #endif
 #ifdef COMPAT_43
 		if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN ||
 		    ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL ||
 		    (flags & KSA_OSIGSET) == 0)
 			SIGDELSET(ps->ps_osigset, sig);
 		else
 			SIGADDSET(ps->ps_osigset, sig);
 #endif
 	}
 	mtx_unlock(&ps->ps_mtx);
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigaction_args {
 	int	sig;
 	struct	sigaction *act;
 	struct	sigaction *oact;
 };
 #endif
 /*
  * MPSAFE
  */
 int
 sigaction(td, uap)
 	struct thread *td;
 	register struct sigaction_args *uap;
 {
 	struct sigaction act, oact;
 	register struct sigaction *actp, *oactp;
 	int error;
 
 	actp = (uap->act != NULL) ? &act : NULL;
 	oactp = (uap->oact != NULL) ? &oact : NULL;
 	if (actp) {
 		error = copyin(uap->act, actp, sizeof(act));
 		if (error)
 			return (error);
 	}
 	error = kern_sigaction(td, uap->sig, actp, oactp, 0);
 	if (oactp && !error)
 		error = copyout(oactp, uap->oact, sizeof(oact));
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_sigaction_args {
 	int	sig;
 	struct	sigaction *act;
 	struct	sigaction *oact;
 };
 #endif
 /*
  * MPSAFE
  */
 int
 freebsd4_sigaction(td, uap)
 	struct thread *td;
 	register struct freebsd4_sigaction_args *uap;
 {
 	struct sigaction act, oact;
 	register struct sigaction *actp, *oactp;
 	int error;
 
 
 	actp = (uap->act != NULL) ? &act : NULL;
 	oactp = (uap->oact != NULL) ? &oact : NULL;
 	if (actp) {
 		error = copyin(uap->act, actp, sizeof(act));
 		if (error)
 			return (error);
 	}
 	error = kern_sigaction(td, uap->sig, actp, oactp, KSA_FREEBSD4);
 	if (oactp && !error)
 		error = copyout(oactp, uap->oact, sizeof(oact));
 	return (error);
 }
 #endif	/* COMAPT_FREEBSD4 */
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 #ifndef _SYS_SYSPROTO_H_
 struct osigaction_args {
 	int	signum;
 	struct	osigaction *nsa;
 	struct	osigaction *osa;
 };
 #endif
 /*
  * MPSAFE
  */
 int
 osigaction(td, uap)
 	struct thread *td;
 	register struct osigaction_args *uap;
 {
 	struct osigaction sa;
 	struct sigaction nsa, osa;
 	register struct sigaction *nsap, *osap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 
 	nsap = (uap->nsa != NULL) ? &nsa : NULL;
 	osap = (uap->osa != NULL) ? &osa : NULL;
 
 	if (nsap) {
 		error = copyin(uap->nsa, &sa, sizeof(sa));
 		if (error)
 			return (error);
 		nsap->sa_handler = sa.sa_handler;
 		nsap->sa_flags = sa.sa_flags;
 		OSIG2SIG(sa.sa_mask, nsap->sa_mask);
 	}
 	error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET);
 	if (osap && !error) {
 		sa.sa_handler = osap->sa_handler;
 		sa.sa_flags = osap->sa_flags;
 		SIG2OSIG(osap->sa_mask, sa.sa_mask);
 		error = copyout(&sa, uap->osa, sizeof(sa));
 	}
 	return (error);
 }
 
 #if !defined(__i386__) && !defined(__alpha__)
 /* Avoid replicating the same stub everywhere */
 int
 osigreturn(td, uap)
 	struct thread *td;
 	struct osigreturn_args *uap;
 {
 
 	return (nosys(td, (struct nosys_args *)uap));
 }
 #endif
 #endif /* COMPAT_43 */
 
 /*
  * Initialize signal state for process 0;
  * set to ignore signals that are ignored by default.
  */
 void
 siginit(p)
 	struct proc *p;
 {
 	register int i;
 	struct sigacts *ps;
 
 	PROC_LOCK(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	for (i = 1; i <= NSIG; i++)
 		if (sigprop(i) & SA_IGNORE && i != SIGCONT)
 			SIGADDSET(ps->ps_sigignore, i);
 	mtx_unlock(&ps->ps_mtx);
 	PROC_UNLOCK(p);
 }
 
 /*
  * Reset signals for an exec of the specified process.
  */
 void
 execsigs(struct proc *p)
 {
 	struct sigacts *ps;
 	int sig;
 	struct thread *td;
 
 	/*
 	 * Reset caught signals.  Held signals remain held
 	 * through td_sigmask (unless they were caught,
 	 * and are now ignored by default).
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	td = FIRST_THREAD_IN_PROC(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	while (SIGNOTEMPTY(ps->ps_sigcatch)) {
 		sig = sig_ffs(&ps->ps_sigcatch);
 		SIGDELSET(ps->ps_sigcatch, sig);
 		if (sigprop(sig) & SA_IGNORE) {
 			if (sig != SIGCONT)
 				SIGADDSET(ps->ps_sigignore, sig);
 			sigqueue_delete_proc(p, sig);
 		}
 		ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL;
 	}
 	/*
 	 * Reset stack state to the user stack.
 	 * Clear set of signals caught on the signal stack.
 	 */
 	td->td_sigstk.ss_flags = SS_DISABLE;
 	td->td_sigstk.ss_size = 0;
 	td->td_sigstk.ss_sp = 0;
 	td->td_pflags &= ~TDP_ALTSTACK;
 	/*
 	 * Reset no zombies if child dies flag as Solaris does.
 	 */
 	ps->ps_flag &= ~(PS_NOCLDWAIT | PS_CLDSIGIGN);
 	if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN)
 		ps->ps_sigact[_SIG_IDX(SIGCHLD)] = SIG_DFL;
 	mtx_unlock(&ps->ps_mtx);
 }
 
 /*
  * kern_sigprocmask()
  *
  *	Manipulate signal mask.
  */
 int
 kern_sigprocmask(td, how, set, oset, old)
 	struct thread *td;
 	int how;
 	sigset_t *set, *oset;
 	int old;
 {
 	int error;
 
 	PROC_LOCK(td->td_proc);
 	if (oset != NULL)
 		*oset = td->td_sigmask;
 
 	error = 0;
 	if (set != NULL) {
 		switch (how) {
 		case SIG_BLOCK:
 			SIG_CANTMASK(*set);
 			SIGSETOR(td->td_sigmask, *set);
 			break;
 		case SIG_UNBLOCK:
 			SIGSETNAND(td->td_sigmask, *set);
 			signotify(td);
 			break;
 		case SIG_SETMASK:
 			SIG_CANTMASK(*set);
 			if (old)
 				SIGSETLO(td->td_sigmask, *set);
 			else
 				td->td_sigmask = *set;
 			signotify(td);
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 	}
 	PROC_UNLOCK(td->td_proc);
 	return (error);
 }
 
 /*
  * sigprocmask() - MP SAFE
  */
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigprocmask_args {
 	int	how;
 	const sigset_t *set;
 	sigset_t *oset;
 };
 #endif
 int
 sigprocmask(td, uap)
 	register struct thread *td;
 	struct sigprocmask_args *uap;
 {
 	sigset_t set, oset;
 	sigset_t *setp, *osetp;
 	int error;
 
 	setp = (uap->set != NULL) ? &set : NULL;
 	osetp = (uap->oset != NULL) ? &oset : NULL;
 	if (setp) {
 		error = copyin(uap->set, setp, sizeof(set));
 		if (error)
 			return (error);
 	}
 	error = kern_sigprocmask(td, uap->how, setp, osetp, 0);
 	if (osetp && !error) {
 		error = copyout(osetp, uap->oset, sizeof(oset));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 /*
  * osigprocmask() - MP SAFE
  */
 #ifndef _SYS_SYSPROTO_H_
 struct osigprocmask_args {
 	int	how;
 	osigset_t mask;
 };
 #endif
 int
 osigprocmask(td, uap)
 	register struct thread *td;
 	struct osigprocmask_args *uap;
 {
 	sigset_t set, oset;
 	int error;
 
 	OSIG2SIG(uap->mask, set);
 	error = kern_sigprocmask(td, uap->how, &set, &oset, 1);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * MPSAFE
  */
 int
 sigwait(struct thread *td, struct sigwait_args *uap)
 {
 	ksiginfo_t ksi;
 	sigset_t set;
 	int error;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error) {
 		td->td_retval[0] = error;
 		return (0);
 	}
 
 	error = kern_sigtimedwait(td, set, &ksi, NULL);
 	if (error) {
 		if (error == ERESTART)
 			return (error);
 		td->td_retval[0] = error;
 		return (0);
 	}
 
 	error = copyout(&ksi.ksi_signo, uap->sig, sizeof(ksi.ksi_signo));
 	td->td_retval[0] = error;
 	return (0);
 }
 /*
  * MPSAFE
  */
 int
 sigtimedwait(struct thread *td, struct sigtimedwait_args *uap)
 {
 	struct timespec ts;
 	struct timespec *timeout;
 	sigset_t set;
 	ksiginfo_t ksi;
 	int error;
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts, sizeof(ts));
 		if (error)
 			return (error);
 
 		timeout = &ts;
 	} else
 		timeout = NULL;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, timeout);
 	if (error)
 		return (error);
 
 	if (uap->info)
 		error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t));
 
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 /*
  * MPSAFE
  */
 int
 sigwaitinfo(struct thread *td, struct sigwaitinfo_args *uap)
 {
 	ksiginfo_t ksi;
 	sigset_t set;
 	int error;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, NULL);
 	if (error)
 		return (error);
 
 	if (uap->info)
 		error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t));
 	
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 static int
 kern_sigtimedwait(struct thread *td, sigset_t waitset, ksiginfo_t *ksi,
 	struct timespec *timeout)
 {
 	struct sigacts *ps;
 	sigset_t savedmask;
 	struct proc *p;
 	int error, sig, hz, i, timevalid = 0;
 	struct timespec rts, ets, ts;
 	struct timeval tv;
 
 	p = td->td_proc;
 	error = 0;
 	sig = 0;
 	SIG_CANTMASK(waitset);
 
 	PROC_LOCK(p);
 	ps = p->p_sigacts;
 	savedmask = td->td_sigmask;
 	if (timeout) {
 		if (timeout->tv_nsec >= 0 && timeout->tv_nsec < 1000000000) {
 			timevalid = 1;
 			getnanouptime(&rts);
 		 	ets = rts;
 			timespecadd(&ets, timeout);
 		}
 	}
 
 again:
 	for (i = 1; i <= _SIG_MAXSIG; ++i) {
 		if (!SIGISMEMBER(waitset, i))
 			continue;
 		if (SIGISMEMBER(td->td_sigqueue.sq_signals, i)) {
 			SIGFILLSET(td->td_sigmask);
 			SIG_CANTMASK(td->td_sigmask);
 			SIGDELSET(td->td_sigmask, i);
 			mtx_lock(&ps->ps_mtx);
 			sig = cursig(td);
 			i = 0;
 			mtx_unlock(&ps->ps_mtx);
 		} else if (SIGISMEMBER(p->p_sigqueue.sq_signals, i)) {
 			if (p->p_flag & P_SA) {
 				p->p_flag |= P_SIGEVENT;
 				wakeup(&p->p_siglist);
 			}
 			sigqueue_move(&p->p_sigqueue, &td->td_sigqueue, i);
 			SIGFILLSET(td->td_sigmask);
 			SIG_CANTMASK(td->td_sigmask);
 			SIGDELSET(td->td_sigmask, i);
 			mtx_lock(&ps->ps_mtx);
 			sig = cursig(td);
 			i = 0;
 			mtx_unlock(&ps->ps_mtx);
 		}
 		if (sig)
 			goto out;
 	}
 	if (error)
 		goto out;
 
 	/*
 	 * POSIX says this must be checked after looking for pending
 	 * signals.
 	 */
 	if (timeout) {
 		if (!timevalid) {
 			error = EINVAL;
 			goto out;
 		}
 		getnanouptime(&rts);
 		if (timespeccmp(&rts, &ets, >=)) {
 			error = EAGAIN;
 			goto out;
 		}
 		ts = ets;
 		timespecsub(&ts, &rts);
 		TIMESPEC_TO_TIMEVAL(&tv, &ts);
 		hz = tvtohz(&tv);
 	} else
 		hz = 0;
 
 	td->td_sigmask = savedmask;
 	SIGSETNAND(td->td_sigmask, waitset);
 	signotify(td);
 	error = msleep(&ps, &p->p_mtx, PPAUSE|PCATCH, "sigwait", hz);
 	if (timeout) {
 		if (error == ERESTART) {
 			/* timeout can not be restarted. */
 			error = EINTR;
 		} else if (error == EAGAIN) {
 			/* will calculate timeout by ourself. */
 			error = 0;
 		}
 	}
 	goto again;
 
 out:
 	if (sig) {
 		sig_t action;
 
 		ksiginfo_init(ksi);
 		sigqueue_get(&td->td_sigqueue, sig, ksi);
 		ksi->ksi_signo = sig;
 		if (ksi->ksi_code == SI_TIMER)
 			itimer_accept(p, ksi->ksi_timerid, ksi);
 		error = 0;
 		mtx_lock(&ps->ps_mtx);
 		action = ps->ps_sigact[_SIG_IDX(sig)];
 		mtx_unlock(&ps->ps_mtx);
 #ifdef KTRACE
 		if (KTRPOINT(td, KTR_PSIG))
 			ktrpsig(sig, action, &td->td_sigmask, 0);
 #endif
 		_STOPEVENT(p, S_SIG, sig);
 
 	}
 	td->td_sigmask = savedmask;
 	signotify(td);
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigpending_args {
 	sigset_t	*set;
 };
 #endif
 /*
  * MPSAFE
  */
 int
 sigpending(td, uap)
 	struct thread *td;
 	struct sigpending_args *uap;
 {
 	struct proc *p = td->td_proc;
 	sigset_t pending;
 
 	PROC_LOCK(p);
 	pending = p->p_sigqueue.sq_signals;
 	SIGSETOR(pending, td->td_sigqueue.sq_signals);
 	PROC_UNLOCK(p);
 	return (copyout(&pending, uap->set, sizeof(sigset_t)));
 }
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 #ifndef _SYS_SYSPROTO_H_
 struct osigpending_args {
 	int	dummy;
 };
 #endif
 /*
  * MPSAFE
  */
 int
 osigpending(td, uap)
 	struct thread *td;
 	struct osigpending_args *uap;
 {
 	struct proc *p = td->td_proc;
 	sigset_t pending;
 
 	PROC_LOCK(p);
 	pending = p->p_sigqueue.sq_signals;
 	SIGSETOR(pending, td->td_sigqueue.sq_signals);
 	PROC_UNLOCK(p);
 	SIG2OSIG(pending, td->td_retval[0]);
 	return (0);
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_43)
 /*
  * Generalized interface signal handler, 4.3-compatible.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct osigvec_args {
 	int	signum;
 	struct	sigvec *nsv;
 	struct	sigvec *osv;
 };
 #endif
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 osigvec(td, uap)
 	struct thread *td;
 	register struct osigvec_args *uap;
 {
 	struct sigvec vec;
 	struct sigaction nsa, osa;
 	register struct sigaction *nsap, *osap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 	nsap = (uap->nsv != NULL) ? &nsa : NULL;
 	osap = (uap->osv != NULL) ? &osa : NULL;
 	if (nsap) {
 		error = copyin(uap->nsv, &vec, sizeof(vec));
 		if (error)
 			return (error);
 		nsap->sa_handler = vec.sv_handler;
 		OSIG2SIG(vec.sv_mask, nsap->sa_mask);
 		nsap->sa_flags = vec.sv_flags;
 		nsap->sa_flags ^= SA_RESTART;	/* opposite of SV_INTERRUPT */
 	}
 	error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET);
 	if (osap && !error) {
 		vec.sv_handler = osap->sa_handler;
 		SIG2OSIG(osap->sa_mask, vec.sv_mask);
 		vec.sv_flags = osap->sa_flags;
 		vec.sv_flags &= ~SA_NOCLDWAIT;
 		vec.sv_flags ^= SA_RESTART;
 		error = copyout(&vec, uap->osv, sizeof(vec));
 	}
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct osigblock_args {
 	int	mask;
 };
 #endif
 /*
  * MPSAFE
  */
 int
 osigblock(td, uap)
 	register struct thread *td;
 	struct osigblock_args *uap;
 {
 	struct proc *p = td->td_proc;
 	sigset_t set;
 
 	OSIG2SIG(uap->mask, set);
 	SIG_CANTMASK(set);
 	PROC_LOCK(p);
 	SIG2OSIG(td->td_sigmask, td->td_retval[0]);
 	SIGSETOR(td->td_sigmask, set);
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct osigsetmask_args {
 	int	mask;
 };
 #endif
 /*
  * MPSAFE
  */
 int
 osigsetmask(td, uap)
 	struct thread *td;
 	struct osigsetmask_args *uap;
 {
 	struct proc *p = td->td_proc;
 	sigset_t set;
 
 	OSIG2SIG(uap->mask, set);
 	SIG_CANTMASK(set);
 	PROC_LOCK(p);
 	SIG2OSIG(td->td_sigmask, td->td_retval[0]);
 	SIGSETLO(td->td_sigmask, set);
 	signotify(td);
 	PROC_UNLOCK(p);
 	return (0);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Suspend calling thread until signal, providing mask to be set
  * in the meantime. 
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sigsuspend_args {
 	const sigset_t *sigmask;
 };
 #endif
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 sigsuspend(td, uap)
 	struct thread *td;
 	struct sigsuspend_args *uap;
 {
 	sigset_t mask;
 	int error;
 
 	error = copyin(uap->sigmask, &mask, sizeof(mask));
 	if (error)
 		return (error);
 	return (kern_sigsuspend(td, mask));
 }
 
 int
 kern_sigsuspend(struct thread *td, sigset_t mask)
 {
 	struct proc *p = td->td_proc;
 
 	/*
 	 * When returning from sigsuspend, we want
 	 * the old mask to be restored after the
 	 * signal handler has finished.  Thus, we
 	 * save it here and mark the sigacts structure
 	 * to indicate this.
 	 */
 	PROC_LOCK(p);
 	td->td_oldsigmask = td->td_sigmask;
 	td->td_pflags |= TDP_OLDMASK;
 	SIG_CANTMASK(mask);
 	td->td_sigmask = mask;
 	signotify(td);
 	while (msleep(&p->p_sigacts, &p->p_mtx, PPAUSE|PCATCH, "pause", 0) == 0)
 		/* void */;
 	PROC_UNLOCK(p);
 	/* always return EINTR rather than ERESTART... */
 	return (EINTR);
 }
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 /*
  * Compatibility sigsuspend call for old binaries.  Note nonstandard calling
  * convention: libc stub passes mask, not pointer, to save a copyin.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct osigsuspend_args {
 	osigset_t mask;
 };
 #endif
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 osigsuspend(td, uap)
 	struct thread *td;
 	struct osigsuspend_args *uap;
 {
 	struct proc *p = td->td_proc;
 	sigset_t mask;
 
 	PROC_LOCK(p);
 	td->td_oldsigmask = td->td_sigmask;
 	td->td_pflags |= TDP_OLDMASK;
 	OSIG2SIG(uap->mask, mask);
 	SIG_CANTMASK(mask);
 	SIGSETLO(td->td_sigmask, mask);
 	signotify(td);
 	while (msleep(&p->p_sigacts, &p->p_mtx, PPAUSE|PCATCH, "opause", 0) == 0)
 		/* void */;
 	PROC_UNLOCK(p);
 	/* always return EINTR rather than ERESTART... */
 	return (EINTR);
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_43)
 #ifndef _SYS_SYSPROTO_H_
 struct osigstack_args {
 	struct	sigstack *nss;
 	struct	sigstack *oss;
 };
 #endif
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 osigstack(td, uap)
 	struct thread *td;
 	register struct osigstack_args *uap;
 {
 	struct sigstack nss, oss;
 	int error = 0;
 
 	if (uap->nss != NULL) {
 		error = copyin(uap->nss, &nss, sizeof(nss));
 		if (error)
 			return (error);
 	}
 	oss.ss_sp = td->td_sigstk.ss_sp;
 	oss.ss_onstack = sigonstack(cpu_getstack(td));
 	if (uap->nss != NULL) {
 		td->td_sigstk.ss_sp = nss.ss_sp;
 		td->td_sigstk.ss_size = 0;
 		td->td_sigstk.ss_flags |= nss.ss_onstack & SS_ONSTACK;
 		td->td_pflags |= TDP_ALTSTACK;
 	}
 	if (uap->oss != NULL)
 		error = copyout(&oss, uap->oss, sizeof(oss));
 
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigaltstack_args {
 	stack_t	*ss;
 	stack_t	*oss;
 };
 #endif
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 sigaltstack(td, uap)
 	struct thread *td;
 	register struct sigaltstack_args *uap;
 {
 	stack_t ss, oss;
 	int error;
 
 	if (uap->ss != NULL) {
 		error = copyin(uap->ss, &ss, sizeof(ss));
 		if (error)
 			return (error);
 	}
 	error = kern_sigaltstack(td, (uap->ss != NULL) ? &ss : NULL,
 	    (uap->oss != NULL) ? &oss : NULL);
 	if (error)
 		return (error);
 	if (uap->oss != NULL)
 		error = copyout(&oss, uap->oss, sizeof(stack_t));
 	return (error);
 }
 
 int
 kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss)
 {
 	struct proc *p = td->td_proc;
 	int oonstack;
 
 	oonstack = sigonstack(cpu_getstack(td));
 
 	if (oss != NULL) {
 		*oss = td->td_sigstk;
 		oss->ss_flags = (td->td_pflags & TDP_ALTSTACK)
 		    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	}
 
 	if (ss != NULL) {
 		if (oonstack)
 			return (EPERM);
 		if ((ss->ss_flags & ~SS_DISABLE) != 0)
 			return (EINVAL);
 		if (!(ss->ss_flags & SS_DISABLE)) {
 			if (ss->ss_size < p->p_sysent->sv_minsigstksz)
 				return (ENOMEM);
 
 			td->td_sigstk = *ss;
 			td->td_pflags |= TDP_ALTSTACK;
 		} else {
 			td->td_pflags &= ~TDP_ALTSTACK;
 		}
 	}
 	return (0);
 }
 
 /*
  * Common code for kill process group/broadcast kill.
  * cp is calling process.
  */
 static int
 killpg1(td, sig, pgid, all)
 	register struct thread *td;
 	int sig, pgid, all;
 {
 	register struct proc *p;
 	struct pgrp *pgrp;
 	int nfound = 0;
 
 	if (all) {
 		/*
 		 * broadcast
 		 */
 		sx_slock(&allproc_lock);
 		LIST_FOREACH(p, &allproc, p_list) {
 			PROC_LOCK(p);
 			if (p->p_pid <= 1 || p->p_flag & P_SYSTEM ||
 			    p == td->td_proc) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			if (p_cansignal(td, p, sig) == 0) {
 				nfound++;
 				if (sig)
 					psignal(p, sig);
 			}
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
 	} else {
 		sx_slock(&proctree_lock);
 		if (pgid == 0) {
 			/*
 			 * zero pgid means send to my process group.
 			 */
 			pgrp = td->td_proc->p_pgrp;
 			PGRP_LOCK(pgrp);
 		} else {
 			pgrp = pgfind(pgid);
 			if (pgrp == NULL) {
 				sx_sunlock(&proctree_lock);
 				return (ESRCH);
 			}
 		}
 		sx_sunlock(&proctree_lock);
 		LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
 			PROC_LOCK(p);	      
 			if (p->p_pid <= 1 || p->p_flag & P_SYSTEM) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			if (p_cansignal(td, p, sig) == 0) {
 				nfound++;
 				if (sig)
 					psignal(p, sig);
 			}
 			PROC_UNLOCK(p);
 		}
 		PGRP_UNLOCK(pgrp);
 	}
 	return (nfound ? 0 : ESRCH);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct kill_args {
 	int	pid;
 	int	signum;
 };
 #endif
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 kill(td, uap)
 	register struct thread *td;
 	register struct kill_args *uap;
 {
 	register struct proc *p;
 	int error;
 
 	AUDIT_ARG(signum, uap->signum);
 	if ((u_int)uap->signum > _SIG_MAXSIG)
 		return (EINVAL);
 
 	if (uap->pid > 0) {
 		/* kill single process */
 		if ((p = pfind(uap->pid)) == NULL) {
 			if ((p = zpfind(uap->pid)) == NULL)
 				return (ESRCH);
 		}
 		AUDIT_ARG(process, p);
 		error = p_cansignal(td, p, uap->signum);
 		if (error == 0 && uap->signum)
 			psignal(p, uap->signum);
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	AUDIT_ARG(pid, uap->pid);
 	switch (uap->pid) {
 	case -1:		/* broadcast signal */
 		return (killpg1(td, uap->signum, 0, 1));
 	case 0:			/* signal own process group */
 		return (killpg1(td, uap->signum, 0, 0));
 	default:		/* negative explicit process group */
 		return (killpg1(td, uap->signum, -uap->pid, 0));
 	}
 	/* NOTREACHED */
 }
 
 #if defined(COMPAT_43)
 #ifndef _SYS_SYSPROTO_H_
 struct okillpg_args {
 	int	pgid;
 	int	signum;
 };
 #endif
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 okillpg(td, uap)
 	struct thread *td;
 	register struct okillpg_args *uap;
 {
 
 	AUDIT_ARG(signum, uap->signum);
 	AUDIT_ARG(pid, uap->pgid);
 	if ((u_int)uap->signum > _SIG_MAXSIG)
 		return (EINVAL);
 
 	return (killpg1(td, uap->signum, uap->pgid, 0));
 }
 #endif /* COMPAT_43 */
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigqueue_args {
 	pid_t pid;
 	int signum;
 	/* union sigval */ void *value;
 };
 #endif
 
 int
 sigqueue(struct thread *td, struct sigqueue_args *uap)
 {
 	ksiginfo_t ksi;
 	struct proc *p;
 	int error;
 
 	if ((u_int)uap->signum > _SIG_MAXSIG)
 		return (EINVAL);
 
 	/*
 	 * Specification says sigqueue can only send signal to
 	 * single process.
 	 */
 	if (uap->pid <= 0)
 		return (EINVAL);
 
 	if ((p = pfind(uap->pid)) == NULL) {
 		if ((p = zpfind(uap->pid)) == NULL)
 			return (ESRCH);
 	}
 	error = p_cansignal(td, p, uap->signum);
 	if (error == 0 && uap->signum != 0) {
 		ksiginfo_init(&ksi);
 		ksi.ksi_signo = uap->signum;
 		ksi.ksi_code = SI_QUEUE;
 		ksi.ksi_pid = td->td_proc->p_pid;
 		ksi.ksi_uid = td->td_ucred->cr_ruid;
 		ksi.ksi_value.sival_ptr = uap->value;
 		error = tdsignal(p, NULL, ksi.ksi_signo, &ksi);
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 /*
  * Send a signal to a process group.
  */
 void
 gsignal(pgid, sig)
 	int pgid, sig;
 {
 	struct pgrp *pgrp;
 
 	if (pgid != 0) {
 		sx_slock(&proctree_lock);
 		pgrp = pgfind(pgid);
 		sx_sunlock(&proctree_lock);
 		if (pgrp != NULL) {
 			pgsignal(pgrp, sig, 0);
 			PGRP_UNLOCK(pgrp);
 		}
 	}
 }
 
 /*
  * Send a signal to a process group.  If checktty is 1,
  * limit to members which have a controlling terminal.
  */
 void
 pgsignal(pgrp, sig, checkctty)
 	struct pgrp *pgrp;
 	int sig, checkctty;
 {
 	register struct proc *p;
 
 	if (pgrp) {
 		PGRP_LOCK_ASSERT(pgrp, MA_OWNED);
 		LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (checkctty == 0 || p->p_flag & P_CONTROLT)
 				psignal(p, sig);
 			PROC_UNLOCK(p);
 		}
 	}
 }
 
 /*
  * Send a signal caused by a trap to the current thread.
  * If it will be caught immediately, deliver it with correct code.
  * Otherwise, post it normally.
  *
  * MPSAFE
  */
 void
 trapsignal(struct thread *td, ksiginfo_t *ksi)
 {
 	struct sigacts *ps;
 	struct proc *p;
 	int error;
 	int sig;
 	int code;
 
 	p = td->td_proc;
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	KASSERT(_SIG_VALID(sig), ("invalid signal"));
 
 	if (td->td_pflags & TDP_SA) {
 		if (td->td_mailbox == NULL)
 			thread_user_enter(td);
 		PROC_LOCK(p);
 		SIGDELSET(td->td_sigmask, sig);
 		mtx_lock_spin(&sched_lock);
 		/*
 		 * Force scheduling an upcall, so UTS has chance to
 		 * process the signal before thread runs again in
 		 * userland.
 		 */
 		if (td->td_upcall)
 			td->td_upcall->ku_flags |= KUF_DOUPCALL;
 		mtx_unlock_spin(&sched_lock);
 	} else {
 		PROC_LOCK(p);
 	}
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	if ((p->p_flag & P_TRACED) == 0 && SIGISMEMBER(ps->ps_sigcatch, sig) &&
 	    !SIGISMEMBER(td->td_sigmask, sig)) {
 		p->p_stats->p_ru.ru_nsignals++;
 #ifdef KTRACE
 		if (KTRPOINT(curthread, KTR_PSIG))
 			ktrpsig(sig, ps->ps_sigact[_SIG_IDX(sig)],
 			    &td->td_sigmask, code);
 #endif
 		if (!(td->td_pflags & TDP_SA))
 			(*p->p_sysent->sv_sendsig)(ps->ps_sigact[_SIG_IDX(sig)], 
 				ksi, &td->td_sigmask);
 		else if (td->td_mailbox == NULL) {
 			mtx_unlock(&ps->ps_mtx);
 			/* UTS caused a sync signal */
 			p->p_code = code;	/* XXX for core dump/debugger */
 			p->p_sig = sig;		/* XXX to verify code */
 			sigexit(td, sig);
 		} else {
 			mtx_unlock(&ps->ps_mtx);
 			SIGADDSET(td->td_sigmask, sig);
 			PROC_UNLOCK(p);
 			error = copyout(&ksi->ksi_info, &td->td_mailbox->tm_syncsig,
 			    sizeof(siginfo_t));
 			PROC_LOCK(p);
 			/* UTS memory corrupted */
 			if (error)
 				sigexit(td, SIGSEGV);
 			mtx_lock(&ps->ps_mtx);
 		}
 		SIGSETOR(td->td_sigmask, ps->ps_catchmask[_SIG_IDX(sig)]);
 		if (!SIGISMEMBER(ps->ps_signodefer, sig))
 			SIGADDSET(td->td_sigmask, sig);
 		if (SIGISMEMBER(ps->ps_sigreset, sig)) {
 			/*
 			 * See kern_sigaction() for origin of this code.
 			 */
 			SIGDELSET(ps->ps_sigcatch, sig);
 			if (sig != SIGCONT &&
 			    sigprop(sig) & SA_IGNORE)
 				SIGADDSET(ps->ps_sigignore, sig);
 			ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL;
 		}
 		mtx_unlock(&ps->ps_mtx);
 	} else {
 		/*
 		 * Avoid a possible infinite loop if the thread
 		 * masking the signal or process is ignoring the
 		 * signal.
 		 */
 		if (kern_forcesigexit &&
 		    (SIGISMEMBER(td->td_sigmask, sig) ||
 		     ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN)) {
 			SIGDELSET(td->td_sigmask, sig);
 			SIGDELSET(ps->ps_sigcatch, sig);
 			SIGDELSET(ps->ps_sigignore, sig);
 			ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL;
 		}
 		mtx_unlock(&ps->ps_mtx);
 		p->p_code = code;	/* XXX for core dump/debugger */
 		p->p_sig = sig;		/* XXX to verify code */
 		tdsignal(p, td, sig, ksi);
 	}
 	PROC_UNLOCK(p);
 }
 
 static struct thread *
 sigtd(struct proc *p, int sig, int prop)
 {
 	struct thread *td, *signal_td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/*
 	 * Check if current thread can handle the signal without
 	 * switching conetxt to another thread.
 	 */
 	if (curproc == p && !SIGISMEMBER(curthread->td_sigmask, sig))
 		return (curthread);
 	signal_td = NULL;
 	mtx_lock_spin(&sched_lock);
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (!SIGISMEMBER(td->td_sigmask, sig)) {
 			signal_td = td;
 			break;
 		}
 	}
 	if (signal_td == NULL)
 		signal_td = FIRST_THREAD_IN_PROC(p);
 	mtx_unlock_spin(&sched_lock);
 	return (signal_td);
 }
 
 /*
  * Send the signal to the process.  If the signal has an action, the action
  * is usually performed by the target process rather than the caller; we add
  * the signal to the set of pending signals for the process.
  *
  * Exceptions:
  *   o When a stop signal is sent to a sleeping process that takes the
  *     default action, the process is stopped without awakening it.
  *   o SIGCONT restarts stopped processes (or puts them back to sleep)
  *     regardless of the signal action (eg, blocked or ignored).
  *
  * Other ignored signals are discarded immediately.
  *
  * MPSAFE
  */
 void
 psignal(struct proc *p, int sig)
 {
 	(void) tdsignal(p, NULL, sig, NULL);
 }
 
 int
 psignal_event(struct proc *p, struct sigevent *sigev, ksiginfo_t *ksi)
 {
 	struct thread *td = NULL;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	KASSERT(!KSI_ONQ(ksi), ("psignal_event: ksi on queue"));
 
 	/*
 	 * ksi_code and other fields should be set before
 	 * calling this function.
 	 */
 	ksi->ksi_signo = sigev->sigev_signo;
 	ksi->ksi_value = sigev->sigev_value;
 	if (sigev->sigev_notify == SIGEV_THREAD_ID) {
 		td = thread_find(p, sigev->sigev_notify_thread_id);
 		if (td == NULL)
 			return (ESRCH);
 	}
 	return (tdsignal(p, td, ksi->ksi_signo, ksi));
 }
 
 /*
  * MPSAFE
  */
 int
 tdsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)
 {
 	sigset_t saved;
 	int ret;
 
 	if (p->p_flag & P_SA)
 		saved = p->p_sigqueue.sq_signals;
 	ret = do_tdsignal(p, td, sig, ksi);
 	if ((p->p_flag & P_SA) && !(p->p_flag & P_SIGEVENT)) {
 		if (!SIGSETEQ(saved, p->p_sigqueue.sq_signals)) {
 			/* pending set changed */
 			p->p_flag |= P_SIGEVENT;
 			wakeup(&p->p_siglist);
 		}
 	}
 	return (ret);
 }
 
 static int
 do_tdsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)
 {
 	sig_t action;
 	sigqueue_t *sigqueue;
 	int prop;
 	struct sigacts *ps;
+	int intrval;
 	int ret = 0;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (!_SIG_VALID(sig))
 		panic("do_tdsignal(): invalid signal");
 
 	KASSERT(ksi == NULL || !KSI_ONQ(ksi), ("do_tdsignal: ksi on queue"));
 
 	/*
 	 * IEEE Std 1003.1-2001: return success when killing a zombie.
 	 */
 	if (p->p_state == PRS_ZOMBIE) {
 		if (ksi && (ksi->ksi_flags & KSI_INS))
 			ksiginfo_tryfree(ksi);
 		return (ret);
 	}
 
 	ps = p->p_sigacts;
 	KNOTE_LOCKED(&p->p_klist, NOTE_SIGNAL | sig);
 	prop = sigprop(sig);
 
 	/*
 	 * If the signal is blocked and not destined for this thread, then
 	 * assign it to the process so that we can find it later in the first
 	 * thread that unblocks it.  Otherwise, assign it to this thread now.
 	 */
 	if (td == NULL) {
 		td = sigtd(p, sig, prop);
 		if (SIGISMEMBER(td->td_sigmask, sig))
 			sigqueue = &p->p_sigqueue;
 		else
 			sigqueue = &td->td_sigqueue;
 	} else {
 		KASSERT(td->td_proc == p, ("invalid thread"));
 		sigqueue = &td->td_sigqueue;
 	}
 
 	/*
 	 * If the signal is being ignored,
 	 * or process is exiting or thread is exiting,
 	 * then we forget about it immediately.
 	 * (Note: we don't set SIGCONT in ps_sigignore,
 	 * and if it is set to SIG_IGN,
 	 * action will be SIG_DFL here.)
 	 */
 	mtx_lock(&ps->ps_mtx);
 	if (SIGISMEMBER(ps->ps_sigignore, sig) ||
 	    (p->p_flag & P_WEXIT)) {
 		mtx_unlock(&ps->ps_mtx);
 		if (ksi && (ksi->ksi_flags & KSI_INS))
 			ksiginfo_tryfree(ksi);
 		return (ret);
 	}
 	if (SIGISMEMBER(td->td_sigmask, sig))
 		action = SIG_HOLD;
 	else if (SIGISMEMBER(ps->ps_sigcatch, sig))
 		action = SIG_CATCH;
 	else
 		action = SIG_DFL;
+	if (SIGISMEMBER(ps->ps_sigintr, sig))
+		intrval = EINTR;
+	else
+		intrval = ERESTART;
 	mtx_unlock(&ps->ps_mtx);
 
 	if (prop & SA_CONT)
 		sigqueue_delete_stopmask_proc(p);
 	else if (prop & SA_STOP) {
 		/*
 		 * If sending a tty stop signal to a member of an orphaned
 		 * process group, discard the signal here if the action
 		 * is default; don't stop the process below if sleeping,
 		 * and don't clear any pending SIGCONT.
 		 */
 		if ((prop & SA_TTYSTOP) &&
 		    (p->p_pgrp->pg_jobc == 0) &&
 		    (action == SIG_DFL)) {
 			if (ksi && (ksi->ksi_flags & KSI_INS))
 				ksiginfo_tryfree(ksi);
 			return (ret);
 		}
 		sigqueue_delete_proc(p, SIGCONT);
 		if (p->p_flag & P_CONTINUED) {
 			p->p_flag &= ~P_CONTINUED;
 			PROC_LOCK(p->p_pptr);
 			sigqueue_take(p->p_ksi);
 			PROC_UNLOCK(p->p_pptr);
 		}
 	}
 
 	ret = sigqueue_add(sigqueue, sig, ksi);
 	if (ret != 0)
 		return (ret);
 	signotify(td);
 	/*
 	 * Defer further processing for signals which are held,
 	 * except that stopped processes must be continued by SIGCONT.
 	 */
 	if (action == SIG_HOLD &&
 	    !((prop & SA_CONT) && (p->p_flag & P_STOPPED_SIG)))
 		return (ret);
 	/*
 	 * SIGKILL: Remove procfs STOPEVENTs.
 	 */
 	if (sig == SIGKILL) {
 		/* from procfs_ioctl.c: PIOCBIC */
 		p->p_stops = 0;
 		/* from procfs_ioctl.c: PIOCCONT */
 		p->p_step = 0;
 		wakeup(&p->p_step);
 	}
 	/*
 	 * Some signals have a process-wide effect and a per-thread
 	 * component.  Most processing occurs when the process next
 	 * tries to cross the user boundary, however there are some
 	 * times when processing needs to be done immediatly, such as
 	 * waking up threads so that they can cross the user boundary.
 	 * We try do the per-process part here.
 	 */
 	if (P_SHOULDSTOP(p)) {
 		/*
 		 * The process is in stopped mode. All the threads should be
 		 * either winding down or already on the suspended queue.
 		 */
 		if (p->p_flag & P_TRACED) {
 			/*
 			 * The traced process is already stopped,
 			 * so no further action is necessary.
 			 * No signal can restart us.
 			 */
 			goto out;
 		}
 
 		if (sig == SIGKILL) {
 			/*
 			 * SIGKILL sets process running.
 			 * It will die elsewhere.
 			 * All threads must be restarted.
 			 */
 			p->p_flag &= ~P_STOPPED_SIG;
 			goto runfast;
 		}
 
 		if (prop & SA_CONT) {
 			/*
 			 * If SIGCONT is default (or ignored), we continue the
 			 * process but don't leave the signal in sigqueue as
 			 * it has no further action.  If SIGCONT is held, we
 			 * continue the process and leave the signal in
 			 * sigqueue.  If the process catches SIGCONT, let it
 			 * handle the signal itself.  If it isn't waiting on
 			 * an event, it goes back to run state.
 			 * Otherwise, process goes back to sleep state.
 			 */
 			p->p_flag &= ~P_STOPPED_SIG;
 			if (p->p_numthreads == p->p_suspcount) {
 				p->p_flag |= P_CONTINUED;
 				p->p_xstat = SIGCONT;
 				PROC_LOCK(p->p_pptr);
 				childproc_continued(p);
 				PROC_UNLOCK(p->p_pptr);
 			}
 			if (action == SIG_DFL) {
 				sigqueue_delete(sigqueue, sig);
 			} else if (action == SIG_CATCH) {
 				/*
 				 * The process wants to catch it so it needs
 				 * to run at least one thread, but which one?
 				 * It would seem that the answer would be to
 				 * run an upcall in the next KSE to run, and
 				 * deliver the signal that way. In a NON KSE
 				 * process, we need to make sure that the
 				 * single thread is runnable asap.
 				 * XXXKSE for now however, make them all run.
 				 */
 				goto runfast;
 			}
 			/*
 			 * The signal is not ignored or caught.
 			 */
 			mtx_lock_spin(&sched_lock);
 			thread_unsuspend(p);
 			mtx_unlock_spin(&sched_lock);
 			goto out;
 		}
 
 		if (prop & SA_STOP) {
 			/*
 			 * Already stopped, don't need to stop again
 			 * (If we did the shell could get confused).
 			 * Just make sure the signal STOP bit set.
 			 */
 			p->p_flag |= P_STOPPED_SIG;
 			sigqueue_delete(sigqueue, sig);
 			goto out;
 		}
 
 		/*
 		 * All other kinds of signals:
 		 * If a thread is sleeping interruptibly, simulate a
 		 * wakeup so that when it is continued it will be made
 		 * runnable and can look at the signal.  However, don't make
 		 * the PROCESS runnable, leave it stopped.
 		 * It may run a bit until it hits a thread_suspend_check().
 		 */
 		mtx_lock_spin(&sched_lock);
 		if (TD_ON_SLEEPQ(td) && (td->td_flags & TDF_SINTR))
-			sleepq_abort(td);
+			sleepq_abort(td, intrval);
 		mtx_unlock_spin(&sched_lock);
 		goto out;
 		/*
 		 * Mutexes are short lived. Threads waiting on them will
 		 * hit thread_suspend_check() soon.
 		 */
 	} else if (p->p_state == PRS_NORMAL) {
 		if (p->p_flag & P_TRACED || action == SIG_CATCH) {
 			mtx_lock_spin(&sched_lock);
-			tdsigwakeup(td, sig, action);
+			tdsigwakeup(td, sig, action, intrval);
 			mtx_unlock_spin(&sched_lock);
 			goto out;
 		}
 
 		MPASS(action == SIG_DFL);
 
 		if (prop & SA_STOP) {
 			if (p->p_flag & P_PPWAIT)
 				goto out;
 			p->p_flag |= P_STOPPED_SIG;
 			p->p_xstat = sig;
 			mtx_lock_spin(&sched_lock);
 			sig_suspend_threads(td, p, 1);
 			if (p->p_numthreads == p->p_suspcount) {
 				/*
 				 * only thread sending signal to another
 				 * process can reach here, if thread is sending
 				 * signal to its process, because thread does
 				 * not suspend itself here, p_numthreads
 				 * should never be equal to p_suspcount.
 				 */
 				thread_stopped(p);
 				mtx_unlock_spin(&sched_lock);
 				sigqueue_delete_proc(p, p->p_xstat);
 			} else
 				mtx_unlock_spin(&sched_lock);
 			goto out;
 		} 
 		else
 			goto runfast;
 		/* NOTREACHED */
 	} else {
 		/* Not in "NORMAL" state. discard the signal. */
 		sigqueue_delete(sigqueue, sig);
 		goto out;
 	}
 
 	/*
 	 * The process is not stopped so we need to apply the signal to all the
 	 * running threads.
 	 */
 
 runfast:
 	mtx_lock_spin(&sched_lock);
-	tdsigwakeup(td, sig, action);
+	tdsigwakeup(td, sig, action, intrval);
 	thread_unsuspend(p);
 	mtx_unlock_spin(&sched_lock);
 out:
 	/* If we jump here, sched_lock should not be owned. */
 	mtx_assert(&sched_lock, MA_NOTOWNED);
 	return (ret);
 }
 
 /*
  * The force of a signal has been directed against a single
  * thread.  We need to see what we can do about knocking it
  * out of any sleep it may be in etc.
  */
 static void
-tdsigwakeup(struct thread *td, int sig, sig_t action)
+tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval)
 {
 	struct proc *p = td->td_proc;
 	register int prop;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	mtx_assert(&sched_lock, MA_OWNED);
 	prop = sigprop(sig);
 
 	/*
 	 * Bring the priority of a thread up if we want it to get
 	 * killed in this lifetime.
 	 */
 	if (action == SIG_DFL && (prop & SA_KILL)) {
 		if (p->p_nice > 0)
 			sched_nice(td->td_proc, 0);
 		if (td->td_priority > PUSER)
 			sched_prio(td, PUSER);
 	}
 
 	if (TD_ON_SLEEPQ(td)) {
 		/*
 		 * If thread is sleeping uninterruptibly
 		 * we can't interrupt the sleep... the signal will
 		 * be noticed when the process returns through
 		 * trap() or syscall().
 		 */
 		if ((td->td_flags & TDF_SINTR) == 0)
 			return;
 		/*
 		 * If SIGCONT is default (or ignored) and process is
 		 * asleep, we are finished; the process should not
 		 * be awakened.
 		 */
 		if ((prop & SA_CONT) && action == SIG_DFL) {
 			mtx_unlock_spin(&sched_lock);
 			sigqueue_delete(&p->p_sigqueue, sig);
 			/*
 			 * It may be on either list in this state.
 			 * Remove from both for now.
 			 */
 			sigqueue_delete(&td->td_sigqueue, sig);
 			mtx_lock_spin(&sched_lock);
 			return;
 		}
 
 		/*
 		 * Give low priority threads a better chance to run.
 		 */
 		if (td->td_priority > PUSER)
 			sched_prio(td, PUSER);
 
-		sleepq_abort(td);
+		sleepq_abort(td, intrval);
 	} else {
 		/*
 		 * Other states do nothing with the signal immediately,
 		 * other than kicking ourselves if we are running.
 		 * It will either never be noticed, or noticed very soon.
 		 */
 #ifdef SMP
 		if (TD_IS_RUNNING(td) && td != curthread)
 			forward_signal(td);
 #endif
 	}
 }
 
 static void
 sig_suspend_threads(struct thread *td, struct proc *p, int sending)
 {
 	struct thread *td2;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	FOREACH_THREAD_IN_PROC(p, td2) {
 		if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) &&
 		    (td2->td_flags & TDF_SINTR) &&
 		    !TD_IS_SUSPENDED(td2)) {
 			thread_suspend_one(td2);
 		} else {
 			if (sending || td != td2)
 				td2->td_flags |= TDF_ASTPENDING;
 #ifdef SMP
 			if (TD_IS_RUNNING(td2) && td2 != td)
 				forward_signal(td2);
 #endif
 		}
 	}
 }
 
 int
 ptracestop(struct thread *td, int sig)
 {
 	struct proc *p = td->td_proc;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
 	    &p->p_mtx.mtx_object, "Stopping for traced signal");
 
 	mtx_lock_spin(&sched_lock);
 	td->td_flags |= TDF_XSIG;
 	mtx_unlock_spin(&sched_lock);
 	td->td_xsig = sig;
 	while ((p->p_flag & P_TRACED) && (td->td_flags & TDF_XSIG)) {
 		if (p->p_flag & P_SINGLE_EXIT) {
 			mtx_lock_spin(&sched_lock);
 			td->td_flags &= ~TDF_XSIG;
 			mtx_unlock_spin(&sched_lock);
 			return (sig);
 		}
 		/*
 		 * Just make wait() to work, the last stopped thread
 		 * will win.
 		 */
 		p->p_xstat = sig;
 		p->p_xthread = td;
 		p->p_flag |= (P_STOPPED_SIG|P_STOPPED_TRACE);
 		mtx_lock_spin(&sched_lock);
 		sig_suspend_threads(td, p, 0);
 stopme:
 		thread_stopped(p);
 		thread_suspend_one(td);
 		PROC_UNLOCK(p);
 		DROP_GIANT();
 		mi_switch(SW_VOL, NULL);
 		mtx_unlock_spin(&sched_lock);
 		PICKUP_GIANT();
 		PROC_LOCK(p);
 		if (!(p->p_flag & P_TRACED))
 			break;
 		if (td->td_flags & TDF_DBSUSPEND) {
 			if (p->p_flag & P_SINGLE_EXIT)
 				break;
 			mtx_lock_spin(&sched_lock);
 			goto stopme;
 		}
 	}
 	return (td->td_xsig);
 }
 
 /*
  * If the current process has received a signal (should be caught or cause
  * termination, should interrupt current syscall), return the signal number.
  * Stop signals with default action are processed immediately, then cleared;
  * they aren't returned.  This is checked after each entry to the system for
  * a syscall or trap (though this can usually be done without calling issignal
  * by checking the pending signal masks in cursig.) The normal call
  * sequence is
  *
  *	while (sig = cursig(curthread))
  *		postsig(sig);
  */
 static int
 issignal(td)
 	struct thread *td;
 {
 	struct proc *p;
 	struct sigacts *ps;
 	sigset_t sigpending;
 	int sig, prop, newsig;
 
 	p = td->td_proc;
 	ps = p->p_sigacts;
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	for (;;) {
 		int traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG);
 
 		sigpending = td->td_sigqueue.sq_signals;
 		SIGSETNAND(sigpending, td->td_sigmask);
 
 		if (p->p_flag & P_PPWAIT)
 			SIG_STOPSIGMASK(sigpending);
 		if (SIGISEMPTY(sigpending))	/* no signal to send */
 			return (0);
 		sig = sig_ffs(&sigpending);
 
 		if (p->p_stops & S_SIG) {
 			mtx_unlock(&ps->ps_mtx);
 			stopevent(p, S_SIG, sig);
 			mtx_lock(&ps->ps_mtx);
 		}
 
 		/*
 		 * We should see pending but ignored signals
 		 * only if P_TRACED was on when they were posted.
 		 */
 		if (SIGISMEMBER(ps->ps_sigignore, sig) && (traced == 0)) {
 			sigqueue_delete(&td->td_sigqueue, sig);
 			if (td->td_pflags & TDP_SA)
 				SIGADDSET(td->td_sigmask, sig);
 			continue;
 		}
 		if (p->p_flag & P_TRACED && (p->p_flag & P_PPWAIT) == 0) {
 			/*
 			 * If traced, always stop.
 			 */
 			mtx_unlock(&ps->ps_mtx);
 			newsig = ptracestop(td, sig);
 			mtx_lock(&ps->ps_mtx);
 
 			if (td->td_pflags & TDP_SA)
 				SIGADDSET(td->td_sigmask, sig);
 
 			if (sig != newsig) {
 				ksiginfo_t ksi;
 				/*
 				 * clear old signal.
 				 * XXX shrug off debugger, it causes siginfo to
 				 * be thrown away.
 				 */
 				sigqueue_get(&td->td_sigqueue, sig, &ksi);
 
 				/*
 				 * If parent wants us to take the signal,
 				 * then it will leave it in p->p_xstat;
 				 * otherwise we just look for signals again.
 			 	*/
 				if (newsig == 0)
 					continue;
 				sig = newsig;
 
 				/*
 				 * Put the new signal into td_sigqueue. If the
 				 * signal is being masked, look for other signals.
 				 */
 				SIGADDSET(td->td_sigqueue.sq_signals, sig);
 				if (td->td_pflags & TDP_SA)
 					SIGDELSET(td->td_sigmask, sig);
 				if (SIGISMEMBER(td->td_sigmask, sig))
 					continue;
 				signotify(td);
 			}
 
 			/*
 			 * If the traced bit got turned off, go back up
 			 * to the top to rescan signals.  This ensures
 			 * that p_sig* and p_sigact are consistent.
 			 */
 			if ((p->p_flag & P_TRACED) == 0)
 				continue;
 		}
 
 		prop = sigprop(sig);
 
 		/*
 		 * Decide whether the signal should be returned.
 		 * Return the signal's number, or fall through
 		 * to clear it from the pending mask.
 		 */
 		switch ((intptr_t)p->p_sigacts->ps_sigact[_SIG_IDX(sig)]) {
 
 		case (intptr_t)SIG_DFL:
 			/*
 			 * Don't take default actions on system processes.
 			 */
 			if (p->p_pid <= 1) {
 #ifdef DIAGNOSTIC
 				/*
 				 * Are you sure you want to ignore SIGSEGV
 				 * in init? XXX
 				 */
 				printf("Process (pid %lu) got signal %d\n",
 					(u_long)p->p_pid, sig);
 #endif
 				break;		/* == ignore */
 			}
 			/*
 			 * If there is a pending stop signal to process
 			 * with default action, stop here,
 			 * then clear the signal.  However,
 			 * if process is member of an orphaned
 			 * process group, ignore tty stop signals.
 			 */
 			if (prop & SA_STOP) {
 				if (p->p_flag & P_TRACED ||
 		    		    (p->p_pgrp->pg_jobc == 0 &&
 				     prop & SA_TTYSTOP))
 					break;	/* == ignore */
 				mtx_unlock(&ps->ps_mtx);
 				WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
 				    &p->p_mtx.mtx_object, "Catching SIGSTOP");
 				p->p_flag |= P_STOPPED_SIG;
 				p->p_xstat = sig;
 				mtx_lock_spin(&sched_lock);
 				sig_suspend_threads(td, p, 0);
 				thread_stopped(p);
 				thread_suspend_one(td);
 				PROC_UNLOCK(p);
 				DROP_GIANT();
 				mi_switch(SW_INVOL, NULL);
 				mtx_unlock_spin(&sched_lock);
 				PICKUP_GIANT();
 				PROC_LOCK(p);
 				mtx_lock(&ps->ps_mtx);
 				break;
 			} else if (prop & SA_IGNORE) {
 				/*
 				 * Except for SIGCONT, shouldn't get here.
 				 * Default action is to ignore; drop it.
 				 */
 				break;		/* == ignore */
 			} else
 				return (sig);
 			/*NOTREACHED*/
 
 		case (intptr_t)SIG_IGN:
 			/*
 			 * Masking above should prevent us ever trying
 			 * to take action on an ignored signal other
 			 * than SIGCONT, unless process is traced.
 			 */
 			if ((prop & SA_CONT) == 0 &&
 			    (p->p_flag & P_TRACED) == 0)
 				printf("issignal\n");
 			break;		/* == ignore */
 
 		default:
 			/*
 			 * This signal has an action, let
 			 * postsig() process it.
 			 */
 			return (sig);
 		}
 		sigqueue_delete(&td->td_sigqueue, sig);		/* take the signal! */
 	}
 	/* NOTREACHED */
 }
 
 /*
  * MPSAFE
  */
 void
 thread_stopped(struct proc *p)
 {
 	struct proc *p1 = curthread->td_proc;
 	int n;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	mtx_assert(&sched_lock, MA_OWNED);
 	n = p->p_suspcount;
 	if (p == p1)
 		n++;
 	if ((p->p_flag & P_STOPPED_SIG) && (n == p->p_numthreads)) {
 		mtx_unlock_spin(&sched_lock);
 		p->p_flag &= ~P_WAITED;
 		PROC_LOCK(p->p_pptr);
 		childproc_stopped(p, (p->p_flag & P_TRACED) ?
 			CLD_TRAPPED : CLD_STOPPED);
 		PROC_UNLOCK(p->p_pptr);
 		mtx_lock_spin(&sched_lock);
 	}
 }
  
 /*
  * Take the action for the specified signal
  * from the current set of pending signals.
  */
 void
 postsig(sig)
 	register int sig;
 {
 	struct thread *td = curthread;
 	register struct proc *p = td->td_proc;
 	struct sigacts *ps;
 	sig_t action;
 	ksiginfo_t ksi;
 	sigset_t returnmask;
 	int code;
 
 	KASSERT(sig != 0, ("postsig"));
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	ps = p->p_sigacts;
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 	ksiginfo_init(&ksi);
 	sigqueue_get(&td->td_sigqueue, sig, &ksi);
 	ksi.ksi_signo = sig;
 	if (ksi.ksi_code == SI_TIMER)
 		itimer_accept(p, ksi.ksi_timerid, &ksi);
 	action = ps->ps_sigact[_SIG_IDX(sig)];
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_PSIG))
 		ktrpsig(sig, action, td->td_pflags & TDP_OLDMASK ?
 		    &td->td_oldsigmask : &td->td_sigmask, 0);
 #endif
 	if (p->p_stops & S_SIG) {
 		mtx_unlock(&ps->ps_mtx);
 		stopevent(p, S_SIG, sig);
 		mtx_lock(&ps->ps_mtx);
 	}
 
 	if (!(td->td_pflags & TDP_SA) && action == SIG_DFL) {
 		/*
 		 * Default action, where the default is to kill
 		 * the process.  (Other cases were ignored above.)
 		 */
 		mtx_unlock(&ps->ps_mtx);
 		sigexit(td, sig);
 		/* NOTREACHED */
 	} else {
 		if (td->td_pflags & TDP_SA) {
 			if (sig == SIGKILL) {
 				mtx_unlock(&ps->ps_mtx);
 				sigexit(td, sig);
 			}
 		}
 
 		/*
 		 * If we get here, the signal must be caught.
 		 */
 		KASSERT(action != SIG_IGN && !SIGISMEMBER(td->td_sigmask, sig),
 		    ("postsig action"));
 		/*
 		 * Set the new mask value and also defer further
 		 * occurrences of this signal.
 		 *
 		 * Special case: user has done a sigsuspend.  Here the
 		 * current mask is not of interest, but rather the
 		 * mask from before the sigsuspend is what we want
 		 * restored after the signal processing is completed.
 		 */
 		if (td->td_pflags & TDP_OLDMASK) {
 			returnmask = td->td_oldsigmask;
 			td->td_pflags &= ~TDP_OLDMASK;
 		} else
 			returnmask = td->td_sigmask;
 
 		SIGSETOR(td->td_sigmask, ps->ps_catchmask[_SIG_IDX(sig)]);
 		if (!SIGISMEMBER(ps->ps_signodefer, sig))
 			SIGADDSET(td->td_sigmask, sig);
 
 		if (SIGISMEMBER(ps->ps_sigreset, sig)) {
 			/*
 			 * See kern_sigaction() for origin of this code.
 			 */
 			SIGDELSET(ps->ps_sigcatch, sig);
 			if (sig != SIGCONT &&
 			    sigprop(sig) & SA_IGNORE)
 				SIGADDSET(ps->ps_sigignore, sig);
 			ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL;
 		}
 		p->p_stats->p_ru.ru_nsignals++;
 		if (p->p_sig != sig) {
 			code = 0;
 		} else {
 			code = p->p_code;
 			p->p_code = 0;
 			p->p_sig = 0;
 		}
 		if (td->td_pflags & TDP_SA)
 			thread_signal_add(curthread, &ksi);
 		else
 			(*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask);
 	}
 }
 
 /*
  * Kill the current process for stated reason.
  */
 void
 killproc(p, why)
 	struct proc *p;
 	char *why;
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	CTR3(KTR_PROC, "killproc: proc %p (pid %d, %s)",
 		p, p->p_pid, p->p_comm);
 	log(LOG_ERR, "pid %d (%s), uid %d, was killed: %s\n", p->p_pid, p->p_comm,
 		p->p_ucred ? p->p_ucred->cr_uid : -1, why);
 	psignal(p, SIGKILL);
 }
 
 /*
  * Force the current process to exit with the specified signal, dumping core
  * if appropriate.  We bypass the normal tests for masked and caught signals,
  * allowing unrecoverable failures to terminate the process without changing
  * signal state.  Mark the accounting record with the signal termination.
  * If dumping core, save the signal number for the debugger.  Calls exit and
  * does not return.
  *
  * MPSAFE
  */
 void
 sigexit(td, sig)
 	struct thread *td;
 	int sig;
 {
 	struct proc *p = td->td_proc;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	p->p_acflag |= AXSIG;
 	/*
 	 * We must be single-threading to generate a core dump.  This
 	 * ensures that the registers in the core file are up-to-date.
 	 * Also, the ELF dump handler assumes that the thread list doesn't
 	 * change out from under it.
 	 *
 	 * XXX If another thread attempts to single-thread before us
 	 *     (e.g. via fork()), we won't get a dump at all.
 	 */
 	if ((sigprop(sig) & SA_CORE) && (thread_single(SINGLE_NO_EXIT) == 0)) {
 		p->p_sig = sig;
 		/*
 		 * Log signals which would cause core dumps
 		 * (Log as LOG_INFO to appease those who don't want
 		 * these messages.)
 		 * XXX : Todo, as well as euid, write out ruid too
 		 * Note that coredump() drops proc lock.
 		 */
 		if (coredump(td) == 0)
 			sig |= WCOREFLAG;
 		if (kern_logsigexit)
 			log(LOG_INFO,
 			    "pid %d (%s), uid %d: exited on signal %d%s\n",
 			    p->p_pid, p->p_comm,
 			    td->td_ucred ? td->td_ucred->cr_uid : -1,
 			    sig &~ WCOREFLAG,
 			    sig & WCOREFLAG ? " (core dumped)" : "");
 	} else
 		PROC_UNLOCK(p);
 	exit1(td, W_EXITCODE(0, sig));
 	/* NOTREACHED */
 }
 
 /*
  * Send queued SIGCHLD to parent when child process's state
  * is changed.
  */
 static void
 sigparent(struct proc *p, int reason, int status)
 {
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED);
 
 	if (p->p_ksi != NULL) {
 		p->p_ksi->ksi_signo  = SIGCHLD;
 		p->p_ksi->ksi_code   = reason;
 		p->p_ksi->ksi_status = status;
 		p->p_ksi->ksi_pid    = p->p_pid;
 		p->p_ksi->ksi_uid    = p->p_ucred->cr_ruid;
 		if (KSI_ONQ(p->p_ksi))
 			return;
 	}
 	tdsignal(p->p_pptr, NULL, SIGCHLD, p->p_ksi);
 }
 
 static void
 childproc_jobstate(struct proc *p, int reason, int status)
 {
 	struct sigacts *ps;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED);
 
 	/*
 	 * Wake up parent sleeping in kern_wait(), also send
 	 * SIGCHLD to parent, but SIGCHLD does not guarantee
 	 * that parent will awake, because parent may masked
 	 * the signal.
 	 */
 	p->p_pptr->p_flag |= P_STATCHILD;
 	wakeup(p->p_pptr);
 
 	ps = p->p_pptr->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	if ((ps->ps_flag & PS_NOCLDSTOP) == 0) {
 		mtx_unlock(&ps->ps_mtx);
 		sigparent(p, reason, status);
 	} else
 		mtx_unlock(&ps->ps_mtx);
 }
 
 void
 childproc_stopped(struct proc *p, int reason)
 {
 	childproc_jobstate(p, reason, p->p_xstat);
 }
 
 void
 childproc_continued(struct proc *p)
 {
 	childproc_jobstate(p, CLD_CONTINUED, SIGCONT);
 }
 
 void
 childproc_exited(struct proc *p)
 {
 	int reason;
 	int status = p->p_xstat; /* convert to int */
 
 	reason = CLD_EXITED;
 	if (WCOREDUMP(status))
 		reason = CLD_DUMPED;
 	else if (WIFSIGNALED(status))
 		reason = CLD_KILLED;
 	/*
 	 * XXX avoid calling wakeup(p->p_pptr), the work is
 	 * done in exit1().
 	 */
 	sigparent(p, reason, status);
 }
 
 static char corefilename[MAXPATHLEN] = {"%N.core"};
 SYSCTL_STRING(_kern, OID_AUTO, corefile, CTLFLAG_RW, corefilename,
 	      sizeof(corefilename), "process corefile name format string");
 
 /*
  * expand_name(name, uid, pid)
  * Expand the name described in corefilename, using name, uid, and pid.
  * corefilename is a printf-like string, with three format specifiers:
  *	%N	name of process ("name")
  *	%P	process id (pid)
  *	%U	user id (uid)
  * For example, "%N.core" is the default; they can be disabled completely
  * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
  * This is controlled by the sysctl variable kern.corefile (see above).
  */
 
 static char *
 expand_name(name, uid, pid)
 	const char *name;
 	uid_t uid;
 	pid_t pid;
 {
 	const char *format, *appendstr;
 	char *temp;
 	char buf[11];		/* Buffer for pid/uid -- max 4B */
 	size_t i, l, n;
 
 	format = corefilename;
 	temp = malloc(MAXPATHLEN, M_TEMP, M_NOWAIT | M_ZERO);
 	if (temp == NULL)
 		return (NULL);
 	for (i = 0, n = 0; n < MAXPATHLEN && format[i]; i++) {
 		switch (format[i]) {
 		case '%':	/* Format character */
 			i++;
 			switch (format[i]) {
 			case '%':
 				appendstr = "%";
 				break;
 			case 'N':	/* process name */
 				appendstr = name;
 				break;
 			case 'P':	/* process id */
 				sprintf(buf, "%u", pid);
 				appendstr = buf;
 				break;
 			case 'U':	/* user id */
 				sprintf(buf, "%u", uid);
 				appendstr = buf;
 				break;
 			default:
 				appendstr = "";
 			  	log(LOG_ERR,
 				    "Unknown format character %c in `%s'\n",
 				    format[i], format);
 			}
 			l = strlen(appendstr);
 			if ((n + l) >= MAXPATHLEN)
 				goto toolong;
 			memcpy(temp + n, appendstr, l);
 			n += l;
 			break;
 		default:
 			temp[n++] = format[i];
 		}
 	}
 	if (format[i] != '\0')
 		goto toolong;
 	return (temp);
 toolong:
 	log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too long\n",
 	    (long)pid, name, (u_long)uid);
 	free(temp, M_TEMP);
 	return (NULL);
 }
 
 /*
  * Dump a process' core.  The main routine does some
  * policy checking, and creates the name of the coredump;
  * then it passes on a vnode and a size limit to the process-specific
  * coredump routine if there is one; if there _is not_ one, it returns
  * ENOSYS; otherwise it returns the error from the process-specific routine.
  */
 
 static int
 coredump(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 	register struct vnode *vp;
 	register struct ucred *cred = td->td_ucred;
 	struct flock lf;
 	struct nameidata nd;
 	struct vattr vattr;
 	int error, error1, flags, locked;
 	struct mount *mp;
 	char *name;			/* name of corefile */
 	off_t limit;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td);
 	_STOPEVENT(p, S_CORE, 0);
 
 	if (((sugid_coredump == 0) && p->p_flag & P_SUGID) || do_coredump == 0) {
 		PROC_UNLOCK(p);
 		return (EFAULT);
 	}
 	
 	/*
 	 * Note that the bulk of limit checking is done after
 	 * the corefile is created.  The exception is if the limit
 	 * for corefiles is 0, in which case we don't bother
 	 * creating the corefile at all.  This layout means that
 	 * a corefile is truncated instead of not being created,
 	 * if it is larger than the limit.
 	 */
 	limit = (off_t)lim_cur(p, RLIMIT_CORE);
 	PROC_UNLOCK(p);
 	if (limit == 0)
 		return (EFBIG);
 
 	mtx_lock(&Giant);
 restart:
 	name = expand_name(p->p_comm, td->td_ucred->cr_uid, p->p_pid);
 	if (name == NULL) {
 		mtx_unlock(&Giant);
 		return (EINVAL);
 	}
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td); /* XXXKSE */
 	flags = O_CREAT | FWRITE | O_NOFOLLOW;
 	error = vn_open(&nd, &flags, S_IRUSR | S_IWUSR, -1);
 	free(name, M_TEMP);
 	if (error) {
 		mtx_unlock(&Giant);		
 		return (error);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 
 	/* Don't dump to non-regular files or files with links. */
 	if (vp->v_type != VREG ||
 	    VOP_GETATTR(vp, &vattr, cred, td) || vattr.va_nlink != 1) {
 		VOP_UNLOCK(vp, 0, td);
 		error = EFAULT;
 		goto out;
 	}
 
 	VOP_UNLOCK(vp, 0, td);
 	lf.l_whence = SEEK_SET;
 	lf.l_start = 0;
 	lf.l_len = 0;
 	lf.l_type = F_WRLCK;
 	locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0);
 
 	if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 		lf.l_type = F_UNLCK;
 		if (locked)
 			VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
 		if ((error = vn_close(vp, FWRITE, cred, td)) != 0)
 			return (error);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 
 	VATTR_NULL(&vattr);
 	vattr.va_size = 0;
 	if (set_core_nodump_flag)
 		vattr.va_flags = UF_NODUMP;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	VOP_LEASE(vp, td, cred, LEASE_WRITE);
 	VOP_SETATTR(vp, &vattr, cred, td);
 	VOP_UNLOCK(vp, 0, td);
 	PROC_LOCK(p);
 	p->p_acflag |= ACORE;
 	PROC_UNLOCK(p);
 
 	error = p->p_sysent->sv_coredump ?
 	  p->p_sysent->sv_coredump(td, vp, limit) :
 	  ENOSYS;
 
 	if (locked) {
 		lf.l_type = F_UNLCK;
 		VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
 	}
 	vn_finished_write(mp);
 out:
 	error1 = vn_close(vp, FWRITE, cred, td);
 	mtx_unlock(&Giant);
 	if (error == 0)
 		error = error1;
 	return (error);
 }
 
 /*
  * Nonexistent system call-- signal process (may want to handle it).
  * Flag error in case process won't see signal immediately (blocked or ignored).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct nosys_args {
 	int	dummy;
 };
 #endif
 /*
  * MPSAFE
  */
 /* ARGSUSED */
 int
 nosys(td, args)
 	struct thread *td;
 	struct nosys_args *args;
 {
 	struct proc *p = td->td_proc;
 
 	PROC_LOCK(p);
 	psignal(p, SIGSYS);
 	PROC_UNLOCK(p);
 	return (ENOSYS);
 }
 
 /*
  * Send a SIGIO or SIGURG signal to a process or process group using
  * stored credentials rather than those of the current process.
  */
 void
 pgsigio(sigiop, sig, checkctty)
 	struct sigio **sigiop;
 	int sig, checkctty;
 {
 	struct sigio *sigio;
 
 	SIGIO_LOCK();
 	sigio = *sigiop;
 	if (sigio == NULL) {
 		SIGIO_UNLOCK();
 		return;
 	}
 	if (sigio->sio_pgid > 0) {
 		PROC_LOCK(sigio->sio_proc);
 		if (CANSIGIO(sigio->sio_ucred, sigio->sio_proc->p_ucred))
 			psignal(sigio->sio_proc, sig);
 		PROC_UNLOCK(sigio->sio_proc);
 	} else if (sigio->sio_pgid < 0) {
 		struct proc *p;
 
 		PGRP_LOCK(sigio->sio_pgrp);
 		LIST_FOREACH(p, &sigio->sio_pgrp->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (CANSIGIO(sigio->sio_ucred, p->p_ucred) &&
 			    (checkctty == 0 || (p->p_flag & P_CONTROLT)))
 				psignal(p, sig);
 			PROC_UNLOCK(p);
 		}
 		PGRP_UNLOCK(sigio->sio_pgrp);
 	}
 	SIGIO_UNLOCK();
 }
 
 static int
 filt_sigattach(struct knote *kn)
 {
 	struct proc *p = curproc;
 
 	kn->kn_ptr.p_proc = p;
 	kn->kn_flags |= EV_CLEAR;		/* automatically set */
 
 	knlist_add(&p->p_klist, kn, 0);
 
 	return (0);
 }
 
 static void
 filt_sigdetach(struct knote *kn)
 {
 	struct proc *p = kn->kn_ptr.p_proc;
 
 	knlist_remove(&p->p_klist, kn, 0);
 }
 
 /*
  * signal knotes are shared with proc knotes, so we apply a mask to 
  * the hint in order to differentiate them from process hints.  This
  * could be avoided by using a signal-specific knote list, but probably
  * isn't worth the trouble.
  */
 static int
 filt_signal(struct knote *kn, long hint)
 {
 
 	if (hint & NOTE_SIGNAL) {
 		hint &= ~NOTE_SIGNAL;
 
 		if (kn->kn_id == hint)
 			kn->kn_data++;
 	}
 	return (kn->kn_data != 0);
 }
 
 struct sigacts *
 sigacts_alloc(void)
 {
 	struct sigacts *ps;
 
 	ps = malloc(sizeof(struct sigacts), M_SUBPROC, M_WAITOK | M_ZERO);
 	ps->ps_refcnt = 1;
 	mtx_init(&ps->ps_mtx, "sigacts", NULL, MTX_DEF);
 	return (ps);
 }
 
 void
 sigacts_free(struct sigacts *ps)
 {
 
 	mtx_lock(&ps->ps_mtx);
 	ps->ps_refcnt--;
 	if (ps->ps_refcnt == 0) {
 		mtx_destroy(&ps->ps_mtx);
 		free(ps, M_SUBPROC);
 	} else
 		mtx_unlock(&ps->ps_mtx);
 }
 
 struct sigacts *
 sigacts_hold(struct sigacts *ps)
 {
 	mtx_lock(&ps->ps_mtx);
 	ps->ps_refcnt++;
 	mtx_unlock(&ps->ps_mtx);
 	return (ps);
 }
 
 void
 sigacts_copy(struct sigacts *dest, struct sigacts *src)
 {
 
 	KASSERT(dest->ps_refcnt == 1, ("sigacts_copy to shared dest"));
 	mtx_lock(&src->ps_mtx);
 	bcopy(src, dest, offsetof(struct sigacts, ps_refcnt));
 	mtx_unlock(&src->ps_mtx);
 }
 
 int
 sigacts_shared(struct sigacts *ps)
 {
 	int shared;
 
 	mtx_lock(&ps->ps_mtx);
 	shared = ps->ps_refcnt > 1;
 	mtx_unlock(&ps->ps_mtx);
 	return (shared);
 }
Index: head/sys/kern/kern_synch.c
===================================================================
--- head/sys/kern/kern_synch.c	(revision 155740)
+++ head/sys/kern/kern_synch.c	(revision 155741)
@@ -1,573 +1,567 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/ktrace.h>
 #endif
 
 #include <machine/cpu.h>
 
 static void synch_setup(void *dummy);
 SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, NULL)
 
 int	hogticks;
 int	lbolt;
 
 static struct callout loadav_callout;
 static struct callout lbolt_callout;
 
 struct loadavg averunnable =
 	{ {0, 0, 0}, FSCALE };	/* load average, of runnable procs */
 /*
  * Constants for averages over 1, 5, and 15 minutes
  * when sampling at 5 second intervals.
  */
 static fixpt_t cexp[3] = {
 	0.9200444146293232 * FSCALE,	/* exp(-1/12) */
 	0.9834714538216174 * FSCALE,	/* exp(-1/60) */
 	0.9944598480048967 * FSCALE,	/* exp(-1/180) */
 };
 
 /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */
 static int      fscale __unused = FSCALE;
 SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, "");
 
 static void	loadav(void *arg);
 static void	lboltcb(void *arg);
 
 void
 sleepinit(void)
 {
 
 	hogticks = (hz / 10) * 2;	/* Default only. */
 	init_sleepqueues();
 }
 
 /*
  * General sleep call.  Suspends the current process until a wakeup is
  * performed on the specified identifier.  The process will then be made
  * runnable with the specified priority.  Sleeps at most timo/hz seconds
  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
  * before and after sleeping, else signals are not checked.  Returns 0 if
  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
  * signal needs to be delivered, ERESTART is returned if the current system
  * call should be restarted if possible, and EINTR is returned if the system
  * call should be interrupted by the signal (return EINTR).
  *
  * The mutex argument is exited before the caller is suspended, and
  * entered before msleep returns.  If priority includes the PDROP
  * flag the mutex is not entered before returning.
  */
 int
 msleep(ident, mtx, priority, wmesg, timo)
 	void *ident;
 	struct mtx *mtx;
 	int priority, timo;
 	const char *wmesg;
 {
 	struct thread *td;
 	struct proc *p;
-	int catch, rval, sig, flags;
+	int catch, rval, flags;
 	WITNESS_SAVE_DECL(mtx);
 
 	td = curthread;
 	p = td->td_proc;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0);
 #endif
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, mtx == NULL ? NULL :
 	    &mtx->mtx_object, "Sleeping on \"%s\"", wmesg);
 	KASSERT(timo != 0 || mtx_owned(&Giant) || mtx != NULL,
 	    ("sleeping without a mutex"));
 	KASSERT(p != NULL, ("msleep1"));
 	KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep"));
 
 	if (cold) {
 		/*
 		 * During autoconfiguration, just return;
 		 * don't run any other threads or panic below,
 		 * in case this is the idle thread and already asleep.
 		 * XXX: this used to do "s = splhigh(); splx(safepri);
 		 * splx(s);" to give interrupts a chance, but there is
 		 * no way to give interrupts a chance now.
 		 */
 		if (mtx != NULL && priority & PDROP)
 			mtx_unlock(mtx);
 		return (0);
 	}
 	catch = priority & PCATCH;
 	rval = 0;
 
 	/*
 	 * If we are already on a sleep queue, then remove us from that
 	 * sleep queue first.  We have to do this to handle recursive
 	 * sleeps.
 	 */
 	if (TD_ON_SLEEPQ(td))
 		sleepq_remove(td, td->td_wchan);
 
 	sleepq_lock(ident);
 	if (catch) {
 		/*
 		 * Don't bother sleeping if we are exiting and not the exiting
 		 * thread or if our thread is marked as interrupted.
 		 */
 		mtx_lock_spin(&sched_lock);
 		rval = thread_sleep_check(td);
 		mtx_unlock_spin(&sched_lock);
 		if (rval != 0) {
 			sleepq_release(ident);
 			if (mtx != NULL && priority & PDROP)
 				mtx_unlock(mtx);
 			return (rval);
 		}
 	}
 	CTR5(KTR_PROC, "msleep: thread %p (pid %ld, %s) on %s (%p)",
 	    (void *)td, (long)p->p_pid, p->p_comm, wmesg, ident);
 
 	DROP_GIANT();
 	if (mtx != NULL) {
 		mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED);
 		WITNESS_SAVE(&mtx->mtx_object, mtx);
 		mtx_unlock(mtx);
 	}
 
 	/*
 	 * We put ourselves on the sleep queue and start our timeout
 	 * before calling thread_suspend_check, as we could stop there,
 	 * and a wakeup or a SIGCONT (or both) could occur while we were
 	 * stopped without resuming us.  Thus, we must be ready for sleep
 	 * when cursig() is called.  If the wakeup happens while we're
 	 * stopped, then td will no longer be on a sleep queue upon
 	 * return from cursig().
 	 */
 	flags = SLEEPQ_MSLEEP;
 	if (catch)
 		flags |= SLEEPQ_INTERRUPTIBLE;
 	sleepq_add(ident, mtx, wmesg, flags);
 	if (timo)
 		sleepq_set_timeout(ident, timo);
-	if (catch) {
-		sig = sleepq_catch_signals(ident);
-	} else
-		sig = 0;
 
 	/*
 	 * Adjust this thread's priority.
 	 */
 	mtx_lock_spin(&sched_lock);
 	sched_prio(td, priority & PRIMASK);
 	mtx_unlock_spin(&sched_lock);
 
 	if (timo && catch)
-		rval = sleepq_timedwait_sig(ident, sig != 0);
+		rval = sleepq_timedwait_sig(ident);
 	else if (timo)
 		rval = sleepq_timedwait(ident);
 	else if (catch)
 		rval = sleepq_wait_sig(ident);
 	else {
 		sleepq_wait(ident);
 		rval = 0;
 	}
-	if (rval == 0 && catch)
-		rval = sleepq_calc_signal_retval(sig);
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0);
 #endif
 	PICKUP_GIANT();
 	if (mtx != NULL && !(priority & PDROP)) {
 		mtx_lock(mtx);
 		WITNESS_RESTORE(&mtx->mtx_object, mtx);
 	}
 	return (rval);
 }
 
 int
 msleep_spin(ident, mtx, wmesg, timo)
 	void *ident;
 	struct mtx *mtx;
 	const char *wmesg;
 	int timo;
 {
 	struct thread *td;
 	struct proc *p;
 	int rval;
 	WITNESS_SAVE_DECL(mtx);
 
 	td = curthread;
 	p = td->td_proc;
 	KASSERT(mtx != NULL, ("sleeping without a mutex"));
 	KASSERT(p != NULL, ("msleep1"));
 	KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep"));
 
 	if (cold) {
 		/*
 		 * During autoconfiguration, just return;
 		 * don't run any other threads or panic below,
 		 * in case this is the idle thread and already asleep.
 		 * XXX: this used to do "s = splhigh(); splx(safepri);
 		 * splx(s);" to give interrupts a chance, but there is
 		 * no way to give interrupts a chance now.
 		 */
 		return (0);
 	}
 
 	sleepq_lock(ident);
 	CTR5(KTR_PROC, "msleep_spin: thread %p (pid %ld, %s) on %s (%p)",
 	    (void *)td, (long)p->p_pid, p->p_comm, wmesg, ident);
 
 	DROP_GIANT();
 	mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED);
 	WITNESS_SAVE(&mtx->mtx_object, mtx);
 	mtx_unlock_spin(mtx);
 
 	/*
 	 * We put ourselves on the sleep queue and start our timeout.
 	 */
 	sleepq_add(ident, mtx, wmesg, SLEEPQ_MSLEEP);
 	if (timo)
 		sleepq_set_timeout(ident, timo);
 
 	/*
 	 * Can't call ktrace with any spin locks held so it can lock the
 	 * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold
 	 * any spin lock.  Thus, we have to drop the sleepq spin lock while
 	 * we handle those requests.  This is safe since we have placed our
 	 * thread on the sleep queue already.
 	 */
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		sleepq_release(ident);
 		ktrcsw(1, 0);
 		sleepq_lock(ident);
 	}
 #endif
 #ifdef WITNESS
 	sleepq_release(ident);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"",
 	    wmesg);
 	sleepq_lock(ident);
 #endif
 	if (timo)
 		rval = sleepq_timedwait(ident);
 	else {
 		sleepq_wait(ident);
 		rval = 0;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0);
 #endif
 	PICKUP_GIANT();
 	mtx_lock_spin(mtx);
 	WITNESS_RESTORE(&mtx->mtx_object, mtx);
 	return (rval);
 }
 
 /*
  * Make all threads sleeping on the specified identifier runnable.
  */
 void
 wakeup(ident)
 	register void *ident;
 {
 
 	sleepq_lock(ident);
 	sleepq_broadcast(ident, SLEEPQ_MSLEEP, -1);
 }
 
 /*
  * Make a thread sleeping on the specified identifier runnable.
  * May wake more than one thread if a target thread is currently
  * swapped out.
  */
 void
 wakeup_one(ident)
 	register void *ident;
 {
 
 	sleepq_lock(ident);
 	sleepq_signal(ident, SLEEPQ_MSLEEP, -1);
 }
 
 /*
  * The machine independent parts of context switching.
  */
 void
 mi_switch(int flags, struct thread *newtd)
 {
 	uint64_t new_switchtime;
 	struct thread *td;
 	struct proc *p;
 
 	mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
 	td = curthread;			/* XXX */
 	p = td->td_proc;		/* XXX */
 	KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code"));
 #ifdef INVARIANTS
 	if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
 		mtx_assert(&Giant, MA_NOTOWNED);
 #endif
 	KASSERT(td->td_critnest == 1 || (td->td_critnest == 2 &&
 	    (td->td_owepreempt) && (flags & SW_INVOL) != 0 &&
 	    newtd == NULL) || panicstr,
 	    ("mi_switch: switch in a critical section"));
 	KASSERT((flags & (SW_INVOL | SW_VOL)) != 0,
 	    ("mi_switch: switch must be voluntary or involuntary"));
 	KASSERT(newtd != curthread, ("mi_switch: preempting back to ourself"));
 
 	if (flags & SW_VOL)
 		p->p_stats->p_ru.ru_nvcsw++;
 	else
 		p->p_stats->p_ru.ru_nivcsw++;
 
 	/*
 	 * Compute the amount of time during which the current
 	 * process was running, and add that to its total so far.
 	 */
 	new_switchtime = cpu_ticks();
 	p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
 	p->p_rux.rux_uticks += td->td_uticks;
 	td->td_uticks = 0;
 	p->p_rux.rux_iticks += td->td_iticks;
 	td->td_iticks = 0;
 	p->p_rux.rux_sticks += td->td_sticks;
 	td->td_sticks = 0;
 
 	td->td_generation++;	/* bump preempt-detect counter */
 
 	/*
 	 * Don't perform context switches from the debugger.
 	 */
 	if (kdb_active) {
 		mtx_unlock_spin(&sched_lock);
 		kdb_backtrace();
 		kdb_reenter();
 		panic("%s: did not reenter debugger", __func__);
 	}
 
 	/*
 	 * Check if the process exceeds its cpu resource allocation.  If
 	 * it reaches the max, arrange to kill the process in ast().
 	 */
 	if (p->p_cpulimit != RLIM_INFINITY &&
 	    p->p_rux.rux_runtime >= p->p_cpulimit * cpu_tickrate()) {
 		p->p_sflag |= PS_XCPU;
 		td->td_flags |= TDF_ASTPENDING;
 	}
 
 	/*
 	 * Finish up stats for outgoing thread.
 	 */
 	cnt.v_swtch++;
 	PCPU_SET(switchtime, new_switchtime);
 	PCPU_SET(switchticks, ticks);
 	CTR4(KTR_PROC, "mi_switch: old thread %p (kse %p, pid %ld, %s)",
 	    (void *)td, td->td_sched, (long)p->p_pid, p->p_comm);
 	if ((flags & SW_VOL) && (td->td_proc->p_flag & P_SA))
 		newtd = thread_switchout(td, flags, newtd);
 #if (KTR_COMPILE & KTR_SCHED) != 0
 	if (td == PCPU_GET(idlethread))
 		CTR3(KTR_SCHED, "mi_switch: %p(%s) prio %d idle",
 		    td, td->td_proc->p_comm, td->td_priority);
 	else if (newtd != NULL)
 		CTR5(KTR_SCHED,
 		    "mi_switch: %p(%s) prio %d preempted by %p(%s)",
 		    td, td->td_proc->p_comm, td->td_priority, newtd,
 		    newtd->td_proc->p_comm);
 	else
 		CTR6(KTR_SCHED,
 		    "mi_switch: %p(%s) prio %d inhibit %d wmesg %s lock %s",
 		    td, td->td_proc->p_comm, td->td_priority,
 		    td->td_inhibitors, td->td_wmesg, td->td_lockname);
 #endif
 	sched_switch(td, newtd, flags);
 	CTR3(KTR_SCHED, "mi_switch: running %p(%s) prio %d",
 	    td, td->td_proc->p_comm, td->td_priority);
 
 	CTR4(KTR_PROC, "mi_switch: new thread %p (kse %p, pid %ld, %s)",
 	    (void *)td, td->td_sched, (long)p->p_pid, p->p_comm);
 
 	/* 
 	 * If the last thread was exiting, finish cleaning it up.
 	 */
 	if ((td = PCPU_GET(deadthread))) {
 		PCPU_SET(deadthread, NULL);
 		thread_stash(td);
 	}
 }
 
 /*
  * Change process state to be runnable,
  * placing it on the run queue if it is in memory,
  * and awakening the swapper if it isn't in memory.
  */
 void
 setrunnable(struct thread *td)
 {
 	struct proc *p;
 
 	p = td->td_proc;
 	mtx_assert(&sched_lock, MA_OWNED);
 	switch (p->p_state) {
 	case PRS_ZOMBIE:
 		panic("setrunnable(1)");
 	default:
 		break;
 	}
 	switch (td->td_state) {
 	case TDS_RUNNING:
 	case TDS_RUNQ:
 		return;
 	case TDS_INHIBITED:
 		/*
 		 * If we are only inhibited because we are swapped out
 		 * then arange to swap in this process. Otherwise just return.
 		 */
 		if (td->td_inhibitors != TDI_SWAPPED)
 			return;
 		/* XXX: intentional fall-through ? */
 	case TDS_CAN_RUN:
 		break;
 	default:
 		printf("state is 0x%x", td->td_state);
 		panic("setrunnable(2)");
 	}
 	if ((p->p_sflag & PS_INMEM) == 0) {
 		if ((p->p_sflag & PS_SWAPPINGIN) == 0) {
 			p->p_sflag |= PS_SWAPINREQ;
 			/*
 			 * due to a LOR between sched_lock and
 			 * the sleepqueue chain locks, use
 			 * lower level scheduling functions.
 			 */
 			kick_proc0();
 		}
 	} else
 		sched_wakeup(td);
 }
 
 /*
  * Compute a tenex style load average of a quantity on
  * 1, 5 and 15 minute intervals.
  * XXXKSE   Needs complete rewrite when correct info is available.
  * Completely Bogus.. only works with 1:1 (but compiles ok now :-)
  */
 static void
 loadav(void *arg)
 {
 	int i, nrun;
 	struct loadavg *avg;
 
 	nrun = sched_load();
 	avg = &averunnable;
 
 	for (i = 0; i < 3; i++)
 		avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
 		    nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
 
 	/*
 	 * Schedule the next update to occur after 5 seconds, but add a
 	 * random variation to avoid synchronisation with processes that
 	 * run at regular intervals.
 	 */
 	callout_reset(&loadav_callout, hz * 4 + (int)(random() % (hz * 2 + 1)),
 	    loadav, NULL);
 }
 
 static void
 lboltcb(void *arg)
 {
 	wakeup(&lbolt);
 	callout_reset(&lbolt_callout, hz, lboltcb, NULL);
 }
 
 /* ARGSUSED */
 static void
 synch_setup(dummy)
 	void *dummy;
 {
 	callout_init(&loadav_callout, CALLOUT_MPSAFE);
 	callout_init(&lbolt_callout, CALLOUT_MPSAFE);
 
 	/* Kick off timeout driven events by calling first time. */
 	loadav(NULL);
 	lboltcb(NULL);
 }
 
 /*
  * General purpose yield system call
  */
 int
 yield(struct thread *td, struct yield_args *uap)
 {
 	struct ksegrp *kg;
 
 	kg = td->td_ksegrp;
 	mtx_assert(&Giant, MA_NOTOWNED);
 	mtx_lock_spin(&sched_lock);
 	sched_prio(td, PRI_MAX_TIMESHARE);
 	mi_switch(SW_VOL, NULL);
 	mtx_unlock_spin(&sched_lock);
 	td->td_retval[0] = 0;
 	return (0);
 }
Index: head/sys/kern/kern_thread.c
===================================================================
--- head/sys/kern/kern_thread.c	(revision 155740)
+++ head/sys/kern/kern_thread.c	(revision 155741)
@@ -1,1062 +1,1062 @@
 /*-
  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
  *  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/turnstile.h>
 #include <sys/ktr.h>
 #include <sys/umtx.h>
 
 #include <security/audit/audit.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 /*
  * KSEGRP related storage.
  */
 static uma_zone_t ksegrp_zone;
 static uma_zone_t thread_zone;
 
 /* DEBUG ONLY */
 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
 static int thread_debug = 0;
 SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
 	&thread_debug, 0, "thread debug");
 
 int max_threads_per_proc = 1500;
 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
 	&max_threads_per_proc, 0, "Limit on threads per proc");
 
 int max_groups_per_proc = 1500;
 SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
 	&max_groups_per_proc, 0, "Limit on thread groups per proc");
 
 int max_threads_hits;
 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
 	&max_threads_hits, 0, "");
 
 int virtual_cpu;
 
 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
 TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
 struct mtx kse_zombie_lock;
 MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
 
 static int
 sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS)
 {
 	int error, new_val;
 	int def_val;
 
 	def_val = mp_ncpus;
 	if (virtual_cpu == 0)
 		new_val = def_val;
 	else
 		new_val = virtual_cpu;
 	error = sysctl_handle_int(oidp, &new_val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (new_val < 0)
 		return (EINVAL);
 	virtual_cpu = new_val;
 	return (0);
 }
 
 /* DEBUG ONLY */
 SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW,
 	0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I",
 	"debug virtual cpus");
 
 struct mtx tid_lock;
 static struct unrhdr *tid_unrhdr;
 
 /*
  * Prepare a thread for use.
  */
 static int
 thread_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct thread	*td;
 
 	td = (struct thread *)mem;
 	td->td_state = TDS_INACTIVE;
 	td->td_oncpu = NOCPU;
 
 	td->td_tid = alloc_unr(tid_unrhdr);
 
 	/*
 	 * Note that td_critnest begins life as 1 because the thread is not
 	 * running and is thereby implicitly waiting to be on the receiving
 	 * end of a context switch.  A context switch must occur inside a
 	 * critical section, and in fact, includes hand-off of the sched_lock.
 	 * After a context switch to a newly created thread, it will release
 	 * sched_lock for the first time, and its td_critnest will hit 0 for
 	 * the first time.  This happens on the far end of a context switch,
 	 * and when it context switches away from itself, it will in fact go
 	 * back into a critical section, and hand off the sched lock to the
 	 * next thread.
 	 */
 	td->td_critnest = 1;
 
 #ifdef AUDIT
 	audit_thread_alloc(td);
 #endif
 	return (0);
 }
 
 /*
  * Reclaim a thread after use.
  */
 static void
 thread_dtor(void *mem, int size, void *arg)
 {
 	struct thread *td;
 
 	td = (struct thread *)mem;
 
 #ifdef INVARIANTS
 	/* Verify that this thread is in a safe state to free. */
 	switch (td->td_state) {
 	case TDS_INHIBITED:
 	case TDS_RUNNING:
 	case TDS_CAN_RUN:
 	case TDS_RUNQ:
 		/*
 		 * We must never unlink a thread that is in one of
 		 * these states, because it is currently active.
 		 */
 		panic("bad state for thread unlinking");
 		/* NOTREACHED */
 	case TDS_INACTIVE:
 		break;
 	default:
 		panic("bad thread state");
 		/* NOTREACHED */
 	}
 #endif
 #ifdef AUDIT
 	audit_thread_free(td);
 #endif
 	free_unr(tid_unrhdr, td->td_tid);
 	sched_newthread(td);
 }
 
 /*
  * Initialize type-stable parts of a thread (when newly created).
  */
 static int
 thread_init(void *mem, int size, int flags)
 {
 	struct thread *td;
 
 	td = (struct thread *)mem;
 
 	vm_thread_new(td, 0);
 	cpu_thread_setup(td);
 	td->td_sleepqueue = sleepq_alloc();
 	td->td_turnstile = turnstile_alloc();
 	td->td_umtxq = umtxq_alloc();
 	td->td_sched = (struct td_sched *)&td[1];
 	sched_newthread(td);
 	return (0);
 }
 
 /*
  * Tear down type-stable parts of a thread (just before being discarded).
  */
 static void
 thread_fini(void *mem, int size)
 {
 	struct thread *td;
 
 	td = (struct thread *)mem;
 	turnstile_free(td->td_turnstile);
 	sleepq_free(td->td_sleepqueue);
 	umtxq_free(td->td_umtxq);
 	vm_thread_dispose(td);
 }
 
 /*
  * Initialize type-stable parts of a ksegrp (when newly created).
  */
 static int
 ksegrp_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct ksegrp	*kg;
 
 	kg = (struct ksegrp *)mem;
 	bzero(mem, size);
 	kg->kg_sched = (struct kg_sched *)&kg[1];
 	return (0);
 }
 
 void
 ksegrp_link(struct ksegrp *kg, struct proc *p)
 {
 
 	TAILQ_INIT(&kg->kg_threads);
 	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
 	TAILQ_INIT(&kg->kg_upcalls);	/* all upcall structure in ksegrp */
 	kg->kg_proc = p;
 	/*
 	 * the following counters are in the -zero- section
 	 * and may not need clearing
 	 */
 	kg->kg_numthreads = 0;
 	kg->kg_numupcalls = 0;
 	/* link it in now that it's consistent */
 	p->p_numksegrps++;
 	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
 }
 
 /*
  * Called from:
  *   thread-exit()
  */
 void
 ksegrp_unlink(struct ksegrp *kg)
 {
 	struct proc *p;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads"));
 	KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls"));
 
 	p = kg->kg_proc;
 	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
 	p->p_numksegrps--;
 	/*
 	 * Aggregate stats from the KSE
 	 */
 	if (p->p_procscopegrp == kg)
 		p->p_procscopegrp = NULL;
 }
 
 /*
  * For a newly created process,
  * link up all the structures and its initial threads etc.
  * called from:
  * {arch}/{arch}/machdep.c   ia64_init(), init386() etc.
  * proc_dtor() (should go away)
  * proc_init()
  */
 void
 proc_linkup(struct proc *p, struct ksegrp *kg, struct thread *td)
 {
 
 	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
 	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
 	sigqueue_init(&p->p_sigqueue, p);
 	p->p_ksi = ksiginfo_alloc(1);
 	if (p->p_ksi != NULL) {
 		/* XXX p_ksi may be null if ksiginfo zone is not ready */
 		p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
 	}
 	LIST_INIT(&p->p_mqnotifier);
 	p->p_numksegrps = 0;
 	p->p_numthreads = 0;
 
 	ksegrp_link(kg, p);
 	thread_link(td, kg);
 }
 
 /*
  * Initialize global thread allocation resources.
  */
 void
 threadinit(void)
 {
 
 	mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
 	tid_unrhdr = new_unrhdr(PID_MAX + 1, INT_MAX, &tid_lock);
 
 	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
 	    thread_ctor, thread_dtor, thread_init, thread_fini,
 	    UMA_ALIGN_CACHE, 0);
 	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
 	    ksegrp_ctor, NULL, NULL, NULL,
 	    UMA_ALIGN_CACHE, 0);
 	kseinit();	/* set up kse specific stuff  e.g. upcall zone*/
 }
 
 /*
  * Stash an embarasingly extra thread into the zombie thread queue.
  */
 void
 thread_stash(struct thread *td)
 {
 	mtx_lock_spin(&kse_zombie_lock);
 	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
 	mtx_unlock_spin(&kse_zombie_lock);
 }
 
 /*
  * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
  */
 void
 ksegrp_stash(struct ksegrp *kg)
 {
 	mtx_lock_spin(&kse_zombie_lock);
 	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
 	mtx_unlock_spin(&kse_zombie_lock);
 }
 
 /*
  * Reap zombie kse resource.
  */
 void
 thread_reap(void)
 {
 	struct thread *td_first, *td_next;
 	struct ksegrp *kg_first, * kg_next;
 
 	/*
 	 * Don't even bother to lock if none at this instant,
 	 * we really don't care about the next instant..
 	 */
 	if ((!TAILQ_EMPTY(&zombie_threads))
 	    || (!TAILQ_EMPTY(&zombie_ksegrps))) {
 		mtx_lock_spin(&kse_zombie_lock);
 		td_first = TAILQ_FIRST(&zombie_threads);
 		kg_first = TAILQ_FIRST(&zombie_ksegrps);
 		if (td_first)
 			TAILQ_INIT(&zombie_threads);
 		if (kg_first)
 			TAILQ_INIT(&zombie_ksegrps);
 		mtx_unlock_spin(&kse_zombie_lock);
 		while (td_first) {
 			td_next = TAILQ_NEXT(td_first, td_runq);
 			if (td_first->td_ucred)
 				crfree(td_first->td_ucred);
 			thread_free(td_first);
 			td_first = td_next;
 		}
 		while (kg_first) {
 			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
 			ksegrp_free(kg_first);
 			kg_first = kg_next;
 		}
 		/*
 		 * there will always be a thread on the list if one of these
 		 * is there.
 		 */
 		kse_GC();
 	}
 }
 
 /*
  * Allocate a ksegrp.
  */
 struct ksegrp *
 ksegrp_alloc(void)
 {
 	return (uma_zalloc(ksegrp_zone, M_WAITOK));
 }
 
 /*
  * Allocate a thread.
  */
 struct thread *
 thread_alloc(void)
 {
 	thread_reap(); /* check if any zombies to get */
 	return (uma_zalloc(thread_zone, M_WAITOK));
 }
 
 /*
  * Deallocate a ksegrp.
  */
 void
 ksegrp_free(struct ksegrp *td)
 {
 	uma_zfree(ksegrp_zone, td);
 }
 
 /*
  * Deallocate a thread.
  */
 void
 thread_free(struct thread *td)
 {
 
 	cpu_thread_clean(td);
 	uma_zfree(thread_zone, td);
 }
 
 /*
  * Discard the current thread and exit from its context.
  * Always called with scheduler locked.
  *
  * Because we can't free a thread while we're operating under its context,
  * push the current thread into our CPU's deadthread holder. This means
  * we needn't worry about someone else grabbing our context before we
  * do a cpu_throw().  This may not be needed now as we are under schedlock.
  * Maybe we can just do a thread_stash() as thr_exit1 does.
  */
 /*  XXX
  * libthr expects its thread exit to return for the last
  * thread, meaning that the program is back to non-threaded
  * mode I guess. Because we do this (cpu_throw) unconditionally
  * here, they have their own version of it. (thr_exit1()) 
  * that doesn't do it all if this was the last thread.
  * It is also called from thread_suspend_check().
  * Of course in the end, they end up coming here through exit1
  * anyhow..  After fixing 'thr' to play by the rules we should be able 
  * to merge these two functions together.
  *
  * called from:
  * exit1()
  * kse_exit()
  * thr_exit()
  * thread_user_enter()
  * thread_userret()
  * thread_suspend_check()
  */
 void
 thread_exit(void)
 {
 	struct thread *td;
 	struct proc *p;
 	struct ksegrp	*kg;
 
 	td = curthread;
 	kg = td->td_ksegrp;
 	p = td->td_proc;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	mtx_assert(&Giant, MA_NOTOWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(p != NULL, ("thread exiting without a process"));
 	KASSERT(kg != NULL, ("thread exiting without a kse group"));
 	CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
 	    (long)p->p_pid, p->p_comm);
 	KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
 
 #ifdef AUDIT
 	AUDIT_SYSCALL_EXIT(0, td);
 #endif
 
 	if (td->td_standin != NULL) {
 		/*
 		 * Note that we don't need to free the cred here as it
 		 * is done in thread_reap().
 		 */
 		thread_stash(td->td_standin);
 		td->td_standin = NULL;
 	}
 
 	/*
 	 * drop FPU & debug register state storage, or any other
 	 * architecture specific resources that
 	 * would not be on a new untouched process.
 	 */
 	cpu_thread_exit(td);	/* XXXSMP */
 
 	/*
 	 * The thread is exiting. scheduler can release its stuff
 	 * and collect stats etc.
 	 */
 	sched_thread_exit(td);
 
 	/*
 	 * The last thread is left attached to the process
 	 * So that the whole bundle gets recycled. Skip
 	 * all this stuff if we never had threads.
 	 * EXIT clears all sign of other threads when
 	 * it goes to single threading, so the last thread always
 	 * takes the short path.
 	 */
 	if (p->p_flag & P_HADTHREADS) {
 		if (p->p_numthreads > 1) {
 			thread_unlink(td);
 
 			/* XXX first arg not used in 4BSD or ULE */
 			sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
 
 			/*
 			 * The test below is NOT true if we are the
 			 * sole exiting thread. P_STOPPED_SNGL is unset
 			 * in exit1() after it is the only survivor.
 			 */
 			if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 				if (p->p_numthreads == p->p_suspcount) {
 					thread_unsuspend_one(p->p_singlethread);
 				}
 			}
 
 			/*
 			 * Because each upcall structure has an owner thread,
 			 * owner thread exits only when process is in exiting
 			 * state, so upcall to userland is no longer needed,
 			 * deleting upcall structure is safe here.
 			 * So when all threads in a group is exited, all upcalls
 			 * in the group should be automatically freed.
 			 *  XXXKSE This is a KSE thing and should be exported
 			 * there somehow.
 			 */
 			upcall_remove(td);
 
 			/*
 			 * If the thread we unlinked above was the last one,
 			 * then this ksegrp should go away too.
 			 */
 			if (kg->kg_numthreads == 0) {
 				/*
 				 * let the scheduler know about this in case
 				 * it needs to recover stats or resources.
 				 * Theoretically we could let
 				 * sched_exit_ksegrp()  do the equivalent of
 				 * setting the concurrency to 0
 				 * but don't do it yet to avoid changing
 				 * the existing scheduler code until we
 				 * are ready.
 				 * We supply a random other ksegrp
 				 * as the recipient of any built up
 				 * cpu usage etc. (If the scheduler wants it).
 				 * XXXKSE
 				 * This is probably not fair so think of
  				 * a better answer.
 				 */
 				sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td);
 				sched_set_concurrency(kg, 0); /* XXX TEMP */
 				ksegrp_unlink(kg);
 				ksegrp_stash(kg);
 			}
 			PROC_UNLOCK(p);
 			td->td_ksegrp	= NULL;
 			PCPU_SET(deadthread, td);
 		} else {
 			/*
 			 * The last thread is exiting.. but not through exit()
 			 * what should we do?
 			 * Theoretically this can't happen
  			 * exit1() - clears threading flags before coming here
  			 * kse_exit() - treats last thread specially
  			 * thr_exit() - treats last thread specially
  			 * thread_user_enter() - only if more exist
  			 * thread_userret() - only if more exist
  			 * thread_suspend_check() - only if more exist
 			 */
 			panic ("thread_exit: Last thread exiting on its own");
 		}
 	} else {
 		/*
 		 * non threaded process comes here.
 		 * This includes an EX threaded process that is coming
 		 * here via exit1(). (exit1 dethreads the proc first).
 		 */
 		PROC_UNLOCK(p);
 	}
 	td->td_state = TDS_INACTIVE;
 	CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
 	cpu_throw(td, choosethread());
 	panic("I'm a teapot!");
 	/* NOTREACHED */
 }
 
 /*
  * Do any thread specific cleanups that may be needed in wait()
  * called with Giant, proc and schedlock not held.
  */
 void
 thread_wait(struct proc *p)
 {
 	struct thread *td;
 
 	mtx_assert(&Giant, MA_NOTOWNED);
 	KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
 	KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()"));
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (td->td_standin != NULL) {
 			if (td->td_standin->td_ucred != NULL) {
 				crfree(td->td_standin->td_ucred);
 				td->td_standin->td_ucred = NULL;
 			}
 			thread_free(td->td_standin);
 			td->td_standin = NULL;
 		}
 		cpu_thread_clean(td);
 		crfree(td->td_ucred);
 	}
 	thread_reap();	/* check for zombie threads etc. */
 }
 
 /*
  * Link a thread to a process.
  * set up anything that needs to be initialized for it to
  * be used by the process.
  *
  * Note that we do not link to the proc's ucred here.
  * The thread is linked as if running but no KSE assigned.
  * Called from:
  *  proc_linkup()
  *  thread_schedule_upcall()
  *  thr_create()
  */
 void
 thread_link(struct thread *td, struct ksegrp *kg)
 {
 	struct proc *p;
 
 	p = kg->kg_proc;
 	td->td_state    = TDS_INACTIVE;
 	td->td_proc     = p;
 	td->td_ksegrp   = kg;
 	td->td_flags    = 0;
 	td->td_kflags	= 0;
 
 	LIST_INIT(&td->td_contested);
 	sigqueue_init(&td->td_sigqueue, p);
 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
 	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
 	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
 	p->p_numthreads++;
 	kg->kg_numthreads++;
 }
 
 /*
  * Convert a process with one thread to an unthreaded process.
  * Called from:
  *  thread_single(exit)  (called from execve and exit)
  *  kse_exit()		XXX may need cleaning up wrt KSE stuff
  */
 void
 thread_unthread(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 
 	KASSERT((p->p_numthreads == 1), ("Unthreading with >1 threads"));
 	upcall_remove(td);
 	p->p_flag &= ~(P_SA|P_HADTHREADS);
 	td->td_mailbox = NULL;
 	td->td_pflags &= ~(TDP_SA | TDP_CAN_UNBIND);
 	if (td->td_standin != NULL) {
 		thread_stash(td->td_standin);
 		td->td_standin = NULL;
 	}
 	sched_set_concurrency(td->td_ksegrp, 1);
 }
 
 /*
  * Called from:
  *  thread_exit()
  */
 void
 thread_unlink(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 	struct ksegrp *kg = td->td_ksegrp;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	TAILQ_REMOVE(&p->p_threads, td, td_plist);
 	p->p_numthreads--;
 	TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
 	kg->kg_numthreads--;
 	/* could clear a few other things here */
 	/* Must  NOT clear links to proc and ksegrp! */
 }
 
 /*
  * Enforce single-threading.
  *
  * Returns 1 if the caller must abort (another thread is waiting to
  * exit the process or similar). Process is locked!
  * Returns 0 when you are successfully the only thread running.
  * A process has successfully single threaded in the suspend mode when
  * There are no threads in user mode. Threads in the kernel must be
  * allowed to continue until they get to the user boundary. They may even
  * copy out their return values and data before suspending. They may however be
  * accellerated in reaching the user boundary as we will wake up
  * any sleeping threads that are interruptable. (PCATCH).
  */
 int
 thread_single(int mode)
 {
 	struct thread *td;
 	struct thread *td2;
 	struct proc *p;
 	int remaining;
 
 	td = curthread;
 	p = td->td_proc;
 	mtx_assert(&Giant, MA_NOTOWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT((td != NULL), ("curthread is NULL"));
 
 	if ((p->p_flag & P_HADTHREADS) == 0)
 		return (0);
 
 	/* Is someone already single threading? */
 	if (p->p_singlethread != NULL && p->p_singlethread != td)
 		return (1);
 
 	if (mode == SINGLE_EXIT) {
 		p->p_flag |= P_SINGLE_EXIT;
 		p->p_flag &= ~P_SINGLE_BOUNDARY;
 	} else {
 		p->p_flag &= ~P_SINGLE_EXIT;
 		if (mode == SINGLE_BOUNDARY)
 			p->p_flag |= P_SINGLE_BOUNDARY;
 		else
 			p->p_flag &= ~P_SINGLE_BOUNDARY;
 	}
 	p->p_flag |= P_STOPPED_SINGLE;
 	mtx_lock_spin(&sched_lock);
 	p->p_singlethread = td;
 	if (mode == SINGLE_EXIT)
 		remaining = p->p_numthreads;
 	else if (mode == SINGLE_BOUNDARY)
 		remaining = p->p_numthreads - p->p_boundary_count;
 	else
 		remaining = p->p_numthreads - p->p_suspcount;
 	while (remaining != 1) {
 		FOREACH_THREAD_IN_PROC(p, td2) {
 			if (td2 == td)
 				continue;
 			td2->td_flags |= TDF_ASTPENDING;
 			if (TD_IS_INHIBITED(td2)) {
 				switch (mode) {
 				case SINGLE_EXIT:
 					if (td->td_flags & TDF_DBSUSPEND)
 						td->td_flags &= ~TDF_DBSUSPEND;
 					if (TD_IS_SUSPENDED(td2))
 						thread_unsuspend_one(td2);
 					if (TD_ON_SLEEPQ(td2) &&
 					    (td2->td_flags & TDF_SINTR))
-						sleepq_abort(td2);
+						sleepq_abort(td2, EINTR);
 					break;
 				case SINGLE_BOUNDARY:
 					if (TD_IS_SUSPENDED(td2) &&
 					    !(td2->td_flags & TDF_BOUNDARY))
 						thread_unsuspend_one(td2);
 					if (TD_ON_SLEEPQ(td2) &&
 					    (td2->td_flags & TDF_SINTR))
-						sleepq_abort(td2);
+						sleepq_abort(td2, ERESTART);
 					break;
 				default:	
 					if (TD_IS_SUSPENDED(td2))
 						continue;
 					/*
 					 * maybe other inhibitted states too?
 					 */
 					if ((td2->td_flags & TDF_SINTR) &&
 					    (td2->td_inhibitors &
 					    (TDI_SLEEPING | TDI_SWAPPED)))
 						thread_suspend_one(td2);
 					break;
 				}
 			}
 #ifdef SMP
 			else if (TD_IS_RUNNING(td2) && td != td2) {
 				forward_signal(td2);
 			}
 #endif
 		}
 		if (mode == SINGLE_EXIT)
 			remaining = p->p_numthreads;
 		else if (mode == SINGLE_BOUNDARY)
 			remaining = p->p_numthreads - p->p_boundary_count;
 		else
 			remaining = p->p_numthreads - p->p_suspcount;
 
 		/*
 		 * Maybe we suspended some threads.. was it enough?
 		 */
 		if (remaining == 1)
 			break;
 
 		/*
 		 * Wake us up when everyone else has suspended.
 		 * In the mean time we suspend as well.
 		 */
 		thread_suspend_one(td);
 		PROC_UNLOCK(p);
 		mi_switch(SW_VOL, NULL);
 		mtx_unlock_spin(&sched_lock);
 		PROC_LOCK(p);
 		mtx_lock_spin(&sched_lock);
 		if (mode == SINGLE_EXIT)
 			remaining = p->p_numthreads;
 		else if (mode == SINGLE_BOUNDARY)
 			remaining = p->p_numthreads - p->p_boundary_count;
 		else
 			remaining = p->p_numthreads - p->p_suspcount;
 	}
 	if (mode == SINGLE_EXIT) {
 		/*
 		 * We have gotten rid of all the other threads and we
 		 * are about to either exit or exec. In either case,
 		 * we try our utmost  to revert to being a non-threaded
 		 * process.
 		 */
 		p->p_singlethread = NULL;
 		p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT);
 		thread_unthread(td);
 	}
 	mtx_unlock_spin(&sched_lock);
 	return (0);
 }
 
 /*
  * Called in from locations that can safely check to see
  * whether we have to suspend or at least throttle for a
  * single-thread event (e.g. fork).
  *
  * Such locations include userret().
  * If the "return_instead" argument is non zero, the thread must be able to
  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
  *
  * The 'return_instead' argument tells the function if it may do a
  * thread_exit() or suspend, or whether the caller must abort and back
  * out instead.
  *
  * If the thread that set the single_threading request has set the
  * P_SINGLE_EXIT bit in the process flags then this call will never return
  * if 'return_instead' is false, but will exit.
  *
  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
  *---------------+--------------------+---------------------
  *       0       | returns 0          |   returns 0 or 1
  *               | when ST ends       |   immediatly
  *---------------+--------------------+---------------------
  *       1       | thread exits       |   returns 1
  *               |                    |  immediatly
  * 0 = thread_exit() or suspension ok,
  * other = return error instead of stopping the thread.
  *
  * While a full suspension is under effect, even a single threading
  * thread would be suspended if it made this call (but it shouldn't).
  * This call should only be made from places where
  * thread_exit() would be safe as that may be the outcome unless
  * return_instead is set.
  */
 int
 thread_suspend_check(int return_instead)
 {
 	struct thread *td;
 	struct proc *p;
 
 	td = curthread;
 	p = td->td_proc;
 	mtx_assert(&Giant, MA_NOTOWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	while (P_SHOULDSTOP(p) ||
 	      ((p->p_flag & P_TRACED) && (td->td_flags & TDF_DBSUSPEND))) {
 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 			KASSERT(p->p_singlethread != NULL,
 			    ("singlethread not set"));
 			/*
 			 * The only suspension in action is a
 			 * single-threading. Single threader need not stop.
 			 * XXX Should be safe to access unlocked
 			 * as it can only be set to be true by us.
 			 */
 			if (p->p_singlethread == td)
 				return (0);	/* Exempt from stopping. */
 		}
 		if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
-			return (1);
+			return (EINTR);
 
 		/* Should we goto user boundary if we didn't come from there? */
 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
 		    (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
-			return (1);
+			return (ERESTART);
 
 		/* If thread will exit, flush its pending signals */
 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td))
 			sigqueue_flush(&td->td_sigqueue);
 
 		mtx_lock_spin(&sched_lock);
 		thread_stopped(p);
 		/*
 		 * If the process is waiting for us to exit,
 		 * this thread should just suicide.
 		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
 		 */
 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td))
 			thread_exit();
 
 		/*
 		 * When a thread suspends, it just
 		 * moves to the processes's suspend queue
 		 * and stays there.
 		 */
 		thread_suspend_one(td);
 		if (return_instead == 0) {
 			p->p_boundary_count++;
 			td->td_flags |= TDF_BOUNDARY;
 		}
 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 			if (p->p_numthreads == p->p_suspcount) 
 				thread_unsuspend_one(p->p_singlethread);
 		}
 		PROC_UNLOCK(p);
 		mi_switch(SW_INVOL, NULL);
 		if (return_instead == 0) {
 			p->p_boundary_count--;
 			td->td_flags &= ~TDF_BOUNDARY;
 		}
 		mtx_unlock_spin(&sched_lock);
 		PROC_LOCK(p);
 	}
 	return (0);
 }
 
 void
 thread_suspend_one(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
 	p->p_suspcount++;
 	TD_SET_SUSPENDED(td);
 	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
 }
 
 void
 thread_unsuspend_one(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
 	TD_CLR_SUSPENDED(td);
 	p->p_suspcount--;
 	setrunnable(td);
 }
 
 /*
  * Allow all threads blocked by single threading to continue running.
  */
 void
 thread_unsuspend(struct proc *p)
 {
 	struct thread *td;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if (!P_SHOULDSTOP(p)) {
 		while ((td = TAILQ_FIRST(&p->p_suspended))) {
 			thread_unsuspend_one(td);
 		}
 	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
 	    (p->p_numthreads == p->p_suspcount)) {
 		/*
 		 * Stopping everything also did the job for the single
 		 * threading request. Now we've downgraded to single-threaded,
 		 * let it continue.
 		 */
 		thread_unsuspend_one(p->p_singlethread);
 	}
 }
 
 /*
  * End the single threading mode..
  */
 void
 thread_single_end(void)
 {
 	struct thread *td;
 	struct proc *p;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY);
 	mtx_lock_spin(&sched_lock);
 	p->p_singlethread = NULL;
 	p->p_procscopegrp = NULL;
 	/*
 	 * If there are other threads they mey now run,
 	 * unless of course there is a blanket 'stop order'
 	 * on the process. The single threader must be allowed
 	 * to continue however as this is a bad place to stop.
 	 */
 	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
 		while ((td = TAILQ_FIRST(&p->p_suspended))) {
 			thread_unsuspend_one(td);
 		}
 	}
 	mtx_unlock_spin(&sched_lock);
 }
 
 /*
  * Called before going into an interruptible sleep to see if we have been
  * interrupted or requested to exit.
  */
 int
 thread_sleep_check(struct thread *td)
 {
 	struct proc *p;
 
 	p = td->td_proc;
 	mtx_assert(&sched_lock, MA_OWNED);
 	if (p->p_flag & P_HADTHREADS) {
 		if (p->p_singlethread != td) {
 			if (p->p_flag & P_SINGLE_EXIT)
 				return (EINTR);
 			if (p->p_flag & P_SINGLE_BOUNDARY)
 				return (ERESTART);
 		}
 		if (td->td_flags & TDF_INTERRUPT)
 			return (td->td_intrval);
 	}
 	return (0);
 }
 
 struct thread *
 thread_find(struct proc *p, lwpid_t tid)
 {
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	mtx_lock_spin(&sched_lock);
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (td->td_tid == tid)
 			break;
 	}
 	mtx_unlock_spin(&sched_lock);
 	return (td);
 }
Index: head/sys/kern/subr_sleepqueue.c
===================================================================
--- head/sys/kern/subr_sleepqueue.c	(revision 155740)
+++ head/sys/kern/subr_sleepqueue.c	(revision 155741)
@@ -1,908 +1,903 @@
 /*-
  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Implementation of sleep queues used to hold queue of threads blocked on
  * a wait channel.  Sleep queues different from turnstiles in that wait
  * channels are not owned by anyone, so there is no priority propagation.
  * Sleep queues can also provide a timeout and can also be interrupted by
  * signals.  That said, there are several similarities between the turnstile
  * and sleep queue implementations.  (Note: turnstiles were implemented
  * first.)  For example, both use a hash table of the same size where each
  * bucket is referred to as a "chain" that contains both a spin lock and
  * a linked list of queues.  An individual queue is located by using a hash
  * to pick a chain, locking the chain, and then walking the chain searching
  * for the queue.  This means that a wait channel object does not need to
  * embed it's queue head just as locks do not embed their turnstile queue
  * head.  Threads also carry around a sleep queue that they lend to the
  * wait channel when blocking.  Just as in turnstiles, the queue includes
  * a free list of the sleep queues of other threads blocked on the same
  * wait channel in the case of multiple waiters.
  *
  * Some additional functionality provided by sleep queues include the
  * ability to set a timeout.  The timeout is managed using a per-thread
  * callout that resumes a thread if it is asleep.  A thread may also
  * catch signals while it is asleep (aka an interruptible sleep).  The
  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
  * sleep queues also provide some extra assertions.  One is not allowed to
  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
  * must consistently use the same lock to synchronize with a wait channel,
  * though this check is currently only a warning for sleep/wakeup due to
  * pre-existing abuse of that API.  The same lock must also be held when
  * awakening threads, though that is currently only enforced for condition
  * variables.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_sleepqueue_profiling.h"
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/sysctl.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 /*
  * Constants for the hash table of sleep queue chains.  These constants are
  * the same ones that 4BSD (and possibly earlier versions of BSD) used.
  * Basically, we ignore the lower 8 bits of the address since most wait
  * channel pointers are aligned and only look at the next 7 bits for the
  * hash.  SC_TABLESIZE must be a power of two for SC_MASK to work properly.
  */
 #define	SC_TABLESIZE	128			/* Must be power of 2. */
 #define	SC_MASK		(SC_TABLESIZE - 1)
 #define	SC_SHIFT	8
 #define	SC_HASH(wc)	(((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK)
 #define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
 
 /*
  * There two different lists of sleep queues.  Both lists are connected
  * via the sq_hash entries.  The first list is the sleep queue chain list
  * that a sleep queue is on when it is attached to a wait channel.  The
  * second list is the free list hung off of a sleep queue that is attached
  * to a wait channel.
  *
  * Each sleep queue also contains the wait channel it is attached to, the
  * list of threads blocked on that wait channel, flags specific to the
  * wait channel, and the lock used to synchronize with a wait channel.
  * The flags are used to catch mismatches between the various consumers
  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
  * The lock pointer is only used when invariants are enabled for various
  * debugging checks.
  *
  * Locking key:
  *  c - sleep queue chain lock
  */
 struct sleepqueue {
 	TAILQ_HEAD(, thread) sq_blocked;	/* (c) Blocked threads. */
 	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
 	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
 	void	*sq_wchan;			/* (c) Wait channel. */
 #ifdef INVARIANTS
 	int	sq_type;			/* (c) Queue type. */
 	struct mtx *sq_lock;			/* (c) Associated lock. */
 #endif
 };
 
 struct sleepqueue_chain {
 	LIST_HEAD(, sleepqueue) sc_queues;	/* List of sleep queues. */
 	struct mtx sc_lock;			/* Spin lock for this chain. */
 #ifdef SLEEPQUEUE_PROFILING
 	u_int	sc_depth;			/* Length of sc_queues. */
 	u_int	sc_max_depth;			/* Max length of sc_queues. */
 #endif
 };
 
 #ifdef SLEEPQUEUE_PROFILING
 u_int sleepq_max_depth;
 SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
 SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
     "sleepq chain stats");
 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
     0, "maxmimum depth achieved of a single chain");
 #endif
 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
 
 static MALLOC_DEFINE(M_SLEEPQUEUE, "sleepqueue", "sleep queues");
 
 /*
  * Prototypes for non-exported routines.
  */
 static int	sleepq_check_timeout(void);
 static void	sleepq_switch(void *wchan);
 static void	sleepq_timeout(void *arg);
 static void	sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri);
 
 /*
  * Early initialization of sleep queues that is called from the sleepinit()
  * SYSINIT.
  */
 void
 init_sleepqueues(void)
 {
 #ifdef SLEEPQUEUE_PROFILING
 	struct sysctl_oid *chain_oid;
 	char chain_name[10];
 #endif
 	int i;
 
 	for (i = 0; i < SC_TABLESIZE; i++) {
 		LIST_INIT(&sleepq_chains[i].sc_queues);
 		mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
 		    MTX_SPIN);
 #ifdef SLEEPQUEUE_PROFILING
 		snprintf(chain_name, sizeof(chain_name), "%d", i);
 		chain_oid = SYSCTL_ADD_NODE(NULL, 
 		    SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
 		    chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
 		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
 		    "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
 		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
 		    "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
 		    NULL);
 #endif
 	}
 	thread0.td_sleepqueue = sleepq_alloc();
 }
 
 /*
  * Malloc and initialize a new sleep queue for a new thread.
  */
 struct sleepqueue *
 sleepq_alloc(void)
 {
 	struct sleepqueue *sq;
 
 	sq = malloc(sizeof(struct sleepqueue), M_SLEEPQUEUE, M_WAITOK | M_ZERO);
 	TAILQ_INIT(&sq->sq_blocked);
 	LIST_INIT(&sq->sq_free);
 	return (sq);
 }
 
 /*
  * Free a sleep queue when a thread is destroyed.
  */
 void
 sleepq_free(struct sleepqueue *sq)
 {
 
 	MPASS(sq != NULL);
 	MPASS(TAILQ_EMPTY(&sq->sq_blocked));
 	free(sq, M_SLEEPQUEUE);
 }
 
 /*
  * Lock the sleep queue chain associated with the specified wait channel.
  */
 void
 sleepq_lock(void *wchan)
 {
 	struct sleepqueue_chain *sc;
 
 	sc = SC_LOOKUP(wchan);
 	mtx_lock_spin(&sc->sc_lock);
 }
 
 /*
  * Look up the sleep queue associated with a given wait channel in the hash
  * table locking the associated sleep queue chain.  If no queue is found in
  * the table, NULL is returned.
  */
 struct sleepqueue *
 sleepq_lookup(void *wchan)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 		if (sq->sq_wchan == wchan)
 			return (sq);
 	return (NULL);
 }
 
 /*
  * Unlock the sleep queue chain associated with a given wait channel.
  */
 void
 sleepq_release(void *wchan)
 {
 	struct sleepqueue_chain *sc;
 
 	sc = SC_LOOKUP(wchan);
 	mtx_unlock_spin(&sc->sc_lock);
 }
 
 /*
  * Places the current thread on the sleep queue for the specified wait
  * channel.  If INVARIANTS is enabled, then it associates the passed in
  * lock with the sleepq to make sure it is held when that sleep queue is
  * woken up.
  */
 void
 sleepq_add(void *wchan, struct mtx *lock, const char *wmesg, int flags)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	MPASS(td->td_sleepqueue != NULL);
 	MPASS(wchan != NULL);
 
 	/* If this thread is not allowed to sleep, die a horrible death. */
 	KASSERT(!(td->td_pflags & TDP_NOSLEEPING),
 	    ("Trying sleep, but thread marked as sleeping prohibited"));
 
 	/* Look up the sleep queue associated with the wait channel 'wchan'. */
 	sq = sleepq_lookup(wchan);
 
 	/*
 	 * If the wait channel does not already have a sleep queue, use
 	 * this thread's sleep queue.  Otherwise, insert the current thread
 	 * into the sleep queue already in use by this wait channel.
 	 */
 	if (sq == NULL) {
 #ifdef SLEEPQUEUE_PROFILING
 		sc->sc_depth++;
 		if (sc->sc_depth > sc->sc_max_depth) {
 			sc->sc_max_depth = sc->sc_depth;
 			if (sc->sc_max_depth > sleepq_max_depth)
 				sleepq_max_depth = sc->sc_max_depth;
 		}
 #endif
 		sq = td->td_sleepqueue;
 		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
 		KASSERT(TAILQ_EMPTY(&sq->sq_blocked),
 		    ("thread's sleep queue has a non-empty queue"));
 		KASSERT(LIST_EMPTY(&sq->sq_free),
 		    ("thread's sleep queue has a non-empty free list"));
 		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
 		sq->sq_wchan = wchan;
 #ifdef INVARIANTS
 		sq->sq_lock = lock;
 		sq->sq_type = flags & SLEEPQ_TYPE;
 #endif
 	} else {
 		MPASS(wchan == sq->sq_wchan);
 		MPASS(lock == sq->sq_lock);
 		MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
 		LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
 	}
 	TAILQ_INSERT_TAIL(&sq->sq_blocked, td, td_slpq);
 	td->td_sleepqueue = NULL;
 	mtx_lock_spin(&sched_lock);
 	td->td_wchan = wchan;
 	td->td_wmesg = wmesg;
-	if (flags & SLEEPQ_INTERRUPTIBLE)
+	if (flags & SLEEPQ_INTERRUPTIBLE) {
 		td->td_flags |= TDF_SINTR;
+		td->td_flags &= ~TDF_SLEEPABORT;
+	}
 	mtx_unlock_spin(&sched_lock);
 }
 
 /*
  * Sets a timeout that will remove the current thread from the specified
  * sleep queue after timo ticks if the thread has not already been awakened.
  */
 void
 sleepq_set_timeout(void *wchan, int timo)
 {
 	struct sleepqueue_chain *sc;
 	struct thread *td;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	MPASS(TD_ON_SLEEPQ(td));
 	MPASS(td->td_sleepqueue == NULL);
 	MPASS(wchan != NULL);
 	callout_reset(&td->td_slpcallout, timo, sleepq_timeout, td);
 }
 
 /*
  * Marks the pending sleep of the current thread as interruptible and
  * makes an initial check for pending signals before putting a thread
- * to sleep.
+ * to sleep. Return with sleep queue and scheduler lock held.
  */
-int
+static int
 sleepq_catch_signals(void *wchan)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 	struct thread *td;
 	struct proc *p;
-	int sig;
+	struct sigacts *ps;
+	int sig, ret;
 
 	td = curthread;
-	p = td->td_proc;
+	p = curproc;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
-	MPASS(td->td_sleepqueue == NULL);
 	MPASS(wchan != NULL);
 	CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
-	    (void *)td, (long)p->p_pid, p->p_comm);
+		(void *)td, (long)p->p_pid, p->p_comm);
 
-	/* Mark thread as being in an interruptible sleep. */
 	MPASS(td->td_flags & TDF_SINTR);
-	MPASS(TD_ON_SLEEPQ(td));
-	sleepq_release(wchan);
+	mtx_unlock_spin(&sc->sc_lock);
 
 	/* See if there are any pending signals for this thread. */
 	PROC_LOCK(p);
-	mtx_lock(&p->p_sigacts->ps_mtx);
+	ps = p->p_sigacts;
+	mtx_lock(&ps->ps_mtx);
 	sig = cursig(td);
-	mtx_unlock(&p->p_sigacts->ps_mtx);
-	if (sig == 0 && thread_suspend_check(1))
-		sig = SIGSTOP;
-	PROC_UNLOCK(p);
+	if (sig == 0) {
+		mtx_unlock(&ps->ps_mtx);
+		ret = thread_suspend_check(1);
+		MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
+	} else {
+		if (SIGISMEMBER(ps->ps_sigintr, sig))
+			ret = EINTR;
+		else
+			ret = ERESTART;
+		mtx_unlock(&ps->ps_mtx);
+	}
 
-	/*
-	 * If there were pending signals and this thread is still on
-	 * the sleep queue, remove it from the sleep queue.  If the
-	 * thread was removed from the sleep queue while we were blocked
-	 * above, then clear TDF_SINTR before returning.
-	 */
-	sleepq_lock(wchan);
-	sq = sleepq_lookup(wchan);
-	mtx_lock_spin(&sched_lock);
-	if (TD_ON_SLEEPQ(td) && sig != 0)
-		sleepq_resume_thread(sq, td, -1);
-	else if (!TD_ON_SLEEPQ(td) && sig == 0)
+	if (ret) {
+		PROC_UNLOCK(p);
+		/*
+		 * If there were pending signals and this thread is still on
+		 * the sleep queue, remove it from the sleep queue.
+		 */
+		mtx_lock_spin(&sc->sc_lock);
+		sq = sleepq_lookup(wchan);
+		mtx_lock_spin(&sched_lock);
+		if (TD_ON_SLEEPQ(td))
+			sleepq_resume_thread(sq, td, -1);
 		td->td_flags &= ~TDF_SINTR;
-	mtx_unlock_spin(&sched_lock);
-	return (sig);
+	} else {
+		mtx_lock_spin(&sc->sc_lock);
+		mtx_lock_spin(&sched_lock);
+		PROC_UNLOCK(p);
+	}
+	return (ret);
 }
 
 /*
  * Switches to another thread if we are still asleep on a sleep queue and
  * drop the lock on the sleep queue chain.  Returns with sched_lock held.
  */
 static void
 sleepq_switch(void *wchan)
 {
 	struct sleepqueue_chain *sc;
 	struct thread *td;
 
 	td = curthread;
 	sc = SC_LOOKUP(wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
+	mtx_assert(&sched_lock, MA_OWNED);
 
 	/* 
 	 * If we have a sleep queue, then we've already been woken up, so
 	 * just return.
 	 */
 	if (td->td_sleepqueue != NULL) {
 		MPASS(!TD_ON_SLEEPQ(td));
 		mtx_unlock_spin(&sc->sc_lock);
-		mtx_lock_spin(&sched_lock);
 		return;
 	}
 
 	/*
 	 * Otherwise, actually go to sleep.
 	 */
-	mtx_lock_spin(&sched_lock);
 	mtx_unlock_spin(&sc->sc_lock);
-
 	sched_sleep(td);
 	TD_SET_SLEEPING(td);
 	mi_switch(SW_VOL, NULL);
 	KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
 	CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_proc->p_comm);
 }
 
 /*
  * Check to see if we timed out.
  */
 static int
 sleepq_check_timeout(void)
 {
 	struct thread *td;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	td = curthread;
 
 	/*
 	 * If TDF_TIMEOUT is set, we timed out.
 	 */
 	if (td->td_flags & TDF_TIMEOUT) {
 		td->td_flags &= ~TDF_TIMEOUT;
 		return (EWOULDBLOCK);
 	}
 
 	/*
 	 * If TDF_TIMOFAIL is set, the timeout ran after we had
 	 * already been woken up.
 	 */
 	if (td->td_flags & TDF_TIMOFAIL)
 		td->td_flags &= ~TDF_TIMOFAIL;
 
 	/*
 	 * If callout_stop() fails, then the timeout is running on
 	 * another CPU, so synchronize with it to avoid having it
 	 * accidentally wake up a subsequent sleep.
 	 */
 	else if (callout_stop(&td->td_slpcallout) == 0) {
 		td->td_flags |= TDF_TIMEOUT;
 		TD_SET_SLEEPING(td);
 		mi_switch(SW_INVOL, NULL);
 	}
 	return (0);
 }
 
 /*
  * Check to see if we were awoken by a signal.
  */
 static int
 sleepq_check_signals(void)
 {
 	struct thread *td;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	td = curthread;
 
-	/*
-	 * If TDF_SINTR is clear, then we were awakened while executing
-	 * sleepq_catch_signals().
-	 */
-	if (!(td->td_flags & TDF_SINTR))
-		return (0);
-
 	/* We are no longer in an interruptible sleep. */
-	td->td_flags &= ~TDF_SINTR;
+	if (td->td_flags & TDF_SINTR)
+		td->td_flags &= ~TDF_SINTR;
 
+	if (td->td_flags & TDF_SLEEPABORT) {
+		td->td_flags &= ~TDF_SLEEPABORT;
+		return (td->td_intrval);
+	}
+
 	if (td->td_flags & TDF_INTERRUPT)
 		return (td->td_intrval);
+
 	return (0);
 }
 
 /*
- * If we were in an interruptible sleep and we weren't interrupted and
- * didn't timeout, check to see if there are any pending signals and
- * which return value we should use if so.  The return value from an
- * earlier call to sleepq_catch_signals() should be passed in as the
- * argument.
- */
-int
-sleepq_calc_signal_retval(int sig)
-{
-	struct thread *td;
-	struct proc *p;
-	int rval;
-
-	td = curthread;
-	p = td->td_proc;
-	PROC_LOCK(p);
-	mtx_lock(&p->p_sigacts->ps_mtx);
-	/* XXX: Should we always be calling cursig()? */
-	if (sig == 0)
-		sig = cursig(td);
-	if (sig != 0) {
-		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
-			rval = EINTR;
-		else
-			rval = ERESTART;
-	} else
-		rval = 0;
-	mtx_unlock(&p->p_sigacts->ps_mtx);
-	PROC_UNLOCK(p);
-	return (rval);
-}
-
-/*
  * Block the current thread until it is awakened from its sleep queue.
  */
 void
 sleepq_wait(void *wchan)
 {
 
 	MPASS(!(curthread->td_flags & TDF_SINTR));
+	mtx_lock_spin(&sched_lock);
 	sleepq_switch(wchan);
 	mtx_unlock_spin(&sched_lock);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue
  * or it is interrupted by a signal.
  */
 int
 sleepq_wait_sig(void *wchan)
 {
+	int rcatch;
 	int rval;
 
-	sleepq_switch(wchan);
+	rcatch = sleepq_catch_signals(wchan);
+	if (rcatch == 0)
+		sleepq_switch(wchan);
+	else
+		sleepq_release(wchan);
 	rval = sleepq_check_signals();
 	mtx_unlock_spin(&sched_lock); 
+	if (rcatch)
+		return (rcatch);
 	return (rval);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue
  * or it times out while waiting.
  */
 int
 sleepq_timedwait(void *wchan)
 {
 	int rval;
 
 	MPASS(!(curthread->td_flags & TDF_SINTR));
+	mtx_lock_spin(&sched_lock);
 	sleepq_switch(wchan);
 	rval = sleepq_check_timeout();
 	mtx_unlock_spin(&sched_lock);
 	return (rval);
 }
 
 /*
  * Block the current thread until it is awakened from its sleep queue,
  * it is interrupted by a signal, or it times out waiting to be awakened.
  */
 int
-sleepq_timedwait_sig(void *wchan, int signal_caught)
+sleepq_timedwait_sig(void *wchan)
 {
-	int rvalt, rvals;
+	int rcatch, rvalt, rvals;
 
-	sleepq_switch(wchan);
+	rcatch = sleepq_catch_signals(wchan);
+	if (rcatch == 0)
+		sleepq_switch(wchan);
+	else
+		sleepq_release(wchan);
 	rvalt = sleepq_check_timeout();
 	rvals = sleepq_check_signals();
 	mtx_unlock_spin(&sched_lock);
-	if (signal_caught || rvalt == 0)
+	if (rcatch)
+		return (rcatch);
+	if (rvals)
 		return (rvals);
-	else
-		return (rvalt);
+	return (rvalt);
 }
 
 /*
  * Removes a thread from a sleep queue and makes it
  * runnable.
  */
 static void
 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
 {
 	struct sleepqueue_chain *sc;
 
 	MPASS(td != NULL);
 	MPASS(sq->sq_wchan != NULL);
 	MPASS(td->td_wchan == sq->sq_wchan);
 	sc = SC_LOOKUP(sq->sq_wchan);
 	mtx_assert(&sc->sc_lock, MA_OWNED);
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	/* Remove the thread from the queue. */
 	TAILQ_REMOVE(&sq->sq_blocked, td, td_slpq);
 
 	/*
 	 * Get a sleep queue for this thread.  If this is the last waiter,
 	 * use the queue itself and take it out of the chain, otherwise,
 	 * remove a queue from the free list.
 	 */
 	if (LIST_EMPTY(&sq->sq_free)) {
 		td->td_sleepqueue = sq;
 #ifdef INVARIANTS
 		sq->sq_wchan = NULL;
 #endif
 #ifdef SLEEPQUEUE_PROFILING
 		sc->sc_depth--;
 #endif
 	} else
 		td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
 	LIST_REMOVE(td->td_sleepqueue, sq_hash);
 
 	td->td_wmesg = NULL;
 	td->td_wchan = NULL;
 
 	/*
 	 * Note that thread td might not be sleeping if it is running
 	 * sleepq_catch_signals() on another CPU or is blocked on
 	 * its proc lock to check signals.  It doesn't hurt to clear
 	 * the sleeping flag if it isn't set though, so we just always
 	 * do it.  However, we can't assert that it is set.
 	 */
 	CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, td->td_proc->p_comm);
 	TD_CLR_SLEEPING(td);
 
 	/* Adjust priority if requested. */
 	MPASS(pri == -1 || (pri >= PRI_MIN && pri <= PRI_MAX));
 	if (pri != -1 && td->td_priority > pri)
 		sched_prio(td, pri);
 	setrunnable(td);
 }
 
 /*
  * Find the highest priority thread sleeping on a wait channel and resume it.
  */
 void
 sleepq_signal(void *wchan, int flags, int pri)
 {
 	struct sleepqueue *sq;
 	struct thread *td, *besttd;
 
 	CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL) {
 		sleepq_release(wchan);
 		return;
 	}
 	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 
 	/*
 	 * Find the highest priority thread on the queue.  If there is a
 	 * tie, use the thread that first appears in the queue as it has
 	 * been sleeping the longest since threads are always added to
 	 * the tail of sleep queues.
 	 */
 	besttd = NULL;
 	TAILQ_FOREACH(td, &sq->sq_blocked, td_slpq) {
 		if (besttd == NULL || td->td_priority < besttd->td_priority)
 			besttd = td;
 	}
 	MPASS(besttd != NULL);
 	mtx_lock_spin(&sched_lock);
 	sleepq_resume_thread(sq, besttd, pri);
 	mtx_unlock_spin(&sched_lock);
 	sleepq_release(wchan);
 }
 
 /*
  * Resume all threads sleeping on a specified wait channel.
  */
 void
 sleepq_broadcast(void *wchan, int flags, int pri)
 {
 	struct sleepqueue *sq;
 
 	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
 	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 	sq = sleepq_lookup(wchan);
 	if (sq == NULL) {
 		sleepq_release(wchan);
 		return;
 	}
 	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 
 	/* Resume all blocked threads on the sleep queue. */
 	mtx_lock_spin(&sched_lock);
 	while (!TAILQ_EMPTY(&sq->sq_blocked))
 		sleepq_resume_thread(sq, TAILQ_FIRST(&sq->sq_blocked), pri);
 	mtx_unlock_spin(&sched_lock);
 	sleepq_release(wchan);
 }
 
 /*
  * Time sleeping threads out.  When the timeout expires, the thread is
  * removed from the sleep queue and made runnable if it is still asleep.
  */
 static void
 sleepq_timeout(void *arg)
 {
 	struct sleepqueue *sq;
 	struct thread *td;
 	void *wchan;
 
 	td = arg;
 	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_proc->p_comm);
 
 	/*
 	 * First, see if the thread is asleep and get the wait channel if
 	 * it is.
 	 */
 	mtx_lock_spin(&sched_lock);
 	if (TD_ON_SLEEPQ(td)) {
 		wchan = td->td_wchan;
 		mtx_unlock_spin(&sched_lock);
 		sleepq_lock(wchan);
 		sq = sleepq_lookup(wchan);
 		mtx_lock_spin(&sched_lock);
 	} else {
 		wchan = NULL;
 		sq = NULL;
 	}
 
 	/*
 	 * At this point, if the thread is still on the sleep queue,
 	 * we have that sleep queue locked as it cannot migrate sleep
 	 * queues while we dropped sched_lock.  If it had resumed and
 	 * was on another CPU while the lock was dropped, it would have
 	 * seen that TDF_TIMEOUT and TDF_TIMOFAIL are clear and the
 	 * call to callout_stop() to stop this routine would have failed
 	 * meaning that it would have already set TDF_TIMEOUT to
 	 * synchronize with this function.
 	 */
 	if (TD_ON_SLEEPQ(td)) {
 		MPASS(td->td_wchan == wchan);
 		MPASS(sq != NULL);
 		td->td_flags |= TDF_TIMEOUT;
 		sleepq_resume_thread(sq, td, -1);
 		mtx_unlock_spin(&sched_lock);
 		sleepq_release(wchan);
 		return;
 	} else if (wchan != NULL)
 		sleepq_release(wchan);
 
 	/*
 	 * Now check for the edge cases.  First, if TDF_TIMEOUT is set,
 	 * then the other thread has already yielded to us, so clear
 	 * the flag and resume it.  If TDF_TIMEOUT is not set, then the
 	 * we know that the other thread is not on a sleep queue, but it
 	 * hasn't resumed execution yet.  In that case, set TDF_TIMOFAIL
 	 * to let it know that the timeout has already run and doesn't
 	 * need to be canceled.
 	 */
 	if (td->td_flags & TDF_TIMEOUT) {
 		MPASS(TD_IS_SLEEPING(td));
 		td->td_flags &= ~TDF_TIMEOUT;
 		TD_CLR_SLEEPING(td);
 		setrunnable(td);
 	} else
 		td->td_flags |= TDF_TIMOFAIL;
 	mtx_unlock_spin(&sched_lock);
 }
 
 /*
  * Resumes a specific thread from the sleep queue associated with a specific
  * wait channel if it is on that queue.
  */
 void
 sleepq_remove(struct thread *td, void *wchan)
 {
 	struct sleepqueue *sq;
 
 	/*
 	 * Look up the sleep queue for this wait channel, then re-check
 	 * that the thread is asleep on that channel, if it is not, then
 	 * bail.
 	 */
 	MPASS(wchan != NULL);
 	sleepq_lock(wchan);
 	sq = sleepq_lookup(wchan);
 	mtx_lock_spin(&sched_lock);
 	if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 		mtx_unlock_spin(&sched_lock);
 		sleepq_release(wchan);
 		return;
 	}
 	MPASS(sq != NULL);
 
 	/* Thread is asleep on sleep queue sq, so wake it up. */
 	sleepq_resume_thread(sq, td, -1);
 	sleepq_release(wchan);
 	mtx_unlock_spin(&sched_lock);
 }
 
 /*
  * Abort a thread as if an interrupt had occurred.  Only abort
  * interruptible waits (unfortunately it isn't safe to abort others).
  *
  * XXX: What in the world does the comment below mean?
  * Also, whatever the signal code does...
  */
 void
-sleepq_abort(struct thread *td)
+sleepq_abort(struct thread *td, int intrval)
 {
 	void *wchan;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	MPASS(TD_ON_SLEEPQ(td));
 	MPASS(td->td_flags & TDF_SINTR);
+	MPASS(intrval == EINTR || intrval == ERESTART);
 
 	/*
 	 * If the TDF_TIMEOUT flag is set, just leave. A
 	 * timeout is scheduled anyhow.
 	 */
 	if (td->td_flags & TDF_TIMEOUT)
 		return;
 
 	CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_proc->p_comm);
 	wchan = td->td_wchan;
+	if (wchan != NULL) {
+		td->td_intrval = intrval;
+		td->td_flags |= TDF_SLEEPABORT;
+	}
 	mtx_unlock_spin(&sched_lock);
 	sleepq_remove(td, wchan);
 	mtx_lock_spin(&sched_lock);
 }
 
 #ifdef DDB
 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 {
 	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
 #ifdef INVARIANTS
 	struct lock_object *lock;
 #endif
 	struct thread *td;
 	void *wchan;
 	int i;
 
 	if (!have_addr)
 		return;
 
 	/*
 	 * First, see if there is an active sleep queue for the wait channel
 	 * indicated by the address.
 	 */
 	wchan = (void *)addr;
 	sc = SC_LOOKUP(wchan);
 	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 		if (sq->sq_wchan == wchan)
 			goto found;
 
 	/*
 	 * Second, see if there is an active sleep queue at the address
 	 * indicated.
 	 */
 	for (i = 0; i < SC_TABLESIZE; i++)
 		LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 			if (sq == (struct sleepqueue *)addr)
 				goto found;
 		}
 
 	db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 	return;
 found:
 	db_printf("Wait channel: %p\n", sq->sq_wchan);
 #ifdef INVARIANTS
 	db_printf("Queue type: %d\n", sq->sq_type);
 	if (sq->sq_lock) {
 		lock = &sq->sq_lock->mtx_object;
 		db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 		    LOCK_CLASS(lock)->lc_name, lock->lo_name);
 	}
 #endif
 	db_printf("Blocked threads:\n");
 	if (TAILQ_EMPTY(&sq->sq_blocked))
 		db_printf("\tempty\n");
 	else
 		TAILQ_FOREACH(td, &sq->sq_blocked, td_slpq) {
 			db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 			    td->td_tid, td->td_proc->p_pid,
 			    td->td_proc->p_comm);
 		}	
 }
 #endif
Index: head/sys/sys/proc.h
===================================================================
--- head/sys/sys/proc.h	(revision 155740)
+++ head/sys/sys/proc.h	(revision 155741)
@@ -1,971 +1,971 @@
 /*-
  * Copyright (c) 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)proc.h	8.15 (Berkeley) 5/19/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_PROC_H_
 #define	_SYS_PROC_H_
 
 #include <sys/callout.h>		/* For struct callout. */
 #include <sys/event.h>			/* For struct klist. */
 #ifndef _KERNEL
 #include <sys/filedesc.h>
 #endif
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/priority.h>
 #include <sys/rtprio.h>			/* XXX. */
 #include <sys/runq.h>
 #include <sys/sigio.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #ifndef _KERNEL
 #include <sys/time.h>			/* For structs itimerval, timeval. */
 #else
 #include <sys/pcpu.h>
 #endif
 #include <sys/ucontext.h>
 #include <sys/ucred.h>
 #include <machine/proc.h>		/* Machine-dependent proc substruct. */
 
 /*
  * One structure allocated per session.
  *
  * List of locks
  * (m)		locked by s_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct session {
 	int		s_count;	/* (m) Ref cnt; pgrps in session. */
 	struct proc	*s_leader;	/* (m + e) Session leader. */
 	struct vnode	*s_ttyvp;	/* (m) Vnode of controlling tty. */
 	struct tty	*s_ttyp;	/* (m) Controlling tty. */
 	pid_t		s_sid;		/* (c) Session ID. */
 					/* (m) Setlogin() name: */
 	char		s_login[roundup(MAXLOGNAME, sizeof(long))];
 	struct mtx	s_mtx;		/* Mutex to protect members. */
 };
 
 /*
  * One structure allocated per process group.
  *
  * List of locks
  * (m)		locked by pg_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct pgrp {
 	LIST_ENTRY(pgrp) pg_hash;	/* (e) Hash chain. */
 	LIST_HEAD(, proc) pg_members;	/* (m + e) Pointer to pgrp members. */
 	struct session	*pg_session;	/* (c) Pointer to session. */
 	struct sigiolst	pg_sigiolst;	/* (m) List of sigio sources. */
 	pid_t		pg_id;		/* (c) Process group id. */
 	int		pg_jobc;	/* (m) Job control process count. */
 	struct mtx	pg_mtx;		/* Mutex to protect members */
 };
 
 /*
  * pargs, used to hold a copy of the command line, if it had a sane length.
  */
 struct pargs {
 	u_int	ar_ref;		/* Reference count. */
 	u_int	ar_length;	/* Length. */
 	u_char	ar_args[1];	/* Arguments. */
 };
 
 /*-
  * Description of a process.
  *
  * This structure contains the information needed to manage a thread of
  * control, known in UN*X as a process; it has references to substructures
  * containing descriptions of things that the process uses, but may share
  * with related processes.  The process structure and the substructures
  * are always addressable except for those marked "(CPU)" below,
  * which might be addressable only on a processor on which the process
  * is running.
  *
  * Below is a key of locks used to protect each member of struct proc.  The
  * lock is indicated by a reference to a specific character in parens in the
  * associated comment.
  *      * - not yet protected
  *      a - only touched by curproc or parent during fork/wait
  *      b - created at fork, never changes
  *		(exception aiods switch vmspaces, but they are also
  *		marked 'P_SYSTEM' so hopefully it will be left alone)
  *      c - locked by proc mtx
  *      d - locked by allproc_lock lock
  *      e - locked by proctree_lock lock
  *      f - session mtx
  *      g - process group mtx
  *      h - callout_lock mtx
  *      i - by curproc or the master session mtx
  *      j - locked by sched_lock mtx
  *      k - only accessed by curthread
  *	k*- only accessed by curthread and from an interrupt
  *      l - the attaching proc or attaching proc parent
  *      m - Giant
  *      n - not locked, lazy
  *      o - ktrace lock
  *      p - select lock (sellock)
  *      q - td_contested lock
  *      r - p_peers lock
  *      x - created at fork, only changes during single threading in exec
  *      z - zombie threads/ksegroup lock
  *
  * If the locking key specifies two identifiers (for example, p_pptr) then
  * either lock is sufficient for read access, but both locks must be held
  * for write access.
  */
 struct auditinfo;
 struct kaudit_record;
 struct kg_sched;
 struct nlminfo;
 struct kaioinfo;
 struct p_sched;
 struct sleepqueue;
 struct td_sched;
 struct trapframe;
 struct turnstile;
 struct mqueue_notifier;
 
 /*
  * Here we define the three structures used for process information.
  *
  * The first is the thread. It might be thought of as a "Kernel
  * Schedulable Entity Context".
  * This structure contains all the information as to where a thread of
  * execution is now, or was when it was suspended, why it was suspended,
  * and anything else that will be needed to restart it when it is
  * rescheduled. Always associated with a KSE when running, but can be
  * reassigned to an equivalent KSE when being restarted for
  * load balancing. Each of these is associated with a kernel stack
  * and a pcb.
  *
  * It is important to remember that a particular thread structure may only
  * exist as long as the system call or kernel entrance (e.g. by pagefault)
  * which it is currently executing. It should therefore NEVER be referenced
  * by pointers in long lived structures that live longer than a single
  * request. If several threads complete their work at the same time,
  * they will all rewind their stacks to the user boundary, report their
  * completion state, and all but one will be freed. That last one will
  * be kept to provide a kernel stack and pcb for the NEXT syscall or kernel
  * entrance (basically to save freeing and then re-allocating it).  The existing
  * thread keeps a cached spare thread available to allow it to quickly
  * get one when it needs a new one. There is also a system
  * cache of free threads. Threads have priority and partake in priority
  * inheritance schemes.
  */
 struct thread;
 
 /*
  * The KSEGRP is allocated resources across a number of CPUs.
  * (Including a number of CPUxQUANTA. It parcels these QUANTA up among
  * its threads, each of which should be running in a different CPU.
  * BASE priority and total available quanta are properties of a KSEGRP.
  * Multiple KSEGRPs in a single process compete against each other
  * for total quanta in the same way that a forked child competes against
  * it's parent process.
  */
 struct ksegrp;
 
 /*
  * A process is the owner of all system resources allocated to a task
  * except CPU quanta.
  * All KSEGs under one process see, and have the same access to, these
  * resources (e.g. files, memory, sockets, credential, kqueues).
  * A process may compete for CPU cycles on the same basis as a
  * forked process cluster by spawning several KSEGRPs.
  */
 struct proc;
 
 /***************
  * In pictures:
  With a single run queue used by all processors:
 
  RUNQ: --->KSE---KSE--...               SLEEPQ:[]---THREAD---THREAD---THREAD
 	     \      \                          []---THREAD
       KSEG---THREAD--THREAD--THREAD            []
 					       []---THREAD---THREAD
 
   (processors run THREADs from the KSEG until they are exhausted or
   the KSEG exhausts its quantum)
 
 With PER-CPU run queues:
 KSEs on the separate run queues directly
 They would be given priorities calculated from the KSEG.
 
  *
  *****************/
 
 /*
  * Kernel runnable context (thread).
  * This is what is put to sleep and reactivated.
  * The first KSE available in the correct group will run this thread.
  * If several are available, use the one on the same CPU as last time.
  * When waiting to be run, threads are hung off the KSEGRP in priority order.
  * With N runnable and queued KSEs in the KSEGRP, the first N threads
  * are linked to them. Other threads are not yet assigned.
  */
 struct thread {
 	struct proc	*td_proc;	/* (*) Associated process. */
 	struct ksegrp	*td_ksegrp;	/* (*) Associated KSEG. */
 	TAILQ_ENTRY(thread) td_plist;	/* (*) All threads in this proc. */
 	TAILQ_ENTRY(thread) td_kglist;	/* (*) All threads in this ksegrp. */
 
 	/* The two queues below should someday be merged. */
 	TAILQ_ENTRY(thread) td_slpq;	/* (j) Sleep queue. */
 	TAILQ_ENTRY(thread) td_lockq;	/* (j) Lock queue. */
 	TAILQ_ENTRY(thread) td_runq;	/* (j/z) Run queue(s). XXXKSE */
 
 	TAILQ_HEAD(, selinfo) td_selq;	/* (p) List of selinfos. */
 	struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
 	struct turnstile *td_turnstile;	/* (k) Associated turnstile. */
 	struct umtx_q   *td_umtxq;	/* (c?) Link for when we're blocked. */
 	lwpid_t		td_tid;		/* (b) Thread ID. */
 	sigqueue_t	td_sigqueue;	/* (c) Sigs arrived, not delivered. */
 #define	td_siglist	td_sigqueue.sq_signals
 
 /* Cleared during fork1() or thread_schedule_upcall(). */
 #define	td_startzero td_flags
 	int		td_flags;	/* (j) TDF_* flags. */
 	int		td_inhibitors;	/* (j) Why can not run. */
 	int		td_pflags;	/* (k) Private thread (TDP_*) flags. */
 	int		td_dupfd;	/* (k) Ret value from fdopen. XXX */
 	void		*td_wchan;	/* (j) Sleep address. */
 	const char	*td_wmesg;	/* (j) Reason for sleep. */
 	u_char		td_lastcpu;	/* (j) Last cpu we were on. */
 	u_char		td_oncpu;	/* (j) Which cpu we are on. */
 	volatile u_char td_owepreempt;  /* (k*) Preempt on last critical_exit */
 	short		td_locks;	/* (k) DEBUG: lockmgr count of locks. */
 	u_char		td_tsqueue;	/* (j) Turnstile queue blocked on. */
 	struct turnstile *td_blocked;	/* (j) Lock thread is blocked on. */
 	const char	*td_lockname;	/* (j) Name of lock blocked on. */
 	LIST_HEAD(, turnstile) td_contested;	/* (q) Contested locks. */
 	struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
 	int		td_intr_nesting_level; /* (k) Interrupt recursion. */
 	int		td_pinned;	/* (k) Temporary cpu pin count. */
 	struct kse_thr_mailbox *td_mailbox; /* (*) Userland mailbox address. */
 	struct ucred	*td_ucred;	/* (k) Reference to credentials. */
 	struct thread	*td_standin;	/* (k + a) Use this for an upcall. */
 	struct kse_upcall *td_upcall;	/* (k + j) Upcall structure. */
 	u_int		td_pticks;	/* (k) Statclock hits for profiling */
 	u_int		td_sticks;	/* (k) Statclock hits in system mode. */
 	u_int		td_iticks;	/* (k) Statclock hits in intr mode. */
 	u_int		td_uticks;	/* (k) Statclock hits in user mode. */
 	u_int		td_uuticks;	/* (k) Statclock hits (usr), for UTS. */
 	u_int		td_usticks;	/* (k) Statclock hits (sys), for UTS. */
 	int		td_intrval;	/* (j) Return value of TDF_INTERRUPT. */
 	sigset_t	td_oldsigmask;	/* (k) Saved mask from pre sigpause. */
 	sigset_t	td_sigmask;	/* (c) Current signal mask. */
 	volatile u_int	td_generation;	/* (k) For detection of preemption */
 	stack_t		td_sigstk;	/* (k) Stack ptr and on-stack flag. */
 	int		td_kflags;	/* (c) Flags for KSE threading. */
 	int		td_xsig;	/* (c) Signal for ptrace */
 	u_long		td_profil_addr;	/* (k) Temporary addr until AST. */
 	u_int		td_profil_ticks; /* (k) Temporary ticks until AST. */
 	char		td_name[MAXCOMLEN + 1];	/* (*) Thread name. */
 #define	td_endzero td_base_pri
 
 /* Copied during fork1() or thread_sched_upcall(). */
 #define	td_startcopy td_endzero
 	u_char		td_base_pri;	/* (j) Thread base kernel priority. */
 	u_char		td_priority;	/* (j) Thread active priority. */
 #define	td_endcopy td_pcb
 
 /*
  * Fields that must be manually set in fork1() or thread_sched_upcall()
  * or already have been set in the allocator, constructor, etc.
  */
 	struct pcb	*td_pcb;	/* (k) Kernel VA of pcb and kstack. */
 	enum {
 		TDS_INACTIVE = 0x0,
 		TDS_INHIBITED,
 		TDS_CAN_RUN,
 		TDS_RUNQ,
 		TDS_RUNNING
 	} td_state;
 	register_t	td_retval[2];	/* (k) Syscall aux returns. */
 	struct callout	td_slpcallout;	/* (h) Callout for sleep. */
 	struct trapframe *td_frame;	/* (k) */
 	struct vm_object *td_kstack_obj;/* (a) Kstack object. */
 	vm_offset_t	td_kstack;	/* (a) Kernel VA of kstack. */
 	int		td_kstack_pages; /* (a) Size of the kstack. */
 	struct vm_object *td_altkstack_obj;/* (a) Alternate kstack object. */
 	vm_offset_t	td_altkstack;	/* (a) Kernel VA of alternate kstack. */
 	int		td_altkstack_pages; /* (a) Size of alternate kstack. */
 	volatile u_int	td_critnest;	/* (k*) Critical section nest level. */
 	struct mdthread td_md;		/* (k) Any machine-dependent fields. */
 	struct td_sched	*td_sched;	/* (*) Scheduler-specific data. */
 	struct kaudit_record	*td_ar;	/* (k) Active audit record, if any. */
 };
 
 /*
  * Flags kept in td_flags:
  * To change these you MUST have the scheduler lock.
  */
 #define	TDF_BORROWING	0x00000001 /* Thread is borrowing pri from another. */
 #define	TDF_INPANIC	0x00000002 /* Caused a panic, let it drive crashdump. */
 #define	TDF_SINTR	0x00000008 /* Sleep is interruptible. */
 #define	TDF_TIMEOUT	0x00000010 /* Timing out during sleep. */
 #define	TDF_IDLETD	0x00000020 /* This is a per-CPU idle thread. */
 #define	TDF_SELECT	0x00000040 /* Selecting; wakeup/waiting danger. */
-#define	TDF_UNUSED7	0x00000080 /* --available -- */
+#define	TDF_SLEEPABORT	0x00000080 /* sleepq_abort was called. */
 #define	TDF_TSNOBLOCK	0x00000100 /* Don't block on a turnstile due to race. */
 #define	TDF_UNUSED9	0x00000200 /* --available -- */
 #define	TDF_BOUNDARY	0x00000400 /* Thread suspended at user boundary */
 #define	TDF_ASTPENDING	0x00000800 /* Thread has some asynchronous events. */
 #define	TDF_TIMOFAIL	0x00001000 /* Timeout from sleep after we were awake. */
 #define	TDF_INTERRUPT	0x00002000 /* Thread is marked as interrupted. */
 #define	TDF_UNUSED14	0x00004000 /* --available -- */
 #define	TDF_UNUSED15	0x00008000 /* --available -- */
 #define	TDF_NEEDRESCHED	0x00010000 /* Thread needs to yield. */
 #define	TDF_NEEDSIGCHK	0x00020000 /* Thread may need signal delivery. */
 #define	TDF_XSIG	0x00040000 /* Thread is exchanging signal under trace */
 #define	TDF_UMTXQ	0x00080000 /* Thread is sleeping on a umtx. */
 #define	TDF_THRWAKEUP	0x00100000 /* Libthr thread must not suspend itself. */
 #define	TDF_DBSUSPEND	0x00200000 /* Thread is suspended by debugger */
 #define	TDF_UNUSED22	0x00400000 /* --available -- */
 #define	TDF_UNUSED23	0x00800000 /* --available -- */
 #define	TDF_SCHED0	0x01000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED1	0x02000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED2	0x04000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED3	0x08000000 /* Reserved for scheduler private use */
 
 /*
  * "Private" flags kept in td_pflags:
  * These are only accessed by curthread and thus need no locking.
  */
 #define	TDP_OLDMASK	0x00000001 /* Need to restore mask after suspend. */
 #define	TDP_INKTR	0x00000002 /* Thread is currently in KTR code. */
 #define	TDP_INKTRACE	0x00000004 /* Thread is currently in KTRACE code. */
 #define	TDP_UPCALLING	0x00000008 /* This thread is doing an upcall. */
 #define	TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */
 #define	TDP_ALTSTACK	0x00000020 /* Have alternate signal stack. */
 #define	TDP_DEADLKTREAT	0x00000040 /* Lock aquisition - deadlock treatment. */
 #define	TDP_SA		0x00000080 /* A scheduler activation based thread. */
 #define	TDP_NOSLEEPING	0x00000100 /* Thread is not allowed to sleep on a sq. */
 #define	TDP_OWEUPC	0x00000200 /* Call addupc() at next AST. */
 #define	TDP_ITHREAD	0x00000400 /* Thread is an interrupt thread. */
 #define	TDP_CAN_UNBIND	0x00000800 /* Only temporarily bound. */
 #define	TDP_SCHED1	0x00001000 /* Reserved for scheduler private use */
 #define	TDP_SCHED2	0x00002000 /* Reserved for scheduler private use */
 #define	TDP_SCHED3	0x00004000 /* Reserved for scheduler private use */
 #define	TDP_SCHED4	0x00008000 /* Reserved for scheduler private use */
 #define	TDP_GEOM	0x00010000 /* Settle GEOM before finishing syscall */
 #define	TDP_SOFTDEP	0x00020000 /* Stuck processing softdep worklist */
 #define	TDP_NORUNNINGBUF 0x00040000 /* Ignore runningbufspace check */
 
 /*
  * Reasons that the current thread can not be run yet.
  * More than one may apply.
  */
 #define	TDI_SUSPENDED	0x0001	/* On suspension queue. */
 #define	TDI_SLEEPING	0x0002	/* Actually asleep! (tricky). */
 #define	TDI_SWAPPED	0x0004	/* Stack not in mem.  Bad juju if run. */
 #define	TDI_LOCK	0x0008	/* Stopped on a lock. */
 #define	TDI_IWAIT	0x0010	/* Awaiting interrupt. */
 
 /*
  * flags (in kflags) related to M:N threading.
  */
 #define	TDK_KSEREL	0x0001	/* Blocked in msleep on kg->kg_completed. */
 #define	TDK_KSERELSIG	0x0002	/* Blocked in msleep on p->p_siglist. */
 #define	TDK_WAKEUP	0x0004	/* Thread has been woken by kse_wakeup. */
 
 #define	TD_CAN_UNBIND(td)			\
     (((td)->td_pflags & TDP_CAN_UNBIND) &&	\
      ((td)->td_upcall != NULL))
 
 #define	TD_IS_SLEEPING(td)	((td)->td_inhibitors & TDI_SLEEPING)
 #define	TD_ON_SLEEPQ(td)	((td)->td_wchan != NULL)
 #define	TD_IS_SUSPENDED(td)	((td)->td_inhibitors & TDI_SUSPENDED)
 #define	TD_IS_SWAPPED(td)	((td)->td_inhibitors & TDI_SWAPPED)
 #define	TD_ON_LOCK(td)		((td)->td_inhibitors & TDI_LOCK)
 #define	TD_AWAITING_INTR(td)	((td)->td_inhibitors & TDI_IWAIT)
 #define	TD_IS_RUNNING(td)	((td)->td_state == TDS_RUNNING)
 #define	TD_ON_RUNQ(td)		((td)->td_state == TDS_RUNQ)
 #define	TD_CAN_RUN(td)		((td)->td_state == TDS_CAN_RUN)
 #define	TD_IS_INHIBITED(td)	((td)->td_state == TDS_INHIBITED)
 
 #define	TD_SET_INHIB(td, inhib) do {			\
 	(td)->td_state = TDS_INHIBITED;			\
 	(td)->td_inhibitors |= (inhib);			\
 } while (0)
 
 #define	TD_CLR_INHIB(td, inhib) do {			\
 	if (((td)->td_inhibitors & (inhib)) &&		\
 	    (((td)->td_inhibitors &= ~(inhib)) == 0))	\
 		(td)->td_state = TDS_CAN_RUN;		\
 } while (0)
 
 #define	TD_SET_SLEEPING(td)	TD_SET_INHIB((td), TDI_SLEEPING)
 #define	TD_SET_SWAPPED(td)	TD_SET_INHIB((td), TDI_SWAPPED)
 #define	TD_SET_LOCK(td)		TD_SET_INHIB((td), TDI_LOCK)
 #define	TD_SET_SUSPENDED(td)	TD_SET_INHIB((td), TDI_SUSPENDED)
 #define	TD_SET_IWAIT(td)	TD_SET_INHIB((td), TDI_IWAIT)
 #define	TD_SET_EXITING(td)	TD_SET_INHIB((td), TDI_EXITING)
 
 #define	TD_CLR_SLEEPING(td)	TD_CLR_INHIB((td), TDI_SLEEPING)
 #define	TD_CLR_SWAPPED(td)	TD_CLR_INHIB((td), TDI_SWAPPED)
 #define	TD_CLR_LOCK(td)		TD_CLR_INHIB((td), TDI_LOCK)
 #define	TD_CLR_SUSPENDED(td)	TD_CLR_INHIB((td), TDI_SUSPENDED)
 #define	TD_CLR_IWAIT(td)	TD_CLR_INHIB((td), TDI_IWAIT)
 
 #define	TD_SET_RUNNING(td)	(td)->td_state = TDS_RUNNING
 #define	TD_SET_RUNQ(td)		(td)->td_state = TDS_RUNQ
 #define	TD_SET_CAN_RUN(td)	(td)->td_state = TDS_CAN_RUN
 
 /*
  * An upcall is used when returning to userland.  If a thread does not have
  * an upcall on return to userland the thread exports its context and exits.
  */
 struct kse_upcall {
 	TAILQ_ENTRY(kse_upcall) ku_link;	/* List of upcalls in KSEG. */
 	struct ksegrp		*ku_ksegrp;	/* Associated KSEG. */
 	struct thread		*ku_owner;	/* Owning thread. */
 	int			ku_flags;	/* KUF_* flags. */
 	struct kse_mailbox	*ku_mailbox;	/* Userland mailbox address. */
 	stack_t			ku_stack;	/* Userland upcall stack. */
 	void			*ku_func;	/* Userland upcall function. */
 	unsigned int		ku_mflags;	/* Cached upcall mbox flags. */
 };
 
 #define	KUF_DOUPCALL	0x00001		/* Do upcall now; don't wait. */
 #define	KUF_EXITING	0x00002		/* Upcall structure is exiting. */
 
 /*
  * Kernel-scheduled entity group (KSEG).  The scheduler considers each KSEG to
  * be an indivisible unit from a time-sharing perspective, though each KSEG may
  * contain multiple KSEs.
  */
 struct ksegrp {
 	struct proc	*kg_proc;	/* (*) Proc that contains this KSEG. */
 	TAILQ_ENTRY(ksegrp) kg_ksegrp;	/* (*) Queue of KSEGs in kg_proc. */
 	TAILQ_HEAD(, thread) kg_threads;/* (td_kglist) All threads. */
 	TAILQ_HEAD(, thread) kg_runq;	/* (td_runq) waiting RUNNABLE threads */
 	TAILQ_HEAD(, kse_upcall) kg_upcalls;	/* All upcalls in the group. */
 
 #define	kg_startzero kg_estcpu
 	u_int		kg_estcpu;	/* (j) Sum of the same field in KSEs. */
 	u_int		kg_slptime;	/* (j) How long completely blocked. */
 	int		kg_numupcalls;	/* (j) Num upcalls. */
 	int		kg_upsleeps;	/* (c) Num threads in kse_release(). */
 	struct kse_thr_mailbox *kg_completed; /* (c) Completed thread mboxes. */
 	int		kg_nextupcall;	/* (n) Next upcall time. */
 	int		kg_upquantum;	/* (n) Quantum to schedule an upcall. */
 #define	kg_endzero kg_pri_class
 
 #define	kg_startcopy	kg_endzero
 	u_char		kg_pri_class;	/* (j) Scheduling class. */
 	u_char		kg_user_pri;	/* (j) User pri from estcpu and nice. */
 #define	kg_endcopy kg_numthreads
 	int		kg_numthreads;	/* (j) Num threads in total. */
 	struct kg_sched	*kg_sched;	/* (*) Scheduler-specific data. */
 };
 
 /*
  * XXX: Does this belong in resource.h or resourcevar.h instead?
  * Resource usage extension.  The times in rusage structs in the kernel are
  * never up to date.  The actual times are kept as runtimes and tick counts
  * (with control info in the "previous" times), and are converted when
  * userland asks for rusage info.  Backwards compatibility prevents putting
  * this directly in the user-visible rusage struct.
  *
  * Locking: (cj) means (j) for p_rux and (c) for p_crux.
  */
 struct rusage_ext {
 	u_int64_t	rux_runtime;    /* (cj) Real time. */
 	u_int64_t	rux_uticks;     /* (cj) Statclock hits in user mode. */
 	u_int64_t	rux_sticks;     /* (cj) Statclock hits in sys mode. */
 	u_int64_t	rux_iticks;     /* (cj) Statclock hits in intr mode. */
 	u_int64_t	rux_uu;         /* (c) Previous user time in usec. */
 	u_int64_t	rux_su;         /* (c) Previous sys time in usec. */
 	u_int64_t	rux_iu;         /* (c) Previous intr time in usec. */
 };
 
 /*
  * The old fashionned process. May have multiple threads, KSEGRPs
  * and KSEs. Starts off with a single embedded KSEGRP and THREAD.
  */
 struct proc {
 	LIST_ENTRY(proc) p_list;	/* (d) List of all processes. */
 	TAILQ_HEAD(, ksegrp) p_ksegrps;	/* (c)(kg_ksegrp) All KSEGs. */
 	TAILQ_HEAD(, thread) p_threads;	/* (j)(td_plist) Threads. (shortcut) */
 	TAILQ_HEAD(, thread) p_suspended; /* (td_runq) Suspended threads. */
 	struct ucred	*p_ucred;	/* (c) Process owner's identity. */
 	struct filedesc	*p_fd;		/* (b) Open files. */
 	struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */
 					/* Accumulated stats for all threads? */
 	struct pstats	*p_stats;	/* (b) Accounting/statistics (CPU). */
 	struct plimit	*p_limit;	/* (c) Process limits. */
 	struct sigacts	*p_sigacts;	/* (x) Signal actions, state (CPU). */
 
 	/*
 	 * The following don't make too much sense.
 	 * See the td_ or ke_ versions of the same flags.
 	 */
 	int		p_flag;		/* (c) P_* flags. */
 	int		p_sflag;	/* (j) PS_* flags. */
 	enum {
 		PRS_NEW = 0,		/* In creation */
 		PRS_NORMAL,		/* threads can be run. */
 		PRS_ZOMBIE
 	} p_state;			/* (j/c) S* process status. */
 
 	pid_t		p_pid;		/* (b) Process identifier. */
 	LIST_ENTRY(proc) p_hash;	/* (d) Hash chain. */
 	LIST_ENTRY(proc) p_pglist;	/* (g + e) List of processes in pgrp. */
 	struct proc	*p_pptr;	/* (c + e) Pointer to parent process. */
 	LIST_ENTRY(proc) p_sibling;	/* (e) List of sibling processes. */
 	LIST_HEAD(, proc) p_children;	/* (e) Pointer to list of children. */
 	struct mtx	p_mtx;		/* (n) Lock for this struct. */
 	struct ksiginfo *p_ksi;	/* Locked by parent proc lock */
 	sigqueue_t	p_sigqueue;	/* (c) Sigs not delivered to a td. */
 #define p_siglist	p_sigqueue.sq_signals
 
 /* The following fields are all zeroed upon creation in fork. */
 #define	p_startzero	p_oppid
 	pid_t		p_oppid;	/* (c + e) Save ppid in ptrace. XXX */
 	struct vmspace	*p_vmspace;	/* (b) Address space. */
 	u_int		p_swtime;	/* (j) Time swapped in or out. */
 	struct itimerval p_realtimer;	/* (c) Alarm timer. */
 	struct rusage_ext p_rux;	/* (cj) Internal resource usage. */
 	struct rusage_ext p_crux;	/* (c) Internal child resource usage. */
 	int		p_profthreads;	/* (c) Num threads in addupc_task. */
 	int		p_maxthrwaits;	/* (c) Max threads num waiters */
 	int		p_traceflag;	/* (o) Kernel trace points. */
 	struct vnode	*p_tracevp;	/* (c + o) Trace to vnode. */
 	struct ucred	*p_tracecred;	/* (o) Credentials to trace with. */
 	struct vnode	*p_textvp;	/* (b) Vnode of executable. */
 	char		p_lock;		/* (c) Proclock (prevent swap) count. */
 	struct sigiolst	p_sigiolst;	/* (c) List of sigio sources. */
 	int		p_sigparent;	/* (c) Signal to parent on exit. */
 	int		p_sig;		/* (n) For core dump/debugger XXX. */
 	u_long		p_code;		/* (n) For core dump/debugger XXX. */
 	u_int		p_stops;	/* (c) Stop event bitmask. */
 	u_int		p_stype;	/* (c) Stop event type. */
 	char		p_step;		/* (c) Process is stopped. */
 	u_char		p_pfsflags;	/* (c) Procfs flags. */
 	struct nlminfo	*p_nlminfo;	/* (?) Only used by/for lockd. */
 	struct kaioinfo	*p_aioinfo;	/* (c) ASYNC I/O info. */
 	struct thread	*p_singlethread;/* (c + j) If single threading this is it */
 	int		p_suspcount;	/* (c) Num threads in suspended mode. */
 	struct thread	*p_xthread;	/* (c) Trap thread */
 	int		p_boundary_count;/* (c) Num threads at user boundary */
 	struct ksegrp	*p_procscopegrp;
 	int		p_pendingcnt;	/* how many signals are pending */
 	struct itimers	*p_itimers;	/* (c) POSIX interval timers. */
 /* End area that is zeroed on creation. */
 #define	p_endzero	p_magic
 
 /* The following fields are all copied upon creation in fork. */
 #define	p_startcopy	p_endzero
 	u_int		p_magic;	/* (b) Magic number. */
 	char		p_comm[MAXCOMLEN + 1];	/* (b) Process name. */
 	struct pgrp	*p_pgrp;	/* (c + e) Pointer to process group. */
 	struct sysentvec *p_sysent;	/* (b) Syscall dispatch info. */
 	struct pargs	*p_args;	/* (c) Process arguments. */
 	rlim_t		p_cpulimit;	/* (j) Current CPU limit in seconds. */
 	signed char	p_nice;		/* (c + j) Process "nice" value. */
 /* End area that is copied on creation. */
 #define	p_endcopy	p_xstat
 
 	u_short		p_xstat;	/* (c) Exit status; also stop sig. */
 	struct knlist	p_klist;	/* (c) Knotes attached to this proc. */
 	int		p_numthreads;	/* (j) Number of threads. */
 	int		p_numksegrps;	/* (c) Number of ksegrps. */
 	struct mdproc	p_md;		/* Any machine-dependent fields. */
 	struct callout	p_itcallout;	/* (h + c) Interval timer callout. */
 	u_short		p_acflag;	/* (c) Accounting flags. */
 	struct rusage	*p_ru;		/* (a) Exit information. XXX */
 	struct proc	*p_peers;	/* (r) */
 	struct proc	*p_leader;	/* (b) */
 	void		*p_emuldata;	/* (c) Emulator state data. */
 	struct label	*p_label;	/* (*) Proc (not subject) MAC label. */
 	struct p_sched	*p_sched;	/* (*) Scheduler-specific data. */
 	STAILQ_HEAD(, ktr_request)	p_ktr;	/* (o) KTR event queue. */
 	LIST_HEAD(, mqueue_notifier)	p_mqnotifier; /* (c) mqueue notifiers.*/
 	struct auditinfo	*p_au;	/* (c) Process audit properties. */
 };
 
 #define	p_session	p_pgrp->pg_session
 #define	p_pgid		p_pgrp->pg_id
 
 #define	NOCPU	0xff		/* For when we aren't on a CPU. */
 
 
 /* These flags are kept in p_flag. */
 #define	P_ADVLOCK	0x00001	/* Process may hold a POSIX advisory lock. */
 #define	P_CONTROLT	0x00002	/* Has a controlling terminal. */
 #define	P_KTHREAD	0x00004	/* Kernel thread (*). */
 #define	P_NOLOAD	0x00008	/* Ignore during load avg calculations. */
 #define	P_PPWAIT	0x00010	/* Parent is waiting for child to exec/exit. */
 #define	P_PROFIL	0x00020	/* Has started profiling. */
 #define	P_STOPPROF	0x00040	/* Has thread requesting to stop profiling. */
 #define	P_HADTHREADS	0x00080	/* Has had threads (no cleanup shortcuts) */
 #define	P_SUGID		0x00100	/* Had set id privileges since last exec. */
 #define	P_SYSTEM	0x00200	/* System proc: no sigs, stats or swapping. */
 #define	P_SINGLE_EXIT	0x00400	/* Threads suspending should exit, not wait. */
 #define	P_TRACED	0x00800	/* Debugged process being traced. */
 #define	P_WAITED	0x01000	/* Someone is waiting for us. */
 #define	P_WEXIT		0x02000	/* Working on exiting. */
 #define	P_EXEC		0x04000	/* Process called exec. */
 #define	P_SA		0x08000	/* Using scheduler activations. */
 #define	P_CONTINUED	0x10000	/* Proc has continued from a stopped state. */
 #define	P_STOPPED_SIG	0x20000	/* Stopped due to SIGSTOP/SIGTSTP. */
 #define	P_STOPPED_TRACE	0x40000	/* Stopped because of tracing. */
 #define	P_STOPPED_SINGLE 0x80000 /* Only 1 thread can continue (not to user). */
 #define	P_PROTECTED	0x100000 /* Do not kill on memory overcommit. */
 #define	P_SIGEVENT	0x200000 /* Process pending signals changed. */
 #define	P_SINGLE_BOUNDARY 0x400000 /* Threads should suspend at user boundary. */
 #define	P_HWPMC		0x800000 /* Process is using HWPMCs */
 
 #define	P_JAILED	0x1000000 /* Process is in jail. */
 #define	P_INEXEC	0x4000000 /* Process is in execve(). */
 #define	P_STATCHILD	0x8000000 /* Child process stopped or exited. */
 
 #define	P_STOPPED	(P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE)
 #define	P_SHOULDSTOP(p)	((p)->p_flag & P_STOPPED)
 
 /* These flags are kept in p_sflag and are protected with sched_lock. */
 #define	PS_INMEM	0x00001	/* Loaded into memory. */
 #define	PS_XCPU		0x00002 /* Exceeded CPU limit. */
 #define	PS_ALRMPEND	0x00020	/* Pending SIGVTALRM needs to be posted. */
 #define	PS_PROFPEND	0x00040	/* Pending SIGPROF needs to be posted. */
 #define	PS_SWAPINREQ	0x00100	/* Swapin request due to wakeup. */
 #define	PS_SWAPPINGOUT	0x00200	/* Process is being swapped out. */
 #define	PS_SWAPPINGIN	0x04000	/* Process is being swapped in. */
 #define	PS_MACPEND	0x08000	/* AST-based MAC event pending. */
 
 /*
  * These were process status values (p_stat), now they are only used in
  * legacy conversion code.
  */
 #define	SIDL	1		/* Process being created by fork. */
 #define	SRUN	2		/* Currently runnable. */
 #define	SSLEEP	3		/* Sleeping on an address. */
 #define	SSTOP	4		/* Process debugging or suspension. */
 #define	SZOMB	5		/* Awaiting collection by parent. */
 #define	SWAIT	6		/* Waiting for interrupt. */
 #define	SLOCK	7		/* Blocked on a lock. */
 
 #define	P_MAGIC		0xbeefface
 
 #ifdef _KERNEL
 
 /* Flags for mi_switch(). */
 #define	SW_VOL		0x0001		/* Voluntary switch. */
 #define	SW_INVOL	0x0002		/* Involuntary switch. */
 #define SW_PREEMPT	0x0004		/* The invol switch is a preemption */
 
 /* Flags for setrunqueue().  Why are we setting this thread on the run queue? */
 #define SRQ_BORING	0x0000		/* No special circumstances. */
 #define SRQ_YIELDING	0x0001		/* We are yielding (from mi_switch). */
 #define SRQ_OURSELF	0x0002		/* It is ourself (from mi_switch). */
 #define SRQ_INTR	0x0004		/* It is probably urgent. */
 #define SRQ_PREEMPTED	0x0008		/* has been preempted.. be kind */
 
 /* How values for thread_single(). */
 #define	SINGLE_NO_EXIT	0
 #define	SINGLE_EXIT	1
 #define	SINGLE_BOUNDARY	2
 
 /* XXXKSE: Missing values for thread_suspsend_check(). */
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_PARGS);
 MALLOC_DECLARE(M_PGRP);
 MALLOC_DECLARE(M_SESSION);
 MALLOC_DECLARE(M_SUBPROC);
 MALLOC_DECLARE(M_ZOMBIE);
 #endif
 
 #define	FOREACH_PROC_IN_SYSTEM(p)					\
 	LIST_FOREACH((p), &allproc, p_list)
 #define	FOREACH_KSEGRP_IN_PROC(p, kg)					\
 	TAILQ_FOREACH((kg), &(p)->p_ksegrps, kg_ksegrp)
 #define	FOREACH_THREAD_IN_GROUP(kg, td)					\
 	TAILQ_FOREACH((td), &(kg)->kg_threads, td_kglist)
 #define	FOREACH_UPCALL_IN_GROUP(kg, ku)					\
 	TAILQ_FOREACH((ku), &(kg)->kg_upcalls, ku_link)
 #define	FOREACH_THREAD_IN_PROC(p, td)					\
 	TAILQ_FOREACH((td), &(p)->p_threads, td_plist)
 
 /* XXXKSE the following lines should probably only be used in 1:1 code: */
 #define	FIRST_THREAD_IN_PROC(p)	TAILQ_FIRST(&(p)->p_threads)
 #define	FIRST_KSEGRP_IN_PROC(p)	TAILQ_FIRST(&(p)->p_ksegrps)
 
 /*
  * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t,
  * as it is used to represent "no process group".
  */
 #define	PID_MAX		99999
 #define	NO_PID		100000
 
 #define	SESS_LEADER(p)	((p)->p_session->s_leader == (p))
 #define	SESSHOLD(s)	((s)->s_count++)
 #define	SESSRELE(s)	sessrele(s)
 
 
 #define	STOPEVENT(p, e, v) do {						\
 	if ((p)->p_stops & (e))	{					\
 		PROC_LOCK(p);						\
 		stopevent((p), (e), (v));				\
 		PROC_UNLOCK(p);						\
 	}								\
 } while (0)
 #define	_STOPEVENT(p, e, v) do {					\
 	PROC_LOCK_ASSERT(p, MA_OWNED);					\
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.mtx_object, \
  	    "checking stopevent %d", (e));				\
 	if ((p)->p_stops & (e))						\
 		stopevent((p), (e), (v));				\
 } while (0)
 
 /* Lock and unlock a process. */
 #define	PROC_LOCK(p)	mtx_lock(&(p)->p_mtx)
 #define	PROC_TRYLOCK(p)	mtx_trylock(&(p)->p_mtx)
 #define	PROC_UNLOCK(p)	mtx_unlock(&(p)->p_mtx)
 #define	PROC_LOCKED(p)	mtx_owned(&(p)->p_mtx)
 #define	PROC_LOCK_ASSERT(p, type)	mtx_assert(&(p)->p_mtx, (type))
 
 /* Lock and unlock a process group. */
 #define	PGRP_LOCK(pg)	mtx_lock(&(pg)->pg_mtx)
 #define	PGRP_UNLOCK(pg)	mtx_unlock(&(pg)->pg_mtx)
 #define	PGRP_LOCKED(pg)	mtx_owned(&(pg)->pg_mtx)
 #define	PGRP_LOCK_ASSERT(pg, type)	mtx_assert(&(pg)->pg_mtx, (type))
 
 #define	PGRP_LOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_LOCK(pg);						\
 } while (0)
 #define	PGRP_UNLOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_UNLOCK(pg);					\
 } while (0)
 
 /* Lock and unlock a session. */
 #define	SESS_LOCK(s)	mtx_lock(&(s)->s_mtx)
 #define	SESS_UNLOCK(s)	mtx_unlock(&(s)->s_mtx)
 #define	SESS_LOCKED(s)	mtx_owned(&(s)->s_mtx)
 #define	SESS_LOCK_ASSERT(s, type)	mtx_assert(&(s)->s_mtx, (type))
 
 /* Hold process U-area in memory, normally for ptrace/procfs work. */
 #define	PHOLD(p) do {							\
 	PROC_LOCK(p);							\
 	_PHOLD(p);							\
 	PROC_UNLOCK(p);							\
 } while (0)
 #define	_PHOLD(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	(p)->p_lock++;							\
 	if (((p)->p_sflag & PS_INMEM) == 0)				\
 		faultin((p));						\
 } while (0)
 
 #define	PRELE(p) do {							\
 	PROC_LOCK((p));							\
 	_PRELE((p));							\
 	PROC_UNLOCK((p));						\
 } while (0)
 #define	_PRELE(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	(--(p)->p_lock);						\
 } while (0)
 
 /* Check whether a thread is safe to be swapped out. */
 #define	thread_safetoswapout(td) (TD_IS_SLEEPING(td) || TD_IS_SUSPENDED(td))
 
 /* Control whether or not it is safe for curthread to sleep. */
 #define	THREAD_NO_SLEEPING() do {					\
 	KASSERT(!(curthread->td_pflags & TDP_NOSLEEPING),		\
 	    ("nested no sleeping"));					\
 	curthread->td_pflags |= TDP_NOSLEEPING;				\
 } while (0)
 
 #define	THREAD_SLEEPING_OK() do {					\
 	KASSERT((curthread->td_pflags & TDP_NOSLEEPING),		\
 	    ("nested sleeping ok"));					\
 	curthread->td_pflags &= ~TDP_NOSLEEPING;			\
 } while (0)
 
 #define	PIDHASH(pid)	(&pidhashtbl[(pid) & pidhash])
 extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
 extern u_long pidhash;
 
 #define	PGRPHASH(pgid)	(&pgrphashtbl[(pgid) & pgrphash])
 extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
 extern u_long pgrphash;
 
 extern struct sx allproc_lock;
 extern struct sx proctree_lock;
 extern struct mtx ppeers_lock;
 extern struct ksegrp ksegrp0;		/* Primary ksegrp in proc0. */
 extern struct proc proc0;		/* Process slot for swapper. */
 extern struct thread thread0;		/* Primary thread in proc0. */
 extern struct vmspace vmspace0;		/* VM space for proc0. */
 extern int hogticks;			/* Limit on kernel cpu hogs. */
 extern int lastpid;
 extern int nprocs, maxproc;		/* Current and max number of procs. */
 extern int maxprocperuid;		/* Max procs per uid. */
 extern u_long ps_arg_cache_limit;
 
 LIST_HEAD(proclist, proc);
 TAILQ_HEAD(procqueue, proc);
 TAILQ_HEAD(threadqueue, thread);
 extern struct proclist allproc;		/* List of all processes. */
 extern struct proclist zombproc;	/* List of zombie processes. */
 extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */
 
 extern struct uma_zone *proc_zone;
 
 struct	proc *pfind(pid_t);		/* Find process by id. */
 struct	pgrp *pgfind(pid_t);		/* Find process group by id. */
 struct	proc *zpfind(pid_t);		/* Find zombie process by id. */
 
 void	adjustrunqueue(struct thread *, int newpri);
 void	ast(struct trapframe *framep);
 struct	thread *choosethread(void);
 int	cr_cansignal(struct ucred *cred, struct proc *proc, int signum);
 int	enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp,
 	    struct session *sess);
 int	enterthispgrp(struct proc *p, struct pgrp *pgrp);
 void	faultin(struct proc *p);
 void	fixjobc(struct proc *p, struct pgrp *pgrp, int entering);
 int	fork1(struct thread *, int, int, struct proc **);
 void	fork_exit(void (*)(void *, struct trapframe *), void *,
 	    struct trapframe *);
 void	fork_return(struct thread *, struct trapframe *);
 int	inferior(struct proc *p);
 void 	kick_proc0(void);
 int	leavepgrp(struct proc *p);
 int	maybe_preempt(struct thread *td);
 void	mi_switch(int flags, struct thread *newtd);
 int	p_candebug(struct thread *td, struct proc *p);
 int	p_cansee(struct thread *td, struct proc *p);
 int	p_cansched(struct thread *td, struct proc *p);
 int	p_cansignal(struct thread *td, struct proc *p, int signum);
 int	p_canwait(struct thread *td, struct proc *p);
 struct	pargs *pargs_alloc(int len);
 void	pargs_drop(struct pargs *pa);
 void	pargs_free(struct pargs *pa);
 void	pargs_hold(struct pargs *pa);
 void	procinit(void);
 void	proc_linkup(struct proc *p, struct ksegrp *kg, struct thread *td);
 void	proc_reparent(struct proc *child, struct proc *newparent);
 struct	pstats *pstats_alloc(void);
 void	pstats_fork(struct pstats *src, struct pstats *dst);
 void	pstats_free(struct pstats *ps);
 int	securelevel_ge(struct ucred *cr, int level);
 int	securelevel_gt(struct ucred *cr, int level);
 void	sessrele(struct session *);
 void	setrunnable(struct thread *);
 void	setrunqueue(struct thread *, int flags);
 void	setsugid(struct proc *p);
 int	sigonstack(size_t sp);
 void	sleepinit(void);
 void	stopevent(struct proc *, u_int, u_int);
 void	threadinit(void);
 void	cpu_idle(void);
 extern	void (*cpu_idle_hook)(void);	/* Hook to machdep CPU idler. */
 void	cpu_switch(struct thread *old, struct thread *new);
 void	cpu_throw(struct thread *old, struct thread *new) __dead2;
 void	unsleep(struct thread *);
 void	userret(struct thread *, struct trapframe *);
 
 void	cpu_exit(struct thread *);
 void	exit1(struct thread *, int) __dead2;
 void	cpu_fork(struct thread *, struct proc *, struct thread *, int);
 void	cpu_set_fork_handler(struct thread *, void (*)(void *), void *);
 
 /* New in KSE. */
 struct	ksegrp *ksegrp_alloc(void);
 void	ksegrp_free(struct ksegrp *kg);
 void	ksegrp_stash(struct ksegrp *kg);
 void	kse_GC(void);
 void	kseinit(void);
 void	cpu_set_upcall(struct thread *td, struct thread *td0);
 void	cpu_set_upcall_kse(struct thread *, void (*)(void *), void *, stack_t *);
 int	cpu_set_user_tls(struct thread *, void *tls_base);
 void	cpu_thread_clean(struct thread *);
 void	cpu_thread_exit(struct thread *);
 void	cpu_thread_setup(struct thread *td);
 void	cpu_thread_swapin(struct thread *);
 void	cpu_thread_swapout(struct thread *);
 void	ksegrp_link(struct ksegrp *kg, struct proc *p);
 void	ksegrp_unlink(struct ksegrp *kg);
 struct	thread *thread_alloc(void);
 void	thread_continued(struct proc *p);
 void	thread_exit(void) __dead2;
 int	thread_export_context(struct thread *td, int willexit);
 void	thread_free(struct thread *td);
 void	thread_link(struct thread *td, struct ksegrp *kg);
 void	thread_reap(void);
 struct thread *thread_schedule_upcall(struct thread *td, struct kse_upcall *ku);
 void	thread_signal_add(struct thread *td, ksiginfo_t *);
 int	thread_single(int how);
 void	thread_single_end(void);
 int	thread_sleep_check(struct thread *td);
 void	thread_stash(struct thread *td);
 int	thread_statclock(int user);
 void	thread_stopped(struct proc *p);
 void	childproc_stopped(struct proc *child, int reason);
 void	childproc_continued(struct proc *child);
 void	childproc_exited(struct proc *child);
 int	thread_suspend_check(int how);
 void	thread_suspend_one(struct thread *td);
 struct thread *thread_switchout(struct thread *td, int flags,
 	    struct thread *newtd);
 void	thread_unlink(struct thread *td);
 void	thread_unsuspend(struct proc *p);
 void	thread_unsuspend_one(struct thread *td);
 void	thread_unthread(struct thread *td);
 int	thread_userret(struct thread *td, struct trapframe *frame);
 void	thread_user_enter(struct thread *td);
 void	thread_wait(struct proc *p);
 struct thread	*thread_find(struct proc *p, lwpid_t tid);
 void	thr_exit1(void);
 struct kse_upcall *upcall_alloc(void);
 void	upcall_free(struct kse_upcall *ku);
 void	upcall_link(struct kse_upcall *ku, struct ksegrp *kg);
 void	upcall_unlink(struct kse_upcall *ku);
 void	upcall_remove(struct thread *td);
 void	upcall_stash(struct kse_upcall *ke);
 
 #endif	/* _KERNEL */
 
 #endif	/* !_SYS_PROC_H_ */
Index: head/sys/sys/sleepqueue.h
===================================================================
--- head/sys/sys/sleepqueue.h	(revision 155740)
+++ head/sys/sys/sleepqueue.h	(revision 155741)
@@ -1,109 +1,107 @@
 /*-
  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SLEEPQUEUE_H_
 #define _SYS_SLEEPQUEUE_H_
 
 /*
  * Sleep queue interface.  Sleep/wakeup and condition variables use a sleep
  * queue for the queue of threads blocked on a sleep channel.
  *
  * A thread calls sleepq_lock() to lock the sleep queue chain associated
  * with a given wait channel.  A thread can then call call sleepq_add() to
  * add themself onto a sleep queue and call one of the sleepq_wait()
  * functions to actually go to sleep.  If a thread needs to abort a sleep
  * operation it should call sleepq_release() to unlock the associated sleep
  * queue chain lock.  If the thread also needs to remove itself from a queue
  * it just enqueued itself on, it can use sleepq_remove() instead.
  *
  * If the thread only wishes to sleep for a limited amount of time, it can
  * call sleepq_set_timeout() after sleepq_add() to setup a timeout.  It
  * should then use one of the sleepq_timedwait() functions to block.
  *
  * If the thread wants to the sleep to be interruptible by signals, it can
  * call sleepq_catch_signals() after sleepq_add().  It should then use
  * one of the sleepq_wait_sig() functions to block.  After the thread has
  * been resumed, it should call sleepq_calc_signal_retval() to determine
  * if it should return EINTR or ERESTART passing in the value returned from
  * the earlier call to sleepq_catch_signals().
  *
  * A thread is normally resumed from a sleep queue by either the
  * sleepq_signal() or sleepq_broadcast() functions.  Sleepq_signal() wakes
  * the thread with the highest priority that is sleeping on the specified
  * wait channel.  Sleepq_broadcast() wakes all threads that are sleeping
  * on the specified wait channel.  A thread sleeping in an interruptible
  * sleep can be interrupted by calling sleepq_abort().  A thread can also
  * be removed from a specified sleep queue using the sleepq_remove()
  * function.  Note that the sleep queue chain must first be locked via
  * sleepq_lock() when calling sleepq_signal() and sleepq_broadcast().
  *
  * Each thread allocates a sleep queue at thread creation via sleepq_alloc()
  * and releases it at thread destruction via sleepq_free().  Note that
  * a sleep queue is not tied to a specific thread and that the sleep queue
  * released at thread destruction may not be the same sleep queue that the
  * thread allocated when it was created.
  *
  * XXX: Some other parts of the kernel such as ithread sleeping may end up
  * using this interface as well (death to TDI_IWAIT!)
  */
 
 struct mtx;
 struct sleepqueue;
 struct thread;
 
 #ifdef _KERNEL
 
 #define	SLEEPQ_TYPE		0x0ff		/* Mask of sleep queue types. */
 #define	SLEEPQ_MSLEEP		0x00		/* Used by msleep/wakeup. */
 #define	SLEEPQ_CONDVAR		0x01		/* Used for a cv. */
 #define	SLEEPQ_INTERRUPTIBLE	0x100		/* Sleep is interruptible. */
 
 void	init_sleepqueues(void);
-void	sleepq_abort(struct thread *td);
+void	sleepq_abort(struct thread *td, int intrval);
 void	sleepq_add(void *, struct mtx *, const char *, int);
 struct sleepqueue *sleepq_alloc(void);
 void	sleepq_broadcast(void *, int, int);
-int	sleepq_calc_signal_retval(int sig);
-int	sleepq_catch_signals(void *wchan);
 void	sleepq_free(struct sleepqueue *);
 void	sleepq_lock(void *);
 struct sleepqueue *sleepq_lookup(void *);
 void	sleepq_release(void *);
 void	sleepq_remove(struct thread *, void *);
 void	sleepq_signal(void *, int, int);
 void	sleepq_set_timeout(void *wchan, int timo);
 int	sleepq_timedwait(void *wchan);
-int	sleepq_timedwait_sig(void *wchan, int signal_caught);
+int	sleepq_timedwait_sig(void *wchan);
 void	sleepq_wait(void *);
 int	sleepq_wait_sig(void *wchan);
 
 #endif	/* _KERNEL */
 #endif	/* !_SYS_SLEEPQUEUE_H_ */