diff --git a/lib/libthr/thread/Makefile.inc b/lib/libthr/thread/Makefile.inc
index 4ce90b55dcae..6d571d4bd39d 100644
--- a/lib/libthr/thread/Makefile.inc
+++ b/lib/libthr/thread/Makefile.inc
@@ -1,56 +1,57 @@
 # $FreeBSD$
 
 # thr sources
 .PATH: ${.CURDIR}/thread
 
 SRCS+= \
 	thr_affinity.c \
 	thr_attr.c \
 	thr_barrier.c \
 	thr_barrierattr.c \
 	thr_cancel.c \
 	thr_clean.c \
 	thr_concurrency.c \
 	thr_cond.c \
 	thr_condattr.c \
 	thr_create.c \
 	thr_detach.c \
 	thr_equal.c \
 	thr_event.c \
 	thr_exit.c \
 	thr_fork.c \
 	thr_getprio.c \
 	thr_getcpuclockid.c \
 	thr_getschedparam.c \
 	thr_info.c \
 	thr_init.c \
 	thr_join.c \
 	thr_list.c \
 	thr_kern.c \
 	thr_kill.c \
 	thr_main_np.c \
 	thr_multi_np.c \
 	thr_mutex.c \
 	thr_mutexattr.c \
 	thr_once.c \
 	thr_printf.c \
 	thr_pspinlock.c \
 	thr_resume_np.c \
 	thr_rtld.c \
 	thr_rwlock.c \
 	thr_rwlockattr.c \
 	thr_self.c \
 	thr_sem.c \
 	thr_setprio.c \
 	thr_setschedparam.c \
 	thr_sig.c \
 	thr_single_np.c \
+	thr_sleepq.c \
 	thr_spec.c \
 	thr_spinlock.c \
 	thr_stack.c \
 	thr_syscalls.c \
 	thr_suspend_np.c \
 	thr_switch_np.c \
 	thr_symbols.c \
 	thr_umtx.c \
 	thr_yield.c
diff --git a/lib/libthr/thread/thr_cond.c b/lib/libthr/thread/thr_cond.c
index 03b5cdd7c6f3..6ec6d4c6da0b 100644
--- a/lib/libthr/thread/thr_cond.c
+++ b/lib/libthr/thread/thr_cond.c
@@ -1,305 +1,482 @@
 /*
  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "namespace.h"
 #include <stdlib.h>
 #include <errno.h>
 #include <string.h>
 #include <pthread.h>
 #include <limits.h>
 #include "un-namespace.h"
 
 #include "thr_private.h"
 
 /*
  * Prototypes
  */
 int	__pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex);
 int	__pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
 		       const struct timespec * abstime);
 static int cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr);
 static int cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex,
 		    const struct timespec *abstime, int cancel);
-static int cond_signal_common(pthread_cond_t *cond, int broadcast);
+static int cond_signal_common(pthread_cond_t *cond);
+static int cond_broadcast_common(pthread_cond_t *cond);
 
 /*
  * Double underscore versions are cancellation points.  Single underscore
  * versions are not and are provided for libc internal usage (which
  * shouldn't introduce cancellation points).
  */
 __weak_reference(__pthread_cond_wait, pthread_cond_wait);
 __weak_reference(__pthread_cond_timedwait, pthread_cond_timedwait);
 
 __weak_reference(_pthread_cond_init, pthread_cond_init);
 __weak_reference(_pthread_cond_destroy, pthread_cond_destroy);
 __weak_reference(_pthread_cond_signal, pthread_cond_signal);
 __weak_reference(_pthread_cond_broadcast, pthread_cond_broadcast);
 
+#define CV_PSHARED(cvp)	(((cvp)->__flags & USYNC_PROCESS_SHARED) != 0)
+
 static int
 cond_init(pthread_cond_t *cond, const pthread_condattr_t *cond_attr)
 {
-	pthread_cond_t	pcond;
-	int             rval = 0;
+	struct pthread_cond	*cvp;
+	int	error = 0;
 
-	if ((pcond = (pthread_cond_t)
+	if ((cvp = (pthread_cond_t)
 	    calloc(1, sizeof(struct pthread_cond))) == NULL) {
-		rval = ENOMEM;
+		error = ENOMEM;
 	} else {
 		/*
 		 * Initialise the condition variable structure:
 		 */
 		if (cond_attr == NULL || *cond_attr == NULL) {
-			pcond->c_pshared = 0;
-			pcond->c_clockid = CLOCK_REALTIME;
+			cvp->__clock_id = CLOCK_REALTIME;
 		} else {
-			pcond->c_pshared = (*cond_attr)->c_pshared;
-			pcond->c_clockid = (*cond_attr)->c_clockid;
+			if ((*cond_attr)->c_pshared)
+				cvp->__flags |= USYNC_PROCESS_SHARED;
+			cvp->__clock_id = (*cond_attr)->c_clockid;
 		}
-		_thr_umutex_init(&pcond->c_lock);
-		*cond = pcond;
+		*cond = cvp;
 	}
-	/* Return the completion status: */
-	return (rval);
+	return (error);
 }
 
 static int
 init_static(struct pthread *thread, pthread_cond_t *cond)
 {
 	int ret;
 
 	THR_LOCK_ACQUIRE(thread, &_cond_static_lock);
 
 	if (*cond == NULL)
 		ret = cond_init(cond, NULL);
 	else
 		ret = 0;
 
 	THR_LOCK_RELEASE(thread, &_cond_static_lock);
 
 	return (ret);
 }
 
 #define CHECK_AND_INIT_COND							\
-	if (__predict_false((cv = (*cond)) <= THR_COND_DESTROYED)) {		\
-		if (cv == THR_COND_INITIALIZER) {				\
+	if (__predict_false((cvp = (*cond)) <= THR_COND_DESTROYED)) {		\
+		if (cvp == THR_COND_INITIALIZER) {				\
 			int ret;						\
 			ret = init_static(_get_curthread(), cond);		\
 			if (ret)						\
 				return (ret);					\
-		} else if (cv == THR_COND_DESTROYED) {				\
+		} else if (cvp == THR_COND_DESTROYED) {				\
 			return (EINVAL);					\
 		}								\
-		cv = *cond;							\
+		cvp = *cond;							\
 	}
 
 int
 _pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *cond_attr)
 {
 
 	*cond = NULL;
 	return (cond_init(cond, cond_attr));
 }
 
 int
 _pthread_cond_destroy(pthread_cond_t *cond)
 {
-	struct pthread		*curthread = _get_curthread();
-	struct pthread_cond	*cv;
-	int			rval = 0;
-
-	if ((cv = *cond) == THR_COND_INITIALIZER)
-		rval = 0;
-	else if (cv == THR_COND_DESTROYED)
-		rval = EINVAL;
+	struct pthread_cond	*cvp;
+	int			error = 0;
+
+	if ((cvp = *cond) == THR_COND_INITIALIZER)
+		error = 0;
+	else if (cvp == THR_COND_DESTROYED)
+		error = EINVAL;
 	else {
-		cv = *cond;
-		THR_UMUTEX_LOCK(curthread, &cv->c_lock);
+		cvp = *cond;
 		*cond = THR_COND_DESTROYED;
-		THR_UMUTEX_UNLOCK(curthread, &cv->c_lock);
 
 		/*
 		 * Free the memory allocated for the condition
 		 * variable structure:
 		 */
-		free(cv);
-	}
-	return (rval);
-}
-
-struct cond_cancel_info
-{
-	pthread_mutex_t	*mutex;
-	pthread_cond_t	*cond;
-	int		count;
-};
-
-static void
-cond_cancel_handler(void *arg)
-{
-	struct pthread *curthread = _get_curthread();
-	struct cond_cancel_info *info = (struct cond_cancel_info *)arg;
-	pthread_cond_t  cv;
-
-	if (info->cond != NULL) {
-		cv = *(info->cond);
-		THR_UMUTEX_UNLOCK(curthread, &cv->c_lock);
+		free(cvp);
 	}
-	_mutex_cv_lock(info->mutex, info->count);
+	return (error);
 }
 
 /*
  * Cancellation behaivor:
  *   Thread may be canceled at start, if thread is canceled, it means it
  *   did not get a wakeup from pthread_cond_signal(), otherwise, it is
  *   not canceled.
  *   Thread cancellation never cause wakeup from pthread_cond_signal()
  *   to be lost.
  */
 static int
-cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex,
+cond_wait_kernel(struct pthread_cond *cvp, struct pthread_mutex *mp,
 	const struct timespec *abstime, int cancel)
 {
 	struct pthread	*curthread = _get_curthread();
-	struct timespec ts, ts2, *tsp;
-	struct cond_cancel_info info;
-	pthread_cond_t  cv;
-	int		ret;
+	int		recurse;
+	int		error, error2 = 0;
+
+	error = _mutex_cv_detach(mp, &recurse);
+	if (error != 0)
+		return (error);
+
+	if (cancel) {
+		_thr_cancel_enter2(curthread, 0);
+		error = _thr_ucond_wait((struct ucond *)&cvp->__has_kern_waiters,
+			(struct umutex *)&mp->m_lock, abstime,
+			CVWAIT_ABSTIME|CVWAIT_CLOCKID);
+		_thr_cancel_leave(curthread, 0);
+	} else {
+		error = _thr_ucond_wait((struct ucond *)&cvp->__has_kern_waiters,
+			(struct umutex *)&mp->m_lock, abstime,
+			CVWAIT_ABSTIME|CVWAIT_CLOCKID);
+	}
 
 	/*
-	 * If the condition variable is statically initialized,
-	 * perform the dynamic initialization:
+	 * Note that PP mutex and ROBUST mutex may return
+	 * interesting error codes.
 	 */
-	CHECK_AND_INIT_COND
-
-	cv = *cond;
-	THR_UMUTEX_LOCK(curthread, &cv->c_lock);
-	ret = _mutex_cv_unlock(mutex, &info.count);
-	if (__predict_false(ret != 0)) {
-		THR_UMUTEX_UNLOCK(curthread, &cv->c_lock);
-		return (ret);
+	if (error == 0) {
+		error2 = _mutex_cv_lock(mp, recurse);
+	} else if (error == EINTR || error == ETIMEDOUT) {
+		error2 = _mutex_cv_lock(mp, recurse);
+		if (error2 == 0 && cancel)
+			_thr_testcancel(curthread);
+		if (error == EINTR)
+			error = 0;
+	} else {
+		/* We know that it didn't unlock the mutex. */
+		error2 = _mutex_cv_attach(mp, recurse);
+		if (error2 == 0 && cancel)
+			_thr_testcancel(curthread);
 	}
+	return (error2 != 0 ? error2 : error);
+}
+
+/*
+ * Thread waits in userland queue whenever possible, when thread
+ * is signaled or broadcasted, it is removed from the queue, and
+ * is saved in curthread's defer_waiters[] buffer, but won't be
+ * woken up until mutex is unlocked.
+ */
+
+static int
+cond_wait_user(struct pthread_cond *cvp, struct pthread_mutex *mp,
+	const struct timespec *abstime, int cancel)
+{
+	struct pthread	*curthread = _get_curthread();
+	struct sleepqueue *sq;
+	int	recurse;
+	int	error;
 
-	info.mutex = mutex;
-	info.cond  = cond;
+	if (curthread->wchan != NULL)
+		PANIC("thread was already on queue.");
 
-	if (abstime != NULL) {
-		clock_gettime(cv->c_clockid, &ts);
-		TIMESPEC_SUB(&ts2, abstime, &ts);
-		tsp = &ts2;
-	} else
-		tsp = NULL;
+	if (cancel)
+		_thr_testcancel(curthread);
 
-	if (cancel) {
-		THR_CLEANUP_PUSH(curthread, cond_cancel_handler, &info);
-		_thr_cancel_enter2(curthread, 0);
-		ret = _thr_ucond_wait(&cv->c_kerncv, &cv->c_lock, tsp, 1);
-		info.cond = NULL;
-		_thr_cancel_leave(curthread, (ret != 0));
-		THR_CLEANUP_POP(curthread, 0);
-	} else {
-		ret = _thr_ucond_wait(&cv->c_kerncv, &cv->c_lock, tsp, 0);
+	_sleepq_lock(cvp);
+	/*
+	 * set __has_user_waiters before unlocking mutex, this allows
+	 * us to check it without locking in pthread_cond_signal().
+	 */
+	cvp->__has_user_waiters = 1; 
+	curthread->will_sleep = 1;
+	(void)_mutex_cv_unlock(mp, &recurse);
+	curthread->mutex_obj = mp;
+	_sleepq_add(cvp, curthread);
+	for(;;) {
+		_thr_clear_wake(curthread);
+		_sleepq_unlock(cvp);
+
+		if (cancel) {
+			_thr_cancel_enter2(curthread, 0);
+			error = _thr_sleep(curthread, cvp->__clock_id, abstime);
+			_thr_cancel_leave(curthread, 0);
+		} else {
+			error = _thr_sleep(curthread, cvp->__clock_id, abstime);
+		}
+
+		if (curthread->wchan == NULL) {
+			error = 0;
+			goto out;
+		}
+
+		_sleepq_lock(cvp);
+		if (curthread->wchan == NULL) {
+			error = 0;
+			break;
+		} else if (cancel && SHOULD_CANCEL(curthread)) {
+			sq = _sleepq_lookup(cvp);
+			cvp->__has_user_waiters = 
+				_sleepq_remove(sq, curthread);
+			_sleepq_unlock(cvp);
+			curthread->mutex_obj = NULL;
+			_mutex_cv_lock(mp, recurse);
+			if (!THR_IN_CRITICAL(curthread))
+				_pthread_exit(PTHREAD_CANCELED);
+			else /* this should not happen */
+				return (0);
+		} else if (error == ETIMEDOUT) {
+			sq = _sleepq_lookup(cvp);
+			cvp->__has_user_waiters =
+				_sleepq_remove(sq, curthread);
+			break;
+		}
 	}
-	if (ret == EINTR)
-		ret = 0;
-	_mutex_cv_lock(mutex, info.count);
-	return (ret);
+	_sleepq_unlock(cvp);
+out:
+	curthread->mutex_obj = NULL;
+	_mutex_cv_lock(mp, recurse);
+	return (error);
+}
+
+static int
+cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex,
+	const struct timespec *abstime, int cancel)
+{
+	struct pthread	*curthread = _get_curthread();
+	struct pthread_cond *cvp;
+	struct pthread_mutex *mp;
+	int	error;
+
+	CHECK_AND_INIT_COND
+
+	mp = *mutex;
+
+	if ((error = _mutex_owned(curthread, mp)) != 0)
+		return (error);
+
+	if (curthread->attr.sched_policy != SCHED_OTHER ||
+	    (mp->m_lock.m_flags & (UMUTEX_PRIO_PROTECT|UMUTEX_PRIO_INHERIT|
+		USYNC_PROCESS_SHARED)) != 0 ||
+	    (cvp->__flags & USYNC_PROCESS_SHARED) != 0)
+		return cond_wait_kernel(cvp, mp, abstime, cancel);
+	else
+		return cond_wait_user(cvp, mp, abstime, cancel);
 }
 
 int
 _pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
 {
 
 	return (cond_wait_common(cond, mutex, NULL, 0));
 }
 
 int
 __pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
 {
 
 	return (cond_wait_common(cond, mutex, NULL, 1));
 }
 
 int
-_pthread_cond_timedwait(pthread_cond_t * cond, pthread_mutex_t * mutex,
+_pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
 		       const struct timespec * abstime)
 {
 
 	if (abstime == NULL || abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
 	    abstime->tv_nsec >= 1000000000)
 		return (EINVAL);
 
 	return (cond_wait_common(cond, mutex, abstime, 0));
 }
 
 int
 __pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
 		       const struct timespec *abstime)
 {
 
 	if (abstime == NULL || abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
 	    abstime->tv_nsec >= 1000000000)
 		return (EINVAL);
 
 	return (cond_wait_common(cond, mutex, abstime, 1));
 }
 
 static int
-cond_signal_common(pthread_cond_t *cond, int broadcast)
+cond_signal_common(pthread_cond_t *cond)
 {
 	struct pthread	*curthread = _get_curthread();
-	pthread_cond_t	cv;
-	int		ret = 0;
+	struct pthread *td;
+	struct pthread_cond *cvp;
+	struct pthread_mutex *mp;
+	struct sleepqueue *sq;
+	int	*waddr;
+	int	pshared;
 
 	/*
 	 * If the condition variable is statically initialized, perform dynamic
 	 * initialization.
 	 */
 	CHECK_AND_INIT_COND
 
-	THR_UMUTEX_LOCK(curthread, &cv->c_lock);
-	if (!broadcast)
-		ret = _thr_ucond_signal(&cv->c_kerncv);
-	else
-		ret = _thr_ucond_broadcast(&cv->c_kerncv);
-	THR_UMUTEX_UNLOCK(curthread, &cv->c_lock);
-	return (ret);
+	pshared = CV_PSHARED(cvp);
+
+	_thr_ucond_signal((struct ucond *)&cvp->__has_kern_waiters);
+
+	if (pshared || cvp->__has_user_waiters == 0)
+		return (0);
+
+	curthread = _get_curthread();
+	waddr = NULL;
+	_sleepq_lock(cvp);
+	sq = _sleepq_lookup(cvp);
+	if (sq == NULL) {
+		_sleepq_unlock(cvp);
+		return (0);
+	}
+
+	td = _sleepq_first(sq);
+	mp = td->mutex_obj;
+	cvp->__has_user_waiters = _sleepq_remove(sq, td);
+	if (mp->m_owner == curthread) {
+		if (curthread->nwaiter_defer >= MAX_DEFER_WAITERS) {
+			_thr_wake_all(curthread->defer_waiters,
+					curthread->nwaiter_defer);
+			curthread->nwaiter_defer = 0;
+		}
+		curthread->defer_waiters[curthread->nwaiter_defer++] =
+			&td->wake_addr->value;
+		mp->m_flags |= PMUTEX_FLAG_DEFERED;
+	} else {
+		waddr = &td->wake_addr->value;
+	}
+	_sleepq_unlock(cvp);
+	if (waddr != NULL)
+		_thr_set_wake(waddr);
+	return (0);
+}
+
+struct broadcast_arg {
+	struct pthread *curthread;
+	unsigned int *waddrs[MAX_DEFER_WAITERS];
+	int count;
+};
+
+static void
+drop_cb(struct pthread *td, void *arg)
+{
+	struct broadcast_arg *ba = arg;
+	struct pthread_mutex *mp;
+	struct pthread *curthread = ba->curthread;
+
+	mp = td->mutex_obj;
+	if (mp->m_owner == curthread) {
+		if (curthread->nwaiter_defer >= MAX_DEFER_WAITERS) {
+			_thr_wake_all(curthread->defer_waiters,
+				curthread->nwaiter_defer);
+			curthread->nwaiter_defer = 0;
+		}
+		curthread->defer_waiters[curthread->nwaiter_defer++] =
+			&td->wake_addr->value;
+		mp->m_flags |= PMUTEX_FLAG_DEFERED;
+	} else {
+		if (ba->count >= MAX_DEFER_WAITERS) {
+			_thr_wake_all(ba->waddrs, ba->count);
+			ba->count = 0;
+		}
+		ba->waddrs[ba->count++] = &td->wake_addr->value;
+	}
+}
+
+static int
+cond_broadcast_common(pthread_cond_t *cond)
+{
+	int    pshared;
+	struct pthread_cond *cvp;
+	struct sleepqueue *sq;
+	struct broadcast_arg ba;
+
+	/*
+	 * If the condition variable is statically initialized, perform dynamic
+	 * initialization.
+	 */
+	CHECK_AND_INIT_COND
+
+	pshared = CV_PSHARED(cvp);
+
+	_thr_ucond_broadcast((struct ucond *)&cvp->__has_kern_waiters);
+
+	if (pshared || cvp->__has_user_waiters == 0)
+		return (0);
+
+	ba.curthread = _get_curthread();
+	ba.count = 0;
+	
+	_sleepq_lock(cvp);
+	sq = _sleepq_lookup(cvp);
+	if (sq == NULL) {
+		_sleepq_unlock(cvp);
+		return (0);
+	}
+	_sleepq_drop(sq, drop_cb, &ba);
+	cvp->__has_user_waiters = 0;
+	_sleepq_unlock(cvp);
+	if (ba.count > 0)
+		_thr_wake_all(ba.waddrs, ba.count);
+	return (0);
 }
 
 int
 _pthread_cond_signal(pthread_cond_t * cond)
 {
 
-	return (cond_signal_common(cond, 0));
+	return (cond_signal_common(cond));
 }
 
 int
 _pthread_cond_broadcast(pthread_cond_t * cond)
 {
 
-	return (cond_signal_common(cond, 1));
+	return (cond_broadcast_common(cond));
 }
diff --git a/lib/libthr/thread/thr_init.c b/lib/libthr/thread/thr_init.c
index b10c2277f15f..7e0721524fb5 100644
--- a/lib/libthr/thread/thr_init.c
+++ b/lib/libthr/thread/thr_init.c
@@ -1,475 +1,477 @@
 /*
  * Copyright (c) 2003 Daniel M. Eischen <deischen@freebsd.org>
  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by John Birrell.
  * 4. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "namespace.h"
 #include <sys/types.h>
 #include <sys/signalvar.h>
 #include <sys/ioctl.h>
 #include <sys/sysctl.h>
 #include <sys/ttycom.h>
 #include <sys/mman.h>
 #include <sys/rtprio.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <paths.h>
 #include <pthread.h>
 #include <pthread_np.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 #include "un-namespace.h"
 
 #include "libc_private.h"
 #include "thr_private.h"
 
 char		*_usrstack;
 struct pthread	*_thr_initial;
 int		_libthr_debug;
 int		_thread_event_mask;
 struct pthread	*_thread_last_event;
 pthreadlist	_thread_list = TAILQ_HEAD_INITIALIZER(_thread_list);
 pthreadlist 	_thread_gc_list = TAILQ_HEAD_INITIALIZER(_thread_gc_list);
 int		_thread_active_threads = 1;
 atfork_head	_thr_atfork_list = TAILQ_HEAD_INITIALIZER(_thr_atfork_list);
 struct urwlock	_thr_atfork_lock = DEFAULT_URWLOCK;
 
 struct pthread_prio	_thr_priorities[3] = {
 	{RTP_PRIO_MIN,  RTP_PRIO_MAX, 0}, /* FIFO */
 	{0, 0, 63}, /* OTHER */
 	{RTP_PRIO_MIN, RTP_PRIO_MAX, 0}  /* RR */
 };
 
 struct pthread_attr _pthread_attr_default = {
 	.sched_policy = SCHED_OTHER,
 	.sched_inherit = PTHREAD_INHERIT_SCHED,
 	.prio = 0,
 	.suspend = THR_CREATE_RUNNING,
 	.flags = PTHREAD_SCOPE_SYSTEM,
 	.stackaddr_attr = NULL,
 	.stacksize_attr = THR_STACK_DEFAULT,
 	.guardsize_attr = 0,
 	.cpusetsize = 0,
 	.cpuset = NULL
 };
 
 struct pthread_mutex_attr _pthread_mutexattr_default = {
 	.m_type = PTHREAD_MUTEX_DEFAULT,
 	.m_protocol = PTHREAD_PRIO_NONE,
 	.m_ceiling = 0
 };
 
 struct pthread_mutex_attr _pthread_mutexattr_adaptive_default = {
 	.m_type = PTHREAD_MUTEX_ADAPTIVE_NP,
 	.m_protocol = PTHREAD_PRIO_NONE,
 	.m_ceiling = 0
 };
 
 /* Default condition variable attributes: */
 struct pthread_cond_attr _pthread_condattr_default = {
 	.c_pshared = PTHREAD_PROCESS_PRIVATE,
 	.c_clockid = CLOCK_REALTIME
 };
 
 pid_t		_thr_pid;
 int		_thr_is_smp = 0;
 size_t		_thr_guard_default;
 size_t		_thr_stack_default = THR_STACK_DEFAULT;
 size_t		_thr_stack_initial = THR_STACK_INITIAL;
 int		_thr_page_size;
 int		_thr_spinloops;
 int		_thr_yieldloops;
 int		_gc_count;
 struct umutex	_mutex_static_lock = DEFAULT_UMUTEX;
 struct umutex	_cond_static_lock = DEFAULT_UMUTEX;
 struct umutex	_rwlock_static_lock = DEFAULT_UMUTEX;
 struct umutex	_keytable_lock = DEFAULT_UMUTEX;
 struct urwlock	_thr_list_lock = DEFAULT_URWLOCK;
 struct umutex	_thr_event_lock = DEFAULT_UMUTEX;
 
 int	__pthread_cond_wait(pthread_cond_t *, pthread_mutex_t *);
 int	__pthread_mutex_lock(pthread_mutex_t *);
 int	__pthread_mutex_trylock(pthread_mutex_t *);
 void	_thread_init_hack(void) __attribute__ ((constructor));
 
 static void init_private(void);
 static void init_main_thread(struct pthread *thread);
 
 /*
  * All weak references used within libc should be in this table.
  * This is so that static libraries will work.
  */
 
 STATIC_LIB_REQUIRE(_fork);
 STATIC_LIB_REQUIRE(_pthread_getspecific);
 STATIC_LIB_REQUIRE(_pthread_key_create);
 STATIC_LIB_REQUIRE(_pthread_key_delete);
 STATIC_LIB_REQUIRE(_pthread_mutex_destroy);
 STATIC_LIB_REQUIRE(_pthread_mutex_init);
 STATIC_LIB_REQUIRE(_pthread_mutex_lock);
 STATIC_LIB_REQUIRE(_pthread_mutex_trylock);
 STATIC_LIB_REQUIRE(_pthread_mutex_unlock);
 STATIC_LIB_REQUIRE(_pthread_mutexattr_init);
 STATIC_LIB_REQUIRE(_pthread_mutexattr_destroy);
 STATIC_LIB_REQUIRE(_pthread_mutexattr_settype);
 STATIC_LIB_REQUIRE(_pthread_once);
 STATIC_LIB_REQUIRE(_pthread_setspecific);
 STATIC_LIB_REQUIRE(_raise);
 STATIC_LIB_REQUIRE(_sem_destroy);
 STATIC_LIB_REQUIRE(_sem_getvalue);
 STATIC_LIB_REQUIRE(_sem_init);
 STATIC_LIB_REQUIRE(_sem_post);
 STATIC_LIB_REQUIRE(_sem_timedwait);
 STATIC_LIB_REQUIRE(_sem_trywait);
 STATIC_LIB_REQUIRE(_sem_wait);
 STATIC_LIB_REQUIRE(_sigaction);
 STATIC_LIB_REQUIRE(_sigprocmask);
 STATIC_LIB_REQUIRE(_sigsuspend);
 STATIC_LIB_REQUIRE(_sigtimedwait);
 STATIC_LIB_REQUIRE(_sigwait);
 STATIC_LIB_REQUIRE(_sigwaitinfo);
 STATIC_LIB_REQUIRE(_spinlock);
 STATIC_LIB_REQUIRE(_spinlock_debug);
 STATIC_LIB_REQUIRE(_spinunlock);
 STATIC_LIB_REQUIRE(_thread_init_hack);
 
 /*
  * These are needed when linking statically.  All references within
  * libgcc (and in the future libc) to these routines are weak, but
  * if they are not (strongly) referenced by the application or other
  * libraries, then the actual functions will not be loaded.
  */
 STATIC_LIB_REQUIRE(_pthread_once);
 STATIC_LIB_REQUIRE(_pthread_key_create);
 STATIC_LIB_REQUIRE(_pthread_key_delete);
 STATIC_LIB_REQUIRE(_pthread_getspecific);
 STATIC_LIB_REQUIRE(_pthread_setspecific);
 STATIC_LIB_REQUIRE(_pthread_mutex_init);
 STATIC_LIB_REQUIRE(_pthread_mutex_destroy);
 STATIC_LIB_REQUIRE(_pthread_mutex_lock);
 STATIC_LIB_REQUIRE(_pthread_mutex_trylock);
 STATIC_LIB_REQUIRE(_pthread_mutex_unlock);
 STATIC_LIB_REQUIRE(_pthread_create);
 
 /* Pull in all symbols required by libthread_db */
 STATIC_LIB_REQUIRE(_thread_state_running);
 
 #define	DUAL_ENTRY(entry)	\
 	(pthread_func_t)entry, (pthread_func_t)entry
 
 static pthread_func_t jmp_table[][2] = {
 	{DUAL_ENTRY(_pthread_atfork)},	/* PJT_ATFORK */
 	{DUAL_ENTRY(_pthread_attr_destroy)},	/* PJT_ATTR_DESTROY */
 	{DUAL_ENTRY(_pthread_attr_getdetachstate)},	/* PJT_ATTR_GETDETACHSTATE */
 	{DUAL_ENTRY(_pthread_attr_getguardsize)},	/* PJT_ATTR_GETGUARDSIZE */
 	{DUAL_ENTRY(_pthread_attr_getinheritsched)},	/* PJT_ATTR_GETINHERITSCHED */
 	{DUAL_ENTRY(_pthread_attr_getschedparam)},	/* PJT_ATTR_GETSCHEDPARAM */
 	{DUAL_ENTRY(_pthread_attr_getschedpolicy)},	/* PJT_ATTR_GETSCHEDPOLICY */
 	{DUAL_ENTRY(_pthread_attr_getscope)},	/* PJT_ATTR_GETSCOPE */
 	{DUAL_ENTRY(_pthread_attr_getstackaddr)},	/* PJT_ATTR_GETSTACKADDR */
 	{DUAL_ENTRY(_pthread_attr_getstacksize)},	/* PJT_ATTR_GETSTACKSIZE */
 	{DUAL_ENTRY(_pthread_attr_init)},	/* PJT_ATTR_INIT */
 	{DUAL_ENTRY(_pthread_attr_setdetachstate)},	/* PJT_ATTR_SETDETACHSTATE */
 	{DUAL_ENTRY(_pthread_attr_setguardsize)},	/* PJT_ATTR_SETGUARDSIZE */
 	{DUAL_ENTRY(_pthread_attr_setinheritsched)},	/* PJT_ATTR_SETINHERITSCHED */
 	{DUAL_ENTRY(_pthread_attr_setschedparam)},	/* PJT_ATTR_SETSCHEDPARAM */
 	{DUAL_ENTRY(_pthread_attr_setschedpolicy)},	/* PJT_ATTR_SETSCHEDPOLICY */
 	{DUAL_ENTRY(_pthread_attr_setscope)},	/* PJT_ATTR_SETSCOPE */
 	{DUAL_ENTRY(_pthread_attr_setstackaddr)},	/* PJT_ATTR_SETSTACKADDR */
 	{DUAL_ENTRY(_pthread_attr_setstacksize)},	/* PJT_ATTR_SETSTACKSIZE */
 	{DUAL_ENTRY(_pthread_cancel)},	/* PJT_CANCEL */
 	{DUAL_ENTRY(_pthread_cleanup_pop)},	/* PJT_CLEANUP_POP */
 	{DUAL_ENTRY(_pthread_cleanup_push)},	/* PJT_CLEANUP_PUSH */
 	{DUAL_ENTRY(_pthread_cond_broadcast)},	/* PJT_COND_BROADCAST */
 	{DUAL_ENTRY(_pthread_cond_destroy)},	/* PJT_COND_DESTROY */
 	{DUAL_ENTRY(_pthread_cond_init)},	/* PJT_COND_INIT */
 	{DUAL_ENTRY(_pthread_cond_signal)},	/* PJT_COND_SIGNAL */
 	{DUAL_ENTRY(_pthread_cond_timedwait)},	/* PJT_COND_TIMEDWAIT */
 	{(pthread_func_t)__pthread_cond_wait,
 	 (pthread_func_t)_pthread_cond_wait},	/* PJT_COND_WAIT */
 	{DUAL_ENTRY(_pthread_detach)},	/* PJT_DETACH */
 	{DUAL_ENTRY(_pthread_equal)},	/* PJT_EQUAL */
 	{DUAL_ENTRY(_pthread_exit)},	/* PJT_EXIT */
 	{DUAL_ENTRY(_pthread_getspecific)},	/* PJT_GETSPECIFIC */
 	{DUAL_ENTRY(_pthread_join)},	/* PJT_JOIN */
 	{DUAL_ENTRY(_pthread_key_create)},	/* PJT_KEY_CREATE */
 	{DUAL_ENTRY(_pthread_key_delete)},	/* PJT_KEY_DELETE*/
 	{DUAL_ENTRY(_pthread_kill)},	/* PJT_KILL */
 	{DUAL_ENTRY(_pthread_main_np)},		/* PJT_MAIN_NP */
 	{DUAL_ENTRY(_pthread_mutexattr_destroy)}, /* PJT_MUTEXATTR_DESTROY */
 	{DUAL_ENTRY(_pthread_mutexattr_init)},	/* PJT_MUTEXATTR_INIT */
 	{DUAL_ENTRY(_pthread_mutexattr_settype)}, /* PJT_MUTEXATTR_SETTYPE */
 	{DUAL_ENTRY(_pthread_mutex_destroy)},	/* PJT_MUTEX_DESTROY */
 	{DUAL_ENTRY(_pthread_mutex_init)},	/* PJT_MUTEX_INIT */
 	{(pthread_func_t)__pthread_mutex_lock,
 	 (pthread_func_t)_pthread_mutex_lock},	/* PJT_MUTEX_LOCK */
 	{(pthread_func_t)__pthread_mutex_trylock,
 	 (pthread_func_t)_pthread_mutex_trylock},/* PJT_MUTEX_TRYLOCK */
 	{DUAL_ENTRY(_pthread_mutex_unlock)},	/* PJT_MUTEX_UNLOCK */
 	{DUAL_ENTRY(_pthread_once)},		/* PJT_ONCE */
 	{DUAL_ENTRY(_pthread_rwlock_destroy)},	/* PJT_RWLOCK_DESTROY */
 	{DUAL_ENTRY(_pthread_rwlock_init)},	/* PJT_RWLOCK_INIT */
 	{DUAL_ENTRY(_pthread_rwlock_rdlock)},	/* PJT_RWLOCK_RDLOCK */
 	{DUAL_ENTRY(_pthread_rwlock_tryrdlock)},/* PJT_RWLOCK_TRYRDLOCK */
 	{DUAL_ENTRY(_pthread_rwlock_trywrlock)},/* PJT_RWLOCK_TRYWRLOCK */
 	{DUAL_ENTRY(_pthread_rwlock_unlock)},	/* PJT_RWLOCK_UNLOCK */
 	{DUAL_ENTRY(_pthread_rwlock_wrlock)},	/* PJT_RWLOCK_WRLOCK */
 	{DUAL_ENTRY(_pthread_self)},		/* PJT_SELF */
 	{DUAL_ENTRY(_pthread_setcancelstate)},	/* PJT_SETCANCELSTATE */
 	{DUAL_ENTRY(_pthread_setcanceltype)},	/* PJT_SETCANCELTYPE */
 	{DUAL_ENTRY(_pthread_setspecific)},	/* PJT_SETSPECIFIC */
 	{DUAL_ENTRY(_pthread_sigmask)},		/* PJT_SIGMASK */
 	{DUAL_ENTRY(_pthread_testcancel)},	/* PJT_TESTCANCEL */
 	{DUAL_ENTRY(__pthread_cleanup_pop_imp)},/* PJT_CLEANUP_POP_IMP */
 	{DUAL_ENTRY(__pthread_cleanup_push_imp)},/* PJT_CLEANUP_PUSH_IMP */
 	{DUAL_ENTRY(_pthread_cancel_enter)},	/* PJT_CANCEL_ENTER */
 	{DUAL_ENTRY(_pthread_cancel_leave)}		/* PJT_CANCEL_LEAVE */
 };
 
 static int init_once = 0;
 
 /*
  * For the shared version of the threads library, the above is sufficient.
  * But for the archive version of the library, we need a little bit more.
  * Namely, we must arrange for this particular module to be pulled in from
  * the archive library at link time.  To accomplish that, we define and
  * initialize a variable, "_thread_autoinit_dummy_decl".  This variable is
  * referenced (as an extern) from libc/stdlib/exit.c. This will always
  * create a need for this module, ensuring that it is present in the
  * executable.
  */
 extern int _thread_autoinit_dummy_decl;
 int _thread_autoinit_dummy_decl = 0;
 
 void
 _thread_init_hack(void)
 {
 
 	_libpthread_init(NULL);
 }
 
 
 /*
  * Threaded process initialization.
  *
  * This is only called under two conditions:
  *
  *   1) Some thread routines have detected that the library hasn't yet
  *      been initialized (_thr_initial == NULL && curthread == NULL), or
  *
  *   2) An explicit call to reinitialize after a fork (indicated
  *      by curthread != NULL)
  */
 void
 _libpthread_init(struct pthread *curthread)
 {
 	int fd, first = 0;
 
 	/* Check if this function has already been called: */
 	if ((_thr_initial != NULL) && (curthread == NULL))
 		/* Only initialize the threaded application once. */
 		return;
 
 	/*
 	 * Check the size of the jump table to make sure it is preset
 	 * with the correct number of entries.
 	 */
 	if (sizeof(jmp_table) != (sizeof(pthread_func_t) * PJT_MAX * 2))
 		PANIC("Thread jump table not properly initialized");
 	memcpy(__thr_jtable, jmp_table, sizeof(jmp_table));
 
 	/*
 	 * Check for the special case of this process running as
 	 * or in place of init as pid = 1:
 	 */
 	if ((_thr_pid = getpid()) == 1) {
 		/*
 		 * Setup a new session for this process which is
 		 * assumed to be running as root.
 		 */
 		if (setsid() == -1)
 			PANIC("Can't set session ID");
 		if (revoke(_PATH_CONSOLE) != 0)
 			PANIC("Can't revoke console");
 		if ((fd = __sys_open(_PATH_CONSOLE, O_RDWR)) < 0)
 			PANIC("Can't open console");
 		if (setlogin("root") == -1)
 			PANIC("Can't set login to root");
 		if (_ioctl(fd, TIOCSCTTY, (char *) NULL) == -1)
 			PANIC("Can't set controlling terminal");
 	}
 
 	/* Initialize pthread private data. */
 	init_private();
 
 	/* Set the initial thread. */
 	if (curthread == NULL) {
 		first = 1;
 		/* Create and initialize the initial thread. */
 		curthread = _thr_alloc(NULL);
 		if (curthread == NULL)
 			PANIC("Can't allocate initial thread");
 		init_main_thread(curthread);
 	}
 	/*
 	 * Add the thread to the thread list queue.
 	 */
 	THR_LIST_ADD(curthread);
 	_thread_active_threads = 1;
 
 	/* Setup the thread specific data */
 	_tcb_set(curthread->tcb);
 
 	if (first) {
 		_thr_initial = curthread;
 		_thr_signal_init();
 		if (_thread_event_mask & TD_CREATE)
 			_thr_report_creation(curthread, curthread);
 	}
 }
 
 /*
  * This function and pthread_create() do a lot of the same things.
  * It'd be nice to consolidate the common stuff in one place.
  */
 static void
 init_main_thread(struct pthread *thread)
 {
 	struct sched_param sched_param;
 
 	/* Setup the thread attributes. */
 	thr_self(&thread->tid);
 	thread->attr = _pthread_attr_default;
 	/*
 	 * Set up the thread stack.
 	 *
 	 * Create a red zone below the main stack.  All other stacks
 	 * are constrained to a maximum size by the parameters
 	 * passed to mmap(), but this stack is only limited by
 	 * resource limits, so this stack needs an explicitly mapped
 	 * red zone to protect the thread stack that is just beyond.
 	 */
 	if (mmap(_usrstack - _thr_stack_initial -
 	    _thr_guard_default, _thr_guard_default, 0, MAP_ANON,
 	    -1, 0) == MAP_FAILED)
 		PANIC("Cannot allocate red zone for initial thread");
 
 	/*
 	 * Mark the stack as an application supplied stack so that it
 	 * isn't deallocated.
 	 *
 	 * XXX - I'm not sure it would hurt anything to deallocate
 	 *       the main thread stack because deallocation doesn't
 	 *       actually free() it; it just puts it in the free
 	 *       stack queue for later reuse.
 	 */
 	thread->attr.stackaddr_attr = _usrstack - _thr_stack_initial;
 	thread->attr.stacksize_attr = _thr_stack_initial;
 	thread->attr.guardsize_attr = _thr_guard_default;
 	thread->attr.flags |= THR_STACK_USER;
 
 	/*
 	 * Write a magic value to the thread structure
 	 * to help identify valid ones:
 	 */
 	thread->magic = THR_MAGIC;
 
 	thread->cancel_enable = 1;
 	thread->cancel_async = 0;
 	thr_set_name(thread->tid, "initial thread");
 
 	/* Initialize the mutex queue: */
 	TAILQ_INIT(&thread->mutexq);
 	TAILQ_INIT(&thread->pp_mutexq);
 
 	thread->state = PS_RUNNING;
 
 	_thr_getscheduler(thread->tid, &thread->attr.sched_policy,
 		 &sched_param);
 	thread->attr.prio = sched_param.sched_priority;
 
 #ifdef _PTHREAD_FORCED_UNWIND
 	thread->unwind_stackend = _usrstack;
 #endif
 
 	/* Others cleared to zero by thr_alloc() */
 }
 
 static void
 init_private(void)
 {
 	size_t len;
 	int mib[2];
 	char *env;
 
 	_thr_umutex_init(&_mutex_static_lock);
 	_thr_umutex_init(&_cond_static_lock);
 	_thr_umutex_init(&_rwlock_static_lock);
 	_thr_umutex_init(&_keytable_lock);
 	_thr_urwlock_init(&_thr_atfork_lock);
 	_thr_umutex_init(&_thr_event_lock);
 	_thr_once_init();
 	_thr_spinlock_init();
 	_thr_list_init();
+	_thr_wake_addr_init();
+	_sleepq_init();
 
 	/*
 	 * Avoid reinitializing some things if they don't need to be,
 	 * e.g. after a fork().
 	 */
 	if (init_once == 0) {
 		/* Find the stack top */
 		mib[0] = CTL_KERN;
 		mib[1] = KERN_USRSTACK;
 		len = sizeof (_usrstack);
 		if (sysctl(mib, 2, &_usrstack, &len, NULL, 0) == -1)
 			PANIC("Cannot get kern.usrstack from sysctl");
 		len = sizeof(_thr_is_smp);
 		sysctlbyname("kern.smp.cpus", &_thr_is_smp, &len, NULL, 0);
 		_thr_is_smp = (_thr_is_smp > 1);
 		_thr_page_size = getpagesize();
 		_thr_guard_default = _thr_page_size;
 		_pthread_attr_default.guardsize_attr = _thr_guard_default;
 		_pthread_attr_default.stacksize_attr = _thr_stack_default;
 		env = getenv("LIBPTHREAD_SPINLOOPS");
 		if (env)
 			_thr_spinloops = atoi(env);
 		env = getenv("LIBPTHREAD_YIELDLOOPS");
 		if (env)
 			_thr_yieldloops = atoi(env);
 		TAILQ_INIT(&_thr_atfork_list);
 	}
 	init_once = 1;
 }
diff --git a/lib/libthr/thread/thr_kern.c b/lib/libthr/thread/thr_kern.c
index 3ad33ad995a2..48f7c6506d7e 100644
--- a/lib/libthr/thread/thr_kern.c
+++ b/lib/libthr/thread/thr_kern.c
@@ -1,132 +1,224 @@
 /*
  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
  * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/types.h>
 #include <sys/signalvar.h>
 #include <sys/rtprio.h>
+#include <sys/mman.h>
 #include <pthread.h>
 
 #include "thr_private.h"
 
 /*#define DEBUG_THREAD_KERN */
 #ifdef DEBUG_THREAD_KERN
 #define DBG_MSG		stdout_debug
 #else
 #define DBG_MSG(x...)
 #endif
 
+static struct umutex	addr_lock;
+static struct wake_addr *wake_addr_head;
+static struct wake_addr default_wake_addr;
+
 /*
  * This is called when the first thread (other than the initial
  * thread) is created.
  */
 int
 _thr_setthreaded(int threaded)
 {
 	if (((threaded == 0) ^ (__isthreaded == 0)) == 0)
 		return (0);
 
 	__isthreaded = threaded;
 	if (threaded != 0) {
 		_thr_rtld_init();
 	} else {
 		_thr_rtld_fini();
 	}
 	return (0);
 }
 
 void
 _thr_assert_lock_level()
 {
 	PANIC("locklevel <= 0");
 }
 
 int
 _rtp_to_schedparam(const struct rtprio *rtp, int *policy,
 	struct sched_param *param)
 {
 	switch(rtp->type) {
 	case RTP_PRIO_REALTIME:
 		*policy = SCHED_RR;
 		param->sched_priority = RTP_PRIO_MAX - rtp->prio;
 		break;
 	case RTP_PRIO_FIFO:
 		*policy = SCHED_FIFO;
 		param->sched_priority = RTP_PRIO_MAX - rtp->prio;
 		break;
 	default:
 		*policy = SCHED_OTHER;
 		param->sched_priority = 0;
 		break;
 	}
 	return (0);
 }
 
 int
 _schedparam_to_rtp(int policy, const struct sched_param *param,
 	struct rtprio *rtp)
 {
 	switch(policy) {
 	case SCHED_RR:
 		rtp->type = RTP_PRIO_REALTIME;
 		rtp->prio = RTP_PRIO_MAX - param->sched_priority;
 		break;
 	case SCHED_FIFO:
 		rtp->type = RTP_PRIO_FIFO;
 		rtp->prio = RTP_PRIO_MAX - param->sched_priority;
 		break;
 	case SCHED_OTHER:
 	default:
 		rtp->type = RTP_PRIO_NORMAL;
 		rtp->prio = 0;
 		break;
 	}
 	return (0);
 }
 
 int
 _thr_getscheduler(lwpid_t lwpid, int *policy, struct sched_param *param)
 {
 	struct rtprio rtp;
 	int ret;
 
 	ret = rtprio_thread(RTP_LOOKUP, lwpid, &rtp);
 	if (ret == -1)
 		return (ret);
 	_rtp_to_schedparam(&rtp, policy, param);
 	return (0);
 }
 
 int
 _thr_setscheduler(lwpid_t lwpid, int policy, const struct sched_param *param)
 {
 	struct rtprio rtp;
 
 	_schedparam_to_rtp(policy, param, &rtp);
 	return (rtprio_thread(RTP_SET, lwpid, &rtp));
 }
+
+void
+_thr_wake_addr_init(void)
+{
+	_thr_umutex_init(&addr_lock);
+	wake_addr_head = NULL;
+}
+
+/*
+ * Allocate wake-address, the memory area is never freed after
+ * allocated, this becauses threads may be referencing it.
+ */
+struct wake_addr *
+_thr_alloc_wake_addr(void)
+{
+	struct pthread *curthread;
+	struct wake_addr *p;
+
+	if (_thr_initial == NULL) {
+		return &default_wake_addr;
+	}
+
+	curthread = _get_curthread();
+
+	THR_LOCK_ACQUIRE(curthread, &addr_lock);
+	if (wake_addr_head == NULL) {
+		unsigned i;
+		unsigned pagesize = getpagesize();
+		struct wake_addr *pp = (struct wake_addr *)
+			mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
+			MAP_ANON|MAP_PRIVATE, -1, 0);
+		for (i = 1; i < pagesize/sizeof(struct wake_addr); ++i)
+			pp[i].link = &pp[i+1];
+		pp[i-1].link = NULL;	
+		wake_addr_head = &pp[1];
+		p = &pp[0];
+	} else {
+		p = wake_addr_head;
+		wake_addr_head = p->link;
+	}
+	THR_LOCK_RELEASE(curthread, &addr_lock);
+	p->value = 0;
+	return (p);
+}
+
+void
+_thr_release_wake_addr(struct wake_addr *wa)
+{
+	struct pthread *curthread = _get_curthread();
+
+	if (wa == &default_wake_addr)
+		return;
+	THR_LOCK_ACQUIRE(curthread, &addr_lock);
+	wa->link = wake_addr_head;
+	wake_addr_head = wa;
+	THR_LOCK_RELEASE(curthread, &addr_lock);
+}
+
+/* Sleep on thread wakeup address */
+int
+_thr_sleep(struct pthread *curthread, int clockid,
+	const struct timespec *abstime)
+{
+
+	curthread->will_sleep = 0;
+	if (curthread->nwaiter_defer > 0) {
+		_thr_wake_all(curthread->defer_waiters,
+			curthread->nwaiter_defer);
+		curthread->nwaiter_defer = 0;
+	}
+
+	if (curthread->wake_addr->value != 0)
+		return (0);
+
+	return _thr_umtx_timedwait_uint(&curthread->wake_addr->value, 0,
+                 clockid, abstime, 0);
+}
+
+void
+_thr_wake_all(unsigned int *waddrs[], int count)
+{
+	int i;
+
+	for (i = 0; i < count; ++i)
+		*waddrs[i] = 1;
+	_umtx_op(waddrs, UMTX_OP_NWAKE_PRIVATE, count, NULL, NULL);
+}
diff --git a/lib/libthr/thread/thr_list.c b/lib/libthr/thread/thr_list.c
index 7541fd3f48a0..249501cc5293 100644
--- a/lib/libthr/thread/thr_list.c
+++ b/lib/libthr/thread/thr_list.c
@@ -1,349 +1,353 @@
 /*
  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
  * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/types.h>
 #include <sys/queue.h>
 
 #include <stdlib.h>
 #include <string.h>
 #include <pthread.h>
 
 #include "thr_private.h"
 #include "libc_private.h"
 
 /*#define DEBUG_THREAD_LIST */
 #ifdef DEBUG_THREAD_LIST
 #define DBG_MSG		stdout_debug
 #else
 #define DBG_MSG(x...)
 #endif
 
 #define MAX_THREADS		100000
 
 /*
  * Define a high water mark for the maximum number of threads that
  * will be cached.  Once this level is reached, any extra threads
  * will be free()'d.
  */
 #define	MAX_CACHED_THREADS	100
 
 /*
  * We've got to keep track of everything that is allocated, not only
  * to have a speedy free list, but also so they can be deallocated
  * after a fork().
  */
 static TAILQ_HEAD(, pthread)	free_threadq;
 static struct umutex		free_thread_lock = DEFAULT_UMUTEX;
 static struct umutex		tcb_lock = DEFAULT_UMUTEX;
 static int			free_thread_count = 0;
 static int			inited = 0;
 static int			total_threads;
 
 LIST_HEAD(thread_hash_head, pthread);
 #define HASH_QUEUES	128
 static struct thread_hash_head	thr_hashtable[HASH_QUEUES];
 #define	THREAD_HASH(thrd)	(((unsigned long)thrd >> 8) % HASH_QUEUES)
 
 static void thr_destroy(struct pthread *curthread, struct pthread *thread);
 
 void
 _thr_list_init(void)
 {
 	int i;
 
 	_gc_count = 0;
 	total_threads = 1;
 	_thr_urwlock_init(&_thr_list_lock);
 	TAILQ_INIT(&_thread_list);
 	TAILQ_INIT(&free_threadq);
 	_thr_umutex_init(&free_thread_lock);
 	_thr_umutex_init(&tcb_lock);
 	if (inited) {
 		for (i = 0; i < HASH_QUEUES; ++i)
 			LIST_INIT(&thr_hashtable[i]);
 	}
 	inited = 1;
 }
 
 void
 _thr_gc(struct pthread *curthread)
 {
 	struct pthread *td, *td_next;
 	TAILQ_HEAD(, pthread) worklist;
 
 	TAILQ_INIT(&worklist);
 	THREAD_LIST_WRLOCK(curthread);
 
 	/* Check the threads waiting for GC. */
 	TAILQ_FOREACH_SAFE(td, &_thread_gc_list, gcle, td_next) {
 		if (td->tid != TID_TERMINATED) {
 			/* make sure we are not still in userland */
 			continue;
 		}
 		_thr_stack_free(&td->attr);
 		THR_GCLIST_REMOVE(td);
 		TAILQ_INSERT_HEAD(&worklist, td, gcle);
 	}
 	THREAD_LIST_UNLOCK(curthread);
 
 	while ((td = TAILQ_FIRST(&worklist)) != NULL) {
 		TAILQ_REMOVE(&worklist, td, gcle);
 		/*
 		 * XXX we don't free initial thread, because there might
 		 * have some code referencing initial thread.
 		 */
 		if (td == _thr_initial) {
 			DBG_MSG("Initial thread won't be freed\n");
 			continue;
 		}
 
 		_thr_free(curthread, td);
 	}
 }
 
 struct pthread *
 _thr_alloc(struct pthread *curthread)
 {
 	struct pthread	*thread = NULL;
 	struct tcb	*tcb;
 
 	if (curthread != NULL) {
 		if (GC_NEEDED())
 			_thr_gc(curthread);
 		if (free_thread_count > 0) {
 			THR_LOCK_ACQUIRE(curthread, &free_thread_lock);
 			if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
 				TAILQ_REMOVE(&free_threadq, thread, tle);
 				free_thread_count--;
 			}
 			THR_LOCK_RELEASE(curthread, &free_thread_lock);
 		}
 	}
 	if (thread == NULL) {
 		if (total_threads > MAX_THREADS)
 			return (NULL);
 		atomic_fetchadd_int(&total_threads, 1);
 		thread = malloc(sizeof(struct pthread));
 		if (thread == NULL) {
 			atomic_fetchadd_int(&total_threads, -1);
 			return (NULL);
 		}
 	}
 	if (curthread != NULL) {
 		THR_LOCK_ACQUIRE(curthread, &tcb_lock);
 		tcb = _tcb_ctor(thread, 0 /* not initial tls */);
 		THR_LOCK_RELEASE(curthread, &tcb_lock);
 	} else {
 		tcb = _tcb_ctor(thread, 1 /* initial tls */);
 	}
 	if (tcb != NULL) {
 		memset(thread, 0, sizeof(*thread));
 		thread->tcb = tcb;
+		thread->sleepqueue = _sleepq_alloc();
+		thread->wake_addr = _thr_alloc_wake_addr();
 	} else {
 		thr_destroy(curthread, thread);
 		atomic_fetchadd_int(&total_threads, -1);
 		thread = NULL;
 	}
 	return (thread);
 }
 
 void
 _thr_free(struct pthread *curthread, struct pthread *thread)
 {
 	DBG_MSG("Freeing thread %p\n", thread);
 
 	/*
 	 * Always free tcb, as we only know it is part of RTLD TLS
 	 * block, but don't know its detail and can not assume how
 	 * it works, so better to avoid caching it here.
 	 */
 	if (curthread != NULL) {
 		THR_LOCK_ACQUIRE(curthread, &tcb_lock);
 		_tcb_dtor(thread->tcb);
 		THR_LOCK_RELEASE(curthread, &tcb_lock);
 	} else {
 		_tcb_dtor(thread->tcb);
 	}
 	thread->tcb = NULL;
 	if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
+		_sleepq_free(thread->sleepqueue);
+		_thr_release_wake_addr(thread->wake_addr);
 		thr_destroy(curthread, thread);
 		atomic_fetchadd_int(&total_threads, -1);
 	} else {
 		/*
 		 * Add the thread to the free thread list, this also avoids
 		 * pthread id is reused too quickly, may help some buggy apps.
 		 */
 		THR_LOCK_ACQUIRE(curthread, &free_thread_lock);
 		TAILQ_INSERT_TAIL(&free_threadq, thread, tle);
 		free_thread_count++;
 		THR_LOCK_RELEASE(curthread, &free_thread_lock);
 	}
 }
 
 static void
 thr_destroy(struct pthread *curthread __unused, struct pthread *thread)
 {
 	free(thread);
 }
 
 /*
  * Add the thread to the list of all threads and increment
  * number of active threads.
  */
 void
 _thr_link(struct pthread *curthread, struct pthread *thread)
 {
 	THREAD_LIST_WRLOCK(curthread);
 	THR_LIST_ADD(thread);
 	THREAD_LIST_UNLOCK(curthread);
 	atomic_add_int(&_thread_active_threads, 1);
 }
 
 /*
  * Remove an active thread.
  */
 void
 _thr_unlink(struct pthread *curthread, struct pthread *thread)
 {
 	THREAD_LIST_WRLOCK(curthread);
 	THR_LIST_REMOVE(thread);
 	THREAD_LIST_UNLOCK(curthread);
 	atomic_add_int(&_thread_active_threads, -1);
 }
 
 void
 _thr_hash_add(struct pthread *thread)
 {
 	struct thread_hash_head *head;
 
 	head = &thr_hashtable[THREAD_HASH(thread)];
 	LIST_INSERT_HEAD(head, thread, hle);
 }
 
 void
 _thr_hash_remove(struct pthread *thread)
 {
 	LIST_REMOVE(thread, hle);
 }
 
 struct pthread *
 _thr_hash_find(struct pthread *thread)
 {
 	struct pthread *td;
 	struct thread_hash_head *head;
 
 	head = &thr_hashtable[THREAD_HASH(thread)];
 	LIST_FOREACH(td, head, hle) {
 		if (td == thread)
 			return (thread);
 	}
 	return (NULL);
 }
 
 /*
  * Find a thread in the linked list of active threads and add a reference
  * to it.  Threads with positive reference counts will not be deallocated
  * until all references are released.
  */
 int
 _thr_ref_add(struct pthread *curthread, struct pthread *thread,
     int include_dead)
 {
 	int ret;
 
 	if (thread == NULL)
 		/* Invalid thread: */
 		return (EINVAL);
 
 	if ((ret = _thr_find_thread(curthread, thread, include_dead)) == 0) {
 		thread->refcount++;
 		THR_CRITICAL_ENTER(curthread);
 		THR_THREAD_UNLOCK(curthread, thread);
 	}
 
 	/* Return zero if the thread exists: */
 	return (ret);
 }
 
 void
 _thr_ref_delete(struct pthread *curthread, struct pthread *thread)
 {
 	THR_THREAD_LOCK(curthread, thread);
 	thread->refcount--;
 	_thr_try_gc(curthread, thread);
 	THR_CRITICAL_LEAVE(curthread);
 }
 
 /* entered with thread lock held, exit with thread lock released */
 void
 _thr_try_gc(struct pthread *curthread, struct pthread *thread)
 {
 	if (THR_SHOULD_GC(thread)) {
 		THR_REF_ADD(curthread, thread);
 		THR_THREAD_UNLOCK(curthread, thread);
 		THREAD_LIST_WRLOCK(curthread);
 		THR_THREAD_LOCK(curthread, thread);
 		THR_REF_DEL(curthread, thread);
 		if (THR_SHOULD_GC(thread)) {
 			THR_LIST_REMOVE(thread);
 			THR_GCLIST_ADD(thread);
 		}
 		THR_THREAD_UNLOCK(curthread, thread);
 		THREAD_LIST_UNLOCK(curthread);
 	} else {
 		THR_THREAD_UNLOCK(curthread, thread);
 	}
 }
 
 /* return with thread lock held if thread is found */
 int
 _thr_find_thread(struct pthread *curthread, struct pthread *thread,
     int include_dead)
 {
 	struct pthread *pthread;
 	int ret;
 
 	if (thread == NULL)
 		return (EINVAL);
 
 	ret = 0;
 	THREAD_LIST_RDLOCK(curthread);
 	pthread = _thr_hash_find(thread);
 	if (pthread) {
 		THR_THREAD_LOCK(curthread, pthread);
 		if (include_dead == 0 && pthread->state == PS_DEAD) {
 			THR_THREAD_UNLOCK(curthread, pthread);
 			ret = ESRCH;
 		}
 	} else {
 		ret = ESRCH;
 	}
 	THREAD_LIST_UNLOCK(curthread);
 	return (ret);
 }
diff --git a/lib/libthr/thread/thr_mutex.c b/lib/libthr/thread/thr_mutex.c
index 29f91ec6fbfe..bd1fc2b550e3 100644
--- a/lib/libthr/thread/thr_mutex.c
+++ b/lib/libthr/thread/thr_mutex.c
@@ -1,759 +1,797 @@
 /*
  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by John Birrell.
  * 4. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "namespace.h"
 #include <stdlib.h>
 #include <errno.h>
 #include <string.h>
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <pthread.h>
 #include <pthread_np.h>
 #include "un-namespace.h"
 
 #include "thr_private.h"
 
 #if defined(_PTHREADS_INVARIANTS)
 #define MUTEX_INIT_LINK(m) 		do {		\
 	(m)->m_qe.tqe_prev = NULL;			\
 	(m)->m_qe.tqe_next = NULL;			\
 } while (0)
 #define MUTEX_ASSERT_IS_OWNED(m)	do {		\
 	if (__predict_false((m)->m_qe.tqe_prev == NULL))\
 		PANIC("mutex is not on list");		\
 } while (0)
 #define MUTEX_ASSERT_NOT_OWNED(m)	do {		\
 	if (__predict_false((m)->m_qe.tqe_prev != NULL ||	\
 	    (m)->m_qe.tqe_next != NULL))	\
 		PANIC("mutex is on list");		\
 } while (0)
 #else
 #define MUTEX_INIT_LINK(m)
 #define MUTEX_ASSERT_IS_OWNED(m)
 #define MUTEX_ASSERT_NOT_OWNED(m)
 #endif
 
 /*
  * For adaptive mutexes, how many times to spin doing trylock2
  * before entering the kernel to block
  */
 #define MUTEX_ADAPTIVE_SPINS	2000
 
 /*
  * Prototypes
  */
 int	__pthread_mutex_init(pthread_mutex_t *mutex,
 		const pthread_mutexattr_t *mutex_attr);
 int	__pthread_mutex_trylock(pthread_mutex_t *mutex);
 int	__pthread_mutex_lock(pthread_mutex_t *mutex);
 int	__pthread_mutex_timedlock(pthread_mutex_t *mutex,
 		const struct timespec *abstime);
 int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
     		void *(calloc_cb)(size_t, size_t));
 int	_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
 int	_pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
 int	__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
 int	_pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
 int	_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
 int	__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
 
 static int	mutex_self_trylock(pthread_mutex_t);
 static int	mutex_self_lock(pthread_mutex_t,
 				const struct timespec *abstime);
-static int	mutex_unlock_common(pthread_mutex_t *);
+static int	mutex_unlock_common(struct pthread_mutex *, int);
 static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
 				const struct timespec *);
 
 __weak_reference(__pthread_mutex_init, pthread_mutex_init);
 __strong_reference(__pthread_mutex_init, _pthread_mutex_init);
 __weak_reference(__pthread_mutex_lock, pthread_mutex_lock);
 __strong_reference(__pthread_mutex_lock, _pthread_mutex_lock);
 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
 __weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock);
 __strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock);
 
 /* Single underscore versions provided for libc internal usage: */
 /* No difference between libc and application usage of these: */
 __weak_reference(_pthread_mutex_destroy, pthread_mutex_destroy);
 __weak_reference(_pthread_mutex_unlock, pthread_mutex_unlock);
 
 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
 
 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
 
 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
 
 static int
 mutex_init(pthread_mutex_t *mutex,
     const struct pthread_mutex_attr *mutex_attr,
     void *(calloc_cb)(size_t, size_t))
 {
 	const struct pthread_mutex_attr *attr;
 	struct pthread_mutex *pmutex;
 
 	if (mutex_attr == NULL) {
 		attr = &_pthread_mutexattr_default;
 	} else {
 		attr = mutex_attr;
 		if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
 		    attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
 			return (EINVAL);
 		if (attr->m_protocol < PTHREAD_PRIO_NONE ||
 		    attr->m_protocol > PTHREAD_PRIO_PROTECT)
 			return (EINVAL);
 	}
 	if ((pmutex = (pthread_mutex_t)
 		calloc_cb(1, sizeof(struct pthread_mutex))) == NULL)
 		return (ENOMEM);
 
-	pmutex->m_type = attr->m_type;
+	pmutex->m_flags = attr->m_type;
 	pmutex->m_owner = NULL;
 	pmutex->m_count = 0;
-	pmutex->m_refcount = 0;
 	pmutex->m_spinloops = 0;
 	pmutex->m_yieldloops = 0;
 	MUTEX_INIT_LINK(pmutex);
 	switch(attr->m_protocol) {
 	case PTHREAD_PRIO_NONE:
 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
 		pmutex->m_lock.m_flags = 0;
 		break;
 	case PTHREAD_PRIO_INHERIT:
 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
 		pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
 		break;
 	case PTHREAD_PRIO_PROTECT:
 		pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
 		pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
 		pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
 		break;
 	}
 
-	if (pmutex->m_type == PTHREAD_MUTEX_ADAPTIVE_NP) {
+	if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
 		pmutex->m_spinloops =
 		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
 		pmutex->m_yieldloops = _thr_yieldloops;
 	}
 
 	*mutex = pmutex;
 	return (0);
 }
 
 static int
 init_static(struct pthread *thread, pthread_mutex_t *mutex)
 {
 	int ret;
 
 	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
 
 	if (*mutex == THR_MUTEX_INITIALIZER)
 		ret = mutex_init(mutex, &_pthread_mutexattr_default, calloc);
 	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
 		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default, calloc);
 	else
 		ret = 0;
 	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
 
 	return (ret);
 }
 
 static void
 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
 {
 	struct pthread_mutex *m2;
 
 	m2 = TAILQ_LAST(&curthread->pp_mutexq, mutex_queue);
 	if (m2 != NULL)
 		m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
 	else
 		m->m_lock.m_ceilings[1] = -1;
 }
 
 int
 __pthread_mutex_init(pthread_mutex_t *mutex,
     const pthread_mutexattr_t *mutex_attr)
 {
 	return mutex_init(mutex, mutex_attr ? *mutex_attr : NULL, calloc);
 }
 
 /* This function is used internally by malloc. */
 int
 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
     void *(calloc_cb)(size_t, size_t))
 {
 	static const struct pthread_mutex_attr attr = {
 		.m_type = PTHREAD_MUTEX_NORMAL,
 		.m_protocol = PTHREAD_PRIO_NONE,
 		.m_ceiling = 0
 	};
 	int ret;
 
 	ret = mutex_init(mutex, &attr, calloc_cb);
 	if (ret == 0)
-		(*mutex)->m_private = 1;
+		(*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
 	return (ret);
 }
 
 void
 _mutex_fork(struct pthread *curthread)
 {
 	struct pthread_mutex *m;
 
 	/*
 	 * Fix mutex ownership for child process.
 	 * note that process shared mutex should not
 	 * be inherited because owner is forking thread
 	 * which is in parent process, they should be
 	 * removed from the owned mutex list, current,
 	 * process shared mutex is not supported, so I
 	 * am not worried.
 	 */
 
 	TAILQ_FOREACH(m, &curthread->mutexq, m_qe)
 		m->m_lock.m_owner = TID(curthread);
 	TAILQ_FOREACH(m, &curthread->pp_mutexq, m_qe)
 		m->m_lock.m_owner = TID(curthread) | UMUTEX_CONTESTED;
 }
 
 int
 _pthread_mutex_destroy(pthread_mutex_t *mutex)
 {
 	pthread_mutex_t m;
 	int ret;
 
 	m = *mutex;
 	if (m < THR_MUTEX_DESTROYED) {
 		ret = 0;
 	} else if (m == THR_MUTEX_DESTROYED) {
 		ret = EINVAL;
 	} else {
-		if (m->m_owner != NULL || m->m_refcount != 0) {
+		if (m->m_owner != NULL) {
 			ret = EBUSY;
 		} else {
 			*mutex = THR_MUTEX_DESTROYED;
 			MUTEX_ASSERT_NOT_OWNED(m);
 			free(m);
 			ret = 0;
 		}
 	}
 
 	return (ret);
 }
 
 #define ENQUEUE_MUTEX(curthread, m)  					\
 	do {								\
 		(m)->m_owner = curthread;				\
 		/* Add to the list of owned mutexes: */			\
 		MUTEX_ASSERT_NOT_OWNED((m));				\
 		if (((m)->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)	\
 			TAILQ_INSERT_TAIL(&curthread->mutexq, (m), m_qe);\
 		else							\
 			TAILQ_INSERT_TAIL(&curthread->pp_mutexq, (m), m_qe);\
 	} while (0)
 
+#define DEQUEUE_MUTEX(curthread, m)					\
+		(m)->m_owner = NULL;					\
+		MUTEX_ASSERT_IS_OWNED(m);				\
+		if (__predict_true(((m)->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)) \
+			TAILQ_REMOVE(&curthread->mutexq, (m), m_qe);		\
+		else {							\
+			TAILQ_REMOVE(&curthread->pp_mutexq, (m), m_qe);	\
+			set_inherited_priority(curthread, m);		\
+		}							\
+		MUTEX_INIT_LINK(m);
+
 #define CHECK_AND_INIT_MUTEX						\
 	if (__predict_false((m = *mutex) <= THR_MUTEX_DESTROYED)) {	\
 		if (m == THR_MUTEX_DESTROYED)				\
 			return (EINVAL);				\
 		int ret;						\
 		ret = init_static(_get_curthread(), mutex);		\
 		if (ret)						\
 			return (ret);					\
 		m = *mutex;						\
 	}
 
 static int
 mutex_trylock_common(pthread_mutex_t *mutex)
 {
 	struct pthread *curthread = _get_curthread();
 	struct pthread_mutex *m = *mutex;
 	uint32_t id;
 	int ret;
 
 	id = TID(curthread);
-	if (m->m_private)
+	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
 		THR_CRITICAL_ENTER(curthread);
 	ret = _thr_umutex_trylock(&m->m_lock, id);
 	if (__predict_true(ret == 0)) {
 		ENQUEUE_MUTEX(curthread, m);
 	} else if (m->m_owner == curthread) {
 		ret = mutex_self_trylock(m);
 	} /* else {} */
-	if (ret && m->m_private)
+	if (ret && (m->m_flags & PMUTEX_FLAG_PRIVATE))
 		THR_CRITICAL_LEAVE(curthread);
 	return (ret);
 }
 
 int
 __pthread_mutex_trylock(pthread_mutex_t *mutex)
 {
 	struct pthread_mutex *m;
 
 	CHECK_AND_INIT_MUTEX
 
 	return (mutex_trylock_common(mutex));
 }
 
 static int
 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
 	const struct timespec *abstime)
 {
 	uint32_t	id, owner;
 	int	count;
 	int	ret;
 
 	if (m->m_owner == curthread)
 		return mutex_self_lock(m, abstime);
 
 	id = TID(curthread);
 	/*
 	 * For adaptive mutexes, spin for a bit in the expectation
 	 * that if the application requests this mutex type then
 	 * the lock is likely to be released quickly and it is
 	 * faster than entering the kernel
 	 */
 	if (__predict_false(
 		(m->m_lock.m_flags & 
 		 (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) != 0))
 			goto sleep_in_kernel;
 
 	if (!_thr_is_smp)
 		goto yield_loop;
 
 	count = m->m_spinloops;
 	while (count--) {
 		owner = m->m_lock.m_owner;
 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) {
 				ret = 0;
 				goto done;
 			}
 		}
 		CPU_SPINWAIT;
 	}
 
 yield_loop:
 	count = m->m_yieldloops;
 	while (count--) {
 		_sched_yield();
 		owner = m->m_lock.m_owner;
 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) {
 				ret = 0;
 				goto done;
 			}
 		}
 	}
 
 sleep_in_kernel:
 	if (abstime == NULL) {
 		ret = __thr_umutex_lock(&m->m_lock, id);
 	} else if (__predict_false(
 		   abstime->tv_nsec < 0 ||
 		   abstime->tv_nsec >= 1000000000)) {
 		ret = EINVAL;
 	} else {
 		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
 	}
 done:
 	if (ret == 0)
 		ENQUEUE_MUTEX(curthread, m);
 
 	return (ret);
 }
 
 static inline int
 mutex_lock_common(struct pthread_mutex *m,
-	const struct timespec *abstime)
+	const struct timespec *abstime, int cvattach)
 {
 	struct pthread *curthread  = _get_curthread();
 	int ret;
 
-	if (m->m_private)
+	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
 		THR_CRITICAL_ENTER(curthread);
 	if (_thr_umutex_trylock2(&m->m_lock, TID(curthread)) == 0) {
 		ENQUEUE_MUTEX(curthread, m);
 		ret = 0;
 	} else {
 		ret = mutex_lock_sleep(curthread, m, abstime);
 	}
-	if (ret && m->m_private)
+	if (ret && (m->m_flags & PMUTEX_FLAG_PRIVATE) && !cvattach)
 		THR_CRITICAL_LEAVE(curthread);
 	return (ret);
 }
 
 int
 __pthread_mutex_lock(pthread_mutex_t *mutex)
 {
 	struct pthread_mutex	*m;
 
 	_thr_check_init();
 
 	CHECK_AND_INIT_MUTEX
 
-	return (mutex_lock_common(m, NULL));
+	return (mutex_lock_common(m, NULL, 0));
 }
 
 int
 __pthread_mutex_timedlock(pthread_mutex_t *mutex, const struct timespec *abstime)
 {
 	struct pthread_mutex	*m;
 
 	_thr_check_init();
 
 	CHECK_AND_INIT_MUTEX
 
-	return (mutex_lock_common(m, abstime));
+	return (mutex_lock_common(m, abstime, 0));
 }
 
 int
-_pthread_mutex_unlock(pthread_mutex_t *m)
+_pthread_mutex_unlock(pthread_mutex_t *mutex)
 {
-	return (mutex_unlock_common(m));
+	struct pthread_mutex *mp;
+
+	mp = *mutex;
+	return (mutex_unlock_common(mp, 0));
 }
 
 int
-_mutex_cv_lock(pthread_mutex_t *mutex, int count)
+_mutex_cv_lock(struct pthread_mutex *m, int count)
 {
-	struct pthread_mutex	*m;
-	int	ret;
+	int	error;
 
-	m = *mutex;
-	ret = mutex_lock_common(m, NULL);
-	if (ret == 0) {
-		m->m_refcount--;
-		m->m_count += count;
+	error = mutex_lock_common(m, NULL, 1);
+	if (error == 0)
+		m->m_count = count;
+	return (error);
+}
+
+int
+_mutex_cv_unlock(struct pthread_mutex *m, int *count)
+{
+
+	/*
+	 * Clear the count in case this is a recursive mutex.
+	 */
+	*count = m->m_count;
+	m->m_count = 0;
+	(void)mutex_unlock_common(m, 1);
+        return (0);
+}
+
+int
+_mutex_cv_attach(struct pthread_mutex *m, int count)
+{
+	struct pthread *curthread = _get_curthread();
+	int     error;
+
+	ENQUEUE_MUTEX(curthread, m);
+	m->m_count = count;
+        return (error);
+}
+
+int
+_mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
+{
+	struct pthread *curthread = _get_curthread();
+	int     defered;
+	int     error;
+
+	if ((error = _mutex_owned(curthread, mp)) != 0)
+                return (error);
+
+	/*
+	 * Clear the count in case this is a recursive mutex.
+	 */
+	*recurse = mp->m_count;
+	mp->m_count = 0;
+	DEQUEUE_MUTEX(curthread, mp);
+
+	/* Will this happen in real-world ? */
+        if ((mp->m_flags & PMUTEX_FLAG_DEFERED) != 0) {
+		defered = 1;
+		mp->m_flags &= ~PMUTEX_FLAG_DEFERED;
+	} else
+		defered = 0;
+
+	if (defered)  {
+		_thr_wake_all(curthread->defer_waiters,
+				curthread->nwaiter_defer);
+		curthread->nwaiter_defer = 0;
 	}
-	return (ret);
+	return (0);
 }
 
 static int
 mutex_self_trylock(struct pthread_mutex *m)
 {
 	int	ret;
 
-	switch (m->m_type) {
+	switch (PMUTEX_TYPE(m->m_flags)) {
 	case PTHREAD_MUTEX_ERRORCHECK:
 	case PTHREAD_MUTEX_NORMAL:
 		ret = EBUSY; 
 		break;
 
 	case PTHREAD_MUTEX_RECURSIVE:
 		/* Increment the lock count: */
 		if (m->m_count + 1 > 0) {
 			m->m_count++;
 			ret = 0;
 		} else
 			ret = EAGAIN;
 		break;
 
 	default:
 		/* Trap invalid mutex types; */
 		ret = EINVAL;
 	}
 
 	return (ret);
 }
 
 static int
 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
 {
 	struct timespec	ts1, ts2;
 	int	ret;
 
-	switch (m->m_type) {
+	switch (PMUTEX_TYPE(m->m_flags)) {
 	case PTHREAD_MUTEX_ERRORCHECK:
 	case PTHREAD_MUTEX_ADAPTIVE_NP:
 		if (abstime) {
 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
 			    abstime->tv_nsec >= 1000000000) {
 				ret = EINVAL;
 			} else {
 				clock_gettime(CLOCK_REALTIME, &ts1);
 				TIMESPEC_SUB(&ts2, abstime, &ts1);
 				__sys_nanosleep(&ts2, NULL);
 				ret = ETIMEDOUT;
 			}
 		} else {
 			/*
 			 * POSIX specifies that mutexes should return
 			 * EDEADLK if a recursive lock is detected.
 			 */
 			ret = EDEADLK; 
 		}
 		break;
 
 	case PTHREAD_MUTEX_NORMAL:
 		/*
 		 * What SS2 define as a 'normal' mutex.  Intentionally
 		 * deadlock on attempts to get a lock you already own.
 		 */
 		ret = 0;
 		if (abstime) {
 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
 			    abstime->tv_nsec >= 1000000000) {
 				ret = EINVAL;
 			} else {
 				clock_gettime(CLOCK_REALTIME, &ts1);
 				TIMESPEC_SUB(&ts2, abstime, &ts1);
 				__sys_nanosleep(&ts2, NULL);
 				ret = ETIMEDOUT;
 			}
 		} else {
 			ts1.tv_sec = 30;
 			ts1.tv_nsec = 0;
 			for (;;)
 				__sys_nanosleep(&ts1, NULL);
 		}
 		break;
 
 	case PTHREAD_MUTEX_RECURSIVE:
 		/* Increment the lock count: */
 		if (m->m_count + 1 > 0) {
 			m->m_count++;
 			ret = 0;
 		} else
 			ret = EAGAIN;
 		break;
 
 	default:
 		/* Trap invalid mutex types; */
 		ret = EINVAL;
 	}
 
 	return (ret);
 }
 
 static int
-mutex_unlock_common(pthread_mutex_t *mutex)
+mutex_unlock_common(struct pthread_mutex *m, int cv)
 {
 	struct pthread *curthread = _get_curthread();
-	struct pthread_mutex *m;
 	uint32_t id;
+	int defered;
 
-	m = *mutex;
 	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
 		if (m == THR_MUTEX_DESTROYED)
 			return (EINVAL);
 		return (EPERM);
 	}
 
 	/*
 	 * Check if the running thread is not the owner of the mutex.
 	 */
 	if (__predict_false(m->m_owner != curthread))
 		return (EPERM);
 
 	id = TID(curthread);
 	if (__predict_false(
-		m->m_type == PTHREAD_MUTEX_RECURSIVE &&
+		PMUTEX_TYPE(m->m_flags) == PTHREAD_MUTEX_RECURSIVE &&
 		m->m_count > 0)) {
 		m->m_count--;
 	} else {
-		m->m_owner = NULL;
-		/* Remove the mutex from the threads queue. */
-		MUTEX_ASSERT_IS_OWNED(m);
-		if (__predict_true((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0))
-			TAILQ_REMOVE(&curthread->mutexq, m, m_qe);
-		else {
-			TAILQ_REMOVE(&curthread->pp_mutexq, m, m_qe);
-			set_inherited_priority(curthread, m);
-		}
-		MUTEX_INIT_LINK(m);
-		_thr_umutex_unlock(&m->m_lock, id);
-	}
-	if (m->m_private)
-		THR_CRITICAL_LEAVE(curthread);
-	return (0);
-}
-
-int
-_mutex_cv_unlock(pthread_mutex_t *mutex, int *count)
-{
-	struct pthread *curthread = _get_curthread();
-	struct pthread_mutex *m;
-
-	m = *mutex;
-	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
-		if (m == THR_MUTEX_DESTROYED)
-			return (EINVAL);
-		return (EPERM);
-	}
+		if (curthread->will_sleep == 0 && (m->m_flags & PMUTEX_FLAG_DEFERED) != 0) {
+			defered = 1;
+			m->m_flags &= ~PMUTEX_FLAG_DEFERED;
+        	} else
+                	defered = 0;
 
-	/*
-	 * Check if the running thread is not the owner of the mutex.
-	 */
-	if (__predict_false(m->m_owner != curthread))
-		return (EPERM);
+		DEQUEUE_MUTEX(curthread, m);
+		_thr_umutex_unlock(&m->m_lock, id);
 
-	/*
-	 * Clear the count in case this is a recursive mutex.
-	 */
-	*count = m->m_count;
-	m->m_refcount++;
-	m->m_count = 0;
-	m->m_owner = NULL;
-	/* Remove the mutex from the threads queue. */
-	MUTEX_ASSERT_IS_OWNED(m);
-	if (__predict_true((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0))
-		TAILQ_REMOVE(&curthread->mutexq, m, m_qe);
-	else {
-		TAILQ_REMOVE(&curthread->pp_mutexq, m, m_qe);
-		set_inherited_priority(curthread, m);
+		if (defered)  {
+			_thr_wake_all(curthread->defer_waiters,
+				curthread->nwaiter_defer);
+			curthread->nwaiter_defer = 0;
+		}
 	}
-	MUTEX_INIT_LINK(m);
-	_thr_umutex_unlock(&m->m_lock, TID(curthread));
-
-	if (m->m_private)
+	if (!cv && m->m_flags & PMUTEX_FLAG_PRIVATE)
 		THR_CRITICAL_LEAVE(curthread);
 	return (0);
 }
 
 int
 _pthread_mutex_getprioceiling(pthread_mutex_t *mutex,
 			      int *prioceiling)
 {
 	struct pthread_mutex *m;
 	int ret;
 
 	m = *mutex;
 	if ((m <= THR_MUTEX_DESTROYED) ||
 	    (m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
 		ret = EINVAL;
 	else {
 		*prioceiling = m->m_lock.m_ceilings[0];
 		ret = 0;
 	}
 
 	return (ret);
 }
 
 int
 _pthread_mutex_setprioceiling(pthread_mutex_t *mutex,
 			      int ceiling, int *old_ceiling)
 {
 	struct pthread *curthread = _get_curthread();
 	struct pthread_mutex *m, *m1, *m2;
 	int ret;
 
 	m = *mutex;
 	if ((m <= THR_MUTEX_DESTROYED) ||
 	    (m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
 		return (EINVAL);
 
 	ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
 	if (ret != 0)
 		return (ret);
 
 	if (m->m_owner == curthread) {
 		MUTEX_ASSERT_IS_OWNED(m);
 		m1 = TAILQ_PREV(m, mutex_queue, m_qe);
 		m2 = TAILQ_NEXT(m, m_qe);
 		if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
 		    (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
 			TAILQ_REMOVE(&curthread->pp_mutexq, m, m_qe);
 			TAILQ_FOREACH(m2, &curthread->pp_mutexq, m_qe) {
 				if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
 					TAILQ_INSERT_BEFORE(m2, m, m_qe);
 					return (0);
 				}
 			}
 			TAILQ_INSERT_TAIL(&curthread->pp_mutexq, m, m_qe);
 		}
 	}
 	return (0);
 }
 
 int
 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
 {
 	struct pthread_mutex	*m;
 
 	CHECK_AND_INIT_MUTEX
 
 	*count = m->m_spinloops;
 	return (0);
 }
 
 int
 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
 {
 	struct pthread_mutex	*m;
 
 	CHECK_AND_INIT_MUTEX
 
 	m->m_spinloops = count;
 	return (0);
 }
 
 int
 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
 {
 	struct pthread_mutex	*m;
 
 	CHECK_AND_INIT_MUTEX
 
 	*count = m->m_yieldloops;
 	return (0);
 }
 
 int
 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
 {
 	struct pthread_mutex	*m;
 
 	CHECK_AND_INIT_MUTEX
 
 	m->m_yieldloops = count;
 	return (0);
 }
 
 int
 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
 {
 	struct pthread_mutex	*m;
 
 	m = *mutex;
 	if (m <= THR_MUTEX_DESTROYED)
 		return (0);
 	return (m->m_owner == _get_curthread());
 }
+
+int
+_mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
+{
+	if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
+		if (mp == THR_MUTEX_DESTROYED)
+			return (EINVAL);
+		return (EPERM);
+	}
+      	if (mp->m_owner != curthread)
+		return (EPERM);
+	return (0);                  
+}
diff --git a/lib/libthr/thread/thr_private.h b/lib/libthr/thread/thr_private.h
index 7180d123cae9..9df97aa43118 100644
--- a/lib/libthr/thread/thr_private.h
+++ b/lib/libthr/thread/thr_private.h
@@ -1,807 +1,904 @@
 /*
  * Copyright (C) 2005 Daniel M. Eischen <deischen@freebsd.org>
  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
  * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>.
  *
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _THR_PRIVATE_H
 #define _THR_PRIVATE_H
 
 /*
  * Include files.
  */
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/cdefs.h>
 #include <sys/queue.h>
 #include <sys/param.h>
 #include <sys/cpuset.h>
 #include <machine/atomic.h>
 #include <errno.h>
 #include <limits.h>
 #include <signal.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <ucontext.h>
 #include <sys/thr.h>
 #include <pthread.h>
 
 #define	SYM_FB10(sym)			__CONCAT(sym, _fb10)
 #define	SYM_FBP10(sym)			__CONCAT(sym, _fbp10)
 #define	WEAK_REF(sym, alias)		__weak_reference(sym, alias)
 #define	SYM_COMPAT(sym, impl, ver)	__sym_compat(sym, impl, ver)
 #define	SYM_DEFAULT(sym, impl, ver)	__sym_default(sym, impl, ver)
 
 #define	FB10_COMPAT(func, sym)				\
 	WEAK_REF(func, SYM_FB10(sym));			\
 	SYM_COMPAT(sym, SYM_FB10(sym), FBSD_1.0)
 
 #define	FB10_COMPAT_PRIVATE(func, sym)			\
 	WEAK_REF(func, SYM_FBP10(sym));			\
 	SYM_DEFAULT(sym, SYM_FBP10(sym), FBSDprivate_1.0)
 
 #include "pthread_md.h"
 #include "thr_umtx.h"
 #include "thread_db.h"
 
 #ifdef _PTHREAD_FORCED_UNWIND
 #define _BSD_SOURCE
 #include <unwind.h>
 #endif
 
 typedef TAILQ_HEAD(pthreadlist, pthread) pthreadlist;
 typedef TAILQ_HEAD(atfork_head, pthread_atfork) atfork_head;
 TAILQ_HEAD(mutex_queue, pthread_mutex);
 
 /* Signal to do cancellation */
 #define	SIGCANCEL		32
 
 /*
  * Kernel fatal error handler macro.
  */
 #define PANIC(string)		_thread_exit(__FILE__,__LINE__,string)
 
 /* Output debug messages like this: */
 #define stdout_debug(args...)	_thread_printf(STDOUT_FILENO, ##args)
 #define stderr_debug(args...)	_thread_printf(STDERR_FILENO, ##args)
 
 #ifdef _PTHREADS_INVARIANTS
 #define THR_ASSERT(cond, msg) do {	\
 	if (__predict_false(!(cond)))	\
 		PANIC(msg);		\
 } while (0)
 #else
 #define THR_ASSERT(cond, msg)
 #endif
 
 #ifdef PIC
 # define STATIC_LIB_REQUIRE(name)
 #else
 # define STATIC_LIB_REQUIRE(name) __asm (".globl " #name)
 #endif
 
 #define	TIMESPEC_ADD(dst, src, val)				\
 	do { 							\
 		(dst)->tv_sec = (src)->tv_sec + (val)->tv_sec;	\
 		(dst)->tv_nsec = (src)->tv_nsec + (val)->tv_nsec; \
 		if ((dst)->tv_nsec >= 1000000000) {		\
 			(dst)->tv_sec++;			\
 			(dst)->tv_nsec -= 1000000000;		\
 		}						\
 	} while (0)
 
 #define	TIMESPEC_SUB(dst, src, val)				\
 	do { 							\
 		(dst)->tv_sec = (src)->tv_sec - (val)->tv_sec;	\
 		(dst)->tv_nsec = (src)->tv_nsec - (val)->tv_nsec; \
 		if ((dst)->tv_nsec < 0) {			\
 			(dst)->tv_sec--;			\
 			(dst)->tv_nsec += 1000000000;		\
 		}						\
 	} while (0)
 
 /* XXX These values should be same as those defined in pthread.h */
 #define	THR_MUTEX_INITIALIZER		((struct pthread_mutex *)NULL)
 #define	THR_ADAPTIVE_MUTEX_INITIALIZER	((struct pthread_mutex *)1)
 #define	THR_MUTEX_DESTROYED		((struct pthread_mutex *)2)
 #define	THR_COND_INITIALIZER		((struct pthread_cond *)NULL)
 #define	THR_COND_DESTROYED		((struct pthread_cond *)1)
 #define	THR_RWLOCK_INITIALIZER		((struct pthread_rwlock *)NULL)
 #define	THR_RWLOCK_DESTROYED		((struct pthread_rwlock *)1)
 
+#define PMUTEX_FLAG_TYPE_MASK	0x0ff
+#define PMUTEX_FLAG_PRIVATE	0x100
+#define PMUTEX_FLAG_DEFERED	0x200
+#define PMUTEX_TYPE(mtxflags)	((mtxflags) & PMUTEX_FLAG_TYPE_MASK)
+
+#define MAX_DEFER_WAITERS       50
+
 struct pthread_mutex {
 	/*
 	 * Lock for accesses to this structure.
 	 */
 	struct umutex			m_lock;
-	enum pthread_mutextype		m_type;
+	int				m_flags;
 	struct pthread			*m_owner;
 	int				m_count;
-	int				m_refcount;
 	int				m_spinloops;
 	int				m_yieldloops;
-	int				m_private;
 	/*
 	 * Link for all mutexes a thread currently owns.
 	 */
 	TAILQ_ENTRY(pthread_mutex)	m_qe;
 };
 
 struct pthread_mutex_attr {
 	enum pthread_mutextype	m_type;
 	int			m_protocol;
 	int			m_ceiling;
 };
 
 #define PTHREAD_MUTEXATTR_STATIC_INITIALIZER \
 	{ PTHREAD_MUTEX_DEFAULT, PTHREAD_PRIO_NONE, 0, MUTEX_FLAGS_PRIVATE }
 
 struct pthread_cond {
-	struct umutex	c_lock;
-	struct ucond	c_kerncv;
-	int		c_pshared;
-	int		c_clockid;
+	__uint32_t	__has_user_waiters;
+	__uint32_t	__has_kern_waiters;
+	__uint32_t	__flags;
+	__uint32_t	__clock_id;
 };
 
 struct pthread_cond_attr {
 	int		c_pshared;
 	int		c_clockid;
 };
 
 struct pthread_barrier {
 	struct umutex		b_lock;
 	struct ucond		b_cv;
 	volatile int64_t	b_cycle;
 	volatile int		b_count;
 	volatile int		b_waiters;
 };
 
 struct pthread_barrierattr {
 	int		pshared;
 };
 
 struct pthread_spinlock {
 	struct umutex	s_lock;
 };
 
 /*
  * Flags for condition variables.
  */
 #define COND_FLAGS_PRIVATE	0x01
 #define COND_FLAGS_INITED	0x02
 #define COND_FLAGS_BUSY		0x04
 
 /*
  * Cleanup definitions.
  */
 struct pthread_cleanup {
 	struct pthread_cleanup	*prev;
 	void			(*routine)(void *);
 	void			*routine_arg;
 	int			onheap;
 };
 
 #define	THR_CLEANUP_PUSH(td, func, arg) {		\
 	struct pthread_cleanup __cup;			\
 							\
 	__cup.routine = func;				\
 	__cup.routine_arg = arg;			\
 	__cup.onheap = 0;				\
 	__cup.prev = (td)->cleanup;			\
 	(td)->cleanup = &__cup;
 
 #define	THR_CLEANUP_POP(td, exec)			\
 	(td)->cleanup = __cup.prev;			\
 	if ((exec) != 0)				\
 		__cup.routine(__cup.routine_arg);	\
 }
 
 struct pthread_atfork {
 	TAILQ_ENTRY(pthread_atfork) qe;
 	void (*prepare)(void);
 	void (*parent)(void);
 	void (*child)(void);
 };
 
 struct pthread_attr {
 #define pthread_attr_start_copy	sched_policy
 	int	sched_policy;
 	int	sched_inherit;
 	int	prio;
 	int	suspend;
 #define	THR_STACK_USER		0x100	/* 0xFF reserved for <pthread.h> */
 	int	flags;
 	void	*stackaddr_attr;
 	size_t	stacksize_attr;
 	size_t	guardsize_attr;
 #define pthread_attr_end_copy	cpuset
 	cpuset_t	*cpuset;
 	size_t	cpusetsize;
 };
 
+struct wake_addr {
+	struct wake_addr *link;
+	unsigned int	value;
+	char		pad[12];
+};
+
+struct sleepqueue {
+	TAILQ_HEAD(, pthread)    sq_blocked;
+	SLIST_HEAD(, sleepqueue) sq_freeq;
+	LIST_ENTRY(sleepqueue)   sq_hash;
+	SLIST_ENTRY(sleepqueue)  sq_flink;
+	void			 *sq_wchan;
+	int			 sq_type;
+};
+
 /*
  * Thread creation state attributes.
  */
 #define THR_CREATE_RUNNING		0
 #define THR_CREATE_SUSPENDED		1
 
 /*
  * Miscellaneous definitions.
  */
 #define THR_STACK_DEFAULT		(sizeof(void *) / 4 * 1024 * 1024)
 
 /*
  * Maximum size of initial thread's stack.  This perhaps deserves to be larger
  * than the stacks of other threads, since many applications are likely to run
  * almost entirely on this stack.
  */
 #define THR_STACK_INITIAL		(THR_STACK_DEFAULT * 2)
 
 /*
  * Define priorities returned by kernel.
  */
 #define THR_MIN_PRIORITY		(_thr_priorities[SCHED_OTHER-1].pri_min)
 #define THR_MAX_PRIORITY		(_thr_priorities[SCHED_OTHER-1].pri_max)
 #define THR_DEF_PRIORITY		(_thr_priorities[SCHED_OTHER-1].pri_default)
 
 #define THR_MIN_RR_PRIORITY		(_thr_priorities[SCHED_RR-1].pri_min)
 #define THR_MAX_RR_PRIORITY		(_thr_priorities[SCHED_RR-1].pri_max)
 #define THR_DEF_RR_PRIORITY		(_thr_priorities[SCHED_RR-1].pri_default)
 
 /* XXX The SCHED_FIFO should have same priority range as SCHED_RR */
 #define THR_MIN_FIFO_PRIORITY		(_thr_priorities[SCHED_FIFO_1].pri_min)
 #define THR_MAX_FIFO_PRIORITY		(_thr_priorities[SCHED_FIFO-1].pri_max)
 #define THR_DEF_FIFO_PRIORITY		(_thr_priorities[SCHED_FIFO-1].pri_default)
 
 struct pthread_prio {
 	int	pri_min;
 	int	pri_max;
 	int	pri_default;
 };
 
 struct pthread_rwlockattr {
 	int		pshared;
 };
 
 struct pthread_rwlock {
 	struct urwlock 	lock;
 	struct pthread	*owner;
 };
 
 /*
  * Thread states.
  */
 enum pthread_state {
 	PS_RUNNING,
 	PS_DEAD
 };
 
 struct pthread_specific_elem {
 	const void	*data;
 	int		seqno;
 };
 
 struct pthread_key {
 	volatile int	allocated;
 	int		seqno;
 	void            (*destructor)(void *);
 };
 
 /*
  * lwpid_t is 32bit but kernel thr API exports tid as long type
  * in very earily date.
  */
 #define TID(thread)	((uint32_t) ((thread)->tid))
 
 /*
  * Thread structure.
  */
 struct pthread {
 	/* Kernel thread id. */
 	long			tid;
 #define	TID_TERMINATED		1
 
 	/*
 	 * Lock for accesses to this thread structure.
 	 */
 	struct umutex		lock;
 
 	/* Internal condition variable cycle number. */
 	uint32_t		cycle;
 
 	/* How many low level locks the thread held. */
 	int			locklevel;
 
 	/*
 	 * Set to non-zero when this thread has entered a critical
 	 * region.  We allow for recursive entries into critical regions.
 	 */
 	int			critical_count;
 
 	/* Signal blocked counter. */
 	int			sigblock;
 
 	/* Queue entry for list of all threads. */
 	TAILQ_ENTRY(pthread)	tle;	/* link for all threads in process */
 
 	/* Queue entry for GC lists. */
 	TAILQ_ENTRY(pthread)	gcle;
 
 	/* Hash queue entry. */
 	LIST_ENTRY(pthread)	hle;
 
+	/* Sleep queue entry */
+	TAILQ_ENTRY(pthread)    wle;
+
 	/* Threads reference count. */
 	int			refcount;
 
 	/*
 	 * Thread start routine, argument, stack pointer and thread
 	 * attributes.
 	 */
 	void			*(*start_routine)(void *);
 	void			*arg;
 	struct pthread_attr	attr;
 
 #define	SHOULD_CANCEL(thr)					\
 	((thr)->cancel_pending && (thr)->cancel_enable &&	\
 	 (thr)->no_cancel == 0)
 
 	/* Cancellation is enabled */
 	int			cancel_enable;
 
 	/* Cancellation request is pending */
 	int			cancel_pending;
 
 	/* Thread is at cancellation point */
 	int			cancel_point;
 
 	/* Cancellation is temporarily disabled */
 	int			no_cancel;
 
 	/* Asynchronouse cancellation is enabled */
 	int			cancel_async;
 
 	/* Cancellation is in progress */
 	int			cancelling;
 
 	/* Thread temporary signal mask. */
 	sigset_t		sigmask;
 
 	/* Thread should unblock SIGCANCEL. */
 	int			unblock_sigcancel;
 
 	/* In sigsuspend state */
 	int			in_sigsuspend;
 
 	/* deferred signal info	*/
 	siginfo_t		deferred_siginfo;
 
 	/* signal mask to restore. */
 	sigset_t		deferred_sigmask;
 
 	/* the sigaction should be used for deferred signal. */
 	struct sigaction	deferred_sigact;
 
 	/* Force new thread to exit. */
 	int			force_exit;
 
 	/* Thread state: */
 	enum pthread_state 	state;
 
 	/*
 	 * Error variable used instead of errno. The function __error()
 	 * returns a pointer to this. 
 	 */
 	int			error;
 
 	/*
 	 * The joiner is the thread that is joining to this thread.  The
 	 * join status keeps track of a join operation to another thread.
 	 */
 	struct pthread		*joiner;
 
 	/* Miscellaneous flags; only set with scheduling lock held. */
 	int			flags;
 #define THR_FLAGS_PRIVATE	0x0001
 #define	THR_FLAGS_NEED_SUSPEND	0x0002	/* thread should be suspended */
 #define	THR_FLAGS_SUSPENDED	0x0004	/* thread is suspended */
 #define	THR_FLAGS_DETACHED	0x0008	/* thread is detached */
 
 	/* Thread list flags; only set with thread list lock held. */
 	int			tlflags;
 #define	TLFLAGS_GC_SAFE		0x0001	/* thread safe for cleaning */
 #define	TLFLAGS_IN_TDLIST	0x0002	/* thread in all thread list */
 #define	TLFLAGS_IN_GCLIST	0x0004	/* thread in gc list */
 
 	/* Queue of currently owned NORMAL or PRIO_INHERIT type mutexes. */
 	struct mutex_queue	mutexq;
 
 	/* Queue of all owned PRIO_PROTECT mutexes. */
 	struct mutex_queue	pp_mutexq;
 
 	void				*ret;
 	struct pthread_specific_elem	*specific;
 	int				specific_data_count;
 
 	/* Number rwlocks rdlocks held. */
 	int			rdlock_count;
 
 	/*
 	 * Current locks bitmap for rtld. */
 	int			rtld_bits;
 
 	/* Thread control block */
 	struct tcb		*tcb;
 
 	/* Cleanup handlers Link List */
 	struct pthread_cleanup	*cleanup;
 
 #ifdef _PTHREAD_FORCED_UNWIND
 	struct _Unwind_Exception	ex;
 	void			*unwind_stackend;
 	int			unwind_disabled;
 #endif
 
 	/*
 	 * Magic value to help recognize a valid thread structure
 	 * from an invalid one:
 	 */
 #define	THR_MAGIC		((u_int32_t) 0xd09ba115)
 	u_int32_t		magic;
 
 	/* Enable event reporting */
 	int			report_events;
 
 	/* Event mask */
 	int			event_mask;
 
 	/* Event */
 	td_event_msg_t		event_buf;
+
+	struct wake_addr	*wake_addr;
+#define WAKE_ADDR(td)           ((td)->wake_addr)
+
+	/* Sleep queue */
+	struct	sleepqueue	*sleepqueue;
+
+	/* Wait channel */
+	void			*wchan;
+
+	/* Referenced mutex. */
+	struct pthread_mutex	*mutex_obj;
+
+	/* Thread will sleep. */
+	int			will_sleep;
+
+	/* Number of threads deferred. */
+	int			nwaiter_defer;
+
+	/* Deferred threads from pthread_cond_signal. */
+	unsigned int 		*defer_waiters[MAX_DEFER_WAITERS];
 };
 
 #define THR_SHOULD_GC(thrd) 						\
 	((thrd)->refcount == 0 && (thrd)->state == PS_DEAD &&		\
 	 ((thrd)->flags & THR_FLAGS_DETACHED) != 0)
 
 #define	THR_IN_CRITICAL(thrd)				\
 	(((thrd)->locklevel > 0) ||			\
 	((thrd)->critical_count > 0))
 
 #define	THR_CRITICAL_ENTER(thrd)			\
 	(thrd)->critical_count++
 
 #define	THR_CRITICAL_LEAVE(thrd)			\
 	do {						\
 		(thrd)->critical_count--;		\
 		_thr_ast(thrd);				\
 	} while (0)
 
 #define THR_UMUTEX_TRYLOCK(thrd, lck)			\
 	_thr_umutex_trylock((lck), TID(thrd))
 
 #define	THR_UMUTEX_LOCK(thrd, lck)			\
 	_thr_umutex_lock((lck), TID(thrd))
 
 #define	THR_UMUTEX_TIMEDLOCK(thrd, lck, timo)		\
 	_thr_umutex_timedlock((lck), TID(thrd), (timo))
 
 #define	THR_UMUTEX_UNLOCK(thrd, lck)			\
 	_thr_umutex_unlock((lck), TID(thrd))
 
 #define	THR_LOCK_ACQUIRE(thrd, lck)			\
 do {							\
 	(thrd)->locklevel++;				\
 	_thr_umutex_lock(lck, TID(thrd));		\
 } while (0)
 
+#define	THR_LOCK_ACQUIRE_SPIN(thrd, lck)		\
+do {							\
+	(thrd)->locklevel++;				\
+	_thr_umutex_lock_spin(lck, TID(thrd));		\
+} while (0)
+
 #ifdef	_PTHREADS_INVARIANTS
 #define	THR_ASSERT_LOCKLEVEL(thrd)			\
 do {							\
 	if (__predict_false((thrd)->locklevel <= 0))	\
 		_thr_assert_lock_level();		\
 } while (0)
 #else
 #define THR_ASSERT_LOCKLEVEL(thrd)
 #endif
 
 #define	THR_LOCK_RELEASE(thrd, lck)			\
 do {							\
 	THR_ASSERT_LOCKLEVEL(thrd);			\
 	_thr_umutex_unlock((lck), TID(thrd));		\
 	(thrd)->locklevel--;				\
 	_thr_ast(thrd);					\
 } while (0)
 
 #define	THR_LOCK(curthrd)		THR_LOCK_ACQUIRE(curthrd, &(curthrd)->lock)
 #define	THR_UNLOCK(curthrd)		THR_LOCK_RELEASE(curthrd, &(curthrd)->lock)
 #define	THR_THREAD_LOCK(curthrd, thr)	THR_LOCK_ACQUIRE(curthrd, &(thr)->lock)
 #define	THR_THREAD_UNLOCK(curthrd, thr)	THR_LOCK_RELEASE(curthrd, &(thr)->lock)
 
 #define	THREAD_LIST_RDLOCK(curthrd)				\
 do {								\
 	(curthrd)->locklevel++;					\
 	_thr_rwl_rdlock(&_thr_list_lock);			\
 } while (0)
 
 #define	THREAD_LIST_WRLOCK(curthrd)				\
 do {								\
 	(curthrd)->locklevel++;					\
 	_thr_rwl_wrlock(&_thr_list_lock);			\
 } while (0)
 
 #define	THREAD_LIST_UNLOCK(curthrd)				\
 do {								\
 	_thr_rwl_unlock(&_thr_list_lock);			\
 	(curthrd)->locklevel--;					\
 	_thr_ast(curthrd);					\
 } while (0)
 
 /*
  * Macros to insert/remove threads to the all thread list and
  * the gc list.
  */
 #define	THR_LIST_ADD(thrd) do {					\
 	if (((thrd)->tlflags & TLFLAGS_IN_TDLIST) == 0) {	\
 		TAILQ_INSERT_HEAD(&_thread_list, thrd, tle);	\
 		_thr_hash_add(thrd);				\
 		(thrd)->tlflags |= TLFLAGS_IN_TDLIST;		\
 	}							\
 } while (0)
 #define	THR_LIST_REMOVE(thrd) do {				\
 	if (((thrd)->tlflags & TLFLAGS_IN_TDLIST) != 0) {	\
 		TAILQ_REMOVE(&_thread_list, thrd, tle);		\
 		_thr_hash_remove(thrd);				\
 		(thrd)->tlflags &= ~TLFLAGS_IN_TDLIST;		\
 	}							\
 } while (0)
 #define	THR_GCLIST_ADD(thrd) do {				\
 	if (((thrd)->tlflags & TLFLAGS_IN_GCLIST) == 0) {	\
 		TAILQ_INSERT_HEAD(&_thread_gc_list, thrd, gcle);\
 		(thrd)->tlflags |= TLFLAGS_IN_GCLIST;		\
 		_gc_count++;					\
 	}							\
 } while (0)
 #define	THR_GCLIST_REMOVE(thrd) do {				\
 	if (((thrd)->tlflags & TLFLAGS_IN_GCLIST) != 0) {	\
 		TAILQ_REMOVE(&_thread_gc_list, thrd, gcle);	\
 		(thrd)->tlflags &= ~TLFLAGS_IN_GCLIST;		\
 		_gc_count--;					\
 	}							\
 } while (0)
 
 #define THR_REF_ADD(curthread, pthread) {			\
 	THR_CRITICAL_ENTER(curthread);				\
 	pthread->refcount++;					\
 } while (0)
 
 #define THR_REF_DEL(curthread, pthread) {			\
 	pthread->refcount--;					\
 	THR_CRITICAL_LEAVE(curthread);				\
 } while (0)
 
 #define GC_NEEDED()	(_gc_count >= 5)
 
 #define SHOULD_REPORT_EVENT(curthr, e)			\
 	(curthr->report_events && 			\
 	 (((curthr)->event_mask | _thread_event_mask ) & e) != 0)
 
 extern int __isthreaded;
 
 /*
  * Global variables for the pthread kernel.
  */
 
 extern char		*_usrstack __hidden;
 extern struct pthread	*_thr_initial __hidden;
 
 /* For debugger */
 extern int		_libthr_debug;
 extern int		_thread_event_mask;
 extern struct pthread	*_thread_last_event;
 
 /* List of all threads: */
 extern pthreadlist	_thread_list;
 
 /* List of threads needing GC: */
 extern pthreadlist	_thread_gc_list __hidden;
 
 extern int		_thread_active_threads;
 extern atfork_head	_thr_atfork_list __hidden;
 extern struct urwlock	_thr_atfork_lock __hidden;
 
 /* Default thread attributes: */
 extern struct pthread_attr _pthread_attr_default __hidden;
 
 /* Default mutex attributes: */
 extern struct pthread_mutex_attr _pthread_mutexattr_default __hidden;
 extern struct pthread_mutex_attr _pthread_mutexattr_adaptive_default __hidden;
 
 /* Default condition variable attributes: */
 extern struct pthread_cond_attr _pthread_condattr_default __hidden;
 
 extern struct pthread_prio _thr_priorities[] __hidden;
 
 extern pid_t	_thr_pid __hidden;
 extern int	_thr_is_smp __hidden;
 
 extern size_t	_thr_guard_default __hidden;
 extern size_t	_thr_stack_default __hidden;
 extern size_t	_thr_stack_initial __hidden;
 extern int	_thr_page_size __hidden;
 extern int	_thr_spinloops __hidden;
 extern int	_thr_yieldloops __hidden;
 
 /* Garbage thread count. */
 extern int	_gc_count __hidden;
 
 extern struct umutex	_mutex_static_lock __hidden;
 extern struct umutex	_cond_static_lock __hidden;
 extern struct umutex	_rwlock_static_lock __hidden;
 extern struct umutex	_keytable_lock __hidden;
 extern struct urwlock	_thr_list_lock __hidden;
 extern struct umutex	_thr_event_lock __hidden;
 
 /*
  * Function prototype definitions.
  */
 __BEGIN_DECLS
 int	_thr_setthreaded(int) __hidden;
-int	_mutex_cv_lock(pthread_mutex_t *, int count) __hidden;
-int	_mutex_cv_unlock(pthread_mutex_t *, int *count) __hidden;
+int	_mutex_cv_lock(struct pthread_mutex *, int count) __hidden;
+int	_mutex_cv_unlock(struct pthread_mutex *, int *count) __hidden;
+int     _mutex_cv_attach(struct pthread_mutex *, int count) __hidden;
+int     _mutex_cv_detach(struct pthread_mutex *, int *count) __hidden;
+int     _mutex_owned(struct pthread *, const struct pthread_mutex *) __hidden;
 int	_mutex_reinit(pthread_mutex_t *) __hidden;
 void	_mutex_fork(struct pthread *curthread) __hidden;
 void	_libpthread_init(struct pthread *) __hidden;
 struct pthread *_thr_alloc(struct pthread *) __hidden;
 void	_thread_exit(const char *, int, const char *) __hidden __dead2;
 int	_thr_ref_add(struct pthread *, struct pthread *, int) __hidden;
 void	_thr_ref_delete(struct pthread *, struct pthread *) __hidden;
 void	_thr_ref_delete_unlocked(struct pthread *, struct pthread *) __hidden;
 int	_thr_find_thread(struct pthread *, struct pthread *, int) __hidden;
 void	_thr_rtld_init(void) __hidden;
 void	_thr_rtld_fini(void) __hidden;
 void	_thr_rtld_postfork_child(void) __hidden;
 int	_thr_stack_alloc(struct pthread_attr *) __hidden;
 void	_thr_stack_free(struct pthread_attr *) __hidden;
 void	_thr_free(struct pthread *, struct pthread *) __hidden;
 void	_thr_gc(struct pthread *) __hidden;
 void    _thread_cleanupspecific(void) __hidden;
 void	_thread_printf(int, const char *, ...) __hidden;
 void	_thr_spinlock_init(void) __hidden;
 void	_thr_cancel_enter(struct pthread *) __hidden;
 void	_thr_cancel_enter2(struct pthread *, int) __hidden;
 void	_thr_cancel_leave(struct pthread *, int) __hidden;
 void	_thr_testcancel(struct pthread *) __hidden;
 void	_thr_signal_block(struct pthread *) __hidden;
 void	_thr_signal_unblock(struct pthread *) __hidden;
 void	_thr_signal_init(void) __hidden;
 void	_thr_signal_deinit(void) __hidden;
 int	_thr_send_sig(struct pthread *, int sig) __hidden;
 void	_thr_list_init(void) __hidden;
 void	_thr_hash_add(struct pthread *) __hidden;
 void	_thr_hash_remove(struct pthread *) __hidden;
 struct pthread *_thr_hash_find(struct pthread *) __hidden;
 void	_thr_link(struct pthread *, struct pthread *) __hidden;
 void	_thr_unlink(struct pthread *, struct pthread *) __hidden;
 void	_thr_assert_lock_level(void) __hidden __dead2;
 void	_thr_ast(struct pthread *) __hidden;
 void	_thr_once_init(void) __hidden;
 void	_thr_report_creation(struct pthread *curthread,
 	    struct pthread *newthread) __hidden;
 void	_thr_report_death(struct pthread *curthread) __hidden;
 int	_thr_getscheduler(lwpid_t, int *, struct sched_param *) __hidden;
 int	_thr_setscheduler(lwpid_t, int, const struct sched_param *) __hidden;
 void	_thr_signal_prefork(void) __hidden;
 void	_thr_signal_postfork(void) __hidden;
 void	_thr_signal_postfork_child(void) __hidden;
 void	_thr_try_gc(struct pthread *, struct pthread *) __hidden;
 int	_rtp_to_schedparam(const struct rtprio *rtp, int *policy,
 		struct sched_param *param) __hidden;
 int	_schedparam_to_rtp(int policy, const struct sched_param *param,
 		struct rtprio *rtp) __hidden;
 void	_thread_bp_create(void);
 void	_thread_bp_death(void);
 int	_sched_yield(void);
 
 void	_pthread_cleanup_push(void (*)(void *), void *);
 void	_pthread_cleanup_pop(int);
 void	_pthread_exit_mask(void *status, sigset_t *mask) __dead2 __hidden;
 void	_pthread_cancel_enter(int maycancel);
 void 	_pthread_cancel_leave(int maycancel);
 
 /* #include <fcntl.h> */
 #ifdef  _SYS_FCNTL_H_
 int     __sys_fcntl(int, int, ...);
 int     __sys_open(const char *, int, ...);
 int     __sys_openat(int, const char *, int, ...);
 #endif
 
 /* #include <signal.h> */
 #ifdef _SIGNAL_H_
 int	__sys_kill(pid_t, int);
 int     __sys_sigaction(int, const struct sigaction *, struct sigaction *);
 int     __sys_sigpending(sigset_t *);
 int     __sys_sigprocmask(int, const sigset_t *, sigset_t *);
 int     __sys_sigsuspend(const sigset_t *);
 int     __sys_sigreturn(const ucontext_t *);
 int     __sys_sigaltstack(const struct sigaltstack *, struct sigaltstack *);
 int	__sys_sigwait(const sigset_t *, int *);
 int	__sys_sigtimedwait(const sigset_t *, siginfo_t *,
 		const struct timespec *);
 int	__sys_sigwaitinfo(const sigset_t *set, siginfo_t *info);
 #endif
 
 /* #include <time.h> */
 #ifdef	_TIME_H_
 int	__sys_nanosleep(const struct timespec *, struct timespec *);
 #endif
 
 /* #include <sys/ucontext.h> */
 #ifdef _SYS_UCONTEXT_H_
 int	__sys_setcontext(const ucontext_t *ucp);
 int	__sys_swapcontext(ucontext_t *oucp, const ucontext_t *ucp);
 #endif
 
 /* #include <unistd.h> */
 #ifdef  _UNISTD_H_
 int     __sys_close(int);
 int	__sys_fork(void);
 pid_t	__sys_getpid(void);
 ssize_t __sys_read(int, void *, size_t);
 ssize_t __sys_write(int, const void *, size_t);
 void	__sys_exit(int);
 #endif
 
 int	_umtx_op_err(void *, int op, u_long, void *, void *) __hidden;
 
 static inline int
 _thr_isthreaded(void)
 {
 	return (__isthreaded != 0);
 }
 
 static inline int
 _thr_is_inited(void)
 {
 	return (_thr_initial != NULL);
 }
 
 static inline void
 _thr_check_init(void)
 {
 	if (_thr_initial == NULL)
 		_libpthread_init(NULL);
 }
 
+struct wake_addr *_thr_alloc_wake_addr(void);
+void	_thr_release_wake_addr(struct wake_addr *);
+int	_thr_sleep(struct pthread *, int, const struct timespec *);
+
+void _thr_wake_addr_init(void) __hidden;
+
+static inline void
+_thr_clear_wake(struct pthread *td)
+{
+	td->wake_addr->value = 0;
+}
+
+static inline int
+_thr_is_woken(struct pthread *td)
+{
+	return td->wake_addr->value != 0;
+}
+
+static inline void
+_thr_set_wake(unsigned int *waddr)
+{
+	*waddr = 1;
+	_thr_umtx_wake(waddr, INT_MAX, 0);
+}
+
+void _thr_wake_all(unsigned int *waddrs[], int) __hidden;
+
+static inline struct pthread *
+_sleepq_first(struct sleepqueue *sq)
+{
+	return TAILQ_FIRST(&sq->sq_blocked);
+}
+
+void	_sleepq_init(void) __hidden;
+struct sleepqueue *_sleepq_alloc(void) __hidden;
+void	_sleepq_free(struct sleepqueue *) __hidden;
+void	_sleepq_lock(void *) __hidden;
+void	_sleepq_unlock(void *) __hidden;
+struct sleepqueue *_sleepq_lookup(void *) __hidden;
+void	_sleepq_add(void *, struct pthread *) __hidden;
+int	_sleepq_remove(struct sleepqueue *, struct pthread *) __hidden;
+void	_sleepq_drop(struct sleepqueue *,
+		void (*cb)(struct pthread *, void *arg), void *) __hidden;
+
 struct dl_phdr_info;
 void __pthread_cxa_finalize(struct dl_phdr_info *phdr_info);
 void _thr_tsd_unload(struct dl_phdr_info *phdr_info) __hidden;
 void _thr_sigact_unload(struct dl_phdr_info *phdr_info) __hidden;
 
 __END_DECLS
 
 #endif  /* !_THR_PRIVATE_H */
diff --git a/lib/libthr/thread/thr_umtx.c b/lib/libthr/thread/thr_umtx.c
index dabfa3597921..33c363761981 100644
--- a/lib/libthr/thread/thr_umtx.c
+++ b/lib/libthr/thread/thr_umtx.c
@@ -1,265 +1,318 @@
 /*
  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 #include "thr_private.h"
 #include "thr_umtx.h"
 
 #ifndef HAS__UMTX_OP_ERR
 int _umtx_op_err(void *obj, int op, u_long val, void *uaddr, void *uaddr2)
 {
 	if (_umtx_op(obj, op, val, uaddr, uaddr2) == -1)
 		return (errno);
 	return (0);
 }
 #endif
 
 void
 _thr_umutex_init(struct umutex *mtx)
 {
 	static struct umutex default_mtx = DEFAULT_UMUTEX;
 
 	*mtx = default_mtx;
 }
 
 void
 _thr_urwlock_init(struct urwlock *rwl)
 {
 	static struct urwlock default_rwl = DEFAULT_URWLOCK;
 	*rwl = default_rwl;
 }
 
 int
 __thr_umutex_lock(struct umutex *mtx, uint32_t id)
 {
 	uint32_t owner;
 
 	if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) {
 		for (;;) {
 			/* wait in kernel */
 			_umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, 0);
 
 			owner = mtx->m_owner;
 			if ((owner & ~UMUTEX_CONTESTED) == 0 &&
 			     atomic_cmpset_acq_32(&mtx->m_owner, owner, id|owner))
 				return (0);
 		}
 	}
 
 	return	_umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0);
 }
 
+#define SPINLOOPS 1000
+
+int
+__thr_umutex_lock_spin(struct umutex *mtx, uint32_t id)
+{
+	uint32_t owner;
+
+	if (!_thr_is_smp)
+		return __thr_umutex_lock(mtx, id);
+
+	if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) {
+		for (;;) {
+			int count = SPINLOOPS;
+			while (count--) {
+				owner = mtx->m_owner;
+				if ((owner & ~UMUTEX_CONTESTED) == 0) {
+					if (atomic_cmpset_acq_32(
+					    &mtx->m_owner,
+					    owner, id|owner)) {
+						return (0);
+					}
+				}
+				CPU_SPINWAIT;
+			}
+
+			/* wait in kernel */
+			_umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, 0);
+		}
+	}
+
+	return	_umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0);
+}
+
 int
 __thr_umutex_timedlock(struct umutex *mtx, uint32_t id,
 	const struct timespec *ets)
 {
 	struct timespec timo, cts;
 	uint32_t owner;
 	int ret;
 
 	clock_gettime(CLOCK_REALTIME, &cts);
 	TIMESPEC_SUB(&timo, ets, &cts);
 
 	if (timo.tv_sec < 0)
 		return (ETIMEDOUT);
 
 	for (;;) {
 		if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) {
 
 			/* wait in kernel */
 			ret = _umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, &timo);
 
 			/* now try to lock it */
 			owner = mtx->m_owner;
 			if ((owner & ~UMUTEX_CONTESTED) == 0 &&
 			     atomic_cmpset_acq_32(&mtx->m_owner, owner, id|owner))
 				return (0);
 		} else {
 			ret = _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, &timo);
 			if (ret == 0)
 				break;
 		}
 		if (ret == ETIMEDOUT)
 			break;
 		clock_gettime(CLOCK_REALTIME, &cts);
 		TIMESPEC_SUB(&timo, ets, &cts);
 		if (timo.tv_sec < 0 || (timo.tv_sec == 0 && timo.tv_nsec == 0)) {
 			ret = ETIMEDOUT;
 			break;
 		}
 	}
 	return (ret);
 }
 
 int
 __thr_umutex_unlock(struct umutex *mtx, uint32_t id)
 {
 #ifndef __ia64__
 	/* XXX this logic has a race-condition on ia64. */
 	if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) {
 		atomic_cmpset_rel_32(&mtx->m_owner, id | UMUTEX_CONTESTED, UMUTEX_CONTESTED);
 		return _umtx_op_err(mtx, UMTX_OP_MUTEX_WAKE, 0, 0, 0);
 	}
 #endif /* __ia64__ */
 	return _umtx_op_err(mtx, UMTX_OP_MUTEX_UNLOCK, 0, 0, 0);
 }
 
 int
 __thr_umutex_trylock(struct umutex *mtx)
 {
 	return _umtx_op_err(mtx, UMTX_OP_MUTEX_TRYLOCK, 0, 0, 0);
 }
 
 int
 __thr_umutex_set_ceiling(struct umutex *mtx, uint32_t ceiling,
 	uint32_t *oldceiling)
 {
 	return _umtx_op_err(mtx, UMTX_OP_SET_CEILING, ceiling, oldceiling, 0);
 }
 
 int
 _thr_umtx_wait(volatile long *mtx, long id, const struct timespec *timeout)
 {
 	if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 &&
 		timeout->tv_nsec <= 0)))
 		return (ETIMEDOUT);
 	return _umtx_op_err(__DEVOLATILE(void *, mtx), UMTX_OP_WAIT, id, 0,
 		__DECONST(void*, timeout));
 }
 
 int
 _thr_umtx_wait_uint(volatile u_int *mtx, u_int id, const struct timespec *timeout, int shared)
 {
 	if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 &&
 		timeout->tv_nsec <= 0)))
 		return (ETIMEDOUT);
 	return _umtx_op_err(__DEVOLATILE(void *, mtx), 
 			shared ? UMTX_OP_WAIT_UINT : UMTX_OP_WAIT_UINT_PRIVATE, id, 0,
 			__DECONST(void*, timeout));
 }
 
+int
+_thr_umtx_timedwait_uint(volatile u_int *mtx, u_int id, int clockid,
+	const struct timespec *abstime, int shared)
+{
+	struct timespec ts, ts2, *tsp;
+
+	if (abstime != NULL) {
+		clock_gettime(clockid, &ts);
+		TIMESPEC_SUB(&ts2, abstime, &ts);
+		if (ts2.tv_sec < 0 || ts2.tv_nsec <= 0)
+			return (ETIMEDOUT);
+		tsp = &ts2;
+	} else {
+		tsp = NULL;
+	}
+	return _umtx_op_err(__DEVOLATILE(void *, mtx), 
+		shared ? UMTX_OP_WAIT_UINT : UMTX_OP_WAIT_UINT_PRIVATE, id, NULL,
+			tsp);
+}
+
 int
 _thr_umtx_wake(volatile void *mtx, int nr_wakeup, int shared)
 {
 	return _umtx_op_err(__DEVOLATILE(void *, mtx), shared ? UMTX_OP_WAKE : UMTX_OP_WAKE_PRIVATE,
 		nr_wakeup, 0, 0);
 }
 
 void
 _thr_ucond_init(struct ucond *cv)
 {
 	bzero(cv, sizeof(struct ucond));
 }
 
 int
 _thr_ucond_wait(struct ucond *cv, struct umutex *m,
 	const struct timespec *timeout, int check_unparking)
 {
 	if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 &&
 	    timeout->tv_nsec <= 0))) {
 		struct pthread *curthread = _get_curthread();
 		_thr_umutex_unlock(m, TID(curthread));
                 return (ETIMEDOUT);
 	}
 	return _umtx_op_err(cv, UMTX_OP_CV_WAIT,
 		     check_unparking ? UMTX_CHECK_UNPARKING : 0, 
 		     m, __DECONST(void*, timeout));
 }
  
 int
 _thr_ucond_signal(struct ucond *cv)
 {
 	if (!cv->c_has_waiters)
 		return (0);
 	return _umtx_op_err(cv, UMTX_OP_CV_SIGNAL, 0, NULL, NULL);
 }
 
 int
 _thr_ucond_broadcast(struct ucond *cv)
 {
 	if (!cv->c_has_waiters)
 		return (0);
 	return _umtx_op_err(cv, UMTX_OP_CV_BROADCAST, 0, NULL, NULL);
 }
 
 int
 __thr_rwlock_rdlock(struct urwlock *rwlock, int flags, struct timespec *tsp)
 {
 	return _umtx_op_err(rwlock, UMTX_OP_RW_RDLOCK, flags, NULL, tsp);
 }
 
 int
 __thr_rwlock_wrlock(struct urwlock *rwlock, struct timespec *tsp)
 {
 	return _umtx_op_err(rwlock, UMTX_OP_RW_WRLOCK, 0, NULL, tsp);
 }
 
 int
 __thr_rwlock_unlock(struct urwlock *rwlock)
 {
 	return _umtx_op_err(rwlock, UMTX_OP_RW_UNLOCK, 0, NULL, NULL);
 }
 
 void
 _thr_rwl_rdlock(struct urwlock *rwlock)
 {
 	int ret;
 
 	for (;;) {
 		if (_thr_rwlock_tryrdlock(rwlock, URWLOCK_PREFER_READER) == 0)
 			return;
 		ret = __thr_rwlock_rdlock(rwlock, URWLOCK_PREFER_READER, NULL);
 		if (ret == 0)
 			return;
 		if (ret != EINTR)
 			PANIC("rdlock error");
 	}
 }
 
 void
 _thr_rwl_wrlock(struct urwlock *rwlock)
 {
 	int ret;
 
 	for (;;) {
 		if (_thr_rwlock_trywrlock(rwlock) == 0)
 			return;
 		ret = __thr_rwlock_wrlock(rwlock, NULL);
 		if (ret == 0)
 			return;
 		if (ret != EINTR)
 			PANIC("wrlock error");
 	}
 }
 
 void
 _thr_rwl_unlock(struct urwlock *rwlock)
 {
 	if (_thr_rwlock_unlock(rwlock))
 		PANIC("unlock error");
 }
diff --git a/lib/libthr/thread/thr_umtx.h b/lib/libthr/thread/thr_umtx.h
index 3f53fafcf7de..0a8034b364e2 100644
--- a/lib/libthr/thread/thr_umtx.h
+++ b/lib/libthr/thread/thr_umtx.h
@@ -1,196 +1,207 @@
 /*-
  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _THR_FBSD_UMTX_H_
 #define _THR_FBSD_UMTX_H_
 
 #include <strings.h>
 #include <sys/umtx.h>
 
 #define DEFAULT_UMUTEX	{0,0,{0,0},{0,0,0,0}}
 #define DEFAULT_URWLOCK {0,0,0,0,{0,0,0,0}}
 
 int __thr_umutex_lock(struct umutex *mtx, uint32_t id) __hidden;
+int __thr_umutex_lock_spin(struct umutex *mtx, uint32_t id) __hidden;
 int __thr_umutex_timedlock(struct umutex *mtx, uint32_t id,
 	const struct timespec *timeout) __hidden;
 int __thr_umutex_unlock(struct umutex *mtx, uint32_t id) __hidden;
 int __thr_umutex_trylock(struct umutex *mtx) __hidden;
 int __thr_umutex_set_ceiling(struct umutex *mtx, uint32_t ceiling,
 	uint32_t *oldceiling) __hidden;
 
 void _thr_umutex_init(struct umutex *mtx) __hidden;
 void _thr_urwlock_init(struct urwlock *rwl) __hidden;
 
 int _thr_umtx_wait(volatile long *mtx, long exp,
 	const struct timespec *timeout) __hidden;
 int _thr_umtx_wait_uint(volatile u_int *mtx, u_int exp,
 	const struct timespec *timeout, int shared) __hidden;
+int _thr_umtx_timedwait_uint(volatile u_int *mtx, u_int exp, int clockid,
+	const struct timespec *timeout, int shared) __hidden;
 int _thr_umtx_wake(volatile void *mtx, int count, int shared) __hidden;
 int _thr_ucond_wait(struct ucond *cv, struct umutex *m,
         const struct timespec *timeout, int check_unpaking) __hidden;
 void _thr_ucond_init(struct ucond *cv) __hidden;
 int _thr_ucond_signal(struct ucond *cv) __hidden;
 int _thr_ucond_broadcast(struct ucond *cv) __hidden;
 
 int __thr_rwlock_rdlock(struct urwlock *rwlock, int flags, struct timespec *tsp) __hidden;
 int __thr_rwlock_wrlock(struct urwlock *rwlock, struct timespec *tsp) __hidden;
 int __thr_rwlock_unlock(struct urwlock *rwlock) __hidden;
 
 /* Internal used only */
 void _thr_rwl_rdlock(struct urwlock *rwlock) __hidden;
 void _thr_rwl_wrlock(struct urwlock *rwlock) __hidden;
 void _thr_rwl_unlock(struct urwlock *rwlock) __hidden;
 
 static inline int
 _thr_umutex_trylock(struct umutex *mtx, uint32_t id)
 {
     if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id))
 	return (0);
     if ((mtx->m_flags & UMUTEX_PRIO_PROTECT) == 0)
     	return (EBUSY);
     return (__thr_umutex_trylock(mtx));
 }
 
 static inline int
 _thr_umutex_trylock2(struct umutex *mtx, uint32_t id)
 {
     if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id) != 0)
 	return (0);
     if ((uint32_t)mtx->m_owner == UMUTEX_CONTESTED &&
         __predict_true((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0))
     	if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED))
 		return (0);
     return (EBUSY);
 }
 
 static inline int
 _thr_umutex_lock(struct umutex *mtx, uint32_t id)
 {
     if (_thr_umutex_trylock2(mtx, id) == 0)
 	return (0);
     return (__thr_umutex_lock(mtx, id));
 }
 
+static inline int
+_thr_umutex_lock_spin(struct umutex *mtx, uint32_t id)
+{
+    if (_thr_umutex_trylock2(mtx, id) == 0)
+	return (0);
+    return (__thr_umutex_lock_spin(mtx, id));
+}
+
 static inline int
 _thr_umutex_timedlock(struct umutex *mtx, uint32_t id,
 	const struct timespec *timeout)
 {
     if (_thr_umutex_trylock2(mtx, id) == 0)
 	return (0);
     return (__thr_umutex_timedlock(mtx, id, timeout));
 }
 
 static inline int
 _thr_umutex_unlock(struct umutex *mtx, uint32_t id)
 {
     if (atomic_cmpset_rel_32(&mtx->m_owner, id, UMUTEX_UNOWNED))
 	return (0);
     return (__thr_umutex_unlock(mtx, id));
 }
 
 static inline int
 _thr_rwlock_tryrdlock(struct urwlock *rwlock, int flags)
 {
 	int32_t state;
 	int32_t wrflags;
 
 	if (flags & URWLOCK_PREFER_READER || rwlock->rw_flags & URWLOCK_PREFER_READER)
 		wrflags = URWLOCK_WRITE_OWNER;
 	else
 		wrflags = URWLOCK_WRITE_OWNER | URWLOCK_WRITE_WAITERS;
 	state = rwlock->rw_state;
 	while (!(state & wrflags)) {
 		if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS))
 			return (EAGAIN);
 		if (atomic_cmpset_acq_32(&rwlock->rw_state, state, state + 1))
 			return (0);
 		state = rwlock->rw_state;
 	}
 
 	return (EBUSY);
 }
 
 static inline int
 _thr_rwlock_trywrlock(struct urwlock *rwlock)
 {
 	int32_t state;
 
 	state = rwlock->rw_state;
 	while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 		if (atomic_cmpset_acq_32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER))
 			return (0);
 		state = rwlock->rw_state;
 	}
 
 	return (EBUSY);
 }
 
 static inline int
 _thr_rwlock_rdlock(struct urwlock *rwlock, int flags, struct timespec *tsp)
 {
 	if (_thr_rwlock_tryrdlock(rwlock, flags) == 0)
 		return (0);
 	return (__thr_rwlock_rdlock(rwlock, flags, tsp));
 }
 
 static inline int
 _thr_rwlock_wrlock(struct urwlock *rwlock, struct timespec *tsp)
 {
 	if (_thr_rwlock_trywrlock(rwlock) == 0)
 		return (0);
 	return (__thr_rwlock_wrlock(rwlock, tsp));
 }
 
 static inline int
 _thr_rwlock_unlock(struct urwlock *rwlock)
 {
 	int32_t state;
 
 	state = rwlock->rw_state;
 	if (state & URWLOCK_WRITE_OWNER) {
 		if (atomic_cmpset_rel_32(&rwlock->rw_state, URWLOCK_WRITE_OWNER, 0))
 			return (0);
 	} else {
 		for (;;) {
 			if (__predict_false(URWLOCK_READER_COUNT(state) == 0))
 				return (EPERM);
 			if (!((state & (URWLOCK_WRITE_WAITERS |
 			    URWLOCK_READ_WAITERS)) &&
 			    URWLOCK_READER_COUNT(state) == 1)) {
 				if (atomic_cmpset_rel_32(&rwlock->rw_state,
 				    state, state-1))
 					return (0);
 				state = rwlock->rw_state;
 			} else {
 				break;
 			}
 		}
     	}
     	return (__thr_rwlock_unlock(rwlock));
 }
 #endif
diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c
index 77e47e56b872..580d6303e382 100644
--- a/sys/kern/kern_umtx.c
+++ b/sys/kern/kern_umtx.c
@@ -1,3595 +1,3685 @@
 /*-
  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
+#include <sys/syscallsubr.h>
 #include <sys/eventhandler.h>
 #include <sys/umtx.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 
 #include <machine/cpu.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32_proto.h>
 #endif
 
 enum {
 	TYPE_SIMPLE_WAIT,
 	TYPE_CV,
 	TYPE_SEM,
 	TYPE_SIMPLE_LOCK,
 	TYPE_NORMAL_UMUTEX,
 	TYPE_PI_UMUTEX,
 	TYPE_PP_UMUTEX,
 	TYPE_RWLOCK
 };
 
 #define _UMUTEX_TRY		1
 #define _UMUTEX_WAIT		2
 
 /* Key to represent a unique userland synchronous object */
 struct umtx_key {
 	int	hash;
 	int	type;
 	int	shared;
 	union {
 		struct {
 			vm_object_t	object;
 			uintptr_t	offset;
 		} shared;
 		struct {
 			struct vmspace	*vs;
 			uintptr_t	addr;
 		} private;
 		struct {
 			void		*a;
 			uintptr_t	b;
 		} both;
 	} info;
 };
 
 /* Priority inheritance mutex info. */
 struct umtx_pi {
 	/* Owner thread */
 	struct thread		*pi_owner;
 
 	/* Reference count */
 	int			pi_refcount;
 
  	/* List entry to link umtx holding by thread */
 	TAILQ_ENTRY(umtx_pi)	pi_link;
 
 	/* List entry in hash */
 	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
 
 	/* List for waiters */
 	TAILQ_HEAD(,umtx_q)	pi_blocked;
 
 	/* Identify a userland lock object */
 	struct umtx_key		pi_key;
 };
 
 /* A userland synchronous object user. */
 struct umtx_q {
 	/* Linked list for the hash. */
 	TAILQ_ENTRY(umtx_q)	uq_link;
 
 	/* Umtx key. */
 	struct umtx_key		uq_key;
 
 	/* Umtx flags. */
 	int			uq_flags;
 #define UQF_UMTXQ	0x0001
 
 	/* The thread waits on. */
 	struct thread		*uq_thread;
 
 	/*
 	 * Blocked on PI mutex. read can use chain lock
 	 * or umtx_lock, write must have both chain lock and
 	 * umtx_lock being hold.
 	 */
 	struct umtx_pi		*uq_pi_blocked;
 
 	/* On blocked list */
 	TAILQ_ENTRY(umtx_q)	uq_lockq;
 
 	/* Thread contending with us */
 	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
 
 	/* Inherited priority from PP mutex */
 	u_char			uq_inherited_pri;
 	
 	/* Spare queue ready to be reused */
 	struct umtxq_queue	*uq_spare_queue;
 
 	/* The queue we on */
 	struct umtxq_queue	*uq_cur_queue;
 };
 
 TAILQ_HEAD(umtxq_head, umtx_q);
 
 /* Per-key wait-queue */
 struct umtxq_queue {
 	struct umtxq_head	head;
 	struct umtx_key		key;
 	LIST_ENTRY(umtxq_queue)	link;
 	int			length;
 };
 
 LIST_HEAD(umtxq_list, umtxq_queue);
 
 /* Userland lock object's wait-queue chain */
 struct umtxq_chain {
 	/* Lock for this chain. */
 	struct mtx		uc_lock;
 
 	/* List of sleep queues. */
 	struct umtxq_list	uc_queue[2];
 #define UMTX_SHARED_QUEUE	0
 #define UMTX_EXCLUSIVE_QUEUE	1
 
 	LIST_HEAD(, umtxq_queue) uc_spare_queue;
 
 	/* Busy flag */
 	char			uc_busy;
 
 	/* Chain lock waiters */
 	int			uc_waiters;
 
 	/* All PI in the list */
 	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
 
 };
 
 #define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
 #define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
 
 /*
  * Don't propagate time-sharing priority, there is a security reason,
  * a user can simply introduce PI-mutex, let thread A lock the mutex,
  * and let another thread B block on the mutex, because B is
  * sleeping, its priority will be boosted, this causes A's priority to
  * be boosted via priority propagating too and will never be lowered even
  * if it is using 100%CPU, this is unfair to other processes.
  */
 
 #define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
 			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
 			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
 
 #define	GOLDEN_RATIO_PRIME	2654404609U
 #define	UMTX_CHAINS		128
 #define	UMTX_SHIFTS		(__WORD_BIT - 7)
 
 #define THREAD_SHARE		0
 #define PROCESS_SHARE		1
 #define AUTO_SHARE		2
 
 #define	GET_SHARE(flags)	\
     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
 
 #define BUSY_SPINS		200
 
 static uma_zone_t		umtx_pi_zone;
 static struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
 static int			umtx_pi_allocated;
 
 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
     &umtx_pi_allocated, 0, "Allocated umtx_pi");
 
 static void umtxq_sysinit(void *);
 static void umtxq_hash(struct umtx_key *key);
 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
 static void umtxq_lock(struct umtx_key *key);
 static void umtxq_unlock(struct umtx_key *key);
 static void umtxq_busy(struct umtx_key *key);
 static void umtxq_unbusy(struct umtx_key *key);
 static void umtxq_insert_queue(struct umtx_q *uq, int q);
 static void umtxq_remove_queue(struct umtx_q *uq, int q);
 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
 static int umtxq_count(struct umtx_key *key);
 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
 static int umtx_key_get(void *addr, int type, int share,
 	struct umtx_key *key);
 static void umtx_key_release(struct umtx_key *key);
 static struct umtx_pi *umtx_pi_alloc(int);
 static void umtx_pi_free(struct umtx_pi *pi);
 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
 static void umtx_thread_cleanup(struct thread *td);
 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
 	struct image_params *imgp __unused);
 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
 
 #define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
 #define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
 #define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
 
 static struct mtx umtx_lock;
 
 static void
 umtxq_sysinit(void *arg __unused)
 {
 	int i, j;
 
 	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	for (i = 0; i < 2; ++i) {
 		for (j = 0; j < UMTX_CHAINS; ++j) {
 			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
 				 MTX_DEF | MTX_DUPOK);
 			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
 			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
 			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
 			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
 			umtxq_chains[i][j].uc_busy = 0;
 			umtxq_chains[i][j].uc_waiters = 0;
 		}
 	}
 	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
 	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
 
 struct umtx_q *
 umtxq_alloc(void)
 {
 	struct umtx_q *uq;
 
 	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
 	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
 	TAILQ_INIT(&uq->uq_spare_queue->head);
 	TAILQ_INIT(&uq->uq_pi_contested);
 	uq->uq_inherited_pri = PRI_MAX;
 	return (uq);
 }
 
 void
 umtxq_free(struct umtx_q *uq)
 {
 	MPASS(uq->uq_spare_queue != NULL);
 	free(uq->uq_spare_queue, M_UMTX);
 	free(uq, M_UMTX);
 }
 
 static inline void
 umtxq_hash(struct umtx_key *key)
 {
 	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
 	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
 }
 
 static inline int
 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
 {
 	return (k1->type == k2->type &&
 		k1->info.both.a == k2->info.both.a &&
 	        k1->info.both.b == k2->info.both.b);
 }
 
 static inline struct umtxq_chain *
 umtxq_getchain(struct umtx_key *key)
 {
 	if (key->type <= TYPE_SEM)
 		return (&umtxq_chains[1][key->hash]);
 	return (&umtxq_chains[0][key->hash]);
 }
 
 /*
  * Lock a chain.
  */
 static inline void
 umtxq_lock(struct umtx_key *key)
 {
 	struct umtxq_chain *uc;
 
 	uc = umtxq_getchain(key);
 	mtx_lock(&uc->uc_lock);
 }
 
 /*
  * Unlock a chain.
  */
 static inline void
 umtxq_unlock(struct umtx_key *key)
 {
 	struct umtxq_chain *uc;
 
 	uc = umtxq_getchain(key);
 	mtx_unlock(&uc->uc_lock);
 }
 
 /*
  * Set chain to busy state when following operation
  * may be blocked (kernel mutex can not be used).
  */
 static inline void
 umtxq_busy(struct umtx_key *key)
 {
 	struct umtxq_chain *uc;
 
 	uc = umtxq_getchain(key);
 	mtx_assert(&uc->uc_lock, MA_OWNED);
 	if (uc->uc_busy) {
 #ifdef SMP
 		if (smp_cpus > 1) {
 			int count = BUSY_SPINS;
 			if (count > 0) {
 				umtxq_unlock(key);
 				while (uc->uc_busy && --count > 0)
 					cpu_spinwait();
 				umtxq_lock(key);
 			}
 		}
 #endif
 		while (uc->uc_busy) {
 			uc->uc_waiters++;
 			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
 			uc->uc_waiters--;
 		}
 	}
 	uc->uc_busy = 1;
 }
 
 /*
  * Unbusy a chain.
  */
 static inline void
 umtxq_unbusy(struct umtx_key *key)
 {
 	struct umtxq_chain *uc;
 
 	uc = umtxq_getchain(key);
 	mtx_assert(&uc->uc_lock, MA_OWNED);
 	KASSERT(uc->uc_busy != 0, ("not busy"));
 	uc->uc_busy = 0;
 	if (uc->uc_waiters)
 		wakeup_one(uc);
 }
 
 static struct umtxq_queue *
 umtxq_queue_lookup(struct umtx_key *key, int q)
 {
 	struct umtxq_queue *uh;
 	struct umtxq_chain *uc;
 
 	uc = umtxq_getchain(key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
 		if (umtx_key_match(&uh->key, key))
 			return (uh);
 	}
 
 	return (NULL);
 }
 
 static inline void
 umtxq_insert_queue(struct umtx_q *uq, int q)
 {
 	struct umtxq_queue *uh;
 	struct umtxq_chain *uc;
 
 	uc = umtxq_getchain(&uq->uq_key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
 	uh = umtxq_queue_lookup(&uq->uq_key, q);
 	if (uh != NULL) {
 		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
 	} else {
 		uh = uq->uq_spare_queue;
 		uh->key = uq->uq_key;
 		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
 	}
 	uq->uq_spare_queue = NULL;
 
 	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
 	uh->length++;
 	uq->uq_flags |= UQF_UMTXQ;
 	uq->uq_cur_queue = uh;
 	return;
 }
 
 static inline void
 umtxq_remove_queue(struct umtx_q *uq, int q)
 {
 	struct umtxq_chain *uc;
 	struct umtxq_queue *uh;
 
 	uc = umtxq_getchain(&uq->uq_key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	if (uq->uq_flags & UQF_UMTXQ) {
 		uh = uq->uq_cur_queue;
 		TAILQ_REMOVE(&uh->head, uq, uq_link);
 		uh->length--;
 		uq->uq_flags &= ~UQF_UMTXQ;
 		if (TAILQ_EMPTY(&uh->head)) {
 			KASSERT(uh->length == 0,
 			    ("inconsistent umtxq_queue length"));
 			LIST_REMOVE(uh, link);
 		} else {
 			uh = LIST_FIRST(&uc->uc_spare_queue);
 			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
 			LIST_REMOVE(uh, link);
 		}
 		uq->uq_spare_queue = uh;
 		uq->uq_cur_queue = NULL;
 	}
 }
 
 /*
  * Check if there are multiple waiters
  */
 static int
 umtxq_count(struct umtx_key *key)
 {
 	struct umtxq_chain *uc;
 	struct umtxq_queue *uh;
 
 	uc = umtxq_getchain(key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
 	if (uh != NULL)
 		return (uh->length);
 	return (0);
 }
 
 /*
  * Check if there are multiple PI waiters and returns first
  * waiter.
  */
 static int
 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
 {
 	struct umtxq_chain *uc;
 	struct umtxq_queue *uh;
 
 	*first = NULL;
 	uc = umtxq_getchain(key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
 	if (uh != NULL) {
 		*first = TAILQ_FIRST(&uh->head);
 		return (uh->length);
 	}
 	return (0);
 }
 
 /*
  * Wake up threads waiting on an userland object.
  */
 
 static int
 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
 {
 	struct umtxq_chain *uc;
 	struct umtxq_queue *uh;
 	struct umtx_q *uq;
 	int ret;
 
 	ret = 0;
 	uc = umtxq_getchain(key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	uh = umtxq_queue_lookup(key, q);
 	if (uh != NULL) {
 		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
 			umtxq_remove_queue(uq, q);
 			wakeup(uq);
 			if (++ret >= n_wake)
 				return (ret);
 		}
 	}
 	return (ret);
 }
 
 
 /*
  * Wake up specified thread.
  */
 static inline void
 umtxq_signal_thread(struct umtx_q *uq)
 {
 	struct umtxq_chain *uc;
 
 	uc = umtxq_getchain(&uq->uq_key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	umtxq_remove(uq);
 	wakeup(uq);
 }
 
 /*
  * Put thread into sleep state, before sleeping, check if
  * thread was removed from umtx queue.
  */
 static inline int
 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
 {
 	struct umtxq_chain *uc;
 	int error;
 
 	uc = umtxq_getchain(&uq->uq_key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	if (!(uq->uq_flags & UQF_UMTXQ))
 		return (0);
 	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
 	if (error == EWOULDBLOCK)
 		error = ETIMEDOUT;
 	return (error);
 }
 
 /*
  * Convert userspace address into unique logical address.
  */
 static int
 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
 {
 	struct thread *td = curthread;
 	vm_map_t map;
 	vm_map_entry_t entry;
 	vm_pindex_t pindex;
 	vm_prot_t prot;
 	boolean_t wired;
 
 	key->type = type;
 	if (share == THREAD_SHARE) {
 		key->shared = 0;
 		key->info.private.vs = td->td_proc->p_vmspace;
 		key->info.private.addr = (uintptr_t)addr;
 	} else {
 		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
 		map = &td->td_proc->p_vmspace->vm_map;
 		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
 		    &entry, &key->info.shared.object, &pindex, &prot,
 		    &wired) != KERN_SUCCESS) {
 			return EFAULT;
 		}
 
 		if ((share == PROCESS_SHARE) ||
 		    (share == AUTO_SHARE &&
 		     VM_INHERIT_SHARE == entry->inheritance)) {
 			key->shared = 1;
 			key->info.shared.offset = entry->offset + entry->start -
 				(vm_offset_t)addr;
 			vm_object_reference(key->info.shared.object);
 		} else {
 			key->shared = 0;
 			key->info.private.vs = td->td_proc->p_vmspace;
 			key->info.private.addr = (uintptr_t)addr;
 		}
 		vm_map_lookup_done(map, entry);
 	}
 
 	umtxq_hash(key);
 	return (0);
 }
 
 /*
  * Release key.
  */
 static inline void
 umtx_key_release(struct umtx_key *key)
 {
 	if (key->shared)
 		vm_object_deallocate(key->info.shared.object);
 }
 
 /*
  * Lock a umtx object.
  */
 static int
 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
 {
 	struct umtx_q *uq;
 	u_long owner;
 	u_long old;
 	int error = 0;
 
 	uq = td->td_umtxq;
 
 	/*
 	 * Care must be exercised when dealing with umtx structure. It
 	 * can fault on any access.
 	 */
 	for (;;) {
 		/*
 		 * Try the uncontested case.  This should be done in userland.
 		 */
 		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
 
 		/* The acquire succeeded. */
 		if (owner == UMTX_UNOWNED)
 			return (0);
 
 		/* The address was invalid. */
 		if (owner == -1)
 			return (EFAULT);
 
 		/* If no one owns it but it is contested try to acquire it. */
 		if (owner == UMTX_CONTESTED) {
 			owner = casuword(&umtx->u_owner,
 			    UMTX_CONTESTED, id | UMTX_CONTESTED);
 
 			if (owner == UMTX_CONTESTED)
 				return (0);
 
 			/* The address was invalid. */
 			if (owner == -1)
 				return (EFAULT);
 
 			/* If this failed the lock has changed, restart. */
 			continue;
 		}
 
 		/*
 		 * If we caught a signal, we have retried and now
 		 * exit immediately.
 		 */
 		if (error != 0)
 			return (error);
 
 		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
 			AUTO_SHARE, &uq->uq_key)) != 0)
 			return (error);
 
 		umtxq_lock(&uq->uq_key);
 		umtxq_busy(&uq->uq_key);
 		umtxq_insert(uq);
 		umtxq_unbusy(&uq->uq_key);
 		umtxq_unlock(&uq->uq_key);
 
 		/*
 		 * Set the contested bit so that a release in user space
 		 * knows to use the system call for unlock.  If this fails
 		 * either some one else has acquired the lock or it has been
 		 * released.
 		 */
 		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
 
 		/* The address was invalid. */
 		if (old == -1) {
 			umtxq_lock(&uq->uq_key);
 			umtxq_remove(uq);
 			umtxq_unlock(&uq->uq_key);
 			umtx_key_release(&uq->uq_key);
 			return (EFAULT);
 		}
 
 		/*
 		 * We set the contested bit, sleep. Otherwise the lock changed
 		 * and we need to retry or we lost a race to the thread
 		 * unlocking the umtx.
 		 */
 		umtxq_lock(&uq->uq_key);
 		if (old == owner)
 			error = umtxq_sleep(uq, "umtx", timo);
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
 		umtx_key_release(&uq->uq_key);
 	}
 
 	return (0);
 }
 
 /*
  * Lock a umtx object.
  */
 static int
 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
 	struct timespec *timeout)
 {
 	struct timespec ts, ts2, ts3;
 	struct timeval tv;
 	int error;
 
 	if (timeout == NULL) {
 		error = _do_lock_umtx(td, umtx, id, 0);
 		/* Mutex locking is restarted if it is interrupted. */
 		if (error == EINTR)
 			error = ERESTART;
 	} else {
 		getnanouptime(&ts);
 		timespecadd(&ts, timeout);
 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
 		for (;;) {
 			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
 			if (error != ETIMEDOUT)
 				break;
 			getnanouptime(&ts2);
 			if (timespeccmp(&ts2, &ts, >=)) {
 				error = ETIMEDOUT;
 				break;
 			}
 			ts3 = ts;
 			timespecsub(&ts3, &ts2);
 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 		}
 		/* Timed-locking is not restarted. */
 		if (error == ERESTART)
 			error = EINTR;
 	}
 	return (error);
 }
 
 /*
  * Unlock a umtx object.
  */
 static int
 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
 {
 	struct umtx_key key;
 	u_long owner;
 	u_long old;
 	int error;
 	int count;
 
 	/*
 	 * Make sure we own this mtx.
 	 */
 	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
 	if (owner == -1)
 		return (EFAULT);
 
 	if ((owner & ~UMTX_CONTESTED) != id)
 		return (EPERM);
 
 	/* This should be done in userland */
 	if ((owner & UMTX_CONTESTED) == 0) {
 		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
 		if (old == -1)
 			return (EFAULT);
 		if (old == owner)
 			return (0);
 		owner = old;
 	}
 
 	/* We should only ever be in here for contested locks */
 	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
 		&key)) != 0)
 		return (error);
 
 	umtxq_lock(&key);
 	umtxq_busy(&key);
 	count = umtxq_count(&key);
 	umtxq_unlock(&key);
 
 	/*
 	 * When unlocking the umtx, it must be marked as unowned if
 	 * there is zero or one thread only waiting for it.
 	 * Otherwise, it must be marked as contested.
 	 */
 	old = casuword(&umtx->u_owner, owner,
 		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
 	umtxq_lock(&key);
 	umtxq_signal(&key,1);
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 	umtx_key_release(&key);
 	if (old == -1)
 		return (EFAULT);
 	if (old != owner)
 		return (EINVAL);
 	return (0);
 }
 
 #ifdef COMPAT_FREEBSD32
 
 /*
  * Lock a umtx object.
  */
 static int
 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
 {
 	struct umtx_q *uq;
 	uint32_t owner;
 	uint32_t old;
 	int error = 0;
 
 	uq = td->td_umtxq;
 
 	/*
 	 * Care must be exercised when dealing with umtx structure. It
 	 * can fault on any access.
 	 */
 	for (;;) {
 		/*
 		 * Try the uncontested case.  This should be done in userland.
 		 */
 		owner = casuword32(m, UMUTEX_UNOWNED, id);
 
 		/* The acquire succeeded. */
 		if (owner == UMUTEX_UNOWNED)
 			return (0);
 
 		/* The address was invalid. */
 		if (owner == -1)
 			return (EFAULT);
 
 		/* If no one owns it but it is contested try to acquire it. */
 		if (owner == UMUTEX_CONTESTED) {
 			owner = casuword32(m,
 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 			if (owner == UMUTEX_CONTESTED)
 				return (0);
 
 			/* The address was invalid. */
 			if (owner == -1)
 				return (EFAULT);
 
 			/* If this failed the lock has changed, restart. */
 			continue;
 		}
 
 		/*
 		 * If we caught a signal, we have retried and now
 		 * exit immediately.
 		 */
 		if (error != 0)
 			return (error);
 
 		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
 			AUTO_SHARE, &uq->uq_key)) != 0)
 			return (error);
 
 		umtxq_lock(&uq->uq_key);
 		umtxq_busy(&uq->uq_key);
 		umtxq_insert(uq);
 		umtxq_unbusy(&uq->uq_key);
 		umtxq_unlock(&uq->uq_key);
 
 		/*
 		 * Set the contested bit so that a release in user space
 		 * knows to use the system call for unlock.  If this fails
 		 * either some one else has acquired the lock or it has been
 		 * released.
 		 */
 		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
 
 		/* The address was invalid. */
 		if (old == -1) {
 			umtxq_lock(&uq->uq_key);
 			umtxq_remove(uq);
 			umtxq_unlock(&uq->uq_key);
 			umtx_key_release(&uq->uq_key);
 			return (EFAULT);
 		}
 
 		/*
 		 * We set the contested bit, sleep. Otherwise the lock changed
 		 * and we need to retry or we lost a race to the thread
 		 * unlocking the umtx.
 		 */
 		umtxq_lock(&uq->uq_key);
 		if (old == owner)
 			error = umtxq_sleep(uq, "umtx", timo);
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
 		umtx_key_release(&uq->uq_key);
 	}
 
 	return (0);
 }
 
 /*
  * Lock a umtx object.
  */
 static int
 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
 	struct timespec *timeout)
 {
 	struct timespec ts, ts2, ts3;
 	struct timeval tv;
 	int error;
 
 	if (timeout == NULL) {
 		error = _do_lock_umtx32(td, m, id, 0);
 		/* Mutex locking is restarted if it is interrupted. */
 		if (error == EINTR)
 			error = ERESTART;
 	} else {
 		getnanouptime(&ts);
 		timespecadd(&ts, timeout);
 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
 		for (;;) {
 			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
 			if (error != ETIMEDOUT)
 				break;
 			getnanouptime(&ts2);
 			if (timespeccmp(&ts2, &ts, >=)) {
 				error = ETIMEDOUT;
 				break;
 			}
 			ts3 = ts;
 			timespecsub(&ts3, &ts2);
 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 		}
 		/* Timed-locking is not restarted. */
 		if (error == ERESTART)
 			error = EINTR;
 	}
 	return (error);
 }
 
 /*
  * Unlock a umtx object.
  */
 static int
 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
 {
 	struct umtx_key key;
 	uint32_t owner;
 	uint32_t old;
 	int error;
 	int count;
 
 	/*
 	 * Make sure we own this mtx.
 	 */
 	owner = fuword32(m);
 	if (owner == -1)
 		return (EFAULT);
 
 	if ((owner & ~UMUTEX_CONTESTED) != id)
 		return (EPERM);
 
 	/* This should be done in userland */
 	if ((owner & UMUTEX_CONTESTED) == 0) {
 		old = casuword32(m, owner, UMUTEX_UNOWNED);
 		if (old == -1)
 			return (EFAULT);
 		if (old == owner)
 			return (0);
 		owner = old;
 	}
 
 	/* We should only ever be in here for contested locks */
 	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
 		&key)) != 0)
 		return (error);
 
 	umtxq_lock(&key);
 	umtxq_busy(&key);
 	count = umtxq_count(&key);
 	umtxq_unlock(&key);
 
 	/*
 	 * When unlocking the umtx, it must be marked as unowned if
 	 * there is zero or one thread only waiting for it.
 	 * Otherwise, it must be marked as contested.
 	 */
 	old = casuword32(m, owner,
 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 	umtxq_lock(&key);
 	umtxq_signal(&key,1);
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 	umtx_key_release(&key);
 	if (old == -1)
 		return (EFAULT);
 	if (old != owner)
 		return (EINVAL);
 	return (0);
 }
 #endif
 
 /*
  * Fetch and compare value, sleep on the address if value is not changed.
  */
 static int
 do_wait(struct thread *td, void *addr, u_long id,
 	struct timespec *timeout, int compat32, int is_private)
 {
 	struct umtx_q *uq;
 	struct timespec ts, ts2, ts3;
 	struct timeval tv;
 	u_long tmp;
 	int error = 0;
 
 	uq = td->td_umtxq;
 	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
 		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
 		return (error);
 
 	umtxq_lock(&uq->uq_key);
 	umtxq_insert(uq);
 	umtxq_unlock(&uq->uq_key);
 	if (compat32 == 0)
 		tmp = fuword(addr);
         else
 		tmp = (unsigned int)fuword32(addr);
 	if (tmp != id) {
 		umtxq_lock(&uq->uq_key);
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
 	} else if (timeout == NULL) {
 		umtxq_lock(&uq->uq_key);
 		error = umtxq_sleep(uq, "uwait", 0);
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
 	} else {
 		getnanouptime(&ts);
 		timespecadd(&ts, timeout);
 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
 		umtxq_lock(&uq->uq_key);
 		for (;;) {
 			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
 			if (!(uq->uq_flags & UQF_UMTXQ)) {
 				error = 0;
 				break;
 			}
 			if (error != ETIMEDOUT)
 				break;
 			umtxq_unlock(&uq->uq_key);
 			getnanouptime(&ts2);
 			if (timespeccmp(&ts2, &ts, >=)) {
 				error = ETIMEDOUT;
 				umtxq_lock(&uq->uq_key);
 				break;
 			}
 			ts3 = ts;
 			timespecsub(&ts3, &ts2);
 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 			umtxq_lock(&uq->uq_key);
 		}
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
 	}
 	umtx_key_release(&uq->uq_key);
 	if (error == ERESTART)
 		error = EINTR;
 	return (error);
 }
 
 /*
  * Wake up threads sleeping on the specified address.
  */
 int
 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
 {
 	struct umtx_key key;
 	int ret;
 	
 	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
 		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
 		return (ret);
 	umtxq_lock(&key);
 	ret = umtxq_signal(&key, n_wake);
 	umtxq_unlock(&key);
 	umtx_key_release(&key);
 	return (0);
 }
 
 /*
  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
  */
 static int
 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 	int mode)
 {
 	struct umtx_q *uq;
 	uint32_t owner, old, id;
 	int error = 0;
 
 	id = td->td_tid;
 	uq = td->td_umtxq;
 
 	/*
 	 * Care must be exercised when dealing with umtx structure. It
 	 * can fault on any access.
 	 */
 	for (;;) {
 		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
 		if (mode == _UMUTEX_WAIT) {
 			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
 				return (0);
 		} else {
 			/*
 			 * Try the uncontested case.  This should be done in userland.
 			 */
 			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 
 			/* The acquire succeeded. */
 			if (owner == UMUTEX_UNOWNED)
 				return (0);
 
 			/* The address was invalid. */
 			if (owner == -1)
 				return (EFAULT);
 
 			/* If no one owns it but it is contested try to acquire it. */
 			if (owner == UMUTEX_CONTESTED) {
 				owner = casuword32(&m->m_owner,
 				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 
 				if (owner == UMUTEX_CONTESTED)
 					return (0);
 
 				/* The address was invalid. */
 				if (owner == -1)
 					return (EFAULT);
 
 				/* If this failed the lock has changed, restart. */
 				continue;
 			}
 		}
 
 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 		    (owner & ~UMUTEX_CONTESTED) == id)
 			return (EDEADLK);
 
 		if (mode == _UMUTEX_TRY)
 			return (EBUSY);
 
 		/*
 		 * If we caught a signal, we have retried and now
 		 * exit immediately.
 		 */
 		if (error != 0)
 			return (error);
 
 		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
 		    GET_SHARE(flags), &uq->uq_key)) != 0)
 			return (error);
 
 		umtxq_lock(&uq->uq_key);
 		umtxq_busy(&uq->uq_key);
 		umtxq_insert(uq);
 		umtxq_unlock(&uq->uq_key);
 
 		/*
 		 * Set the contested bit so that a release in user space
 		 * knows to use the system call for unlock.  If this fails
 		 * either some one else has acquired the lock or it has been
 		 * released.
 		 */
 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 
 		/* The address was invalid. */
 		if (old == -1) {
 			umtxq_lock(&uq->uq_key);
 			umtxq_remove(uq);
 			umtxq_unbusy(&uq->uq_key);
 			umtxq_unlock(&uq->uq_key);
 			umtx_key_release(&uq->uq_key);
 			return (EFAULT);
 		}
 
 		/*
 		 * We set the contested bit, sleep. Otherwise the lock changed
 		 * and we need to retry or we lost a race to the thread
 		 * unlocking the umtx.
 		 */
 		umtxq_lock(&uq->uq_key);
 		umtxq_unbusy(&uq->uq_key);
 		if (old == owner)
 			error = umtxq_sleep(uq, "umtxn", timo);
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
 		umtx_key_release(&uq->uq_key);
 	}
 
 	return (0);
 }
 
 /*
  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
  */
 /*
  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
  */
 static int
 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
 {
 	struct umtx_key key;
 	uint32_t owner, old, id;
 	int error;
 	int count;
 
 	id = td->td_tid;
 	/*
 	 * Make sure we own this mtx.
 	 */
 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 	if (owner == -1)
 		return (EFAULT);
 
 	if ((owner & ~UMUTEX_CONTESTED) != id)
 		return (EPERM);
 
 	if ((owner & UMUTEX_CONTESTED) == 0) {
 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 		if (old == -1)
 			return (EFAULT);
 		if (old == owner)
 			return (0);
 		owner = old;
 	}
 
 	/* We should only ever be in here for contested locks */
 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 	    &key)) != 0)
 		return (error);
 
 	umtxq_lock(&key);
 	umtxq_busy(&key);
 	count = umtxq_count(&key);
 	umtxq_unlock(&key);
 
 	/*
 	 * When unlocking the umtx, it must be marked as unowned if
 	 * there is zero or one thread only waiting for it.
 	 * Otherwise, it must be marked as contested.
 	 */
 	old = casuword32(&m->m_owner, owner,
 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 	umtxq_lock(&key);
 	umtxq_signal(&key,1);
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 	umtx_key_release(&key);
 	if (old == -1)
 		return (EFAULT);
 	if (old != owner)
 		return (EINVAL);
 	return (0);
 }
 
 /*
  * Check if the mutex is available and wake up a waiter,
  * only for simple mutex.
  */
 static int
 do_wake_umutex(struct thread *td, struct umutex *m)
 {
 	struct umtx_key key;
 	uint32_t owner;
 	uint32_t flags;
 	int error;
 	int count;
 
 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 	if (owner == -1)
 		return (EFAULT);
 
 	if ((owner & ~UMUTEX_CONTESTED) != 0)
 		return (0);
 
 	flags = fuword32(&m->m_flags);
 
 	/* We should only ever be in here for contested locks */
 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 	    &key)) != 0)
 		return (error);
 
 	umtxq_lock(&key);
 	umtxq_busy(&key);
 	count = umtxq_count(&key);
 	umtxq_unlock(&key);
 
 	if (count <= 1)
 		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
 
 	umtxq_lock(&key);
 	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
 		umtxq_signal(&key, 1);
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 	umtx_key_release(&key);
 	return (0);
 }
 
 static inline struct umtx_pi *
 umtx_pi_alloc(int flags)
 {
 	struct umtx_pi *pi;
 
 	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
 	TAILQ_INIT(&pi->pi_blocked);
 	atomic_add_int(&umtx_pi_allocated, 1);
 	return (pi);
 }
 
 static inline void
 umtx_pi_free(struct umtx_pi *pi)
 {
 	uma_zfree(umtx_pi_zone, pi);
 	atomic_add_int(&umtx_pi_allocated, -1);
 }
 
 /*
  * Adjust the thread's position on a pi_state after its priority has been
  * changed.
  */
 static int
 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
 {
 	struct umtx_q *uq, *uq1, *uq2;
 	struct thread *td1;
 
 	mtx_assert(&umtx_lock, MA_OWNED);
 	if (pi == NULL)
 		return (0);
 
 	uq = td->td_umtxq;
 
 	/*
 	 * Check if the thread needs to be moved on the blocked chain.
 	 * It needs to be moved if either its priority is lower than
 	 * the previous thread or higher than the next thread.
 	 */
 	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
 	uq2 = TAILQ_NEXT(uq, uq_lockq);
 	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
 	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
 		/*
 		 * Remove thread from blocked chain and determine where
 		 * it should be moved to.
 		 */
 		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 			td1 = uq1->uq_thread;
 			MPASS(td1->td_proc->p_magic == P_MAGIC);
 			if (UPRI(td1) > UPRI(td))
 				break;
 		}
 
 		if (uq1 == NULL)
 			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 		else
 			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 	}
 	return (1);
 }
 
 /*
  * Propagate priority when a thread is blocked on POSIX
  * PI mutex.
  */ 
 static void
 umtx_propagate_priority(struct thread *td)
 {
 	struct umtx_q *uq;
 	struct umtx_pi *pi;
 	int pri;
 
 	mtx_assert(&umtx_lock, MA_OWNED);
 	pri = UPRI(td);
 	uq = td->td_umtxq;
 	pi = uq->uq_pi_blocked;
 	if (pi == NULL)
 		return;
 
 	for (;;) {
 		td = pi->pi_owner;
 		if (td == NULL || td == curthread)
 			return;
 
 		MPASS(td->td_proc != NULL);
 		MPASS(td->td_proc->p_magic == P_MAGIC);
 
 		thread_lock(td);
 		if (td->td_lend_user_pri > pri)
 			sched_lend_user_prio(td, pri);
 		else {
 			thread_unlock(td);
 			break;
 		}
 		thread_unlock(td);
 
 		/*
 		 * Pick up the lock that td is blocked on.
 		 */
 		uq = td->td_umtxq;
 		pi = uq->uq_pi_blocked;
 		/* Resort td on the list if needed. */
 		if (!umtx_pi_adjust_thread(pi, td))
 			break;
 	}
 }
 
 /*
  * Unpropagate priority for a PI mutex when a thread blocked on
  * it is interrupted by signal or resumed by others.
  */
 static void
 umtx_unpropagate_priority(struct umtx_pi *pi)
 {
 	struct umtx_q *uq, *uq_owner;
 	struct umtx_pi *pi2;
 	int pri, oldpri;
 
 	mtx_assert(&umtx_lock, MA_OWNED);
 
 	while (pi != NULL && pi->pi_owner != NULL) {
 		pri = PRI_MAX;
 		uq_owner = pi->pi_owner->td_umtxq;
 
 		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
 			uq = TAILQ_FIRST(&pi2->pi_blocked);
 			if (uq != NULL) {
 				if (pri > UPRI(uq->uq_thread))
 					pri = UPRI(uq->uq_thread);
 			}
 		}
 
 		if (pri > uq_owner->uq_inherited_pri)
 			pri = uq_owner->uq_inherited_pri;
 		thread_lock(pi->pi_owner);
 		oldpri = pi->pi_owner->td_user_pri;
 		sched_unlend_user_prio(pi->pi_owner, pri);
 		thread_unlock(pi->pi_owner);
 		if (uq_owner->uq_pi_blocked != NULL)
 			umtx_pi_adjust_locked(pi->pi_owner, oldpri);
 		pi = uq_owner->uq_pi_blocked;
 	}
 }
 
 /*
  * Insert a PI mutex into owned list.
  */
 static void
 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
 {
 	struct umtx_q *uq_owner;
 
 	uq_owner = owner->td_umtxq;
 	mtx_assert(&umtx_lock, MA_OWNED);
 	if (pi->pi_owner != NULL)
 		panic("pi_ower != NULL");
 	pi->pi_owner = owner;
 	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
 }
 
 /*
  * Claim ownership of a PI mutex.
  */
 static int
 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
 {
 	struct umtx_q *uq, *uq_owner;
 
 	uq_owner = owner->td_umtxq;
 	mtx_lock_spin(&umtx_lock);
 	if (pi->pi_owner == owner) {
 		mtx_unlock_spin(&umtx_lock);
 		return (0);
 	}
 
 	if (pi->pi_owner != NULL) {
 		/*
 		 * userland may have already messed the mutex, sigh.
 		 */
 		mtx_unlock_spin(&umtx_lock);
 		return (EPERM);
 	}
 	umtx_pi_setowner(pi, owner);
 	uq = TAILQ_FIRST(&pi->pi_blocked);
 	if (uq != NULL) {
 		int pri;
 
 		pri = UPRI(uq->uq_thread);
 		thread_lock(owner);
 		if (pri < UPRI(owner))
 			sched_lend_user_prio(owner, pri);
 		thread_unlock(owner);
 	}
 	mtx_unlock_spin(&umtx_lock);
 	return (0);
 }
 
 static void
 umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
 {
 	struct umtx_q *uq;
 	struct umtx_pi *pi;
 
 	uq = td->td_umtxq;
 	/*
 	 * Pick up the lock that td is blocked on.
 	 */
 	pi = uq->uq_pi_blocked;
 	MPASS(pi != NULL);
 
 	/* Resort the turnstile on the list. */
 	if (!umtx_pi_adjust_thread(pi, td))
 		return;
 
 	/*
 	 * If our priority was lowered and we are at the head of the
 	 * turnstile, then propagate our new priority up the chain.
 	 */
 	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
 		umtx_propagate_priority(td);
 }
 
 /*
  * Adjust a thread's order position in its blocked PI mutex,
  * this may result new priority propagating process.
  */
 void
 umtx_pi_adjust(struct thread *td, u_char oldpri)
 {
 	struct umtx_q *uq;
 	struct umtx_pi *pi;
 
 	uq = td->td_umtxq;
 	mtx_lock_spin(&umtx_lock);
 	/*
 	 * Pick up the lock that td is blocked on.
 	 */
 	pi = uq->uq_pi_blocked;
 	if (pi != NULL)
 		umtx_pi_adjust_locked(td, oldpri);
 	mtx_unlock_spin(&umtx_lock);
 }
 
 /*
  * Sleep on a PI mutex.
  */
 static int
 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
 	uint32_t owner, const char *wmesg, int timo)
 {
 	struct umtxq_chain *uc;
 	struct thread *td, *td1;
 	struct umtx_q *uq1;
 	int pri;
 	int error = 0;
 
 	td = uq->uq_thread;
 	KASSERT(td == curthread, ("inconsistent uq_thread"));
 	uc = umtxq_getchain(&uq->uq_key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	UMTXQ_BUSY_ASSERT(uc);
 	umtxq_insert(uq);
 	mtx_lock_spin(&umtx_lock);
 	if (pi->pi_owner == NULL) {
 		mtx_unlock_spin(&umtx_lock);
 		/* XXX Only look up thread in current process. */
 		td1 = tdfind(owner, curproc->p_pid);
 		mtx_lock_spin(&umtx_lock);
 		if (td1 != NULL) {
 			if (pi->pi_owner == NULL)
 				umtx_pi_setowner(pi, td1);
 			PROC_UNLOCK(td1->td_proc);
 		}
 	}
 
 	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 		pri = UPRI(uq1->uq_thread);
 		if (pri > UPRI(td))
 			break;
 	}
 
 	if (uq1 != NULL)
 		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 	else
 		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 
 	uq->uq_pi_blocked = pi;
 	thread_lock(td);
 	td->td_flags |= TDF_UPIBLOCKED;
 	thread_unlock(td);
 	umtx_propagate_priority(td);
 	mtx_unlock_spin(&umtx_lock);
 	umtxq_unbusy(&uq->uq_key);
 
 	if (uq->uq_flags & UQF_UMTXQ) {
 		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
 		if (error == EWOULDBLOCK)
 			error = ETIMEDOUT;
 		if (uq->uq_flags & UQF_UMTXQ) {
 			umtxq_remove(uq);
 		}
 	}
 	mtx_lock_spin(&umtx_lock);
 	uq->uq_pi_blocked = NULL;
 	thread_lock(td);
 	td->td_flags &= ~TDF_UPIBLOCKED;
 	thread_unlock(td);
 	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 	umtx_unpropagate_priority(pi);
 	mtx_unlock_spin(&umtx_lock);
 	umtxq_unlock(&uq->uq_key);
 
 	return (error);
 }
 
 /*
  * Add reference count for a PI mutex.
  */
 static void
 umtx_pi_ref(struct umtx_pi *pi)
 {
 	struct umtxq_chain *uc;
 
 	uc = umtxq_getchain(&pi->pi_key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	pi->pi_refcount++;
 }
 
 /*
  * Decrease reference count for a PI mutex, if the counter
  * is decreased to zero, its memory space is freed.
  */ 
 static void
 umtx_pi_unref(struct umtx_pi *pi)
 {
 	struct umtxq_chain *uc;
 
 	uc = umtxq_getchain(&pi->pi_key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
 	if (--pi->pi_refcount == 0) {
 		mtx_lock_spin(&umtx_lock);
 		if (pi->pi_owner != NULL) {
 			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
 				pi, pi_link);
 			pi->pi_owner = NULL;
 		}
 		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
 			("blocked queue not empty"));
 		mtx_unlock_spin(&umtx_lock);
 		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
 		umtx_pi_free(pi);
 	}
 }
 
 /*
  * Find a PI mutex in hash table.
  */
 static struct umtx_pi *
 umtx_pi_lookup(struct umtx_key *key)
 {
 	struct umtxq_chain *uc;
 	struct umtx_pi *pi;
 
 	uc = umtxq_getchain(key);
 	UMTXQ_LOCKED_ASSERT(uc);
 
 	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
 		if (umtx_key_match(&pi->pi_key, key)) {
 			return (pi);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Insert a PI mutex into hash table.
  */
 static inline void
 umtx_pi_insert(struct umtx_pi *pi)
 {
 	struct umtxq_chain *uc;
 
 	uc = umtxq_getchain(&pi->pi_key);
 	UMTXQ_LOCKED_ASSERT(uc);
 	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
 }
 
 /*
  * Lock a PI mutex.
  */
 static int
 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 	int try)
 {
 	struct umtx_q *uq;
 	struct umtx_pi *pi, *new_pi;
 	uint32_t id, owner, old;
 	int error;
 
 	id = td->td_tid;
 	uq = td->td_umtxq;
 
 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 	    &uq->uq_key)) != 0)
 		return (error);
 	umtxq_lock(&uq->uq_key);
 	pi = umtx_pi_lookup(&uq->uq_key);
 	if (pi == NULL) {
 		new_pi = umtx_pi_alloc(M_NOWAIT);
 		if (new_pi == NULL) {
 			umtxq_unlock(&uq->uq_key);
 			new_pi = umtx_pi_alloc(M_WAITOK);
 			umtxq_lock(&uq->uq_key);
 			pi = umtx_pi_lookup(&uq->uq_key);
 			if (pi != NULL) {
 				umtx_pi_free(new_pi);
 				new_pi = NULL;
 			}
 		}
 		if (new_pi != NULL) {
 			new_pi->pi_key = uq->uq_key;
 			umtx_pi_insert(new_pi);
 			pi = new_pi;
 		}
 	}
 	umtx_pi_ref(pi);
 	umtxq_unlock(&uq->uq_key);
 
 	/*
 	 * Care must be exercised when dealing with umtx structure.  It
 	 * can fault on any access.
 	 */
 	for (;;) {
 		/*
 		 * Try the uncontested case.  This should be done in userland.
 		 */
 		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 
 		/* The acquire succeeded. */
 		if (owner == UMUTEX_UNOWNED) {
 			error = 0;
 			break;
 		}
 
 		/* The address was invalid. */
 		if (owner == -1) {
 			error = EFAULT;
 			break;
 		}
 
 		/* If no one owns it but it is contested try to acquire it. */
 		if (owner == UMUTEX_CONTESTED) {
 			owner = casuword32(&m->m_owner,
 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 
 			if (owner == UMUTEX_CONTESTED) {
 				umtxq_lock(&uq->uq_key);
 				umtxq_busy(&uq->uq_key);
 				error = umtx_pi_claim(pi, td);
 				umtxq_unbusy(&uq->uq_key);
 				umtxq_unlock(&uq->uq_key);
 				break;
 			}
 
 			/* The address was invalid. */
 			if (owner == -1) {
 				error = EFAULT;
 				break;
 			}
 
 			/* If this failed the lock has changed, restart. */
 			continue;
 		}
 
 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 		    (owner & ~UMUTEX_CONTESTED) == id) {
 			error = EDEADLK;
 			break;
 		}
 
 		if (try != 0) {
 			error = EBUSY;
 			break;
 		}
 
 		/*
 		 * If we caught a signal, we have retried and now
 		 * exit immediately.
 		 */
 		if (error != 0)
 			break;
 			
 		umtxq_lock(&uq->uq_key);
 		umtxq_busy(&uq->uq_key);
 		umtxq_unlock(&uq->uq_key);
 
 		/*
 		 * Set the contested bit so that a release in user space
 		 * knows to use the system call for unlock.  If this fails
 		 * either some one else has acquired the lock or it has been
 		 * released.
 		 */
 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 
 		/* The address was invalid. */
 		if (old == -1) {
 			umtxq_lock(&uq->uq_key);
 			umtxq_unbusy(&uq->uq_key);
 			umtxq_unlock(&uq->uq_key);
 			error = EFAULT;
 			break;
 		}
 
 		umtxq_lock(&uq->uq_key);
 		/*
 		 * We set the contested bit, sleep. Otherwise the lock changed
 		 * and we need to retry or we lost a race to the thread
 		 * unlocking the umtx.
 		 */
 		if (old == owner)
 			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
 				 "umtxpi", timo);
 		else {
 			umtxq_unbusy(&uq->uq_key);
 			umtxq_unlock(&uq->uq_key);
 		}
 	}
 
 	umtxq_lock(&uq->uq_key);
 	umtx_pi_unref(pi);
 	umtxq_unlock(&uq->uq_key);
 
 	umtx_key_release(&uq->uq_key);
 	return (error);
 }
 
 /*
  * Unlock a PI mutex.
  */
 static int
 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
 {
 	struct umtx_key key;
 	struct umtx_q *uq_first, *uq_first2, *uq_me;
 	struct umtx_pi *pi, *pi2;
 	uint32_t owner, old, id;
 	int error;
 	int count;
 	int pri;
 
 	id = td->td_tid;
 	/*
 	 * Make sure we own this mtx.
 	 */
 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 	if (owner == -1)
 		return (EFAULT);
 
 	if ((owner & ~UMUTEX_CONTESTED) != id)
 		return (EPERM);
 
 	/* This should be done in userland */
 	if ((owner & UMUTEX_CONTESTED) == 0) {
 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 		if (old == -1)
 			return (EFAULT);
 		if (old == owner)
 			return (0);
 		owner = old;
 	}
 
 	/* We should only ever be in here for contested locks */
 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 	    &key)) != 0)
 		return (error);
 
 	umtxq_lock(&key);
 	umtxq_busy(&key);
 	count = umtxq_count_pi(&key, &uq_first);
 	if (uq_first != NULL) {
 		mtx_lock_spin(&umtx_lock);
 		pi = uq_first->uq_pi_blocked;
 		KASSERT(pi != NULL, ("pi == NULL?"));
 		if (pi->pi_owner != curthread) {
 			mtx_unlock_spin(&umtx_lock);
 			umtxq_unbusy(&key);
 			umtxq_unlock(&key);
 			umtx_key_release(&key);
 			/* userland messed the mutex */
 			return (EPERM);
 		}
 		uq_me = curthread->td_umtxq;
 		pi->pi_owner = NULL;
 		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
 		/* get highest priority thread which is still sleeping. */
 		uq_first = TAILQ_FIRST(&pi->pi_blocked);
 		while (uq_first != NULL && 
 		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
 			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
 		}
 		pri = PRI_MAX;
 		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
 			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
 			if (uq_first2 != NULL) {
 				if (pri > UPRI(uq_first2->uq_thread))
 					pri = UPRI(uq_first2->uq_thread);
 			}
 		}
 		thread_lock(curthread);
 		sched_unlend_user_prio(curthread, pri);
 		thread_unlock(curthread);
 		mtx_unlock_spin(&umtx_lock);
 		if (uq_first)
 			umtxq_signal_thread(uq_first);
 	}
 	umtxq_unlock(&key);
 
 	/*
 	 * When unlocking the umtx, it must be marked as unowned if
 	 * there is zero or one thread only waiting for it.
 	 * Otherwise, it must be marked as contested.
 	 */
 	old = casuword32(&m->m_owner, owner,
 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 
 	umtxq_lock(&key);
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 	umtx_key_release(&key);
 	if (old == -1)
 		return (EFAULT);
 	if (old != owner)
 		return (EINVAL);
 	return (0);
 }
 
 /*
  * Lock a PP mutex.
  */
 static int
 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 	int try)
 {
 	struct umtx_q *uq, *uq2;
 	struct umtx_pi *pi;
 	uint32_t ceiling;
 	uint32_t owner, id;
 	int error, pri, old_inherited_pri, su;
 
 	id = td->td_tid;
 	uq = td->td_umtxq;
 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 	    &uq->uq_key)) != 0)
 		return (error);
 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 	for (;;) {
 		old_inherited_pri = uq->uq_inherited_pri;
 		umtxq_lock(&uq->uq_key);
 		umtxq_busy(&uq->uq_key);
 		umtxq_unlock(&uq->uq_key);
 
 		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
 		if (ceiling > RTP_PRIO_MAX) {
 			error = EINVAL;
 			goto out;
 		}
 
 		mtx_lock_spin(&umtx_lock);
 		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
 			mtx_unlock_spin(&umtx_lock);
 			error = EINVAL;
 			goto out;
 		}
 		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
 			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
 			thread_lock(td);
 			if (uq->uq_inherited_pri < UPRI(td))
 				sched_lend_user_prio(td, uq->uq_inherited_pri);
 			thread_unlock(td);
 		}
 		mtx_unlock_spin(&umtx_lock);
 
 		owner = casuword32(&m->m_owner,
 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 
 		if (owner == UMUTEX_CONTESTED) {
 			error = 0;
 			break;
 		}
 
 		/* The address was invalid. */
 		if (owner == -1) {
 			error = EFAULT;
 			break;
 		}
 
 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 		    (owner & ~UMUTEX_CONTESTED) == id) {
 			error = EDEADLK;
 			break;
 		}
 
 		if (try != 0) {
 			error = EBUSY;
 			break;
 		}
 
 		/*
 		 * If we caught a signal, we have retried and now
 		 * exit immediately.
 		 */
 		if (error != 0)
 			break;
 
 		umtxq_lock(&uq->uq_key);
 		umtxq_insert(uq);
 		umtxq_unbusy(&uq->uq_key);
 		error = umtxq_sleep(uq, "umtxpp", timo);
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
 
 		mtx_lock_spin(&umtx_lock);
 		uq->uq_inherited_pri = old_inherited_pri;
 		pri = PRI_MAX;
 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
 			if (uq2 != NULL) {
 				if (pri > UPRI(uq2->uq_thread))
 					pri = UPRI(uq2->uq_thread);
 			}
 		}
 		if (pri > uq->uq_inherited_pri)
 			pri = uq->uq_inherited_pri;
 		thread_lock(td);
 		sched_unlend_user_prio(td, pri);
 		thread_unlock(td);
 		mtx_unlock_spin(&umtx_lock);
 	}
 
 	if (error != 0) {
 		mtx_lock_spin(&umtx_lock);
 		uq->uq_inherited_pri = old_inherited_pri;
 		pri = PRI_MAX;
 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
 			if (uq2 != NULL) {
 				if (pri > UPRI(uq2->uq_thread))
 					pri = UPRI(uq2->uq_thread);
 			}
 		}
 		if (pri > uq->uq_inherited_pri)
 			pri = uq->uq_inherited_pri;
 		thread_lock(td);
 		sched_unlend_user_prio(td, pri);
 		thread_unlock(td);
 		mtx_unlock_spin(&umtx_lock);
 	}
 
 out:
 	umtxq_lock(&uq->uq_key);
 	umtxq_unbusy(&uq->uq_key);
 	umtxq_unlock(&uq->uq_key);
 	umtx_key_release(&uq->uq_key);
 	return (error);
 }
 
 /*
  * Unlock a PP mutex.
  */
 static int
 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
 {
 	struct umtx_key key;
 	struct umtx_q *uq, *uq2;
 	struct umtx_pi *pi;
 	uint32_t owner, id;
 	uint32_t rceiling;
 	int error, pri, new_inherited_pri, su;
 
 	id = td->td_tid;
 	uq = td->td_umtxq;
 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 
 	/*
 	 * Make sure we own this mtx.
 	 */
 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 	if (owner == -1)
 		return (EFAULT);
 
 	if ((owner & ~UMUTEX_CONTESTED) != id)
 		return (EPERM);
 
 	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
 	if (error != 0)
 		return (error);
 
 	if (rceiling == -1)
 		new_inherited_pri = PRI_MAX;
 	else {
 		rceiling = RTP_PRIO_MAX - rceiling;
 		if (rceiling > RTP_PRIO_MAX)
 			return (EINVAL);
 		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
 	}
 
 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 	    &key)) != 0)
 		return (error);
 	umtxq_lock(&key);
 	umtxq_busy(&key);
 	umtxq_unlock(&key);
 	/*
 	 * For priority protected mutex, always set unlocked state
 	 * to UMUTEX_CONTESTED, so that userland always enters kernel
 	 * to lock the mutex, it is necessary because thread priority
 	 * has to be adjusted for such mutex.
 	 */
 	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 		UMUTEX_CONTESTED);
 
 	umtxq_lock(&key);
 	if (error == 0)
 		umtxq_signal(&key, 1);
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 
 	if (error == -1)
 		error = EFAULT;
 	else {
 		mtx_lock_spin(&umtx_lock);
 		if (su != 0)
 			uq->uq_inherited_pri = new_inherited_pri;
 		pri = PRI_MAX;
 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
 			if (uq2 != NULL) {
 				if (pri > UPRI(uq2->uq_thread))
 					pri = UPRI(uq2->uq_thread);
 			}
 		}
 		if (pri > uq->uq_inherited_pri)
 			pri = uq->uq_inherited_pri;
 		thread_lock(td);
 		sched_unlend_user_prio(td, pri);
 		thread_unlock(td);
 		mtx_unlock_spin(&umtx_lock);
 	}
 	umtx_key_release(&key);
 	return (error);
 }
 
 static int
 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
 	uint32_t *old_ceiling)
 {
 	struct umtx_q *uq;
 	uint32_t save_ceiling;
 	uint32_t owner, id;
 	uint32_t flags;
 	int error;
 
 	flags = fuword32(&m->m_flags);
 	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
 		return (EINVAL);
 	if (ceiling > RTP_PRIO_MAX)
 		return (EINVAL);
 	id = td->td_tid;
 	uq = td->td_umtxq;
 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 	   &uq->uq_key)) != 0)
 		return (error);
 	for (;;) {
 		umtxq_lock(&uq->uq_key);
 		umtxq_busy(&uq->uq_key);
 		umtxq_unlock(&uq->uq_key);
 
 		save_ceiling = fuword32(&m->m_ceilings[0]);
 
 		owner = casuword32(&m->m_owner,
 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 
 		if (owner == UMUTEX_CONTESTED) {
 			suword32(&m->m_ceilings[0], ceiling);
 			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 				UMUTEX_CONTESTED);
 			error = 0;
 			break;
 		}
 
 		/* The address was invalid. */
 		if (owner == -1) {
 			error = EFAULT;
 			break;
 		}
 
 		if ((owner & ~UMUTEX_CONTESTED) == id) {
 			suword32(&m->m_ceilings[0], ceiling);
 			error = 0;
 			break;
 		}
 
 		/*
 		 * If we caught a signal, we have retried and now
 		 * exit immediately.
 		 */
 		if (error != 0)
 			break;
 
 		/*
 		 * We set the contested bit, sleep. Otherwise the lock changed
 		 * and we need to retry or we lost a race to the thread
 		 * unlocking the umtx.
 		 */
 		umtxq_lock(&uq->uq_key);
 		umtxq_insert(uq);
 		umtxq_unbusy(&uq->uq_key);
 		error = umtxq_sleep(uq, "umtxpp", 0);
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
 	}
 	umtxq_lock(&uq->uq_key);
 	if (error == 0)
 		umtxq_signal(&uq->uq_key, INT_MAX);
 	umtxq_unbusy(&uq->uq_key);
 	umtxq_unlock(&uq->uq_key);
 	umtx_key_release(&uq->uq_key);
 	if (error == 0 && old_ceiling != NULL)
 		suword32(old_ceiling, save_ceiling);
 	return (error);
 }
 
 static int
 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
 	int mode)
 {
 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 	case 0:
 		return (_do_lock_normal(td, m, flags, timo, mode));
 	case UMUTEX_PRIO_INHERIT:
 		return (_do_lock_pi(td, m, flags, timo, mode));
 	case UMUTEX_PRIO_PROTECT:
 		return (_do_lock_pp(td, m, flags, timo, mode));
 	}
 	return (EINVAL);
 }
 
 /*
  * Lock a userland POSIX mutex.
  */
 static int
 do_lock_umutex(struct thread *td, struct umutex *m,
 	struct timespec *timeout, int mode)
 {
 	struct timespec ts, ts2, ts3;
 	struct timeval tv;
 	uint32_t flags;
 	int error;
 
 	flags = fuword32(&m->m_flags);
 	if (flags == -1)
 		return (EFAULT);
 
 	if (timeout == NULL) {
 		error = _do_lock_umutex(td, m, flags, 0, mode);
 		/* Mutex locking is restarted if it is interrupted. */
 		if (error == EINTR && mode != _UMUTEX_WAIT)
 			error = ERESTART;
 	} else {
 		getnanouptime(&ts);
 		timespecadd(&ts, timeout);
 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
 		for (;;) {
 			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
 			if (error != ETIMEDOUT)
 				break;
 			getnanouptime(&ts2);
 			if (timespeccmp(&ts2, &ts, >=)) {
 				error = ETIMEDOUT;
 				break;
 			}
 			ts3 = ts;
 			timespecsub(&ts3, &ts2);
 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 		}
 		/* Timed-locking is not restarted. */
 		if (error == ERESTART)
 			error = EINTR;
 	}
 	return (error);
 }
 
 /*
  * Unlock a userland POSIX mutex.
  */
 static int
 do_unlock_umutex(struct thread *td, struct umutex *m)
 {
 	uint32_t flags;
 
 	flags = fuword32(&m->m_flags);
 	if (flags == -1)
 		return (EFAULT);
 
 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 	case 0:
 		return (do_unlock_normal(td, m, flags));
 	case UMUTEX_PRIO_INHERIT:
 		return (do_unlock_pi(td, m, flags));
 	case UMUTEX_PRIO_PROTECT:
 		return (do_unlock_pp(td, m, flags));
 	}
 
 	return (EINVAL);
 }
 
 static int
 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
 	struct timespec *timeout, u_long wflags)
 {
 	struct umtx_q *uq;
 	struct timeval tv;
 	struct timespec cts, ets, tts;
 	uint32_t flags;
+	uint32_t clockid;
 	int error;
 
 	uq = td->td_umtxq;
 	flags = fuword32(&cv->c_flags);
 	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
 	if (error != 0)
 		return (error);
+
+	if ((wflags & CVWAIT_CLOCKID) != 0) {
+		clockid = fuword32(&cv->c_clockid);
+		if (clockid < CLOCK_REALTIME ||
+		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
+			/* hmm, only HW clock id will work. */
+			return (EINVAL);
+		}
+	} else {
+		clockid = CLOCK_REALTIME;
+	}
+
 	umtxq_lock(&uq->uq_key);
 	umtxq_busy(&uq->uq_key);
 	umtxq_insert(uq);
 	umtxq_unlock(&uq->uq_key);
 
 	/*
-	 * The magic thing is we should set c_has_waiters to 1 before
-	 * releasing user mutex.
+	 * Set c_has_waiters to 1 before releasing user mutex, also
+	 * don't modify cache line when unnecessary.
 	 */
-	suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
+	if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
+		suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
 
 	umtxq_lock(&uq->uq_key);
 	umtxq_unbusy(&uq->uq_key);
 	umtxq_unlock(&uq->uq_key);
 
 	error = do_unlock_umutex(td, m);
 	
 	umtxq_lock(&uq->uq_key);
 	if (error == 0) {
-		if ((wflags & UMTX_CHECK_UNPARKING) &&
-		    (td->td_pflags & TDP_WAKEUP)) {
-			td->td_pflags &= ~TDP_WAKEUP;
-			error = EINTR;
-		} else if (timeout == NULL) {
+		if (timeout == NULL) {
 			error = umtxq_sleep(uq, "ucond", 0);
 		} else {
-			getnanouptime(&ets);
-			timespecadd(&ets, timeout);
-			TIMESPEC_TO_TIMEVAL(&tv, timeout);
+			if ((wflags & CVWAIT_ABSTIME) == 0) {
+				kern_clock_gettime(td, clockid, &ets);
+				timespecadd(&ets, timeout);
+				tts = *timeout;
+			} else { /* absolute time */
+				ets = *timeout;
+				tts = *timeout;
+				kern_clock_gettime(td, clockid, &cts);
+				timespecsub(&tts, &cts);
+			}
+			TIMESPEC_TO_TIMEVAL(&tv, &tts);
 			for (;;) {
 				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
 				if (error != ETIMEDOUT)
 					break;
-				getnanouptime(&cts);
+				kern_clock_gettime(td, clockid, &cts);
 				if (timespeccmp(&cts, &ets, >=)) {
 					error = ETIMEDOUT;
 					break;
 				}
 				tts = ets;
 				timespecsub(&tts, &cts);
 				TIMESPEC_TO_TIMEVAL(&tv, &tts);
 			}
 		}
 	}
 
 	if ((uq->uq_flags & UQF_UMTXQ) == 0)
 		error = 0;
 	else {
-		umtxq_remove(uq);
+		/*
+		 * This must be timeout,interrupted by signal or
+		 * surprious wakeup, clear c_has_waiter flag when
+		 * necessary.
+		 */
+		umtxq_busy(&uq->uq_key);
+		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
+			int oldlen = uq->uq_cur_queue->length;
+			umtxq_remove(uq);
+			if (oldlen == 1) {
+				umtxq_unlock(&uq->uq_key);
+				suword32(
+				    __DEVOLATILE(uint32_t *,
+					 &cv->c_has_waiters), 0);
+				umtxq_lock(&uq->uq_key);
+			}
+		}
+		umtxq_unbusy(&uq->uq_key);
 		if (error == ERESTART)
 			error = EINTR;
 	}
 
 	umtxq_unlock(&uq->uq_key);
 	umtx_key_release(&uq->uq_key);
 	return (error);
 }
 
 /*
  * Signal a userland condition variable.
  */
 static int
 do_cv_signal(struct thread *td, struct ucond *cv)
 {
 	struct umtx_key key;
 	int error, cnt, nwake;
 	uint32_t flags;
 
 	flags = fuword32(&cv->c_flags);
 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 		return (error);	
 	umtxq_lock(&key);
 	umtxq_busy(&key);
 	cnt = umtxq_count(&key);
 	nwake = umtxq_signal(&key, 1);
 	if (cnt <= nwake) {
 		umtxq_unlock(&key);
 		error = suword32(
 		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 		umtxq_lock(&key);
 	}
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 	umtx_key_release(&key);
 	return (error);
 }
 
 static int
 do_cv_broadcast(struct thread *td, struct ucond *cv)
 {
 	struct umtx_key key;
 	int error;
 	uint32_t flags;
 
 	flags = fuword32(&cv->c_flags);
 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 		return (error);	
 
 	umtxq_lock(&key);
 	umtxq_busy(&key);
 	umtxq_signal(&key, INT_MAX);
 	umtxq_unlock(&key);
 
 	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 
 	umtxq_lock(&key);
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 
 	umtx_key_release(&key);
 	return (error);
 }
 
 static int
 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
 {
 	struct umtx_q *uq;
 	uint32_t flags, wrflags;
 	int32_t state, oldstate;
 	int32_t blocked_readers;
 	int error;
 
 	uq = td->td_umtxq;
 	flags = fuword32(&rwlock->rw_flags);
 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 	if (error != 0)
 		return (error);
 
 	wrflags = URWLOCK_WRITE_OWNER;
 	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
 		wrflags |= URWLOCK_WRITE_WAITERS;
 
 	for (;;) {
 		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 		/* try to lock it */
 		while (!(state & wrflags)) {
 			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
 				umtx_key_release(&uq->uq_key);
 				return (EAGAIN);
 			}
 			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
 			if (oldstate == state) {
 				umtx_key_release(&uq->uq_key);
 				return (0);
 			}
 			state = oldstate;
 		}
 
 		if (error)
 			break;
 
 		/* grab monitor lock */
 		umtxq_lock(&uq->uq_key);
 		umtxq_busy(&uq->uq_key);
 		umtxq_unlock(&uq->uq_key);
 
 		/*
 		 * re-read the state, in case it changed between the try-lock above
 		 * and the check below
 		 */
 		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 
 		/* set read contention bit */
 		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
 			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
 			if (oldstate == state)
 				goto sleep;
 			state = oldstate;
 		}
 
 		/* state is changed while setting flags, restart */
 		if (!(state & wrflags)) {
 			umtxq_lock(&uq->uq_key);
 			umtxq_unbusy(&uq->uq_key);
 			umtxq_unlock(&uq->uq_key);
 			continue;
 		}
 
 sleep:
 		/* contention bit is set, before sleeping, increase read waiter count */
 		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
 
 		while (state & wrflags) {
 			umtxq_lock(&uq->uq_key);
 			umtxq_insert(uq);
 			umtxq_unbusy(&uq->uq_key);
 
 			error = umtxq_sleep(uq, "urdlck", timo);
 
 			umtxq_busy(&uq->uq_key);
 			umtxq_remove(uq);
 			umtxq_unlock(&uq->uq_key);
 			if (error)
 				break;
 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 		}
 
 		/* decrease read waiter count, and may clear read contention bit */
 		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
 		if (blocked_readers == 1) {
 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 			for (;;) {
 				oldstate = casuword32(&rwlock->rw_state, state,
 					 state & ~URWLOCK_READ_WAITERS);
 				if (oldstate == state)
 					break;
 				state = oldstate;
 			}
 		}
 
 		umtxq_lock(&uq->uq_key);
 		umtxq_unbusy(&uq->uq_key);
 		umtxq_unlock(&uq->uq_key);
 	}
 	umtx_key_release(&uq->uq_key);
 	return (error);
 }
 
 static int
 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
 {
 	struct timespec ts, ts2, ts3;
 	struct timeval tv;
 	int error;
 
 	getnanouptime(&ts);
 	timespecadd(&ts, timeout);
 	TIMESPEC_TO_TIMEVAL(&tv, timeout);
 	for (;;) {
 		error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
 		if (error != ETIMEDOUT)
 			break;
 		getnanouptime(&ts2);
 		if (timespeccmp(&ts2, &ts, >=)) {
 			error = ETIMEDOUT;
 			break;
 		}
 		ts3 = ts;
 		timespecsub(&ts3, &ts2);
 		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 	}
 	if (error == ERESTART)
 		error = EINTR;
 	return (error);
 }
 
 static int
 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
 {
 	struct umtx_q *uq;
 	uint32_t flags;
 	int32_t state, oldstate;
 	int32_t blocked_writers;
 	int32_t blocked_readers;
 	int error;
 
 	uq = td->td_umtxq;
 	flags = fuword32(&rwlock->rw_flags);
 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 	if (error != 0)
 		return (error);
 
 	blocked_readers = 0;
 	for (;;) {
 		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
 			if (oldstate == state) {
 				umtx_key_release(&uq->uq_key);
 				return (0);
 			}
 			state = oldstate;
 		}
 
 		if (error) {
 			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
 			    blocked_readers != 0) {
 				umtxq_lock(&uq->uq_key);
 				umtxq_busy(&uq->uq_key);
 				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
 				umtxq_unbusy(&uq->uq_key);
 				umtxq_unlock(&uq->uq_key);
 			}
 
 			break;
 		}
 
 		/* grab monitor lock */
 		umtxq_lock(&uq->uq_key);
 		umtxq_busy(&uq->uq_key);
 		umtxq_unlock(&uq->uq_key);
 
 		/*
 		 * re-read the state, in case it changed between the try-lock above
 		 * and the check below
 		 */
 		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 
 		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
 		       (state & URWLOCK_WRITE_WAITERS) == 0) {
 			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
 			if (oldstate == state)
 				goto sleep;
 			state = oldstate;
 		}
 
 		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 			umtxq_lock(&uq->uq_key);
 			umtxq_unbusy(&uq->uq_key);
 			umtxq_unlock(&uq->uq_key);
 			continue;
 		}
 sleep:
 		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
 
 		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
 			umtxq_lock(&uq->uq_key);
 			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 			umtxq_unbusy(&uq->uq_key);
 
 			error = umtxq_sleep(uq, "uwrlck", timo);
 
 			umtxq_busy(&uq->uq_key);
 			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 			umtxq_unlock(&uq->uq_key);
 			if (error)
 				break;
 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 		}
 
 		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
 		if (blocked_writers == 1) {
 			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 			for (;;) {
 				oldstate = casuword32(&rwlock->rw_state, state,
 					 state & ~URWLOCK_WRITE_WAITERS);
 				if (oldstate == state)
 					break;
 				state = oldstate;
 			}
 			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 		} else
 			blocked_readers = 0;
 
 		umtxq_lock(&uq->uq_key);
 		umtxq_unbusy(&uq->uq_key);
 		umtxq_unlock(&uq->uq_key);
 	}
 
 	umtx_key_release(&uq->uq_key);
 	return (error);
 }
 
 static int
 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
 {
 	struct timespec ts, ts2, ts3;
 	struct timeval tv;
 	int error;
 
 	getnanouptime(&ts);
 	timespecadd(&ts, timeout);
 	TIMESPEC_TO_TIMEVAL(&tv, timeout);
 	for (;;) {
 		error = do_rw_wrlock(td, obj, tvtohz(&tv));
 		if (error != ETIMEDOUT)
 			break;
 		getnanouptime(&ts2);
 		if (timespeccmp(&ts2, &ts, >=)) {
 			error = ETIMEDOUT;
 			break;
 		}
 		ts3 = ts;
 		timespecsub(&ts3, &ts2);
 		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 	}
 	if (error == ERESTART)
 		error = EINTR;
 	return (error);
 }
 
 static int
 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
 {
 	struct umtx_q *uq;
 	uint32_t flags;
 	int32_t state, oldstate;
 	int error, q, count;
 
 	uq = td->td_umtxq;
 	flags = fuword32(&rwlock->rw_flags);
 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 	if (error != 0)
 		return (error);
 
 	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 	if (state & URWLOCK_WRITE_OWNER) {
 		for (;;) {
 			oldstate = casuword32(&rwlock->rw_state, state, 
 				state & ~URWLOCK_WRITE_OWNER);
 			if (oldstate != state) {
 				state = oldstate;
 				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
 					error = EPERM;
 					goto out;
 				}
 			} else
 				break;
 		}
 	} else if (URWLOCK_READER_COUNT(state) != 0) {
 		for (;;) {
 			oldstate = casuword32(&rwlock->rw_state, state,
 				state - 1);
 			if (oldstate != state) {
 				state = oldstate;
 				if (URWLOCK_READER_COUNT(oldstate) == 0) {
 					error = EPERM;
 					goto out;
 				}
 			}
 			else
 				break;
 		}
 	} else {
 		error = EPERM;
 		goto out;
 	}
 
 	count = 0;
 
 	if (!(flags & URWLOCK_PREFER_READER)) {
 		if (state & URWLOCK_WRITE_WAITERS) {
 			count = 1;
 			q = UMTX_EXCLUSIVE_QUEUE;
 		} else if (state & URWLOCK_READ_WAITERS) {
 			count = INT_MAX;
 			q = UMTX_SHARED_QUEUE;
 		}
 	} else {
 		if (state & URWLOCK_READ_WAITERS) {
 			count = INT_MAX;
 			q = UMTX_SHARED_QUEUE;
 		} else if (state & URWLOCK_WRITE_WAITERS) {
 			count = 1;
 			q = UMTX_EXCLUSIVE_QUEUE;
 		}
 	}
 
 	if (count) {
 		umtxq_lock(&uq->uq_key);
 		umtxq_busy(&uq->uq_key);
 		umtxq_signal_queue(&uq->uq_key, count, q);
 		umtxq_unbusy(&uq->uq_key);
 		umtxq_unlock(&uq->uq_key);
 	}
 out:
 	umtx_key_release(&uq->uq_key);
 	return (error);
 }
 
 static int
 do_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
 {
 	struct umtx_q *uq;
 	struct timeval tv;
 	struct timespec cts, ets, tts;
 	uint32_t flags, count;
 	int error;
 
 	uq = td->td_umtxq;
 	flags = fuword32(&sem->_flags);
 	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
 	if (error != 0)
 		return (error);
 	umtxq_lock(&uq->uq_key);
 	umtxq_busy(&uq->uq_key);
 	umtxq_insert(uq);
 	umtxq_unlock(&uq->uq_key);
 
 	if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
 		casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
 
 	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
 	if (count != 0) {
 		umtxq_lock(&uq->uq_key);
 		umtxq_unbusy(&uq->uq_key);
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
 		umtx_key_release(&uq->uq_key);
 		return (0);
 	}
 
 	umtxq_lock(&uq->uq_key);
 	umtxq_unbusy(&uq->uq_key);
 	umtxq_unlock(&uq->uq_key);
 
 	umtxq_lock(&uq->uq_key);
 	if (timeout == NULL) {
 		error = umtxq_sleep(uq, "usem", 0);
 	} else {
 		getnanouptime(&ets);
 		timespecadd(&ets, timeout);
 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
 		for (;;) {
 			error = umtxq_sleep(uq, "usem", tvtohz(&tv));
 			if (error != ETIMEDOUT)
 				break;
 			getnanouptime(&cts);
 			if (timespeccmp(&cts, &ets, >=)) {
 				error = ETIMEDOUT;
 				break;
 			}
 			tts = ets;
 			timespecsub(&tts, &cts);
 			TIMESPEC_TO_TIMEVAL(&tv, &tts);
 		}
 	}
 
 	if ((uq->uq_flags & UQF_UMTXQ) == 0)
 		error = 0;
 	else {
 		umtxq_remove(uq);
 		if (error == ERESTART)
 			error = EINTR;
 	}
 	umtxq_unlock(&uq->uq_key);
 	umtx_key_release(&uq->uq_key);
 	return (error);
 }
 
 /*
  * Signal a userland condition variable.
  */
 static int
 do_sem_wake(struct thread *td, struct _usem *sem)
 {
 	struct umtx_key key;
 	int error, cnt, nwake;
 	uint32_t flags;
 
 	flags = fuword32(&sem->_flags);
 	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
 		return (error);	
 	umtxq_lock(&key);
 	umtxq_busy(&key);
 	cnt = umtxq_count(&key);
 	nwake = umtxq_signal(&key, 1);
 	if (cnt <= nwake) {
 		umtxq_unlock(&key);
 		error = suword32(
 		    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
 		umtxq_lock(&key);
 	}
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 	umtx_key_release(&key);
 	return (error);
 }
 
 int
 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
     /* struct umtx *umtx */
 {
 	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
 }
 
 int
 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
     /* struct umtx *umtx */
 {
 	return do_unlock_umtx(td, uap->umtx, td->td_tid);
 }
 
 static int
 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0) {
 			return (EINVAL);
 		}
 		ts = &timeout;
 	}
 	return (do_lock_umtx(td, uap->obj, uap->val, ts));
 }
 
 static int
 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
 {
 	return (do_unlock_umtx(td, uap->obj, uap->val));
 }
 
 static int
 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0)
 			return (EINVAL);
 		ts = &timeout;
 	}
 	return do_wait(td, uap->obj, uap->val, ts, 0, 0);
 }
 
 static int
 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0)
 			return (EINVAL);
 		ts = &timeout;
 	}
 	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
 }
 
 static int
 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0)
 			return (EINVAL);
 		ts = &timeout;
 	}
 	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
 }
 
 static int
 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
 {
 	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
 }
 
+#define BATCH_SIZE	128
+static int
+__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
+{
+	int count = uap->val;
+	void *uaddrs[BATCH_SIZE];
+	char **upp = (char **)uap->obj;
+	int tocopy;
+	int error = 0;
+	int i, pos = 0;
+
+	while (count > 0) {
+		tocopy = count;
+		if (tocopy > BATCH_SIZE)
+			tocopy = BATCH_SIZE;
+		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
+		if (error != 0)
+			break;
+		for (i = 0; i < tocopy; ++i)
+			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
+		count -= tocopy;
+		pos += tocopy;
+	}
+	return (error);
+}
+
 static int
 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
 {
 	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
 }
 
 static int
 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin(uap->uaddr2, &timeout,
 		    sizeof(timeout));
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0) {
 			return (EINVAL);
 		}
 		ts = &timeout;
 	}
 	return do_lock_umutex(td, uap->obj, ts, 0);
 }
 
 static int
 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
 {
 	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
 }
 
 static int
 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin(uap->uaddr2, &timeout,
 		    sizeof(timeout));
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0) {
 			return (EINVAL);
 		}
 		ts = &timeout;
 	}
 	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
 }
 
 static int
 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
 {
 	return do_wake_umutex(td, uap->obj);
 }
 
 static int
 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
 {
 	return do_unlock_umutex(td, uap->obj);
 }
 
 static int
 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
 {
 	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
 }
 
 static int
 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin(uap->uaddr2, &timeout,
 		    sizeof(timeout));
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0) {
 			return (EINVAL);
 		}
 		ts = &timeout;
 	}
 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 }
 
 static int
 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
 {
 	return do_cv_signal(td, uap->obj);
 }
 
 static int
 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
 {
 	return do_cv_broadcast(td, uap->obj);
 }
 
 static int
 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL) {
 		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 	} else {
 		error = copyin(uap->uaddr2, &timeout,
 		    sizeof(timeout));
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0) {
 			return (EINVAL);
 		}
 		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
 	}
 	return (error);
 }
 
 static int
 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL) {
 		error = do_rw_wrlock(td, uap->obj, 0);
 	} else {
 		error = copyin(uap->uaddr2, &timeout,
 		    sizeof(timeout));
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0) {
 			return (EINVAL);
 		}
 
 		error = do_rw_wrlock2(td, uap->obj, &timeout);
 	}
 	return (error);
 }
 
 static int
 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
 {
 	return do_rw_unlock(td, uap->obj);
 }
 
 static int
 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin(uap->uaddr2, &timeout,
 		    sizeof(timeout));
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0) {
 			return (EINVAL);
 		}
 		ts = &timeout;
 	}
 	return (do_sem_wait(td, uap->obj, ts));
 }
 
 static int
 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
 {
 	return do_sem_wake(td, uap->obj);
 }
 
 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
 
 static _umtx_op_func op_table[] = {
 	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
 	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
 	__umtx_op_wait,			/* UMTX_OP_WAIT */
 	__umtx_op_wake,			/* UMTX_OP_WAKE */
 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
 	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
 	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
 	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
 	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
 	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
 	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
 	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
 	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
 	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
 	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
 	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
 	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
-	__umtx_op_sem_wake		/* UMTX_OP_SEM_WAKE */
+	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
+	__umtx_op_nwake_private		/* UMTX_OP_NWAKE_PRIVATE */
 };
 
 int
 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
 {
 	if ((unsigned)uap->op < UMTX_OP_MAX)
 		return (*op_table[uap->op])(td, uap);
 	return (EINVAL);
 }
 
 #ifdef COMPAT_FREEBSD32
 int
 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
     /* struct umtx *umtx */
 {
 	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
 }
 
 int
 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
     /* struct umtx *umtx */
 {
 	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
 }
 
 struct timespec32 {
 	uint32_t tv_sec;
 	uint32_t tv_nsec;
 };
 
 static inline int
 copyin_timeout32(void *addr, struct timespec *tsp)
 {
 	struct timespec32 ts32;
 	int error;
 
 	error = copyin(addr, &ts32, sizeof(struct timespec32));
 	if (error == 0) {
 		tsp->tv_sec = ts32.tv_sec;
 		tsp->tv_nsec = ts32.tv_nsec;
 	}
 	return (error);
 }
 
 static int
 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin_timeout32(uap->uaddr2, &timeout);
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0) {
 			return (EINVAL);
 		}
 		ts = &timeout;
 	}
 	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
 }
 
 static int
 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 {
 	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
 }
 
 static int
 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin_timeout32(uap->uaddr2, &timeout);
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0)
 			return (EINVAL);
 		ts = &timeout;
 	}
 	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
 }
 
 static int
 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin_timeout32(uap->uaddr2, &timeout);
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0)
 			return (EINVAL);
 		ts = &timeout;
 	}
 	return do_lock_umutex(td, uap->obj, ts, 0);
 }
 
 static int
 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin_timeout32(uap->uaddr2, &timeout);
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0)
 			return (EINVAL);
 		ts = &timeout;
 	}
 	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
 }
 
 static int
 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin_timeout32(uap->uaddr2, &timeout);
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0)
 			return (EINVAL);
 		ts = &timeout;
 	}
 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 }
 
 static int
 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL) {
 		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 	} else {
 		error = copyin_timeout32(uap->uaddr2, &timeout);
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0) {
 			return (EINVAL);
 		}
 		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
 	}
 	return (error);
 }
 
 static int
 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL) {
 		error = do_rw_wrlock(td, uap->obj, 0);
 	} else {
 		error = copyin_timeout32(uap->uaddr2, &timeout);
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0) {
 			return (EINVAL);
 		}
 
 		error = do_rw_wrlock2(td, uap->obj, &timeout);
 	}
 	return (error);
 }
 
 static int
 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin_timeout32(uap->uaddr2, &timeout);
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0)
 			return (EINVAL);
 		ts = &timeout;
 	}
 	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
 }
 
 static int
 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 {
 	struct timespec *ts, timeout;
 	int error;
 
 	/* Allow a null timespec (wait forever). */
 	if (uap->uaddr2 == NULL)
 		ts = NULL;
 	else {
 		error = copyin_timeout32(uap->uaddr2, &timeout);
 		if (error != 0)
 			return (error);
 		if (timeout.tv_nsec >= 1000000000 ||
 		    timeout.tv_nsec < 0)
 			return (EINVAL);
 		ts = &timeout;
 	}
 	return (do_sem_wait(td, uap->obj, ts));
 }
 
+static int
+__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
+{
+	int count = uap->val;
+	uint32_t uaddrs[BATCH_SIZE];
+	uint32_t **upp = (uint32_t **)uap->obj;
+	int tocopy;
+	int error = 0;
+	int i, pos = 0;
+
+	while (count > 0) {
+		tocopy = count;
+		if (tocopy > BATCH_SIZE)
+			tocopy = BATCH_SIZE;
+		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
+		if (error != 0)
+			break;
+		for (i = 0; i < tocopy; ++i)
+			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
+				INT_MAX, 1);
+		count -= tocopy;
+		pos += tocopy;
+	}
+	return (error);
+}
+
 static _umtx_op_func op_table_compat32[] = {
 	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
 	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
 	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
 	__umtx_op_wake,			/* UMTX_OP_WAKE */
 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
 	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
 	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
 	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
 	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
 	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
 	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
 	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
 	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
 	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
 	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
 	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
 	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
-	__umtx_op_sem_wake		/* UMTX_OP_SEM_WAKE */
+	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
+	__umtx_op_nwake_private32	/* UMTX_OP_NWAKE_PRIVATE */
 };
 
 int
 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
 {
 	if ((unsigned)uap->op < UMTX_OP_MAX)
 		return (*op_table_compat32[uap->op])(td,
 			(struct _umtx_op_args *)uap);
 	return (EINVAL);
 }
 #endif
 
 void
 umtx_thread_init(struct thread *td)
 {
 	td->td_umtxq = umtxq_alloc();
 	td->td_umtxq->uq_thread = td;
 }
 
 void
 umtx_thread_fini(struct thread *td)
 {
 	umtxq_free(td->td_umtxq);
 }
 
 /*
  * It will be called when new thread is created, e.g fork().
  */
 void
 umtx_thread_alloc(struct thread *td)
 {
 	struct umtx_q *uq;
 
 	uq = td->td_umtxq;
 	uq->uq_inherited_pri = PRI_MAX;
 
 	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
 	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
 	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
 	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
 }
 
 /*
  * exec() hook.
  */
 static void
 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
 	struct image_params *imgp __unused)
 {
 	umtx_thread_cleanup(curthread);
 }
 
 /*
  * thread_exit() hook.
  */
 void
 umtx_thread_exit(struct thread *td)
 {
 	umtx_thread_cleanup(td);
 }
 
 /*
  * clean up umtx data.
  */
 static void
 umtx_thread_cleanup(struct thread *td)
 {
 	struct umtx_q *uq;
 	struct umtx_pi *pi;
 
 	if ((uq = td->td_umtxq) == NULL)
 		return;
 
 	mtx_lock_spin(&umtx_lock);
 	uq->uq_inherited_pri = PRI_MAX;
 	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
 		pi->pi_owner = NULL;
 		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
 	}
 	mtx_unlock_spin(&umtx_lock);
 	thread_lock(td);
 	sched_unlend_user_prio(td, PRI_MAX);
 	thread_unlock(td);
 }
diff --git a/sys/sys/_umtx.h b/sys/sys/_umtx.h
index fcda9744c35e..fa0ed99280b7 100644
--- a/sys/sys/_umtx.h
+++ b/sys/sys/_umtx.h
@@ -1,66 +1,67 @@
 /*-
  * Copyright (c) 2010, David Xu <davidxu@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 #ifndef _SYS__UMTX_H_
 #define	_SYS__UMTX_H_
 
 #include <sys/_types.h>
 
 struct umtx {
 	volatile unsigned long	u_owner;	/* Owner of the mutex. */
 };
 
 struct umutex {
 	volatile __lwpid_t	m_owner;	/* Owner of the mutex */
 	__uint32_t		m_flags;	/* Flags of the mutex */
 	__uint32_t		m_ceilings[2];	/* Priority protect ceiling */
 	__uint32_t		m_spare[4];
 };
 
 struct ucond {
 	volatile __uint32_t	c_has_waiters;	/* Has waiters in kernel */
 	__uint32_t		c_flags;	/* Flags of the condition variable */
-	__uint32_t		c_spare[2];	/* Spare space */
+	__uint32_t              c_clockid;	/* Clock id */
+	__uint32_t              c_spare[1];	/* Spare space */
 };
 
 struct urwlock {
 	volatile __int32_t	rw_state;
 	__uint32_t		rw_flags;
 	__uint32_t		rw_blocked_readers;
 	__uint32_t		rw_blocked_writers;
 	__uint32_t		rw_spare[4];
 };
 
 struct _usem {
 	volatile __uint32_t	_has_waiters;
 	volatile __uint32_t	_count;
 	__uint32_t		_flags;
 };
 
 #endif /* !_SYS__UMTX_H_ */
diff --git a/sys/sys/umtx.h b/sys/sys/umtx.h
index dab862ef1b9f..c7d69450dea4 100644
--- a/sys/sys/umtx.h
+++ b/sys/sys/umtx.h
@@ -1,180 +1,185 @@
 /*-
  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 #ifndef _SYS_UMTX_H_
 #define	_SYS_UMTX_H_
 
 #include <sys/_umtx.h>
 #include <sys/limits.h>
 
 #define	UMTX_UNOWNED		0x0
 #define	UMTX_CONTESTED		LONG_MIN
 
 #define USYNC_PROCESS_SHARED	0x0001	/* Process shared sync objs */
 
 #define	UMUTEX_UNOWNED		0x0
 #define	UMUTEX_CONTESTED	0x80000000U
 
 #define	UMUTEX_ERROR_CHECK	0x0002	/* Error-checking mutex */
 #define	UMUTEX_PRIO_INHERIT	0x0004	/* Priority inherited mutex */
 #define	UMUTEX_PRIO_PROTECT	0x0008	/* Priority protect mutex */
 
 /* urwlock flags */
 #define URWLOCK_PREFER_READER	0x0002
 
 #define URWLOCK_WRITE_OWNER	0x80000000U
 #define URWLOCK_WRITE_WAITERS	0x40000000U
 #define URWLOCK_READ_WAITERS	0x20000000U
 #define URWLOCK_MAX_READERS	0x1fffffffU
 #define URWLOCK_READER_COUNT(c)	((c) & URWLOCK_MAX_READERS)
 
 /* _usem flags */
 #define SEM_NAMED	0x0002
 
 /* op code for _umtx_op */
 #define	UMTX_OP_LOCK		0
 #define	UMTX_OP_UNLOCK		1
 #define	UMTX_OP_WAIT		2
 #define	UMTX_OP_WAKE		3
 #define	UMTX_OP_MUTEX_TRYLOCK	4
 #define	UMTX_OP_MUTEX_LOCK	5
 #define	UMTX_OP_MUTEX_UNLOCK	6
 #define	UMTX_OP_SET_CEILING	7
 #define	UMTX_OP_CV_WAIT		8
 #define	UMTX_OP_CV_SIGNAL	9
 #define	UMTX_OP_CV_BROADCAST	10
 #define	UMTX_OP_WAIT_UINT	11
 #define	UMTX_OP_RW_RDLOCK	12
 #define	UMTX_OP_RW_WRLOCK	13
 #define	UMTX_OP_RW_UNLOCK	14
 #define	UMTX_OP_WAIT_UINT_PRIVATE	15
 #define	UMTX_OP_WAKE_PRIVATE	16
 #define	UMTX_OP_MUTEX_WAIT	17
 #define	UMTX_OP_MUTEX_WAKE	18
 #define	UMTX_OP_SEM_WAIT	19
 #define	UMTX_OP_SEM_WAKE	20
-#define	UMTX_OP_MAX		21
+#define	UMTX_OP_NWAKE_PRIVATE   21
+#define	UMTX_OP_MAX		22
 
-/* flags for UMTX_OP_CV_WAIT */
-#define UMTX_CHECK_UNPARKING	0x01
+/* Flags for UMTX_OP_CV_WAIT */
+#define	CVWAIT_CHECK_UNPARKING	0x01
+#define	CVWAIT_ABSTIME		0x02
+#define	CVWAIT_CLOCKID		0x04
+
+#define	UMTX_CHECK_UNPARKING	CVWAIT_CHECK_UNPARKING
 
 #ifndef _KERNEL
 
 int _umtx_op(void *obj, int op, u_long val, void *uaddr, void *uaddr2);
 
 /*
  * Old (deprecated) userland mutex system calls.
  */
 int _umtx_lock(struct umtx *mtx);
 int _umtx_unlock(struct umtx *mtx);
 
 /*
  * Standard api.  Try uncontested acquire/release and asks the
  * kernel to resolve failures.
  */
 static __inline void
 umtx_init(struct umtx *umtx)
 {
 	umtx->u_owner = UMTX_UNOWNED;
 }
 
 static __inline u_long
 umtx_owner(struct umtx *umtx)
 {
 	return (umtx->u_owner & ~LONG_MIN);
 }
 
 static __inline int
 umtx_lock(struct umtx *umtx, u_long id)
 {
 	if (atomic_cmpset_acq_long(&umtx->u_owner, UMTX_UNOWNED, id) == 0)
 		if (_umtx_lock(umtx) == -1)
 			return (errno);
 	return (0);
 }
 
 static __inline int
 umtx_trylock(struct umtx *umtx, u_long id)
 {
 	if (atomic_cmpset_acq_long(&umtx->u_owner, UMTX_UNOWNED, id) == 0)
 		return (EBUSY);
 	return (0);
 }
 
 static __inline int
 umtx_timedlock(struct umtx *umtx, u_long id, const struct timespec *timeout)
 {
 	if (atomic_cmpset_acq_long(&umtx->u_owner, UMTX_UNOWNED, id) == 0)
 		if (_umtx_op(umtx, UMTX_OP_LOCK, id, 0,
 		    __DECONST(void *, timeout)) == -1)
 			return (errno);
 	return (0);
 }
 
 static __inline int
 umtx_unlock(struct umtx *umtx, u_long id)
 {
 	if (atomic_cmpset_rel_long(&umtx->u_owner, id, UMTX_UNOWNED) == 0)
 		if (_umtx_unlock(umtx) == -1)
 			return (errno);
 	return (0);
 }
 
 static __inline int
 umtx_wait(u_long *p, long val, const struct timespec *timeout)
 {
 	if (_umtx_op(p, UMTX_OP_WAIT, val, 0,
 	    __DECONST(void *, timeout)) == -1)
 		return (errno);
 	return (0);
 }
 
 /* Wake threads waiting on a user address. */
 static __inline int
 umtx_wake(u_long *p, int nr_wakeup)
 {
 	if (_umtx_op(p, UMTX_OP_WAKE, nr_wakeup, 0, 0) == -1)
 		return (errno);
 	return (0);
 }
 
 #else
 
 struct thread;
 
 struct umtx_q *umtxq_alloc(void);
 void umtxq_free(struct umtx_q *);
 int kern_umtx_wake(struct thread *, void *, int, int);
 void umtx_pi_adjust(struct thread *, u_char);
 void umtx_thread_init(struct thread *);
 void umtx_thread_fini(struct thread *);
 void umtx_thread_alloc(struct thread *);
 void umtx_thread_exit(struct thread *);
 #endif /* !_KERNEL */
 #endif /* !_SYS_UMTX_H_ */