Index: head/sys/kern/kern_mutex.c
===================================================================
--- head/sys/kern/kern_mutex.c	(revision 326106)
+++ head/sys/kern/kern_mutex.c	(revision 326107)
@@ -1,1260 +1,1263 @@
 /*-
  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
  */
 
 /*
  * Machine independent bits of mutex implementation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_adaptive_mutexes.h"
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/turnstile.h>
 #include <sys/vmmeter.h>
 #include <sys/lock_profile.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cpu.h>
 
 #include <ddb/ddb.h>
 
 #include <fs/devfs/devfs_int.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
 #define	ADAPTIVE_MUTEXES
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DEFINE( , , lock, failed);
 #endif
 
 /*
  * Return the mutex address when the lock cookie address is provided.
  * This functionality assumes that struct mtx* have a member named mtx_lock.
  */
 #define	mtxlock2mtx(c)	(__containerof(c, struct mtx, mtx_lock))
 
 /*
  * Internal utility macros.
  */
 #define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
 
 #define	mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED)
 
 static void	assert_mtx(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_mtx(const struct lock_object *lock);
 #endif
 static void	lock_mtx(struct lock_object *lock, uintptr_t how);
 static void	lock_spin(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_mtx(const struct lock_object *lock,
 		    struct thread **owner);
 #endif
 static uintptr_t unlock_mtx(struct lock_object *lock);
 static uintptr_t unlock_spin(struct lock_object *lock);
 
 /*
  * Lock classes for sleep and spin mutexes.
  */
 struct lock_class lock_class_mtx_sleep = {
 	.lc_name = "sleep mutex",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
 	.lc_assert = assert_mtx,
 #ifdef DDB
 	.lc_ddb_show = db_show_mtx,
 #endif
 	.lc_lock = lock_mtx,
 	.lc_unlock = unlock_mtx,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_mtx,
 #endif
 };
 struct lock_class lock_class_mtx_spin = {
 	.lc_name = "spin mutex",
 	.lc_flags = LC_SPINLOCK | LC_RECURSABLE,
 	.lc_assert = assert_mtx,
 #ifdef DDB
 	.lc_ddb_show = db_show_mtx,
 #endif
 	.lc_lock = lock_spin,
 	.lc_unlock = unlock_spin,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_mtx,
 #endif
 };
 
 #ifdef ADAPTIVE_MUTEXES
 static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging");
 
 static struct lock_delay_config __read_frequently mtx_delay;
 
 SYSCTL_INT(_debug_mtx, OID_AUTO, delay_base, CTLFLAG_RW, &mtx_delay.base,
     0, "");
 SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max,
     0, "");
 
 LOCK_DELAY_SYSINIT_DEFAULT(mtx_delay);
 #endif
 
 static SYSCTL_NODE(_debug, OID_AUTO, mtx_spin, CTLFLAG_RD, NULL,
     "mtx spin debugging");
 
 static struct lock_delay_config __read_frequently mtx_spin_delay;
 
 SYSCTL_INT(_debug_mtx_spin, OID_AUTO, delay_base, CTLFLAG_RW,
     &mtx_spin_delay.base, 0, "");
 SYSCTL_INT(_debug_mtx_spin, OID_AUTO, delay_max, CTLFLAG_RW,
     &mtx_spin_delay.max, 0, "");
 
 LOCK_DELAY_SYSINIT_DEFAULT(mtx_spin_delay);
 
 /*
  * System-wide mutexes
  */
 struct mtx blocked_lock;
 struct mtx __exclusive_cache_line Giant;
 
 void
 assert_mtx(const struct lock_object *lock, int what)
 {
 
 	mtx_assert((const struct mtx *)lock, what);
 }
 
 void
 lock_mtx(struct lock_object *lock, uintptr_t how)
 {
 
 	mtx_lock((struct mtx *)lock);
 }
 
 void
 lock_spin(struct lock_object *lock, uintptr_t how)
 {
 
 	panic("spin locks can only use msleep_spin");
 }
 
 uintptr_t
 unlock_mtx(struct lock_object *lock)
 {
 	struct mtx *m;
 
 	m = (struct mtx *)lock;
 	mtx_assert(m, MA_OWNED | MA_NOTRECURSED);
 	mtx_unlock(m);
 	return (0);
 }
 
 uintptr_t
 unlock_spin(struct lock_object *lock)
 {
 
 	panic("spin locks can only use msleep_spin");
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_mtx(const struct lock_object *lock, struct thread **owner)
 {
 	const struct mtx *m;
 	uintptr_t x;
 
 	m = (const struct mtx *)lock;
 	x = m->mtx_lock;
 	*owner = (struct thread *)(x & ~MTX_FLAGMASK);
 	return (*owner != NULL);
 }
 #endif
 
 /*
  * Function versions of the inlined __mtx_* macros.  These are used by
  * modules and can also be called from assembly language if needed.
  */
 void
 __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 	uintptr_t tid, v;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(curthread),
 	    ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d",
 	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 	WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) |
 	    LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 
 	tid = (uintptr_t)curthread;
 	v = MTX_UNOWNED;
 	if (!_mtx_obtain_lock_fetch(m, &v, tid))
 		_mtx_lock_sleep(m, v, opts, file, line);
 	else
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,
 		    m, 0, 0, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE,
 	    file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 void
 __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 
 #ifdef LOCK_PROFILING
-	__mtx_unlock_sleep(c, opts, file, line);
+	__mtx_unlock_sleep(c, (uintptr_t)curthread, opts, file, line);
 #else
 	__mtx_unlock(m, curthread, opts, file, line);
 #endif
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 #ifdef SMP
 	uintptr_t tid, v;
 #endif
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	if (mtx_owned(m))
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0,
 	    ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
 	opts &= ~MTX_RECURSE;
 	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
 	    file, line, NULL);
 #ifdef SMP
 	spinlock_enter();
 	tid = (uintptr_t)curthread;
 	v = MTX_UNOWNED;
 	if (!_mtx_obtain_lock_fetch(m, &v, tid))
 		_mtx_lock_spin(m, v, opts, file, line);
 	else
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire,
 		    m, 0, 0, file, line);
 #else
 	__mtx_lock_spin(m, curthread, opts, file, line);
 #endif
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 }
 
 int
 __mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_trylock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_trylock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	KASSERT((opts & MTX_RECURSE) == 0,
 	    ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n",
 	    m->lock_object.lo_name, file, line));
 	if (__mtx_trylock_spin(m, curthread, opts, file, line)) {
 		LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line);
 		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 		return (1);
 	}
 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line);
 	return (0);
 }
 
 void
 __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 
 	__mtx_unlock_spin(m);
 }
 
 /*
  * The important part of mtx_trylock{,_flags}()
  * Tries to acquire lock `m.'  If this function is called on a mutex that
  * is already owned, it will recursively acquire the lock.
  */
 int
 _mtx_trylock_flags_int(struct mtx *m, int opts LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t tid, v;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int rval;
 	bool recursed;
 
 	td = curthread;
 	tid = (uintptr_t)td;
 	if (SCHEDULER_STOPPED_TD(td))
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
 	    ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d",
 	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_trylock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 
 	rval = 1;
 	recursed = false;
 	v = MTX_UNOWNED;
 	for (;;) {
 		if (_mtx_obtain_lock_fetch(m, &v, tid))
 			break;
 		if (v == MTX_UNOWNED)
 			continue;
 		if (v == tid &&
 		    ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0)) {
 			m->mtx_recurse++;
 			atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 			recursed = true;
 			break;
 		}
 		rval = 0;
 		break;
 	}
 
 	opts &= ~MTX_RECURSE;
 
 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		TD_LOCKS_INC(curthread);
 		if (!recursed)
 			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,
 			    m, contested, waittime, file, line);
 	}
 
 	return (rval);
 }
 
 int
 _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 
 	m = mtxlock2mtx(c);
 	return (_mtx_trylock_flags_int(m, opts LOCK_FILE_LINE_ARG));
 }
 
 /*
  * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
  *
  * We call this if the lock is either contested (i.e. we need to go to
  * sleep waiting for it), or if we need to recurse on it.
  */
 #if LOCK_DEBUG > 0
 void
 __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, int opts, const char *file,
     int line)
 #else
 void
 __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v)
 #endif
 {
 	struct thread *td;
 	struct mtx *m;
 	struct turnstile *ts;
 	uintptr_t tid;
 	struct thread *owner;
 #ifdef KTR
 	int cont_logged = 0;
 #endif
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	int doing_lockprof;
 #endif
 	td = curthread;
 	tid = (uintptr_t)td;
 	if (SCHEDULER_STOPPED_TD(td))
 		return;
 
 #if defined(ADAPTIVE_MUTEXES)
 	lock_delay_arg_init(&lda, &mtx_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init(&lda, NULL);
 #endif
 	m = mtxlock2mtx(c);
 	if (__predict_false(v == MTX_UNOWNED))
 		v = MTX_READ_VALUE(m);
 
 	if (__predict_false(lv_mtx_owner(v) == td)) {
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0,
 	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
 #if LOCK_DEBUG > 0
 		opts &= ~MTX_RECURSE;
 #endif
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
 			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
 		return;
 	}
 #if LOCK_DEBUG > 0
 	opts &= ~MTX_RECURSE;
 #endif
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&m->lock_object,
 		    &contested, &waittime);
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR4(KTR_LOCK,
 		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
 		    m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
 #ifdef LOCK_PROFILING
 	doing_lockprof = 1;
 #elif defined(KDTRACE_HOOKS)
 	doing_lockprof = lockstat_enabled;
 	if (__predict_false(doing_lockprof))
 		all_time -= lockstat_nsecs(&m->lock_object);
 #endif
 
 	for (;;) {
 		if (v == MTX_UNOWNED) {
 			if (_mtx_obtain_lock_fetch(m, &v, tid))
 				break;
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 #ifdef ADAPTIVE_MUTEXES
 		/*
 		 * If the owner is running on another CPU, spin until the
 		 * owner stops running or the state of the lock changes.
 		 */
 		owner = lv_mtx_owner(v);
 		if (TD_IS_RUNNING(owner)) {
 			if (LOCK_LOG_TEST(&m->lock_object, 0))
 				CTR3(KTR_LOCK,
 				    "%s: spinning on %p held by %p",
 				    __func__, m, owner);
 			KTR_STATE1(KTR_SCHED, "thread",
 			    sched_tdname((struct thread *)tid),
 			    "spinning", "lockname:\"%s\"",
 			    m->lock_object.lo_name);
 			do {
 				lock_delay(&lda);
 				v = MTX_READ_VALUE(m);
 				owner = lv_mtx_owner(v);
 			} while (v != MTX_UNOWNED && TD_IS_RUNNING(owner));
 			KTR_STATE0(KTR_SCHED, "thread",
 			    sched_tdname((struct thread *)tid),
 			    "running");
 			continue;
 		}
 #endif
 
 		ts = turnstile_trywait(&m->lock_object);
 		v = MTX_READ_VALUE(m);
 
 		/*
 		 * Check if the lock has been released while spinning for
 		 * the turnstile chain lock.
 		 */
 		if (v == MTX_UNOWNED) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 #ifdef ADAPTIVE_MUTEXES
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		owner = lv_mtx_owner(v);
 		if (TD_IS_RUNNING(owner)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 #endif
 
 		/*
 		 * If the mutex isn't already contested and a failure occurs
 		 * setting the contested bit, the mutex was either released
 		 * or the state of the MTX_RECURSED bit changed.
 		 */
 		if ((v & MTX_CONTESTED) == 0 &&
 		    !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) {
 			turnstile_cancel(ts);
 			v = MTX_READ_VALUE(m);
 			continue;
 		}
 
 		/*
 		 * We definitely must sleep for this lock.
 		 */
 		mtx_assert(m, MA_NOTOWNED);
 
 #ifdef KTR
 		if (!cont_logged) {
 			CTR6(KTR_CONTENTION,
 			    "contention: %p at %s:%d wants %s, taken by %s:%d",
 			    (void *)tid, file, line, m->lock_object.lo_name,
 			    WITNESS_FILE(&m->lock_object),
 			    WITNESS_LINE(&m->lock_object));
 			cont_logged = 1;
 		}
 #endif
 
 		/*
 		 * Block on the turnstile.
 		 */
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&m->lock_object);
 #endif
 #ifndef ADAPTIVE_MUTEXES
 		owner = mtx_owner(m);
 #endif
 		MPASS(owner == mtx_owner(m));
 		turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&m->lock_object);
 		sleep_cnt++;
 #endif
 		v = MTX_READ_VALUE(m);
 	}
 #ifdef KTR
 	if (cont_logged) {
 		CTR4(KTR_CONTENTION,
 		    "contention end: %s acquired by %p at %s:%d",
 		    m->lock_object.lo_name, (void *)tid, file, line);
 	}
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&m->lock_object);
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested,
 	    waittime, file, line);
 #ifdef KDTRACE_HOOKS
 	if (sleep_time)
 		LOCKSTAT_RECORD1(adaptive__block, m, sleep_time);
 
 	/*
 	 * Only record the loops spinning and not sleeping.
 	 */
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time);
 #endif
 }
 
 static void
 _mtx_lock_spin_failed(struct mtx *m)
 {
 	struct thread *td;
 
 	td = mtx_owner(m);
 
 	/* If the mutex is unlocked, try again. */
 	if (td == NULL)
 		return;
 
 	printf( "spin lock %p (%s) held by %p (tid %d) too long\n",
 	    m, m->lock_object.lo_name, td, td->td_tid);
 #ifdef WITNESS
 	witness_display_spinlock(&m->lock_object, td, printf);
 #endif
 	panic("spin lock held too long");
 }
 
 #ifdef SMP
 /*
  * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock.
  *
  * This is only called if we need to actually spin for the lock. Recursion
  * is handled inline.
  */
 #if LOCK_DEBUG > 0
 void
 _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, int opts,
     const char *file, int line)
 #else
 void
 _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v)
 #endif
 {
 	struct mtx *m;
 	struct lock_delay_arg lda;
 	uintptr_t tid;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
 	int64_t spin_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	int doing_lockprof;
 #endif
 
 	tid = (uintptr_t)curthread;
 	m = mtxlock2mtx(c);
 
 	if (__predict_false(v == MTX_UNOWNED))
 		v = MTX_READ_VALUE(m);
 
 	if (__predict_false(v == tid)) {
 		m->mtx_recurse++;
 		return;
 	}
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	lock_delay_arg_init(&lda, &mtx_spin_delay);
 
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
 	    "spinning", "lockname:\"%s\"", m->lock_object.lo_name);
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
 #ifdef LOCK_PROFILING
 	doing_lockprof = 1;
 #elif defined(KDTRACE_HOOKS)
 	doing_lockprof = lockstat_enabled;
 	if (__predict_false(doing_lockprof))
 		spin_time -= lockstat_nsecs(&m->lock_object);
 #endif
 	for (;;) {
 		if (v == MTX_UNOWNED) {
 			if (_mtx_obtain_lock_fetch(m, &v, tid))
 				break;
 			continue;
 		}
 		/* Give interrupts a chance while we spin. */
 		spinlock_exit();
 		do {
 			if (lda.spin_cnt < 10000000) {
 				lock_delay(&lda);
 			} else {
 				lda.spin_cnt++;
 				if (lda.spin_cnt < 60000000 || kdb_active ||
 				    panicstr != NULL)
 					DELAY(1);
 				else
 					_mtx_lock_spin_failed(m);
 				cpu_spinwait();
 			}
 			v = MTX_READ_VALUE(m);
 		} while (v != MTX_UNOWNED);
 		spinlock_enter();
 	}
 
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
 	KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
 	    "running");
 
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	spin_time += lockstat_nsecs(&m->lock_object);
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m,
 	    contested, waittime, file, line);
 #ifdef KDTRACE_HOOKS
 	if (lda.spin_cnt != 0)
 		LOCKSTAT_RECORD1(spin__spin, m, spin_time);
 #endif
 }
 #endif /* SMP */
 
 #ifdef INVARIANTS
 static void
 thread_lock_validate(struct mtx *m, int opts, const char *file, int line)
 {
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("thread_lock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("thread_lock() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	if (mtx_owned(m))
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
 		    ("thread_lock: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
 	WITNESS_CHECKORDER(&m->lock_object,
 	    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 }
 #else
 #define thread_lock_validate(m, opts, file, line) do { } while (0)
 #endif
 
 #ifndef LOCK_PROFILING
 #if LOCK_DEBUG > 0
 void
 _thread_lock(struct thread *td, int opts, const char *file, int line)
 #else
 void
 _thread_lock(struct thread *td)
 #endif
 {
 	struct mtx *m;
 	uintptr_t tid, v;
 
 	tid = (uintptr_t)curthread;
 
 	spinlock_enter();
 	m = td->td_lock;
 	thread_lock_validate(m, 0, file, line);
 	v = MTX_READ_VALUE(m);
 	if (__predict_true(v == MTX_UNOWNED)) {
 		if (__predict_false(!_mtx_obtain_lock(m, tid)))
 			goto slowpath_unlocked;
 	} else if (v == tid) {
 		m->mtx_recurse++;
 	} else
 		goto slowpath_unlocked;
 	if (__predict_true(m == td->td_lock)) {
 		WITNESS_LOCK(&m->lock_object, LOP_EXCLUSIVE, file, line);
 		return;
 	}
 	if (m->mtx_recurse != 0)
 		m->mtx_recurse--;
 	else
 		_mtx_release_lock_quick(m);
 slowpath_unlocked:
 	spinlock_exit();
 	thread_lock_flags_(td, 0, 0, 0);
 }
 #endif
 
 void
 thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
 {
 	struct mtx *m;
 	uintptr_t tid, v;
 	struct lock_delay_arg lda;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
 	int64_t spin_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	int doing_lockprof = 1;
 #endif
 
 	tid = (uintptr_t)curthread;
 
 	if (SCHEDULER_STOPPED()) {
 		/*
 		 * Ensure that spinlock sections are balanced even when the
 		 * scheduler is stopped, since we may otherwise inadvertently
 		 * re-enable interrupts while dumping core.
 		 */
 		spinlock_enter();
 		return;
 	}
 
 	lock_delay_arg_init(&lda, &mtx_spin_delay);
 
 #ifdef LOCK_PROFILING
 	doing_lockprof = 1;
 #elif defined(KDTRACE_HOOKS)
 	doing_lockprof = lockstat_enabled;
 	if (__predict_false(doing_lockprof))
 		spin_time -= lockstat_nsecs(&td->td_lock->lock_object);
 #endif
 	for (;;) {
 retry:
 		v = MTX_UNOWNED;
 		spinlock_enter();
 		m = td->td_lock;
 		thread_lock_validate(m, opts, file, line);
 		for (;;) {
 			if (_mtx_obtain_lock_fetch(m, &v, tid))
 				break;
 			if (v == MTX_UNOWNED)
 				continue;
 			if (v == tid) {
 				m->mtx_recurse++;
 				break;
 			}
 #ifdef HWPMC_HOOKS
 			PMC_SOFT_CALL( , , lock, failed);
 #endif
 			lock_profile_obtain_lock_failed(&m->lock_object,
 			    &contested, &waittime);
 			/* Give interrupts a chance while we spin. */
 			spinlock_exit();
 			do {
 				if (lda.spin_cnt < 10000000) {
 					lock_delay(&lda);
 				} else {
 					lda.spin_cnt++;
 					if (lda.spin_cnt < 60000000 ||
 					    kdb_active || panicstr != NULL)
 						DELAY(1);
 					else
 						_mtx_lock_spin_failed(m);
 					cpu_spinwait();
 				}
 				if (m != td->td_lock)
 					goto retry;
 				v = MTX_READ_VALUE(m);
 			} while (v != MTX_UNOWNED);
 			spinlock_enter();
 		}
 		if (m == td->td_lock)
 			break;
 		__mtx_unlock_spin(m);	/* does spinlock_exit() */
 	}
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	spin_time += lockstat_nsecs(&m->lock_object);
 #endif
 	if (m->mtx_recurse == 0)
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m,
 		    contested, waittime, file, line);
 #ifdef KDTRACE_HOOKS
 	if (lda.spin_cnt != 0)
 		LOCKSTAT_RECORD1(thread__spin, m, spin_time);
 #endif
 }
 
 struct mtx *
 thread_lock_block(struct thread *td)
 {
 	struct mtx *lock;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	lock = td->td_lock;
 	td->td_lock = &blocked_lock;
 	mtx_unlock_spin(lock);
 
 	return (lock);
 }
 
 void
 thread_lock_unblock(struct thread *td, struct mtx *new)
 {
 	mtx_assert(new, MA_OWNED);
 	MPASS(td->td_lock == &blocked_lock);
 	atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
 }
 
 void
 thread_lock_set(struct thread *td, struct mtx *new)
 {
 	struct mtx *lock;
 
 	mtx_assert(new, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	lock = td->td_lock;
 	td->td_lock = new;
 	mtx_unlock_spin(lock);
 }
 
 /*
  * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
  *
  * We are only called here if the lock is recursed, contested (i.e. we
  * need to wake up a blocked thread) or lockstat probe is active.
  */
 #if LOCK_DEBUG > 0
 void
-__mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line)
+__mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v, int opts,
+    const char *file, int line)
 #else
 void
-__mtx_unlock_sleep(volatile uintptr_t *c)
+__mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v)
 #endif
 {
 	struct mtx *m;
 	struct turnstile *ts;
-	uintptr_t tid, v;
+	uintptr_t tid;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	tid = (uintptr_t)curthread;
 	m = mtxlock2mtx(c);
-	v = MTX_READ_VALUE(m);
 
-	if (v & MTX_RECURSED) {
+	if (__predict_false(v == tid))
+		v = MTX_READ_VALUE(m);
+
+	if (__predict_false(v & MTX_RECURSED)) {
 		if (--(m->mtx_recurse) == 0)
 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
 		return;
 	}
 
 	LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, m);
 	if (v == tid && _mtx_release_lock(m, tid))
 		return;
 
 	/*
 	 * We have to lock the chain before the turnstile so this turnstile
 	 * can be removed from the hash list if it is empty.
 	 */
 	turnstile_chain_lock(&m->lock_object);
 	_mtx_release_lock_quick(m);
 	ts = turnstile_lookup(&m->lock_object);
 	MPASS(ts != NULL);
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
 	turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
 
 	/*
 	 * This turnstile is now no longer associated with the mutex.  We can
 	 * unlock the chain lock so a new turnstile may take it's place.
 	 */
 	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	turnstile_chain_unlock(&m->lock_object);
 }
 
 /*
  * All the unlocking of MTX_SPIN locks is done inline.
  * See the __mtx_unlock_spin() macro for the details.
  */
 
 /*
  * The backing function for the INVARIANTS-enabled mtx_assert()
  */
 #ifdef INVARIANT_SUPPORT
 void
 __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct mtx *m;
 
 	if (panicstr != NULL || dumping || SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	switch (what) {
 	case MA_OWNED:
 	case MA_OWNED | MA_RECURSED:
 	case MA_OWNED | MA_NOTRECURSED:
 		if (!mtx_owned(m))
 			panic("mutex %s not owned at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		if (mtx_recursed(m)) {
 			if ((what & MA_NOTRECURSED) != 0)
 				panic("mutex %s recursed at %s:%d",
 				    m->lock_object.lo_name, file, line);
 		} else if ((what & MA_RECURSED) != 0) {
 			panic("mutex %s unrecursed at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		}
 		break;
 	case MA_NOTOWNED:
 		if (mtx_owned(m))
 			panic("mutex %s owned at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		break;
 	default:
 		panic("unknown mtx_assert at %s:%d", file, line);
 	}
 }
 #endif
 
 /*
  * General init routine used by the MTX_SYSINIT() macro.
  */
 void
 mtx_sysinit(void *arg)
 {
 	struct mtx_args *margs = arg;
 
 	mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL,
 	    margs->ma_opts);
 }
 
 /*
  * Mutex initialization routine; initialize lock `m' of type contained in
  * `opts' with options contained in `opts' and name `name.'  The optional
  * lock type `type' is used as a general lock category name for use with
  * witness.
  */
 void
 _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts)
 {
 	struct mtx *m;
 	struct lock_class *class;
 	int flags;
 
 	m = mtxlock2mtx(c);
 
 	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
 	    MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock,
 	    ("%s: mtx_lock not aligned for %s: %p", __func__, name,
 	    &m->mtx_lock));
 
 	/* Determine lock class and lock flags. */
 	if (opts & MTX_SPIN)
 		class = &lock_class_mtx_spin;
 	else
 		class = &lock_class_mtx_sleep;
 	flags = 0;
 	if (opts & MTX_QUIET)
 		flags |= LO_QUIET;
 	if (opts & MTX_RECURSE)
 		flags |= LO_RECURSABLE;
 	if ((opts & MTX_NOWITNESS) == 0)
 		flags |= LO_WITNESS;
 	if (opts & MTX_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & MTX_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (opts & MTX_NEW)
 		flags |= LO_NEW;
 
 	/* Initialize mutex. */
 	lock_init(&m->lock_object, class, name, type, flags);
 
 	m->mtx_lock = MTX_UNOWNED;
 	m->mtx_recurse = 0;
 }
 
 /*
  * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
  * passed in as a flag here because if the corresponding mtx_init() was
  * called with MTX_QUIET set, then it will already be set in the mutex's
  * flags.
  */
 void
 _mtx_destroy(volatile uintptr_t *c)
 {
 	struct mtx *m;
 
 	m = mtxlock2mtx(c);
 
 	if (!mtx_owned(m))
 		MPASS(mtx_unowned(m));
 	else {
 		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
 
 		/* Perform the non-mtx related part of mtx_unlock_spin(). */
 		if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin)
 			spinlock_exit();
 		else
 			TD_LOCKS_DEC(curthread);
 
 		lock_profile_release_lock(&m->lock_object);
 		/* Tell witness this isn't locked to make it happy. */
 		WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__,
 		    __LINE__);
 	}
 
 	m->mtx_lock = MTX_DESTROYED;
 	lock_destroy(&m->lock_object);
 }
 
 /*
  * Intialize the mutex code and system mutexes.  This is called from the MD
  * startup code prior to mi_startup().  The per-CPU data space needs to be
  * setup before this is called.
  */
 void
 mutex_init(void)
 {
 
 	/* Setup turnstiles so that sleep mutexes work. */
 	init_turnstiles();
 
 	/*
 	 * Initialize mutexes.
 	 */
 	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
 	mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN);
 	blocked_lock.mtx_lock = 0xdeadc0de;	/* Always blocked. */
 	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
 	mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_statmtx, "pstatl", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_itimmtx, "pitiml", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_profmtx, "pprofl", NULL, MTX_SPIN);
 	mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
 	mtx_lock(&Giant);
 }
 
 #ifdef DDB
 void
 db_show_mtx(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct mtx *m;
 
 	m = (const struct mtx *)lock;
 
 	db_printf(" flags: {");
 	if (LOCK_CLASS(lock) == &lock_class_mtx_spin)
 		db_printf("SPIN");
 	else
 		db_printf("DEF");
 	if (m->lock_object.lo_flags & LO_RECURSABLE)
 		db_printf(", RECURSE");
 	if (m->lock_object.lo_flags & LO_DUPOK)
 		db_printf(", DUPOK");
 	db_printf("}\n");
 	db_printf(" state: {");
 	if (mtx_unowned(m))
 		db_printf("UNOWNED");
 	else if (mtx_destroyed(m))
 		db_printf("DESTROYED");
 	else {
 		db_printf("OWNED");
 		if (m->mtx_lock & MTX_CONTESTED)
 			db_printf(", CONTESTED");
 		if (m->mtx_lock & MTX_RECURSED)
 			db_printf(", RECURSED");
 	}
 	db_printf("}\n");
 	if (!mtx_unowned(m) && !mtx_destroyed(m)) {
 		td = mtx_owner(m);
 		db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (mtx_recursed(m))
 			db_printf(" recursed: %d\n", m->mtx_recurse);
 	}
 }
 #endif
Index: head/sys/kern/kern_rwlock.c
===================================================================
--- head/sys/kern/kern_rwlock.c	(revision 326106)
+++ head/sys/kern/kern_rwlock.c	(revision 326107)
@@ -1,1440 +1,1443 @@
 /*-
  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Machine independent bits of reader/writer lock implementation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_rwlocks.h"
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/turnstile.h>
 
 #include <machine/cpu.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
 #define	ADAPTIVE_RWLOCKS
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /*
  * Return the rwlock address when the lock cookie address is provided.
  * This functionality assumes that struct rwlock* have a member named rw_lock.
  */
 #define	rwlock2rw(c)	(__containerof(c, struct rwlock, rw_lock))
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 static void	db_show_rwlock(const struct lock_object *lock);
 #endif
 static void	assert_rw(const struct lock_object *lock, int what);
 static void	lock_rw(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_rw(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_rw(struct lock_object *lock);
 
 struct lock_class lock_class_rw = {
 	.lc_name = "rw",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_rw,
 #ifdef DDB
 	.lc_ddb_show = db_show_rwlock,
 #endif
 	.lc_lock = lock_rw,
 	.lc_unlock = unlock_rw,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_rw,
 #endif
 };
 
 #ifdef ADAPTIVE_RWLOCKS
 static int __read_frequently rowner_retries = 10;
 static int __read_frequently rowner_loops = 10000;
 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
     "rwlock debugging");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
 
 static struct lock_delay_config __read_frequently rw_delay;
 
 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base,
     0, "");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
     0, "");
 
 LOCK_DELAY_SYSINIT_DEFAULT(rw_delay);
 #endif
 
 /*
  * Return a pointer to the owning thread if the lock is write-locked or
  * NULL if the lock is unlocked or read-locked.
  */
 
 #define	lv_rw_wowner(v)							\
 	((v) & RW_LOCK_READ ? NULL :					\
 	 (struct thread *)RW_OWNER((v)))
 
 #define	rw_wowner(rw)	lv_rw_wowner(RW_READ_VALUE(rw))
 
 /*
  * Returns if a write owner is recursed.  Write ownership is not assured
  * here and should be previously checked.
  */
 #define	rw_recursed(rw)		((rw)->rw_recurse != 0)
 
 /*
  * Return true if curthread helds the lock.
  */
 #define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
 
 /*
  * Return a pointer to the owning thread for this lock who should receive
  * any priority lent by threads that block on this lock.  Currently this
  * is identical to rw_wowner().
  */
 #define	rw_owner(rw)		rw_wowner(rw)
 
 #ifndef INVARIANTS
 #define	__rw_assert(c, what, file, line)
 #endif
 
 void
 assert_rw(const struct lock_object *lock, int what)
 {
 
 	rw_assert((const struct rwlock *)lock, what);
 }
 
 void
 lock_rw(struct lock_object *lock, uintptr_t how)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	if (how)
 		rw_rlock(rw);
 	else
 		rw_wlock(rw);
 }
 
 uintptr_t
 unlock_rw(struct lock_object *lock)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
 	if (rw->rw_lock & RW_LOCK_READ) {
 		rw_runlock(rw);
 		return (1);
 	} else {
 		rw_wunlock(rw);
 		return (0);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_rw(const struct lock_object *lock, struct thread **owner)
 {
 	const struct rwlock *rw = (const struct rwlock *)lock;
 	uintptr_t x = rw->rw_lock;
 
 	*owner = rw_wowner(rw);
 	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
 	    (*owner != NULL));
 }
 #endif
 
 void
 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
 {
 	struct rwlock *rw;
 	int flags;
 
 	rw = rwlock2rw(c);
 
 	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
 	    RW_RECURSE | RW_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
 	    ("%s: rw_lock not aligned for %s: %p", __func__, name,
 	    &rw->rw_lock));
 
 	flags = LO_UPGRADABLE;
 	if (opts & RW_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & RW_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & RW_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & RW_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & RW_QUIET)
 		flags |= LO_QUIET;
 	if (opts & RW_NEW)
 		flags |= LO_NEW;
 
 	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
 	rw->rw_lock = RW_UNLOCKED;
 	rw->rw_recurse = 0;
 }
 
 void
 _rw_destroy(volatile uintptr_t *c)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
 	KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
 	rw->rw_lock = RW_DESTROYED;
 	lock_destroy(&rw->lock_object);
 }
 
 void
 rw_sysinit(void *arg)
 {
 	struct rw_args *args;
 
 	args = arg;
 	rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
 	    args->ra_flags);
 }
 
 int
 _rw_wowned(const volatile uintptr_t *c)
 {
 
 	return (rw_wowner(rwlock2rw(c)) == curthread);
 }
 
 void
 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	uintptr_t tid, v;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(curthread),
 	    ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	tid = (uintptr_t)curthread;
 	v = RW_UNLOCKED;
 	if (!_rw_write_lock_fetch(rw, &v, tid))
 		_rw_wlock_hard(rw, v, file, line);
 	else
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,
 		    0, 0, file, line, LOCKSTAT_WRITER);
 
 	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
 	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 int
 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	struct thread *td;
 	uintptr_t tid, v;
 	int rval;
 	bool recursed;
 
 	td = curthread;
 	tid = (uintptr_t)td;
 	if (SCHEDULER_STOPPED_TD(td))
 		return (1);
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
 	    ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
 
 	rval = 1;
 	recursed = false;
 	v = RW_UNLOCKED;
 	for (;;) {
 		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
 			break;
 		if (v == RW_UNLOCKED)
 			continue;
 		if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) {
 			rw->rw_recurse++;
 			atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 			break;
 		}
 		rval = 0;
 		break;
 	}
 
 	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		if (!recursed)
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
 			    rw, 0, 0, file, line, LOCKSTAT_WRITER);
 		TD_LOCKS_INC(curthread);
 	}
 	return (rval);
 }
 
 void
 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_WLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
 	    line);
 
 #ifdef LOCK_PROFILING
 	_rw_wunlock_hard(rw, (uintptr_t)curthread, file, line);
 #else
 	__rw_wunlock(rw, curthread, file, line);
 #endif
 
 	TD_LOCKS_DEC(curthread);
 }
 
 /*
  * Determines whether a new reader can acquire a lock.  Succeeds if the
  * reader already owns a read lock and the lock is locked for read to
  * prevent deadlock from reader recursion.  Also succeeds if the lock
  * is unlocked and has no writer waiters or spinners.  Failing otherwise
  * prioritizes writers before readers.
  */
 #define	RW_CAN_READ(td, _rw)						\
     (((_rw) & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==\
     RW_LOCK_READ || ((td)->td_rw_rlocks && (_rw) & RW_LOCK_READ))
 
 static bool __always_inline
 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp
     LOCK_FILE_LINE_ARG_DEF)
 {
 
 	/*
 	 * Handle the easy case.  If no other thread has a write
 	 * lock, then try to bump up the count of read locks.  Note
 	 * that we have to preserve the current state of the
 	 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
 	 * read lock, then rw_lock must have changed, so restart
 	 * the loop.  Note that this handles the case of a
 	 * completely unlocked rwlock since such a lock is encoded
 	 * as a read lock with no waiters.
 	 */
 	while (RW_CAN_READ(td, *vp)) {
 		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp,
 			*vp + RW_ONE_READER)) {
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR4(KTR_LOCK,
 				    "%s: %p succeed %p -> %p", __func__,
 				    rw, (void *)*vp,
 				    (void *)(*vp + RW_ONE_READER));
 			td->td_rw_rlocks++;
 			return (true);
 		}
 	}
 	return (false);
 }
 
 static void __noinline
 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
     LOCK_FILE_LINE_ARG_DEF)
 {
 	struct turnstile *ts;
 #ifdef ADAPTIVE_RWLOCKS
 	volatile struct thread *owner;
 	int spintries = 0;
 	int i, n;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state;
 	int doing_lockprof;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 #if defined(ADAPTIVE_RWLOCKS)
 	lock_delay_arg_init(&lda, &rw_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init(&lda, NULL);
 #endif
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&rw->lock_object,
 	    &contested, &waittime);
 
 #ifdef LOCK_PROFILING
 	doing_lockprof = 1;
 	state = v;
 #elif defined(KDTRACE_HOOKS)
 	doing_lockprof = lockstat_enabled;
 	if (__predict_false(doing_lockprof)) {
 		all_time -= lockstat_nsecs(&rw->lock_object);
 		state = v;
 	}
 #endif
 
 	for (;;) {
 		if (__rw_rlock_try(rw, td, &v LOCK_FILE_LINE_ARG))
 			break;
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((v & RW_LOCK_READ) == 0) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, rw, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", rw->lock_object.lo_name);
 				do {
 					lock_delay(&lda);
 					v = RW_READ_VALUE(rw);
 					owner = lv_rw_wowner(v);
 				} while (owner != NULL && TD_IS_RUNNING(owner));
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		} else if (spintries < rowner_retries) {
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			for (i = 0; i < rowner_loops; i += n) {
 				n = RW_READERS(v);
 				lock_delay_spin(n);
 				v = RW_READ_VALUE(rw);
 				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(td, v))
 					break;
 			}
 #ifdef KDTRACE_HOOKS
 			lda.spin_cnt += rowner_loops - i;
 #endif
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			if (i != rowner_loops)
 				continue;
 		}
 #endif
 
 		/*
 		 * Okay, now it's the hard case.  Some other thread already
 		 * has a write lock or there are write waiters present,
 		 * acquire the turnstile lock so we can begin the process
 		 * of blocking.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 
 		/*
 		 * The lock might have been released while we spun, so
 		 * recheck its state and restart the loop if needed.
 		 */
 		v = RW_READ_VALUE(rw);
 		if (RW_CAN_READ(td, v)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if ((v & RW_LOCK_READ) == 0) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * The lock is held in write mode or it already has waiters.
 		 */
 		MPASS(!RW_CAN_READ(td, v));
 
 		/*
 		 * If the RW_LOCK_READ_WAITERS flag is already set, then
 		 * we can go ahead and block.  If it is not set then try
 		 * to set it.  If we fail to set it drop the turnstile
 		 * lock and restart the loop.
 		 */
 		if (!(v & RW_LOCK_READ_WAITERS)) {
 			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 			    v | RW_LOCK_READ_WAITERS)) {
 				turnstile_cancel(ts);
 				v = RW_READ_VALUE(rw);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
 				    __func__, rw);
 		}
 
 		/*
 		 * We were unable to acquire the lock and the read waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 		v = RW_READ_VALUE(rw);
 	}
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&rw->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
 	/* Record only the loops spinning and not sleeping. */
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 #endif
 	/*
 	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
 	 * however.  turnstiles don't like owners changing between calls to
 	 * turnstile_wait() currently.
 	 */
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 	    waittime, file, line, LOCKSTAT_READER);
 }
 
 void
 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t v;
 
 	td = curthread;
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) ||
 	    !TD_IS_IDLETHREAD(td),
 	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    td, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
 	KASSERT(rw_wowner(rw) != td,
 	    ("rw_rlock: wlock already held for %s @ %s:%d",
 	    rw->lock_object.lo_name, file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
 
 	v = RW_READ_VALUE(rw);
 	if (__predict_false(LOCKSTAT_OOL_PROFILE_ENABLED(rw__acquire) ||
 	    !__rw_rlock_try(rw, td, &v LOCK_FILE_LINE_ARG)))
 		__rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
 
 	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&rw->lock_object, 0, file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 void
 __rw_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	__rw_rlock_int(rw LOCK_FILE_LINE_ARG);
 }
 
 int
 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 
 	x = rw->rw_lock;
 	for (;;) {
 		KASSERT(rw->rw_lock != RW_DESTROYED,
 		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
 		if (!(x & RW_LOCK_READ))
 			break;
 		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) {
 			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
 			    line);
 			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
 			    rw, 0, 0, file, line, LOCKSTAT_READER);
 			TD_LOCKS_INC(curthread);
 			curthread->td_rw_rlocks++;
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 static bool __always_inline
 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp)
 {
 
 	for (;;) {
 		/*
 		 * See if there is more than one read lock held.  If so,
 		 * just drop one and return.
 		 */
 		if (RW_READERS(*vp) > 1) {
 			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
 			    *vp - RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, rw, (void *)*vp,
 					    (void *)(*vp - RW_ONE_READER));
 				td->td_rw_rlocks--;
 				return (true);
 			}
 			continue;
 		}
 		/*
 		 * If there aren't any waiters for a write lock, then try
 		 * to drop it quickly.
 		 */
 		if (!(*vp & RW_LOCK_WAITERS)) {
 			MPASS((*vp & ~RW_LOCK_WRITE_SPINNER) ==
 			    RW_READERS_LOCK(1));
 			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
 			    RW_UNLOCKED)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p last succeeded",
 					    __func__, rw);
 				td->td_rw_rlocks--;
 				return (true);
 			}
 			continue;
 		}
 		break;
 	}
 	return (false);
 }
 
 static void __noinline
 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
     LOCK_FILE_LINE_ARG_DEF)
 {
 	struct turnstile *ts;
 	uintptr_t x, queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	for (;;) {
 		if (__rw_runlock_try(rw, td, &v))
 			break;
 
 		/*
 		 * Ok, we know we have waiters and we think we are the
 		 * last reader, so grab the turnstile lock.
 		 */
 		turnstile_chain_lock(&rw->lock_object);
 		v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 		MPASS(v & RW_LOCK_WAITERS);
 
 		/*
 		 * Try to drop our lock leaving the lock in a unlocked
 		 * state.
 		 *
 		 * If you wanted to do explicit lock handoff you'd have to
 		 * do it here.  You'd also want to use turnstile_signal()
 		 * and you'd have to handle the race where a higher
 		 * priority thread blocks on the write lock before the
 		 * thread you wakeup actually runs and have the new thread
 		 * "steal" the lock.  For now it's a lot simpler to just
 		 * wakeup all of the waiters.
 		 *
 		 * As above, if we fail, then another thread might have
 		 * acquired a read lock, so drop the turnstile lock and
 		 * restart.
 		 */
 		x = RW_UNLOCKED;
 		if (v & RW_LOCK_WRITE_WAITERS) {
 			queue = TS_EXCLUSIVE_QUEUE;
 			x |= (v & RW_LOCK_READ_WAITERS);
 		} else
 			queue = TS_SHARED_QUEUE;
 		if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
 		    x)) {
 			turnstile_chain_unlock(&rw->lock_object);
 			v = RW_READ_VALUE(rw);
 			continue;
 		}
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
 			    __func__, rw);
 
 		/*
 		 * Ok.  The lock is released and all that's left is to
 		 * wake up the waiters.  Note that the lock might not be
 		 * free anymore, but in that case the writers will just
 		 * block again if they run before the new lock holder(s)
 		 * release the lock.
 		 */
 		ts = turnstile_lookup(&rw->lock_object);
 		MPASS(ts != NULL);
 		turnstile_broadcast(ts, queue);
 		turnstile_unpend(ts, TS_SHARED_LOCK);
 		turnstile_chain_unlock(&rw->lock_object);
 		td->td_rw_rlocks--;
 		break;
 	}
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER);
 }
 
 void
 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t v;
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
 
 	td = curthread;
 	v = RW_READ_VALUE(rw);
 
 	if (__predict_false(LOCKSTAT_OOL_PROFILE_ENABLED(rw__release) ||
 	    !__rw_runlock_try(rw, td, &v)))
 		__rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
 
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	_rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG);
 }
 
 /*
  * This function is called when we are unable to obtain a write lock on the
  * first try.  This means that at least one other thread holds either a
  * read or write lock.
  */
 void
 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t tid;
 	struct rwlock *rw;
 	struct turnstile *ts;
 #ifdef ADAPTIVE_RWLOCKS
 	volatile struct thread *owner;
 	int spintries = 0;
 	int i, n;
 #endif
 	uintptr_t x;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state;
 	int doing_lockprof;
 #endif
 
 	tid = (uintptr_t)curthread;
 	if (SCHEDULER_STOPPED())
 		return;
 
 #if defined(ADAPTIVE_RWLOCKS)
 	lock_delay_arg_init(&lda, &rw_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init(&lda, NULL);
 #endif
 	rw = rwlock2rw(c);
 	if (__predict_false(v == RW_UNLOCKED))
 		v = RW_READ_VALUE(rw);
 
 	if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) {
 		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
 		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
 		    __func__, rw->lock_object.lo_name, file, line));
 		rw->rw_recurse++;
 		atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
 		return;
 	}
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&rw->lock_object,
 	    &contested, &waittime);
 
 #ifdef LOCK_PROFILING
 	doing_lockprof = 1;
 	state = v;
 #elif defined(KDTRACE_HOOKS)
 	doing_lockprof = lockstat_enabled;
 	if (__predict_false(doing_lockprof)) {
 		all_time -= lockstat_nsecs(&rw->lock_object);
 		state = v;
 	}
 #endif
 
 	for (;;) {
 		if (v == RW_UNLOCKED) {
 			if (_rw_write_lock_fetch(rw, &v, tid))
 				break;
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		owner = lv_rw_wowner(v);
 		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 				    __func__, rw, owner);
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			do {
 				lock_delay(&lda);
 				v = RW_READ_VALUE(rw);
 				owner = lv_rw_wowner(v);
 			} while (owner != NULL && TD_IS_RUNNING(owner));
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			continue;
 		}
 		if ((v & RW_LOCK_READ) && RW_READERS(v) &&
 		    spintries < rowner_retries) {
 			if (!(v & RW_LOCK_WRITE_SPINNER)) {
 				if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 				    v | RW_LOCK_WRITE_SPINNER)) {
 					continue;
 				}
 			}
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			for (i = 0; i < rowner_loops; i += n) {
 				n = RW_READERS(v);
 				lock_delay_spin(n);
 				v = RW_READ_VALUE(rw);
 				if ((v & RW_LOCK_WRITE_SPINNER) == 0)
 					break;
 			}
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 #ifdef KDTRACE_HOOKS
 			lda.spin_cnt += rowner_loops - i;
 #endif
 			if (i != rowner_loops)
 				continue;
 		}
 #endif
 		ts = turnstile_trywait(&rw->lock_object);
 		v = RW_READ_VALUE(rw);
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if (!(v & RW_LOCK_READ)) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 		}
 #endif
 		/*
 		 * Check for the waiters flags about this rwlock.
 		 * If the lock was released, without maintain any pending
 		 * waiters queue, simply try to acquire it.
 		 * If a pending waiters queue is present, claim the lock
 		 * ownership and maintain the pending queue.
 		 */
 		x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 		if ((v & ~x) == RW_UNLOCKED) {
 			x &= ~RW_LOCK_WRITE_SPINNER;
 			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) {
 				if (x)
 					turnstile_claim(ts);
 				else
 					turnstile_cancel(ts);
 				break;
 			}
 			turnstile_cancel(ts);
 			v = RW_READ_VALUE(rw);
 			continue;
 		}
 		/*
 		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
 		 * set it.  If we fail to set it, then loop back and try
 		 * again.
 		 */
 		if (!(v & RW_LOCK_WRITE_WAITERS)) {
 			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 			    v | RW_LOCK_WRITE_WAITERS)) {
 				turnstile_cancel(ts);
 				v = RW_READ_VALUE(rw);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
 				    __func__, rw);
 		}
 		/*
 		 * We were unable to acquire the lock and the write waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 #ifdef ADAPTIVE_RWLOCKS
 		spintries = 0;
 #endif
 		v = RW_READ_VALUE(rw);
 	}
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&rw->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
 	/* Record only the loops spinning and not sleeping. */
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 	    waittime, file, line, LOCKSTAT_WRITER);
 }
 
 /*
  * This function is called if lockstat is active or the first try at releasing
  * a write lock failed.  The latter means that the lock is recursed or one of
  * the 2 waiter bits must be set indicating that at least one thread is waiting
  * on this lock.
  */
 void
-__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid LOCK_FILE_LINE_ARG_DEF)
+__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
-	uintptr_t v, setv;
+	uintptr_t tid, setv;
 	int queue;
 
+	tid = (uintptr_t)curthread;
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
-	v = RW_READ_VALUE(rw);
+	if (__predict_false(v == tid))
+		v = RW_READ_VALUE(rw);
+
 	if (v & RW_LOCK_WRITER_RECURSED) {
 		if (--(rw->rw_recurse) == 0)
 			atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
 		return;
 	}
 
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER);
 	if (v == tid && _rw_write_unlock(rw, tid))
 		return;
 
 	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
 	    ("%s: neither of the waiter flags are set", __func__));
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
 
 	turnstile_chain_lock(&rw->lock_object);
 
 	/*
 	 * Use the same algo as sx locks for now.  Prefer waking up shared
 	 * waiters if we have any over writers.  This is probably not ideal.
 	 *
 	 * 'v' is the value we are going to write back to rw_lock.  If we
 	 * have waiters on both queues, we need to preserve the state of
 	 * the waiter flag for the queue we don't wake up.  For now this is
 	 * hardcoded for the algorithm mentioned above.
 	 *
 	 * In the case of both readers and writers waiting we wakeup the
 	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
 	 * new writer comes in before a reader it will claim the lock up
 	 * above.  There is probably a potential priority inversion in
 	 * there that could be worked around either by waking both queues
 	 * of waiters or doing some complicated lock handoff gymnastics.
 	 */
 	setv = RW_UNLOCKED;
 	v = RW_READ_VALUE(rw);
 	queue = TS_SHARED_QUEUE;
 	if (v & RW_LOCK_WRITE_WAITERS) {
 		queue = TS_EXCLUSIVE_QUEUE;
 		setv |= (v & RW_LOCK_READ_WAITERS);
 	}
 	atomic_store_rel_ptr(&rw->rw_lock, setv);
 
 	/* Wake up all waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
 		    queue == TS_SHARED_QUEUE ? "read" : "write");
 
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 	turnstile_broadcast(ts, queue);
 	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	turnstile_chain_unlock(&rw->lock_object);
 }
 
 /*
  * Attempt to do a non-blocking upgrade from a read lock to a write
  * lock.  This will only succeed if this thread holds a single read
  * lock.  Returns true if the upgrade succeeded and false otherwise.
  */
 int
 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t v, x, tid;
 	struct turnstile *ts;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
 
 	/*
 	 * Attempt to switch from one reader to a writer.  If there
 	 * are any write waiters, then we will have to lock the
 	 * turnstile first to prevent races with another writer
 	 * calling turnstile_wait() before we have claimed this
 	 * turnstile.  So, do the simple case of no waiters first.
 	 */
 	tid = (uintptr_t)curthread;
 	success = 0;
 	for (;;) {
 		v = rw->rw_lock;
 		if (RW_READERS(v) > 1)
 			break;
 		if (!(v & RW_LOCK_WAITERS)) {
 			success = atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid);
 			if (!success)
 				continue;
 			break;
 		}
 
 		/*
 		 * Ok, we think we have waiters, so lock the turnstile.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 		v = rw->rw_lock;
 		if (RW_READERS(v) > 1) {
 			turnstile_cancel(ts);
 			break;
 		}
 		/*
 		 * Try to switch from one reader to a writer again.  This time
 		 * we honor the current state of the waiters flags.
 		 * If we obtain the lock with the flags set, then claim
 		 * ownership of the turnstile.
 		 */
 		x = rw->rw_lock & RW_LOCK_WAITERS;
 		success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x);
 		if (success) {
 			if (x)
 				turnstile_claim(ts);
 			else
 				turnstile_cancel(ts);
 			break;
 		}
 		turnstile_cancel(ts);
 	}
 	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
 	if (success) {
 		curthread->td_rw_rlocks--;
 		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(rw__upgrade, rw);
 	}
 	return (success);
 }
 
 int
 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG));
 }
 
 /*
  * Downgrade a write lock into a single read lock.
  */
 void
 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 {
 	struct turnstile *ts;
 	uintptr_t tid, v;
 	int rwait, wwait;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (rw_recursed(rw))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
 
 	/*
 	 * Convert from a writer to a single reader.  First we handle
 	 * the easy case with no waiters.  If there are any waiters, we
 	 * lock the turnstile and "disown" the lock.
 	 */
 	tid = (uintptr_t)curthread;
 	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
 		goto out;
 
 	/*
 	 * Ok, we think we have waiters, so lock the turnstile so we can
 	 * read the waiter flags without any races.
 	 */
 	turnstile_chain_lock(&rw->lock_object);
 	v = rw->rw_lock & RW_LOCK_WAITERS;
 	rwait = v & RW_LOCK_READ_WAITERS;
 	wwait = v & RW_LOCK_WRITE_WAITERS;
 	MPASS(rwait | wwait);
 
 	/*
 	 * Downgrade from a write lock while preserving waiters flag
 	 * and give up ownership of the turnstile.
 	 */
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 	if (!wwait)
 		v &= ~RW_LOCK_READ_WAITERS;
 	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
 	/*
 	 * Wake other readers if there are no writers pending.  Otherwise they
 	 * won't be able to acquire the lock anyway.
 	 */
 	if (rwait && !wwait) {
 		turnstile_broadcast(ts, TS_SHARED_QUEUE);
 		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	} else
 		turnstile_disown(ts);
 	turnstile_chain_unlock(&rw->lock_object);
 out:
 	curthread->td_rw_rlocks++;
 	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(rw__downgrade, rw);
 }
 
 void
 __rw_downgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 	__rw_downgrade_int(rw LOCK_FILE_LINE_ARG);
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef __rw_assert
 #endif
 
 /*
  * In the non-WITNESS case, rw_assert() can only detect that at least
  * *some* thread owns an rlock, but it cannot guarantee that *this*
  * thread owns an rlock.
  */
 void
 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct rwlock *rw;
 
 	if (panicstr != NULL)
 		return;
 
 	rw = rwlock2rw(c);
 
 	switch (what) {
 	case RA_LOCKED:
 	case RA_LOCKED | RA_RECURSED:
 	case RA_LOCKED | RA_NOTRECURSED:
 	case RA_RLOCKED:
 	case RA_RLOCKED | RA_RECURSED:
 	case RA_RLOCKED | RA_NOTRECURSED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has a write lock or we have one
 		 * and are asserting a read lock, fail.  Also, if no one
 		 * has a lock at all, fail.
 		 */
 		if (rw->rw_lock == RW_UNLOCKED ||
 		    (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
 		    rw_wowner(rw) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    rw->lock_object.lo_name, (what & RA_RLOCKED) ?
 			    "read " : "", file, line);
 
 		if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
 			if (rw_recursed(rw)) {
 				if (what & RA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    rw->lock_object.lo_name, file,
 					    line);
 			} else if (what & RA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case RA_WLOCKED:
 	case RA_WLOCKED | RA_RECURSED:
 	case RA_WLOCKED | RA_NOTRECURSED:
 		if (rw_wowner(rw) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		if (rw_recursed(rw)) {
 			if (what & RA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		} else if (what & RA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		break;
 	case RA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold a write lock fail.  We can't reliably check
 		 * to see if we hold a read lock or not.
 		 */
 		if (rw_wowner(rw) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif /* INVARIANT_SUPPORT */
 
 #ifdef DDB
 void
 db_show_rwlock(const struct lock_object *lock)
 {
 	const struct rwlock *rw;
 	struct thread *td;
 
 	rw = (const struct rwlock *)lock;
 
 	db_printf(" state: ");
 	if (rw->rw_lock == RW_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (rw->rw_lock == RW_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (rw->rw_lock & RW_LOCK_READ)
 		db_printf("RLOCK: %ju locks\n",
 		    (uintmax_t)(RW_READERS(rw->rw_lock)));
 	else {
 		td = rw_wowner(rw);
 		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (rw_recursed(rw))
 			db_printf(" recursed: %u\n", rw->rw_recurse);
 	}
 	db_printf(" waiters: ");
 	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
 	case RW_LOCK_READ_WAITERS:
 		db_printf("readers\n");
 		break;
 	case RW_LOCK_WRITE_WAITERS:
 		db_printf("writers\n");
 		break;
 	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
 		db_printf("readers and writers\n");
 		break;
 	default:
 		db_printf("none\n");
 		break;
 	}
 }
 
 #endif
Index: head/sys/kern/kern_sx.c
===================================================================
--- head/sys/kern/kern_sx.c	(revision 326106)
+++ head/sys/kern/kern_sx.c	(revision 326107)
@@ -1,1410 +1,1413 @@
 /*-
  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
  * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */
 
 /*
  * Shared/exclusive locks.  This implementation attempts to ensure
  * deterministic lock granting behavior, so that slocks and xlocks are
  * interleaved.
  *
  * Priority propagation will not generally raise the priority of lock holders,
  * so should not be relied upon in combination with sx locks.
  */
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_sx.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/sx.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #include <machine/cpu.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #define	ADAPTIVE_SX
 #endif
 
 CTASSERT((SX_NOADAPTIVE & LO_CLASSFLAGS) == SX_NOADAPTIVE);
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /* Handy macros for sleep queues. */
 #define	SQ_EXCLUSIVE_QUEUE	0
 #define	SQ_SHARED_QUEUE		1
 
 /*
  * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
  * drop Giant anytime we have to sleep or if we adaptively spin.
  */
 #define	GIANT_DECLARE							\
 	int _giantcnt = 0;						\
 	WITNESS_SAVE_DECL(Giant)					\
 
 #define	GIANT_SAVE(work) do {						\
 	if (mtx_owned(&Giant)) {					\
 		work++;							\
 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
 		while (mtx_owned(&Giant)) {				\
 			_giantcnt++;					\
 			mtx_unlock(&Giant);				\
 		}							\
 	}								\
 } while (0)
 
 #define GIANT_RESTORE() do {						\
 	if (_giantcnt > 0) {						\
 		mtx_assert(&Giant, MA_NOTOWNED);			\
 		while (_giantcnt--)					\
 			mtx_lock(&Giant);				\
 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
 	}								\
 } while (0)
 
 /*
  * Returns true if an exclusive lock is recursed.  It assumes
  * curthread currently has an exclusive lock.
  */
 #define	sx_recursed(sx)		((sx)->sx_recurse != 0)
 
 static void	assert_sx(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_sx(const struct lock_object *lock);
 #endif
 static void	lock_sx(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_sx(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_sx(struct lock_object *lock);
 
 struct lock_class lock_class_sx = {
 	.lc_name = "sx",
 	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_sx,
 #ifdef DDB
 	.lc_ddb_show = db_show_sx,
 #endif
 	.lc_lock = lock_sx,
 	.lc_unlock = unlock_sx,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_sx,
 #endif
 };
 
 #ifndef INVARIANTS
 #define	_sx_assert(sx, what, file, line)
 #endif
 
 #ifdef ADAPTIVE_SX
 static __read_frequently u_int asx_retries = 10;
 static __read_frequently u_int asx_loops = 10000;
 static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging");
 SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
 SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
 
 static struct lock_delay_config __read_frequently sx_delay;
 
 SYSCTL_INT(_debug_sx, OID_AUTO, delay_base, CTLFLAG_RW, &sx_delay.base,
     0, "");
 SYSCTL_INT(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max,
     0, "");
 
 LOCK_DELAY_SYSINIT_DEFAULT(sx_delay);
 #endif
 
 void
 assert_sx(const struct lock_object *lock, int what)
 {
 
 	sx_assert((const struct sx *)lock, what);
 }
 
 void
 lock_sx(struct lock_object *lock, uintptr_t how)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	if (how)
 		sx_slock(sx);
 	else
 		sx_xlock(sx);
 }
 
 uintptr_t
 unlock_sx(struct lock_object *lock)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
 	if (sx_xlocked(sx)) {
 		sx_xunlock(sx);
 		return (0);
 	} else {
 		sx_sunlock(sx);
 		return (1);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_sx(const struct lock_object *lock, struct thread **owner)
 {
 	const struct sx *sx;
 	uintptr_t x;
 
 	sx = (const struct sx *)lock;
 	x = sx->sx_lock;
 	*owner = NULL;
 	return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) :
 	    ((*owner = (struct thread *)SX_OWNER(x)) != NULL));
 }
 #endif
 
 void
 sx_sysinit(void *arg)
 {
 	struct sx_args *sargs = arg;
 
 	sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
 }
 
 void
 sx_init_flags(struct sx *sx, const char *description, int opts)
 {
 	int flags;
 
 	MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
 	    SX_NOPROFILE | SX_NOADAPTIVE | SX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock,
 	    ("%s: sx_lock not aligned for %s: %p", __func__, description,
 	    &sx->sx_lock));
 
 	flags = LO_SLEEPABLE | LO_UPGRADABLE;
 	if (opts & SX_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & SX_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & SX_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & SX_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & SX_QUIET)
 		flags |= LO_QUIET;
 	if (opts & SX_NEW)
 		flags |= LO_NEW;
 
 	flags |= opts & SX_NOADAPTIVE;
 	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
 	sx->sx_lock = SX_LOCK_UNLOCKED;
 	sx->sx_recurse = 0;
 }
 
 void
 sx_destroy(struct sx *sx)
 {
 
 	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
 	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
 	sx->sx_lock = SX_LOCK_DESTROYED;
 	lock_destroy(&sx->lock_object);
 }
 
 int
 sx_try_slock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_try_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 
 	x = sx->sx_lock;
 	for (;;) {
 		KASSERT(x != SX_LOCK_DESTROYED,
 		    ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
 		if (!(x & SX_LOCK_SHARED))
 			break;
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, x + SX_ONE_SHARER)) {
 			LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
 			WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
 			    sx, 0, 0, file, line, LOCKSTAT_READER);
 			TD_LOCKS_INC(curthread);
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 int
 sx_try_slock_(struct sx *sx, const char *file, int line)
 {
 
 	return (sx_try_slock_int(sx LOCK_FILE_LINE_ARG));
 }
 
 int
 _sx_xlock(struct sx *sx, int opts, const char *file, int line)
 {
 	uintptr_t tid, x;
 	int error = 0;
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(curthread),
 	    ("sx_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	tid = (uintptr_t)curthread;
 	x = SX_LOCK_UNLOCKED;
 	if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 		error = _sx_xlock_hard(sx, x, opts LOCK_FILE_LINE_ARG);
 	else
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    0, 0, file, line, LOCKSTAT_WRITER);
 	if (!error) {
 		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
 		    file, line);
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (error);
 }
 
 int
 sx_try_xlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	struct thread *td;
 	uintptr_t tid, x;
 	int rval;
 	bool recursed;
 
 	td = curthread;
 	tid = (uintptr_t)td;
 	if (SCHEDULER_STOPPED_TD(td))
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
 	    ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
 
 	rval = 1;
 	recursed = false;
 	x = SX_LOCK_UNLOCKED;
 	for (;;) {
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 			break;
 		if (x == SX_LOCK_UNLOCKED)
 			continue;
 		if (x == tid && (sx->lock_object.lo_flags & LO_RECURSABLE)) {
 			sx->sx_recurse++;
 			atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 			break;
 		}
 		rval = 0;
 		break;
 	}
 
 	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		if (!recursed)
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
 			    sx, 0, 0, file, line, LOCKSTAT_WRITER);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (rval);
 }
 
 int
 sx_try_xlock_(struct sx *sx, const char *file, int line)
 {
 
 	return (sx_try_xlock_int(sx LOCK_FILE_LINE_ARG));
 }
 
 void
 _sx_xunlock(struct sx *sx, const char *file, int line)
 {
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
 	    line);
 #if LOCK_DEBUG > 0
 	_sx_xunlock_hard(sx, (uintptr_t)curthread, file, line);
 #else
 	__sx_xunlock(sx, curthread, file, line);
 #endif
 	TD_LOCKS_DEC(curthread);
 }
 
 /*
  * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
  * This will only succeed if this thread holds a single shared lock.
  * Return 1 if if the upgrade succeed, 0 otherwise.
  */
 int
 sx_try_upgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 
 	/*
 	 * Try to switch from one shared lock to an exclusive lock.  We need
 	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
 	 * we will wake up the exclusive waiters when we drop the lock.
 	 */
 	x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
 	success = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
 	    (uintptr_t)curthread | x);
 	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
 	if (success) {
 		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(sx__upgrade, sx);
 	}
 	return (success);
 }
 
 int
 sx_try_upgrade_(struct sx *sx, const char *file, int line)
 {
 
 	return (sx_try_upgrade_int(sx LOCK_FILE_LINE_ARG));
 }
 
 /*
  * Downgrade an unrecursed exclusive lock into a single shared lock.
  */
 void
 sx_downgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 	int wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (sx_recursed(sx))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
 
 	/*
 	 * Try to switch from an exclusive lock with no shared waiters
 	 * to one sharer with no shared waiters.  If there are
 	 * exclusive waiters, we don't need to lock the sleep queue so
 	 * long as we preserve the flag.  We do one quick try and if
 	 * that fails we grab the sleepq lock to keep the flags from
 	 * changing and do it the slow way.
 	 *
 	 * We have to lock the sleep queue if there are shared waiters
 	 * so we can wake them up.
 	 */
 	x = sx->sx_lock;
 	if (!(x & SX_LOCK_SHARED_WAITERS) &&
 	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS))) {
 		LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 		return;
 	}
 
 	/*
 	 * Lock the sleep queue so we can read the waiters bits
 	 * without any races and wakeup any shared waiters.
 	 */
 	sleepq_lock(&sx->lock_object);
 
 	/*
 	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
 	 * shared lock.  If there are any shared waiters, wake them up.
 	 */
 	wakeup_swapper = 0;
 	x = sx->sx_lock;
 	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
 	if (x & SX_LOCK_SHARED_WAITERS)
 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
 		    0, SQ_SHARED_QUEUE);
 	sleepq_release(&sx->lock_object);
 
 	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(sx__downgrade, sx);
 
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 void
 sx_downgrade_(struct sx *sx, const char *file, int line)
 {
 
 	sx_downgrade_int(sx LOCK_FILE_LINE_ARG);
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_xlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 int
 _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
 {
 	GIANT_DECLARE;
 	uintptr_t tid;
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
 	u_int i, n, spintries = 0;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int error = 0;
 #if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef	KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state;
 #endif
 	int extra_work = 0;
 
 	tid = (uintptr_t)curthread;
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 #if defined(ADAPTIVE_SX)
 	lock_delay_arg_init(&lda, &sx_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init(&lda, NULL);
 #endif
 
 	if (__predict_false(x == SX_LOCK_UNLOCKED))
 		x = SX_READ_VALUE(sx);
 
 	/* If we already hold an exclusive lock, then recurse. */
 	if (__predict_false(lv_sx_owner(x) == (struct thread *)tid)) {
 		KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
 	    ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
 		    sx->lock_object.lo_name, file, line));
 		sx->sx_recurse++;
 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
 		return (0);
 	}
 
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
 	    &waittime);
 
 #ifdef LOCK_PROFILING
 	extra_work = 1;
 	state = x;
 #elif defined(KDTRACE_HOOKS)
 	extra_work = lockstat_enabled;
 	if (__predict_false(extra_work)) {
 		all_time -= lockstat_nsecs(&sx->lock_object);
 		state = x;
 	}
 #endif
 
 	for (;;) {
 		if (x == SX_LOCK_UNLOCKED) {
 			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 				break;
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			if ((x & SX_LOCK_SHARED) == 0) {
 				owner = lv_sx_owner(x);
 				if (TD_IS_RUNNING(owner)) {
 					if (LOCK_LOG_TEST(&sx->lock_object, 0))
 						CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 						    __func__, sx, owner);
 					KTR_STATE1(KTR_SCHED, "thread",
 					    sched_tdname(curthread), "spinning",
 					    "lockname:\"%s\"",
 					    sx->lock_object.lo_name);
 					GIANT_SAVE(extra_work);
 					do {
 						lock_delay(&lda);
 						x = SX_READ_VALUE(sx);
 						owner = lv_sx_owner(x);
 					} while (owner != NULL &&
 						    TD_IS_RUNNING(owner));
 					KTR_STATE0(KTR_SCHED, "thread",
 					    sched_tdname(curthread), "running");
 					continue;
 				}
 			} else if (SX_SHARERS(x) && spintries < asx_retries) {
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", sx->lock_object.lo_name);
 				GIANT_SAVE(extra_work);
 				spintries++;
 				for (i = 0; i < asx_loops; i += n) {
 					if (LOCK_LOG_TEST(&sx->lock_object, 0))
 						CTR4(KTR_LOCK,
 				    "%s: shared spinning on %p with %u and %u",
 						    __func__, sx, spintries, i);
 					n = SX_SHARERS(x);
 					lock_delay_spin(n);
 					x = SX_READ_VALUE(sx);
 					if ((x & SX_LOCK_SHARED) == 0 ||
 					    SX_SHARERS(x) == 0)
 						break;
 				}
 #ifdef KDTRACE_HOOKS
 				lda.spin_cnt += i;
 #endif
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				if (i != asx_loops)
 					continue;
 			}
 		}
 #endif
 
 		sleepq_lock(&sx->lock_object);
 		x = SX_READ_VALUE(sx);
 
 		/*
 		 * If the lock was released while spinning on the
 		 * sleep queue chain lock, try again.
 		 */
 		if (x == SX_LOCK_UNLOCKED) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the sleep queue
 		 * chain lock.  If so, drop the sleep queue lock and try
 		 * again.
 		 */
 		if (!(x & SX_LOCK_SHARED) &&
 		    (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * If an exclusive lock was released with both shared
 		 * and exclusive waiters and a shared waiter hasn't
 		 * woken up and acquired the lock yet, sx_lock will be
 		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
 		 * If we see that value, try to acquire it once.  Note
 		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
 		 * as there are other exclusive waiters still.  If we
 		 * fail, restart the loop.
 		 */
 		if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
 			if (atomic_cmpset_acq_ptr(&sx->sx_lock,
 			    SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS,
 			    tid | SX_LOCK_EXCLUSIVE_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
 				    __func__, sx);
 				break;
 			}
 			sleepq_release(&sx->lock_object);
 			x = SX_READ_VALUE(sx);
 			continue;
 		}
 
 		/*
 		 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
 		 * than loop back and retry.
 		 */
 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
 			    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				x = SX_READ_VALUE(sx);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
 				    __func__, sx);
 		}
 
 		/*
 		 * Since we have been unable to acquire the exclusive
 		 * lock and the exclusive waiters flag is set, we have
 		 * to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 		GIANT_SAVE(extra_work);
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&sx->lock_object);
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 		x = SX_READ_VALUE(sx);
 	}
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!extra_work))
 		return (error);
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&sx->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
 		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
 		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 #endif
 	if (!error)
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    contested, waittime, file, line, LOCKSTAT_WRITER);
 	GIANT_RESTORE();
 	return (error);
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_xunlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 void
-_sx_xunlock_hard(struct sx *sx, uintptr_t tid LOCK_FILE_LINE_ARG_DEF)
+_sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
 {
-	uintptr_t x, setx;
+	uintptr_t tid, setx;
 	int queue, wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
-	MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
+	tid = (uintptr_t)curthread;
 
-	x = SX_READ_VALUE(sx);
-	if (x & SX_LOCK_RECURSED) {
+	if (__predict_false(x == tid))
+		x = SX_READ_VALUE(sx);
+
+	MPASS(!(x & SX_LOCK_SHARED));
+
+	if (__predict_false(x & SX_LOCK_RECURSED)) {
 		/* The lock is recursed, unrecurse one level. */
 		if ((--sx->sx_recurse) == 0)
 			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
 		return;
 	}
 
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_WRITER);
 	if (x == tid &&
 	    atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
 		return;
 
-	MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
-	    SX_LOCK_EXCLUSIVE_WAITERS));
+	MPASS(x & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS));
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
 
 	sleepq_lock(&sx->lock_object);
 	x = SX_READ_VALUE(sx);
 
 	/*
 	 * The wake up algorithm here is quite simple and probably not
 	 * ideal.  It gives precedence to shared waiters if they are
 	 * present.  For this condition, we have to preserve the
 	 * state of the exclusive waiters flag.
 	 * If interruptible sleeps left the shared queue empty avoid a
 	 * starvation for the threads sleeping on the exclusive queue by giving
 	 * them precedence and cleaning up the shared waiters bit anyway.
 	 */
 	setx = SX_LOCK_UNLOCKED;
 	queue = SQ_EXCLUSIVE_QUEUE;
 	if ((x & SX_LOCK_SHARED_WAITERS) != 0 &&
 	    sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) {
 		queue = SQ_SHARED_QUEUE;
 		setx |= (x & SX_LOCK_EXCLUSIVE_WAITERS);
 	}
 	atomic_store_rel_ptr(&sx->sx_lock, setx);
 
 	/* Wake up all the waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
 		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
 		    "exclusive");
 
 	wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
 	    queue);
 	sleepq_release(&sx->lock_object);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 static bool __always_inline
 __sx_slock_try(struct sx *sx, uintptr_t *xp LOCK_FILE_LINE_ARG_DEF)
 {
 
 	/*
 	 * If no other thread has an exclusive lock then try to bump up
 	 * the count of sharers.  Since we have to preserve the state
 	 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
 	 * shared lock loop back and retry.
 	 */
 	while (*xp & SX_LOCK_SHARED) {
 		MPASS(!(*xp & SX_LOCK_SHARED_WAITERS));
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, xp,
 		    *xp + SX_ONE_SHARER)) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR4(KTR_LOCK, "%s: %p succeed %p -> %p",
 				    __func__, sx, (void *)*xp,
 				    (void *)(*xp + SX_ONE_SHARER));
 			return (true);
 		}
 	}
 	return (false);
 }
 
 static int __noinline
 _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
 {
 	GIANT_DECLARE;
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int error = 0;
 #if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state;
 #endif
 	int extra_work = 0;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 #if defined(ADAPTIVE_SX)
 	lock_delay_arg_init(&lda, &sx_delay);
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init(&lda, NULL);
 #endif
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
 	    &waittime);
 
 #ifdef LOCK_PROFILING
 	extra_work = 1;
 	state = x;
 #elif defined(KDTRACE_HOOKS)
 	extra_work = lockstat_enabled;
 	if (__predict_false(extra_work)) {
 		all_time -= lockstat_nsecs(&sx->lock_object);
 		state = x;
 	}
 #endif
 
 	/*
 	 * As with rwlocks, we don't make any attempt to try to block
 	 * shared locks once there is an exclusive waiter.
 	 */
 	for (;;) {
 		if (__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG))
 			break;
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			owner = lv_sx_owner(x);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, sx, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", sx->lock_object.lo_name);
 				GIANT_SAVE(extra_work);
 				do {
 					lock_delay(&lda);
 					x = SX_READ_VALUE(sx);
 					owner = lv_sx_owner(x);
 				} while (owner != NULL && TD_IS_RUNNING(owner));
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * Some other thread already has an exclusive lock, so
 		 * start the process of blocking.
 		 */
 		sleepq_lock(&sx->lock_object);
 		x = SX_READ_VALUE(sx);
 
 		/*
 		 * The lock could have been released while we spun.
 		 * In this case loop back and retry.
 		 */
 		if (x & SX_LOCK_SHARED) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if (!(x & SX_LOCK_SHARED) &&
 		    (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				x = SX_READ_VALUE(sx);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
 		 * fail to set it drop the sleep queue lock and loop
 		 * back.
 		 */
 		if (!(x & SX_LOCK_SHARED_WAITERS)) {
 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
 			    x | SX_LOCK_SHARED_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				x = SX_READ_VALUE(sx);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
 				    __func__, sx);
 		}
 
 		/*
 		 * Since we have been unable to acquire the shared lock,
 		 * we have to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 		GIANT_SAVE(extra_work);
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&sx->lock_object);
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 		x = SX_READ_VALUE(sx);
 	}
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!extra_work))
 		return (error);
 #endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&sx->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
 		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
 		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 #endif
 	if (error == 0) {
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    contested, waittime, file, line, LOCKSTAT_READER);
 	}
 	GIANT_RESTORE();
 	return (error);
 }
 
 int
 _sx_slock_int(struct sx *sx, int opts LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 	int error;
 
 	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
 	    !TD_IS_IDLETHREAD(curthread),
 	    ("sx_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
 
 	error = 0;
 	x = SX_READ_VALUE(sx);
 	if (__predict_false(LOCKSTAT_OOL_PROFILE_ENABLED(sx__acquire) ||
 	    !__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG)))
 		error = _sx_slock_hard(sx, opts, x LOCK_FILE_LINE_ARG);
 	if (error == 0) {
 		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
 		WITNESS_LOCK(&sx->lock_object, 0, file, line);
 		TD_LOCKS_INC(curthread);
 	}
 	return (error);
 }
 
 int
 _sx_slock(struct sx *sx, int opts, const char *file, int line)
 {
 
 	return (_sx_slock_int(sx, opts LOCK_FILE_LINE_ARG));
 }
 
 static bool __always_inline
 _sx_sunlock_try(struct sx *sx, uintptr_t *xp)
 {
 
 	for (;;) {
 		/*
 		 * We should never have sharers while at least one thread
 		 * holds a shared lock.
 		 */
 		KASSERT(!(*xp & SX_LOCK_SHARED_WAITERS),
 		    ("%s: waiting sharers", __func__));
 
 		/*
 		 * See if there is more than one shared lock held.  If
 		 * so, just drop one and return.
 		 */
 		if (SX_SHARERS(*xp) > 1) {
 			if (atomic_fcmpset_rel_ptr(&sx->sx_lock, xp,
 			    *xp - SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, sx, (void *)*xp,
 					    (void *)(*xp - SX_ONE_SHARER));
 				return (true);
 			}
 			continue;
 		}
 
 		/*
 		 * If there aren't any waiters for an exclusive lock,
 		 * then try to drop it quickly.
 		 */
 		if (!(*xp & SX_LOCK_EXCLUSIVE_WAITERS)) {
 			MPASS(*xp == SX_SHARERS_LOCK(1));
 			*xp = SX_SHARERS_LOCK(1);
 			if (atomic_fcmpset_rel_ptr(&sx->sx_lock,
 			    xp, SX_LOCK_UNLOCKED)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p last succeeded",
 					    __func__, sx);
 				return (true);
 			}
 			continue;
 		}
 		break;
 	}
 	return (false);
 }
 
 static void __noinline
 _sx_sunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
 {
 	int wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER);
 
 	for (;;) {
 		if (_sx_sunlock_try(sx, &x))
 			break;
 
 		/*
 		 * At this point, there should just be one sharer with
 		 * exclusive waiters.
 		 */
 		MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
 
 		sleepq_lock(&sx->lock_object);
 
 		/*
 		 * Wake up semantic here is quite simple:
 		 * Just wake up all the exclusive waiters.
 		 * Note that the state of the lock could have changed,
 		 * so if it fails loop back and retry.
 		 */
 		if (!atomic_cmpset_rel_ptr(&sx->sx_lock,
 		    SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS,
 		    SX_LOCK_UNLOCKED)) {
 			sleepq_release(&sx->lock_object);
 			x = SX_READ_VALUE(sx);
 			continue;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
 			    "exclusive queue", __func__, sx);
 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
 		    0, SQ_EXCLUSIVE_QUEUE);
 		sleepq_release(&sx->lock_object);
 		if (wakeup_swapper)
 			kick_proc0();
 		break;
 	}
 }
 
 void
 _sx_sunlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
 	uintptr_t x;
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
 
 	x = SX_READ_VALUE(sx);
 	if (__predict_false(LOCKSTAT_OOL_PROFILE_ENABLED(sx__release) ||
 	    !_sx_sunlock_try(sx, &x)))
 		_sx_sunlock_hard(sx, x LOCK_FILE_LINE_ARG);
 
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 _sx_sunlock(struct sx *sx, const char *file, int line)
 {
 
 	_sx_sunlock_int(sx LOCK_FILE_LINE_ARG);
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef	_sx_assert
 #endif
 
 /*
  * In the non-WITNESS case, sx_assert() can only detect that at least
  * *some* thread owns an slock, but it cannot guarantee that *this*
  * thread owns an slock.
  */
 void
 _sx_assert(const struct sx *sx, int what, const char *file, int line)
 {
 #ifndef WITNESS
 	int slocked = 0;
 #endif
 
 	if (panicstr != NULL)
 		return;
 	switch (what) {
 	case SA_SLOCKED:
 	case SA_SLOCKED | SA_NOTRECURSED:
 	case SA_SLOCKED | SA_RECURSED:
 #ifndef WITNESS
 		slocked = 1;
 		/* FALLTHROUGH */
 #endif
 	case SA_LOCKED:
 	case SA_LOCKED | SA_NOTRECURSED:
 	case SA_LOCKED | SA_RECURSED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has an exclusive lock or we
 		 * have one and are asserting a shared lock, fail.
 		 * Also, if no one has a lock at all, fail.
 		 */
 		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
 		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
 		    sx_xholder(sx) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    sx->lock_object.lo_name, slocked ? "share " : "",
 			    file, line);
 
 		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
 			if (sx_recursed(sx)) {
 				if (what & SA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    sx->lock_object.lo_name, file,
 					    line);
 			} else if (what & SA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case SA_XLOCKED:
 	case SA_XLOCKED | SA_NOTRECURSED:
 	case SA_XLOCKED | SA_RECURSED:
 		if (sx_xholder(sx) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		if (sx_recursed(sx)) {
 			if (what & SA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		} else if (what & SA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		break;
 	case SA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold an exclusve lock fail.  We can't
 		 * reliably check to see if we hold a shared lock or
 		 * not.
 		 */
 		if (sx_xholder(sx) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown sx lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif	/* INVARIANT_SUPPORT */
 
 #ifdef DDB
 static void
 db_show_sx(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct sx *sx;
 
 	sx = (const struct sx *)lock;
 
 	db_printf(" state: ");
 	if (sx->sx_lock == SX_LOCK_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (sx->sx_lock == SX_LOCK_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else {
 		td = sx_xholder(sx);
 		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (sx_recursed(sx))
 			db_printf(" recursed: %d\n", sx->sx_recurse);
 	}
 
 	db_printf(" waiters: ");
 	switch(sx->sx_lock &
 	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
 	case SX_LOCK_SHARED_WAITERS:
 		db_printf("shared\n");
 		break;
 	case SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive\n");
 		break;
 	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive and shared\n");
 		break;
 	default:
 		db_printf("none\n");
 	}
 }
 
 /*
  * Check to see if a thread that is blocked on a sleep queue is actually
  * blocked on an sx lock.  If so, output some details and return true.
  * If the lock has an exclusive owner, return that in *ownerp.
  */
 int
 sx_chain(struct thread *td, struct thread **ownerp)
 {
 	struct sx *sx;
 
 	/*
 	 * Check to see if this thread is blocked on an sx lock.
 	 * First, we check the lock class.  If that is ok, then we
 	 * compare the lock name against the wait message.
 	 */
 	sx = td->td_wchan;
 	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
 	    sx->lock_object.lo_name != td->td_wmesg)
 		return (0);
 
 	/* We think we have an sx lock, so output some details. */
 	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
 	*ownerp = sx_xholder(sx);
 	if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK (count %ju)\n",
 		    (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else
 		db_printf("XLOCK\n");
 	return (1);
 }
 #endif
Index: head/sys/sys/mutex.h
===================================================================
--- head/sys/sys/mutex.h	(revision 326106)
+++ head/sys/sys/mutex.h	(revision 326107)
@@ -1,543 +1,546 @@
 /*-
  * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI $Id: mutex.h,v 2.7.2.35 2000/04/27 03:10:26 cp Exp $
  * $FreeBSD$
  */
 
 #ifndef _SYS_MUTEX_H_
 #define _SYS_MUTEX_H_
 
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
 #ifdef _KERNEL
 #include <sys/pcpu.h>
 #include <sys/lock_profile.h>
 #include <sys/lockstat.h>
 #include <machine/atomic.h>
 #include <machine/cpufunc.h>
 
 /*
  * Mutex types and options passed to mtx_init().  MTX_QUIET and MTX_DUPOK
  * can also be passed in.
  */
 #define	MTX_DEF		0x00000000	/* DEFAULT (sleep) lock */ 
 #define MTX_SPIN	0x00000001	/* Spin lock (disables interrupts) */
 #define MTX_RECURSE	0x00000004	/* Option: lock allowed to recurse */
 #define	MTX_NOWITNESS	0x00000008	/* Don't do any witness checking. */
 #define MTX_NOPROFILE   0x00000020	/* Don't profile this lock */
 #define	MTX_NEW		0x00000040	/* Don't check for double-init */
 
 /*
  * Option flags passed to certain lock/unlock routines, through the use
  * of corresponding mtx_{lock,unlock}_flags() interface macros.
  */
 #define	MTX_QUIET	LOP_QUIET	/* Don't log a mutex event */
 #define	MTX_DUPOK	LOP_DUPOK	/* Don't log a duplicate acquire */
 
 /*
  * State bits kept in mutex->mtx_lock, for the DEFAULT lock type. None of this,
  * with the exception of MTX_UNOWNED, applies to spin locks.
  */
 #define	MTX_UNOWNED	0x00000000	/* Cookie for free mutex */
 #define	MTX_RECURSED	0x00000001	/* lock recursed (for MTX_DEF only) */
 #define	MTX_CONTESTED	0x00000002	/* lock contested (for MTX_DEF only) */
 #define	MTX_DESTROYED	0x00000004	/* lock destroyed */
 #define	MTX_FLAGMASK	(MTX_RECURSED | MTX_CONTESTED | MTX_DESTROYED)
 
 /*
  * Prototypes
  *
  * NOTE: Functions prepended with `_' (underscore) are exported to other parts
  *	 of the kernel via macros, thus allowing us to use the cpp LOCK_FILE
  *	 and LOCK_LINE or for hiding the lock cookie crunching to the
  *	 consumers. These functions should not be called directly by any
  *	 code using the API. Their macros cover their functionality.
  *	 Functions with a `_' suffix are the entrypoint for the common
  *	 KPI covering both compat shims and fast path case.  These can be
  *	 used by consumers willing to pass options, file and line
  *	 informations, in an option-independent way.
  *
  * [See below for descriptions]
  *
  */
 void	_mtx_init(volatile uintptr_t *c, const char *name, const char *type,
 	    int opts);
 void	_mtx_destroy(volatile uintptr_t *c);
 void	mtx_sysinit(void *arg);
 int	_mtx_trylock_flags_int(struct mtx *m, int opts LOCK_FILE_LINE_ARG_DEF);
 int	_mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file,
 	    int line);
 void	mutex_init(void);
 #if LOCK_DEBUG > 0
 void	__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, int opts,
 	    const char *file, int line);
-void	__mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file,
-	    int line);
+void	__mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v, int opts,
+	    const char *file, int line);
 #else
 void	__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v);
-void	__mtx_unlock_sleep(volatile uintptr_t *c);
+void	__mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v);
 #endif
 
 #ifdef SMP
 #if LOCK_DEBUG > 0
 void	_mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, int opts,
 	    const char *file, int line);
 #else
 void	_mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v);
 #endif
 #endif
 void	__mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file,
 	    int line);
 void	__mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file,
 	    int line);
 void	__mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
 	     int line);
 int	__mtx_trylock_spin_flags(volatile uintptr_t *c, int opts,
 	     const char *file, int line);
 void	__mtx_unlock_spin_flags(volatile uintptr_t *c, int opts,
 	    const char *file, int line);
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void	__mtx_assert(const volatile uintptr_t *c, int what, const char *file,
 	    int line);
 #endif
 void	thread_lock_flags_(struct thread *, int, const char *, int);
 #if LOCK_DEBUG > 0
 void	_thread_lock(struct thread *td, int opts, const char *file, int line);
 #else
 void	_thread_lock(struct thread *);
 #endif
 
 #if defined(LOCK_PROFILING) || defined(KLD_MODULE)
 #define	thread_lock(tdp)						\
 	thread_lock_flags_((tdp), 0, __FILE__, __LINE__)
 #elif LOCK_DEBUG > 0
 #define	thread_lock(tdp)						\
 	_thread_lock((tdp), 0, __FILE__, __LINE__)
 #else
 #define	thread_lock(tdp)						\
 	_thread_lock((tdp))
 #endif
 
 #define	thread_lock_flags(tdp, opt)					\
 	thread_lock_flags_((tdp), (opt), __FILE__, __LINE__)
 #define	thread_unlock(tdp)						\
        mtx_unlock_spin((tdp)->td_lock)
 
 /*
  * Top-level macros to provide lock cookie once the actual mtx is passed.
  * They will also prevent passing a malformed object to the mtx KPI by
  * failing compilation as the mtx_lock reserved member will not be found.
  */
 #define	mtx_init(m, n, t, o)						\
 	_mtx_init(&(m)->mtx_lock, n, t, o)
 #define	mtx_destroy(m)							\
 	_mtx_destroy(&(m)->mtx_lock)
 #define	mtx_trylock_flags_(m, o, f, l)					\
 	_mtx_trylock_flags_(&(m)->mtx_lock, o, f, l)
 #if LOCK_DEBUG > 0
 #define	_mtx_lock_sleep(m, v, o, f, l)					\
 	__mtx_lock_sleep(&(m)->mtx_lock, v, o, f, l)
-#define	_mtx_unlock_sleep(m, o, f, l)					\
-	__mtx_unlock_sleep(&(m)->mtx_lock, o, f, l)
+#define	_mtx_unlock_sleep(m, v, o, f, l)				\
+	__mtx_unlock_sleep(&(m)->mtx_lock, v, o, f, l)
 #else
 #define	_mtx_lock_sleep(m, v, o, f, l)					\
 	__mtx_lock_sleep(&(m)->mtx_lock, v)
-#define	_mtx_unlock_sleep(m, o, f, l)					\
-	__mtx_unlock_sleep(&(m)->mtx_lock)
+#define	_mtx_unlock_sleep(m, v, o, f, l)				\
+	__mtx_unlock_sleep(&(m)->mtx_lock, v)
 #endif
 #ifdef SMP
 #if LOCK_DEBUG > 0
 #define	_mtx_lock_spin(m, v, o, f, l)					\
 	_mtx_lock_spin_cookie(&(m)->mtx_lock, v, o, f, l)
 #else
 #define	_mtx_lock_spin(m, v, o, f, l)					\
 	_mtx_lock_spin_cookie(&(m)->mtx_lock, v)
 #endif
 #endif
 #define	_mtx_lock_flags(m, o, f, l)					\
 	__mtx_lock_flags(&(m)->mtx_lock, o, f, l)
 #define	_mtx_unlock_flags(m, o, f, l)					\
 	__mtx_unlock_flags(&(m)->mtx_lock, o, f, l)
 #define	_mtx_lock_spin_flags(m, o, f, l)				\
 	__mtx_lock_spin_flags(&(m)->mtx_lock, o, f, l)
 #define	_mtx_trylock_spin_flags(m, o, f, l)				\
 	__mtx_trylock_spin_flags(&(m)->mtx_lock, o, f, l)
 #define	_mtx_unlock_spin_flags(m, o, f, l)				\
 	__mtx_unlock_spin_flags(&(m)->mtx_lock, o, f, l)
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 #define	_mtx_assert(m, w, f, l)						\
 	__mtx_assert(&(m)->mtx_lock, w, f, l)
 #endif
 
 #define	mtx_recurse	lock_object.lo_data
 
 /* Very simple operations on mtx_lock. */
 
 /* Try to obtain mtx_lock once. */
 #define _mtx_obtain_lock(mp, tid)					\
 	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid))
 
 #define _mtx_obtain_lock_fetch(mp, vp, tid)				\
 	atomic_fcmpset_acq_ptr(&(mp)->mtx_lock, vp, (tid))
 
 /* Try to release mtx_lock if it is unrecursed and uncontested. */
 #define _mtx_release_lock(mp, tid)					\
 	atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), MTX_UNOWNED)
 
 /* Release mtx_lock quickly, assuming we own it. */
 #define _mtx_release_lock_quick(mp)					\
 	atomic_store_rel_ptr(&(mp)->mtx_lock, MTX_UNOWNED)
 
+#define	_mtx_release_lock_fetch(mp, vp)					\
+	atomic_fcmpset_rel_ptr(&(mp)->mtx_lock, (vp), MTX_UNOWNED)
+
 /*
  * Full lock operations that are suitable to be inlined in non-debug
  * kernels.  If the lock cannot be acquired or released trivially then
  * the work is deferred to another function.
  */
 
 /* Lock a normal mutex. */
 #define __mtx_lock(mp, tid, opts, file, line) do {			\
 	uintptr_t _tid = (uintptr_t)(tid);				\
 	uintptr_t _v = MTX_UNOWNED;					\
 									\
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__acquire) ||\
 	    !_mtx_obtain_lock_fetch((mp), &_v, _tid)))			\
 		_mtx_lock_sleep((mp), _v, (opts), (file), (line));	\
 } while (0)
 
 /*
  * Lock a spin mutex.  For spinlocks, we handle recursion inline (it
  * turns out that function calls can be significantly expensive on
  * some architectures).  Since spin locks are not _too_ common,
  * inlining this code is not too big a deal.
  */
 #ifdef SMP
 #define __mtx_lock_spin(mp, tid, opts, file, line) do {			\
 	uintptr_t _tid = (uintptr_t)(tid);				\
 	uintptr_t _v = MTX_UNOWNED;					\
 									\
 	spinlock_enter();						\
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(spin__acquire) ||	\
 	    !_mtx_obtain_lock_fetch((mp), &_v, _tid))) 			\
 		_mtx_lock_spin((mp), _v, (opts), (file), (line)); 	\
 } while (0)
 #define __mtx_trylock_spin(mp, tid, opts, file, line) __extension__  ({	\
 	uintptr_t _tid = (uintptr_t)(tid);				\
 	int _ret;							\
 									\
 	spinlock_enter();						\
 	if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid))) {\
 		spinlock_exit();					\
 		_ret = 0;						\
 	} else {							\
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire,	\
 		    mp, 0, 0, file, line);				\
 		_ret = 1;						\
 	}								\
 	_ret;								\
 })
 #else /* SMP */
 #define __mtx_lock_spin(mp, tid, opts, file, line) do {			\
 	uintptr_t _tid = (uintptr_t)(tid);				\
 									\
 	spinlock_enter();						\
 	if ((mp)->mtx_lock == _tid)					\
 		(mp)->mtx_recurse++;					\
 	else {								\
 		KASSERT((mp)->mtx_lock == MTX_UNOWNED, ("corrupt spinlock")); \
 		(mp)->mtx_lock = _tid;					\
 	}								\
 } while (0)
 #define __mtx_trylock_spin(mp, tid, opts, file, line) __extension__  ({	\
 	uintptr_t _tid = (uintptr_t)(tid);				\
 	int _ret;							\
 									\
 	spinlock_enter();						\
 	if ((mp)->mtx_lock != MTX_UNOWNED) {				\
 		spinlock_exit();					\
 		_ret = 0;						\
 	} else {							\
 		(mp)->mtx_lock = _tid;					\
 		_ret = 1;						\
 	}								\
 	_ret;								\
 })
 #endif /* SMP */
 
 /* Unlock a normal mutex. */
 #define __mtx_unlock(mp, tid, opts, file, line) do {			\
-	uintptr_t _tid = (uintptr_t)(tid);				\
+	uintptr_t _v = (uintptr_t)(tid);				\
 									\
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__release) ||\
-	    !_mtx_release_lock((mp), _tid)))				\
-		_mtx_unlock_sleep((mp), (opts), (file), (line));	\
+	    !_mtx_release_lock_fetch((mp), &_v)))			\
+		_mtx_unlock_sleep((mp), _v, (opts), (file), (line));	\
 } while (0)
 
 /*
  * Unlock a spin mutex.  For spinlocks, we can handle everything
  * inline, as it's pretty simple and a function call would be too
  * expensive (at least on some architectures).  Since spin locks are
  * not _too_ common, inlining this code is not too big a deal.
  *
  * Since we always perform a spinlock_enter() when attempting to acquire a
  * spin lock, we need to always perform a matching spinlock_exit() when
  * releasing a spin lock.  This includes the recursion cases.
  */
 #ifdef SMP
 #define __mtx_unlock_spin(mp) do {					\
 	if (mtx_recursed((mp)))						\
 		(mp)->mtx_recurse--;					\
 	else {								\
 		LOCKSTAT_PROFILE_RELEASE_LOCK(spin__release, mp);	\
 		_mtx_release_lock_quick((mp));				\
 	}								\
 	spinlock_exit();						\
 } while (0)
 #else /* SMP */
 #define __mtx_unlock_spin(mp) do {					\
 	if (mtx_recursed((mp)))						\
 		(mp)->mtx_recurse--;					\
 	else {								\
 		LOCKSTAT_PROFILE_RELEASE_LOCK(spin__release, mp);	\
 		(mp)->mtx_lock = MTX_UNOWNED;				\
 	}								\
 	spinlock_exit();						\
 } while (0)
 #endif /* SMP */
 
 /*
  * Exported lock manipulation interface.
  *
  * mtx_lock(m) locks MTX_DEF mutex `m'
  *
  * mtx_lock_spin(m) locks MTX_SPIN mutex `m'
  *
  * mtx_unlock(m) unlocks MTX_DEF mutex `m'
  *
  * mtx_unlock_spin(m) unlocks MTX_SPIN mutex `m'
  *
  * mtx_lock_spin_flags(m, opts) and mtx_lock_flags(m, opts) locks mutex `m'
  *     and passes option flags `opts' to the "hard" function, if required.
  *     With these routines, it is possible to pass flags such as MTX_QUIET
  *     to the appropriate lock manipulation routines.
  *
  * mtx_trylock(m) attempts to acquire MTX_DEF mutex `m' but doesn't sleep if
  *     it cannot. Rather, it returns 0 on failure and non-zero on success.
  *     It does NOT handle recursion as we assume that if a caller is properly
  *     using this part of the interface, he will know that the lock in question
  *     is _not_ recursed.
  *
  * mtx_trylock_flags(m, opts) is used the same way as mtx_trylock() but accepts
  *     relevant option flags `opts.'
  *
  * mtx_trylock_spin(m) attempts to acquire MTX_SPIN mutex `m' but doesn't
  *     spin if it cannot.  Rather, it returns 0 on failure and non-zero on
  *     success.  It always returns failure for recursed lock attempts.
  *
  * mtx_initialized(m) returns non-zero if the lock `m' has been initialized.
  *
  * mtx_owned(m) returns non-zero if the current thread owns the lock `m'
  *
  * mtx_recursed(m) returns non-zero if the lock `m' is presently recursed.
  */ 
 #define mtx_lock(m)		mtx_lock_flags((m), 0)
 #define mtx_lock_spin(m)	mtx_lock_spin_flags((m), 0)
 #define mtx_trylock(m)		mtx_trylock_flags((m), 0)
 #define mtx_trylock_spin(m)	mtx_trylock_spin_flags((m), 0)
 #define mtx_unlock(m)		mtx_unlock_flags((m), 0)
 #define mtx_unlock_spin(m)	mtx_unlock_spin_flags((m), 0)
 
 struct mtx_pool;
 
 struct mtx_pool *mtx_pool_create(const char *mtx_name, int pool_size, int opts);
 void mtx_pool_destroy(struct mtx_pool **poolp);
 struct mtx *mtx_pool_find(struct mtx_pool *pool, void *ptr);
 struct mtx *mtx_pool_alloc(struct mtx_pool *pool);
 #define mtx_pool_lock(pool, ptr)					\
 	mtx_lock(mtx_pool_find((pool), (ptr)))
 #define mtx_pool_lock_spin(pool, ptr)					\
 	mtx_lock_spin(mtx_pool_find((pool), (ptr)))
 #define mtx_pool_unlock(pool, ptr)					\
 	mtx_unlock(mtx_pool_find((pool), (ptr)))
 #define mtx_pool_unlock_spin(pool, ptr)					\
 	mtx_unlock_spin(mtx_pool_find((pool), (ptr)))
 
 /*
  * mtxpool_sleep is a general purpose pool of sleep mutexes.
  */
 extern struct mtx_pool *mtxpool_sleep;
 
 #ifndef LOCK_DEBUG
 #error LOCK_DEBUG not defined, include <sys/lock.h> before <sys/mutex.h>
 #endif
 #if LOCK_DEBUG > 0 || defined(MUTEX_NOINLINE)
 #define	mtx_lock_flags_(m, opts, file, line)				\
 	_mtx_lock_flags((m), (opts), (file), (line))
 #define	mtx_unlock_flags_(m, opts, file, line)				\
 	_mtx_unlock_flags((m), (opts), (file), (line))
 #define	mtx_lock_spin_flags_(m, opts, file, line)			\
 	_mtx_lock_spin_flags((m), (opts), (file), (line))
 #define	mtx_trylock_spin_flags_(m, opts, file, line)			\
 	_mtx_trylock_spin_flags((m), (opts), (file), (line))
 #define	mtx_unlock_spin_flags_(m, opts, file, line)			\
 	_mtx_unlock_spin_flags((m), (opts), (file), (line))
 #else	/* LOCK_DEBUG == 0 && !MUTEX_NOINLINE */
 #define	mtx_lock_flags_(m, opts, file, line)				\
 	__mtx_lock((m), curthread, (opts), (file), (line))
 #define	mtx_unlock_flags_(m, opts, file, line)				\
 	__mtx_unlock((m), curthread, (opts), (file), (line))
 #define	mtx_lock_spin_flags_(m, opts, file, line)			\
 	__mtx_lock_spin((m), curthread, (opts), (file), (line))
 #define	mtx_trylock_spin_flags_(m, opts, file, line)			\
 	__mtx_trylock_spin((m), curthread, (opts), (file), (line))
 #define	mtx_unlock_spin_flags_(m, opts, file, line)			\
 	__mtx_unlock_spin((m))
 #endif	/* LOCK_DEBUG > 0 || MUTEX_NOINLINE */
 
 #ifdef INVARIANTS
 #define	mtx_assert_(m, what, file, line)				\
 	_mtx_assert((m), (what), (file), (line))
 
 #define GIANT_REQUIRED	mtx_assert_(&Giant, MA_OWNED, __FILE__, __LINE__)
 
 #else	/* INVARIANTS */
 #define mtx_assert_(m, what, file, line)	(void)0
 #define GIANT_REQUIRED
 #endif	/* INVARIANTS */
 
 #define	mtx_lock_flags(m, opts)						\
 	mtx_lock_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
 #define	mtx_unlock_flags(m, opts)					\
 	mtx_unlock_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
 #define	mtx_lock_spin_flags(m, opts)					\
 	mtx_lock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
 #define	mtx_unlock_spin_flags(m, opts)					\
 	mtx_unlock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
 #define mtx_trylock_flags(m, opts)					\
 	mtx_trylock_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
 #define mtx_trylock_spin_flags(m, opts)					\
 	mtx_trylock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE)
 #define	mtx_assert(m, what)						\
 	mtx_assert_((m), (what), __FILE__, __LINE__)
 
 #define	mtx_sleep(chan, mtx, pri, wmesg, timo)				\
 	_sleep((chan), &(mtx)->lock_object, (pri), (wmesg),		\
 	    tick_sbt * (timo), 0, C_HARDCLOCK)
 
 #define	MTX_READ_VALUE(m)	((m)->mtx_lock)
 
 #define	mtx_initialized(m)	lock_initialized(&(m)->lock_object)
 
 #define lv_mtx_owner(v)	((struct thread *)((v) & ~MTX_FLAGMASK))
 
 #define mtx_owner(m)	lv_mtx_owner(MTX_READ_VALUE(m))
 
 #define mtx_owned(m)	(mtx_owner(m) == curthread)
 
 #define mtx_recursed(m)	((m)->mtx_recurse != 0)
 
 #define mtx_name(m)	((m)->lock_object.lo_name)
 
 /*
  * Global locks.
  */
 extern struct mtx Giant;
 extern struct mtx blocked_lock;
 
 /*
  * Giant lock manipulation and clean exit macros.
  * Used to replace return with an exit Giant and return.
  *
  * Note that DROP_GIANT*() needs to be paired with PICKUP_GIANT() 
  * The #ifndef is to allow lint-like tools to redefine DROP_GIANT.
  */
 #ifndef DROP_GIANT
 #define DROP_GIANT()							\
 do {									\
 	int _giantcnt = 0;						\
 	WITNESS_SAVE_DECL(Giant);					\
 									\
 	if (mtx_owned(&Giant)) {					\
 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
 		for (_giantcnt = 0; mtx_owned(&Giant) &&		\
 		    !SCHEDULER_STOPPED(); _giantcnt++)			\
 			mtx_unlock(&Giant);				\
 	}
 
 #define PICKUP_GIANT()							\
 	PARTIAL_PICKUP_GIANT();						\
 } while (0)
 
 #define PARTIAL_PICKUP_GIANT()						\
 	mtx_assert(&Giant, MA_NOTOWNED);				\
 	if (_giantcnt > 0) {						\
 		while (_giantcnt--)					\
 			mtx_lock(&Giant);				\
 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
 	}
 #endif
 
 struct mtx_args {
 	void		*ma_mtx;
 	const char 	*ma_desc;
 	int		 ma_opts;
 };
 
 #define	MTX_SYSINIT(name, mtx, desc, opts)				\
 	static struct mtx_args name##_args = {				\
 		(mtx),							\
 		(desc),							\
 		(opts)							\
 	};								\
 	SYSINIT(name##_mtx_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
 	    mtx_sysinit, &name##_args);					\
 	SYSUNINIT(name##_mtx_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
 	    _mtx_destroy, __DEVOLATILE(void *, &(mtx)->mtx_lock))
 
 /*
  * The INVARIANTS-enabled mtx_assert() functionality.
  *
  * The constants need to be defined for INVARIANT_SUPPORT infrastructure
  * support as _mtx_assert() itself uses them and the latter implies that
  * _mtx_assert() must build.
  */
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 #define MA_OWNED	LA_XLOCKED
 #define MA_NOTOWNED	LA_UNLOCKED
 #define MA_RECURSED	LA_RECURSED
 #define MA_NOTRECURSED	LA_NOTRECURSED
 #endif
 
 /*
  * Common lock type names.
  */
 #define	MTX_NETWORK_LOCK	"network driver"
 
 #endif	/* _KERNEL */
 #endif	/* _SYS_MUTEX_H_ */
Index: head/sys/sys/rwlock.h
===================================================================
--- head/sys/sys/rwlock.h	(revision 326106)
+++ head/sys/sys/rwlock.h	(revision 326107)
@@ -1,296 +1,299 @@
 /*-
  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_RWLOCK_H_
 #define _SYS_RWLOCK_H_
 
 #include <sys/_lock.h>
 #include <sys/_rwlock.h>
 #include <sys/lock_profile.h>
 #include <sys/lockstat.h>
 
 #ifdef _KERNEL
 #include <sys/pcpu.h>
 #include <machine/atomic.h>
 #endif
 
 /*
  * The rw_lock field consists of several fields.  The low bit indicates
  * if the lock is locked with a read (shared) or write (exclusive) lock.
  * A value of 0 indicates a write lock, and a value of 1 indicates a read
  * lock.  Bit 1 is a boolean indicating if there are any threads waiting
  * for a read lock.  Bit 2 is a boolean indicating if there are any threads
  * waiting for a write lock.  The rest of the variable's definition is
  * dependent on the value of the first bit.  For a write lock, it is a
  * pointer to the thread holding the lock, similar to the mtx_lock field of
  * mutexes.  For read locks, it is a count of read locks that are held.
  *
  * When the lock is not locked by any thread, it is encoded as a read lock
  * with zero waiters.
  */
 
 #define	RW_LOCK_READ		0x01
 #define	RW_LOCK_READ_WAITERS	0x02
 #define	RW_LOCK_WRITE_WAITERS	0x04
 #define	RW_LOCK_WRITE_SPINNER	0x08
 #define	RW_LOCK_WRITER_RECURSED	0x10
 #define	RW_LOCK_FLAGMASK						\
 	(RW_LOCK_READ | RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS |	\
 	RW_LOCK_WRITE_SPINNER | RW_LOCK_WRITER_RECURSED)
 #define	RW_LOCK_WAITERS		(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)
 
 #define	RW_OWNER(x)		((x) & ~RW_LOCK_FLAGMASK)
 #define	RW_READERS_SHIFT	5
 #define	RW_READERS(x)		(RW_OWNER((x)) >> RW_READERS_SHIFT)
 #define	RW_READERS_LOCK(x)	((x) << RW_READERS_SHIFT | RW_LOCK_READ)
 #define	RW_ONE_READER		(1 << RW_READERS_SHIFT)
 
 #define	RW_UNLOCKED		RW_READERS_LOCK(0)
 #define	RW_DESTROYED		(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)
 
 #ifdef _KERNEL
 
 #define	rw_recurse	lock_object.lo_data
 
 #define	RW_READ_VALUE(x)	((x)->rw_lock)
 
 /* Very simple operations on rw_lock. */
 
 /* Try to obtain a write lock once. */
 #define	_rw_write_lock(rw, tid)						\
 	atomic_cmpset_acq_ptr(&(rw)->rw_lock, RW_UNLOCKED, (tid))
 
 #define	_rw_write_lock_fetch(rw, vp, tid)				\
 	atomic_fcmpset_acq_ptr(&(rw)->rw_lock, vp, (tid))
 
 /* Release a write lock quickly if there are no waiters. */
 #define	_rw_write_unlock(rw, tid)					\
 	atomic_cmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED)
 
+#define	_rw_write_unlock_fetch(rw, tid)					\
+	atomic_fcmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED)
+
 /*
  * Full lock operations that are suitable to be inlined in non-debug
  * kernels.  If the lock cannot be acquired or released trivially then
  * the work is deferred to another function.
  */
 
 /* Acquire a write lock. */
 #define	__rw_wlock(rw, tid, file, line) do {				\
 	uintptr_t _tid = (uintptr_t)(tid);				\
 	uintptr_t _v = RW_UNLOCKED;					\
 									\
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) ||	\
 	    !_rw_write_lock_fetch((rw), &_v, _tid)))			\
 		_rw_wlock_hard((rw), _v, (file), (line));		\
 } while (0)
 
 /* Release a write lock. */
 #define	__rw_wunlock(rw, tid, file, line) do {				\
-	uintptr_t _tid = (uintptr_t)(tid);				\
+	uintptr_t _v = (uintptr_t)(tid);				\
 									\
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||	\
-	    !_rw_write_unlock((rw), _tid)))				\
-		_rw_wunlock_hard((rw), _tid, (file), (line));		\
+	    !_rw_write_unlock_fetch((rw), &_v)))			\
+		_rw_wunlock_hard((rw), _v, (file), (line));		\
 } while (0)
 
 /*
  * Function prototypes.  Routines that start with _ are not part of the
  * external API and should not be called directly.  Wrapper macros should
  * be used instead.
  */
 void	_rw_init_flags(volatile uintptr_t *c, const char *name, int opts);
 void	_rw_destroy(volatile uintptr_t *c);
 void	rw_sysinit(void *arg);
 void	rw_sysinit_flags(void *arg);
 int	_rw_wowned(const volatile uintptr_t *c);
 void	_rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line);
 int	__rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 int	__rw_try_wlock(volatile uintptr_t *c, const char *file, int line);
 void	_rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line);
 void	__rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 void	__rw_rlock(volatile uintptr_t *c, const char *file, int line);
 int	__rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 int	__rw_try_rlock(volatile uintptr_t *c, const char *file, int line);
 void	_rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 void	_rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line);
 void	__rw_wlock_hard(volatile uintptr_t *c, uintptr_t v
 	    LOCK_FILE_LINE_ARG_DEF);
 void	__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v
 	    LOCK_FILE_LINE_ARG_DEF);
 int	__rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 int	__rw_try_upgrade(volatile uintptr_t *c, const char *file, int line);
 void	__rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF);
 void	__rw_downgrade(volatile uintptr_t *c, const char *file, int line);
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void	__rw_assert(const volatile uintptr_t *c, int what, const char *file,
 	    int line);
 #endif
 
 /*
  * Top-level macros to provide lock cookie once the actual rwlock is passed.
  * They will also prevent passing a malformed object to the rwlock KPI by
  * failing compilation as the rw_lock reserved member will not be found.
  */
 #define	rw_init(rw, n)							\
 	_rw_init_flags(&(rw)->rw_lock, n, 0)
 #define	rw_init_flags(rw, n, o)						\
 	_rw_init_flags(&(rw)->rw_lock, n, o)
 #define	rw_destroy(rw)							\
 	_rw_destroy(&(rw)->rw_lock)
 #define	rw_wowned(rw)							\
 	_rw_wowned(&(rw)->rw_lock)
 #define	_rw_wlock(rw, f, l)						\
 	_rw_wlock_cookie(&(rw)->rw_lock, f, l)
 #define	_rw_try_wlock(rw, f, l)						\
 	__rw_try_wlock(&(rw)->rw_lock, f, l)
 #define	_rw_wunlock(rw, f, l)						\
 	_rw_wunlock_cookie(&(rw)->rw_lock, f, l)
 #define	_rw_try_rlock(rw, f, l)						\
 	__rw_try_rlock(&(rw)->rw_lock, f, l)
 #if LOCK_DEBUG > 0
 #define	_rw_rlock(rw, f, l)						\
 	__rw_rlock(&(rw)->rw_lock, f, l)
 #define	_rw_runlock(rw, f, l)						\
 	_rw_runlock_cookie(&(rw)->rw_lock, f, l)
 #else
 #define	_rw_rlock(rw, f, l)						\
 	__rw_rlock_int((struct rwlock *)rw)
 #define	_rw_runlock(rw, f, l)						\
 	_rw_runlock_cookie_int((struct rwlock *)rw)
 #endif
 #if LOCK_DEBUG > 0
 #define	_rw_wlock_hard(rw, v, f, l)					\
 	__rw_wlock_hard(&(rw)->rw_lock, v, f, l)
 #define	_rw_wunlock_hard(rw, v, f, l)					\
 	__rw_wunlock_hard(&(rw)->rw_lock, v, f, l)
 #define	_rw_try_upgrade(rw, f, l)					\
 	__rw_try_upgrade(&(rw)->rw_lock, f, l)
 #define	_rw_downgrade(rw, f, l)						\
 	__rw_downgrade(&(rw)->rw_lock, f, l)
 #else
 #define	_rw_wlock_hard(rw, v, f, l)					\
 	__rw_wlock_hard(&(rw)->rw_lock, v)
 #define	_rw_wunlock_hard(rw, v, f, l)					\
 	__rw_wunlock_hard(&(rw)->rw_lock, v)
 #define	_rw_try_upgrade(rw, f, l)					\
 	__rw_try_upgrade_int(rw)
 #define	_rw_downgrade(rw, f, l)						\
 	__rw_downgrade_int(rw)
 #endif
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 #define	_rw_assert(rw, w, f, l)						\
 	__rw_assert(&(rw)->rw_lock, w, f, l)
 #endif
 
 
 /*
  * Public interface for lock operations.
  */
 
 #ifndef LOCK_DEBUG
 #error LOCK_DEBUG not defined, include <sys/lock.h> before <sys/rwlock.h>
 #endif
 #if LOCK_DEBUG > 0 || defined(RWLOCK_NOINLINE)
 #define	rw_wlock(rw)		_rw_wlock((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_wunlock(rw)		_rw_wunlock((rw), LOCK_FILE, LOCK_LINE)
 #else
 #define	rw_wlock(rw)							\
 	__rw_wlock((rw), curthread, LOCK_FILE, LOCK_LINE)
 #define	rw_wunlock(rw)							\
 	__rw_wunlock((rw), curthread, LOCK_FILE, LOCK_LINE)
 #endif
 #define	rw_rlock(rw)		_rw_rlock((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_runlock(rw)		_rw_runlock((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_try_rlock(rw)	_rw_try_rlock((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_try_upgrade(rw)	_rw_try_upgrade((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_try_wlock(rw)	_rw_try_wlock((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_downgrade(rw)	_rw_downgrade((rw), LOCK_FILE, LOCK_LINE)
 #define	rw_unlock(rw)	do {						\
 	if (rw_wowned(rw))						\
 		rw_wunlock(rw);						\
 	else								\
 		rw_runlock(rw);						\
 } while (0)
 #define	rw_sleep(chan, rw, pri, wmesg, timo)				\
 	_sleep((chan), &(rw)->lock_object, (pri), (wmesg),		\
 	    tick_sbt * (timo), 0, C_HARDCLOCK)
 
 #define	rw_initialized(rw)	lock_initialized(&(rw)->lock_object)
 
 struct rw_args {
 	void		*ra_rw;
 	const char 	*ra_desc;
 	int		ra_flags;
 };
 
 #define	RW_SYSINIT_FLAGS(name, rw, desc, flags)				\
 	static struct rw_args name##_args = {				\
 		(rw),							\
 		(desc),							\
 		(flags),						\
 	};								\
 	SYSINIT(name##_rw_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
 	    rw_sysinit, &name##_args);					\
 	SYSUNINIT(name##_rw_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
 	    _rw_destroy, __DEVOLATILE(void *, &(rw)->rw_lock))
 
 #define	RW_SYSINIT(name, rw, desc)	RW_SYSINIT_FLAGS(name, rw, desc, 0)
 
 /*
  * Options passed to rw_init_flags().
  */
 #define	RW_DUPOK	0x01
 #define	RW_NOPROFILE	0x02
 #define	RW_NOWITNESS	0x04
 #define	RW_QUIET	0x08
 #define	RW_RECURSE	0x10
 #define	RW_NEW		0x20
 
 /*
  * The INVARIANTS-enabled rw_assert() functionality.
  *
  * The constants need to be defined for INVARIANT_SUPPORT infrastructure
  * support as _rw_assert() itself uses them and the latter implies that
  * _rw_assert() must build.
  */
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 #define	RA_LOCKED		LA_LOCKED
 #define	RA_RLOCKED		LA_SLOCKED
 #define	RA_WLOCKED		LA_XLOCKED
 #define	RA_UNLOCKED		LA_UNLOCKED
 #define	RA_RECURSED		LA_RECURSED
 #define	RA_NOTRECURSED		LA_NOTRECURSED
 #endif
 
 #ifdef INVARIANTS
 #define	rw_assert(rw, what)	_rw_assert((rw), (what), LOCK_FILE, LOCK_LINE)
 #else
 #define	rw_assert(rw, what)
 #endif
 
 #endif /* _KERNEL */
 #endif /* !_SYS_RWLOCK_H_ */
Index: head/sys/sys/sx.h
===================================================================
--- head/sys/sys/sx.h	(revision 326106)
+++ head/sys/sys/sx.h	(revision 326107)
@@ -1,300 +1,300 @@
 /*-
  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
  * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified other than the possible 
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_SYS_SX_H_
 #define	_SYS_SX_H_
 
 #include <sys/_lock.h>
 #include <sys/_sx.h>
 
 #ifdef	_KERNEL
 #include <sys/pcpu.h>
 #include <sys/lock_profile.h>
 #include <sys/lockstat.h>
 #include <machine/atomic.h>
 #endif
 
 /*
  * In general, the sx locks and rwlocks use very similar algorithms.
  * The main difference in the implementations is how threads are
  * blocked when a lock is unavailable.  For this, sx locks use sleep
  * queues which do not support priority propagation, and rwlocks use
  * turnstiles which do.
  *
  * The sx_lock field consists of several fields.  The low bit
  * indicates if the lock is locked with a shared or exclusive lock.  A
  * value of 0 indicates an exclusive lock, and a value of 1 indicates
  * a shared lock.  Bit 1 is a boolean indicating if there are any
  * threads waiting for a shared lock.  Bit 2 is a boolean indicating
  * if there are any threads waiting for an exclusive lock.  Bit 3 is a
  * boolean indicating if an exclusive lock is recursively held.  The
  * rest of the variable's definition is dependent on the value of the
  * first bit.  For an exclusive lock, it is a pointer to the thread
  * holding the lock, similar to the mtx_lock field of mutexes.  For
  * shared locks, it is a count of read locks that are held.
  *
  * When the lock is not locked by any thread, it is encoded as a
  * shared lock with zero waiters.
  */
 
 #define	SX_LOCK_SHARED			0x01
 #define	SX_LOCK_SHARED_WAITERS		0x02
 #define	SX_LOCK_EXCLUSIVE_WAITERS	0x04
 #define	SX_LOCK_RECURSED		0x08
 #define	SX_LOCK_FLAGMASK						\
 	(SX_LOCK_SHARED | SX_LOCK_SHARED_WAITERS |			\
 	SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_RECURSED)
 
 #define	SX_OWNER(x)			((x) & ~SX_LOCK_FLAGMASK)
 #define	SX_SHARERS_SHIFT		4
 #define	SX_SHARERS(x)			(SX_OWNER(x) >> SX_SHARERS_SHIFT)
 #define	SX_SHARERS_LOCK(x)						\
 	((x) << SX_SHARERS_SHIFT | SX_LOCK_SHARED)
 #define	SX_ONE_SHARER			(1 << SX_SHARERS_SHIFT)
 
 #define	SX_LOCK_UNLOCKED		SX_SHARERS_LOCK(0)
 #define	SX_LOCK_DESTROYED						\
 	(SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)
 
 #ifdef _KERNEL
 
 #define	sx_recurse	lock_object.lo_data
 
 #define	SX_READ_VALUE(sx)	((sx)->sx_lock)
 
 #define	lv_sx_owner(v) \
 	((v & SX_LOCK_SHARED) ? NULL : (struct thread *)SX_OWNER(v))
 
 /*
  * Function prototipes.  Routines that start with an underscore are not part
  * of the public interface and are wrappered with a macro.
  */
 void	sx_sysinit(void *arg);
 #define	sx_init(sx, desc)	sx_init_flags((sx), (desc), 0)
 void	sx_init_flags(struct sx *sx, const char *description, int opts);
 void	sx_destroy(struct sx *sx);
 int	sx_try_slock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF);
 int	sx_try_slock_(struct sx *sx, const char *file, int line);
 int	sx_try_xlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF);
 int	sx_try_xlock_(struct sx *sx, const char *file, int line);
 int	sx_try_upgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF);
 int	sx_try_upgrade_(struct sx *sx, const char *file, int line);
 void	sx_downgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF);
 void	sx_downgrade_(struct sx *sx, const char *file, int line);
 int	_sx_slock_int(struct sx *sx, int opts LOCK_FILE_LINE_ARG_DEF);
 int	_sx_slock(struct sx *sx, int opts, const char *file, int line);
 int	_sx_xlock(struct sx *sx, int opts, const char *file, int line);
 void	_sx_sunlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF);
 void	_sx_sunlock(struct sx *sx, const char *file, int line);
 void	_sx_xunlock(struct sx *sx, const char *file, int line);
 int	_sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF);
-void	_sx_xunlock_hard(struct sx *sx, uintptr_t tid LOCK_FILE_LINE_ARG_DEF);
+void	_sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF);
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void	_sx_assert(const struct sx *sx, int what, const char *file, int line);
 #endif
 #ifdef DDB
 int	sx_chain(struct thread *td, struct thread **ownerp);
 #endif
 
 struct sx_args {
 	struct sx 	*sa_sx;
 	const char	*sa_desc;
 	int		sa_flags;
 };
 
 #define	SX_SYSINIT_FLAGS(name, sxa, desc, flags)			\
 	static struct sx_args name##_args = {				\
 		(sxa),							\
 		(desc),							\
 		(flags)							\
 	};								\
 	SYSINIT(name##_sx_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
 	    sx_sysinit, &name##_args);					\
 	SYSUNINIT(name##_sx_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE,	\
 	    sx_destroy, (sxa))
 
 #define	SX_SYSINIT(name, sxa, desc)	SX_SYSINIT_FLAGS(name, sxa, desc, 0)
 
 /*
  * Full lock operations that are suitable to be inlined in non-debug kernels.
  * If the lock can't be acquired or released trivially then the work is
  * deferred to 'tougher' functions.
  */
 
 #if	(LOCK_DEBUG == 0)
 /* Acquire an exclusive lock. */
 static __inline int
 __sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file,
     int line)
 {
 	uintptr_t tid = (uintptr_t)td;
 	uintptr_t v = SX_LOCK_UNLOCKED;
 	int error = 0;
 
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__acquire) ||
 	    !atomic_fcmpset_acq_ptr(&sx->sx_lock, &v, tid)))
 		error = _sx_xlock_hard(sx, v, opts);
 
 	return (error);
 }
 
 /* Release an exclusive lock. */
 static __inline void
 __sx_xunlock(struct sx *sx, struct thread *td, const char *file, int line)
 {
-	uintptr_t tid = (uintptr_t)td;
+	uintptr_t x = (uintptr_t)td;
 
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) ||
-	    !atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED)))
-		_sx_xunlock_hard(sx, tid);
+	    !atomic_fcmpset_rel_ptr(&sx->sx_lock, &x, SX_LOCK_UNLOCKED)))
+		_sx_xunlock_hard(sx, x);
 }
 #endif
 
 /*
  * Public interface for lock operations.
  */
 #ifndef LOCK_DEBUG
 #error	"LOCK_DEBUG not defined, include <sys/lock.h> before <sys/sx.h>"
 #endif
 #if	(LOCK_DEBUG > 0) || defined(SX_NOINLINE)
 #define	sx_xlock_(sx, file, line)					\
 	(void)_sx_xlock((sx), 0, (file), (line))
 #define	sx_xlock_sig_(sx, file, line)					\
 	_sx_xlock((sx), SX_INTERRUPTIBLE, (file), (line))
 #define	sx_xunlock_(sx, file, line)					\
 	_sx_xunlock((sx), (file), (line))
 #else
 #define	sx_xlock_(sx, file, line)					\
 	(void)__sx_xlock((sx), curthread, 0, (file), (line))
 #define	sx_xlock_sig_(sx, file, line)					\
 	__sx_xlock((sx), curthread, SX_INTERRUPTIBLE, (file), (line))
 #define	sx_xunlock_(sx, file, line)					\
 	__sx_xunlock((sx), curthread, (file), (line))
 #endif	/* LOCK_DEBUG > 0 || SX_NOINLINE */
 #if	(LOCK_DEBUG > 0)
 #define	sx_slock_(sx, file, line)					\
 	(void)_sx_slock((sx), 0, (file), (line))
 #define	sx_slock_sig_(sx, file, line)					\
 	_sx_slock((sx), SX_INTERRUPTIBLE, (file) , (line))
 #define	sx_sunlock_(sx, file, line)					\
 	_sx_sunlock((sx), (file), (line))
 #define	sx_try_slock(sx)	sx_try_slock_((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_try_xlock(sx)	sx_try_xlock_((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_try_upgrade(sx)	sx_try_upgrade_((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_downgrade(sx)	sx_downgrade_((sx), LOCK_FILE, LOCK_LINE)
 #else
 #define	sx_slock_(sx, file, line)					\
 	(void)_sx_slock_int((sx), 0)
 #define	sx_slock_sig_(sx, file, line)					\
 	_sx_slock_int((sx), SX_INTERRUPTIBLE)
 #define	sx_sunlock_(sx, file, line)					\
 	_sx_sunlock_int((sx))
 #define	sx_try_slock(sx)	sx_try_slock_int((sx))
 #define	sx_try_xlock(sx)	sx_try_xlock_int((sx))
 #define	sx_try_upgrade(sx)	sx_try_upgrade_int((sx))
 #define	sx_downgrade(sx)	sx_downgrade_int((sx))
 #endif
 #ifdef INVARIANTS
 #define	sx_assert_(sx, what, file, line)				\
 	_sx_assert((sx), (what), (file), (line))
 #else
 #define	sx_assert_(sx, what, file, line)	(void)0
 #endif
 
 #define	sx_xlock(sx)		sx_xlock_((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_xlock_sig(sx)	sx_xlock_sig_((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_xunlock(sx)		sx_xunlock_((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_slock(sx)		sx_slock_((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_slock_sig(sx)	sx_slock_sig_((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_sunlock(sx)		sx_sunlock_((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_assert(sx, what)	sx_assert_((sx), (what), __FILE__, __LINE__)
 
 /*
  * Return a pointer to the owning thread if the lock is exclusively
  * locked.
  */
 #define	sx_xholder(sx)							\
 	((sx)->sx_lock & SX_LOCK_SHARED ? NULL :			\
 	(struct thread *)SX_OWNER((sx)->sx_lock))
 
 #define	sx_xlocked(sx)							\
 	(((sx)->sx_lock & ~(SX_LOCK_FLAGMASK & ~SX_LOCK_SHARED)) ==	\
 	    (uintptr_t)curthread)
 
 #define	sx_unlock_(sx, file, line) do {					\
 	if (sx_xlocked(sx))						\
 		sx_xunlock_(sx, file, line);				\
 	else								\
 		sx_sunlock_(sx, file, line);				\
 } while (0)
 
 #define	sx_unlock(sx)	sx_unlock_((sx), LOCK_FILE, LOCK_LINE)
 
 #define	sx_sleep(chan, sx, pri, wmesg, timo)				\
 	_sleep((chan), &(sx)->lock_object, (pri), (wmesg),		\
 	    tick_sbt * (timo), 0,  C_HARDCLOCK)
 
 /*
  * Options passed to sx_init_flags().
  */
 #define	SX_DUPOK		0x01
 #define	SX_NOPROFILE		0x02
 #define	SX_NOWITNESS		0x04
 #define	SX_QUIET		0x08
 #define	SX_NOADAPTIVE		0x10
 #define	SX_RECURSE		0x20
 #define	SX_NEW			0x40
 
 /*
  * Options passed to sx_*lock_hard().
  */
 #define	SX_INTERRUPTIBLE	0x40
 
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 #define	SA_LOCKED		LA_LOCKED
 #define	SA_SLOCKED		LA_SLOCKED
 #define	SA_XLOCKED		LA_XLOCKED
 #define	SA_UNLOCKED		LA_UNLOCKED
 #define	SA_RECURSED		LA_RECURSED
 #define	SA_NOTRECURSED		LA_NOTRECURSED
 
 /* Backwards compatibility. */
 #define	SX_LOCKED		LA_LOCKED
 #define	SX_SLOCKED		LA_SLOCKED
 #define	SX_XLOCKED		LA_XLOCKED
 #define	SX_UNLOCKED		LA_UNLOCKED
 #define	SX_RECURSED		LA_RECURSED
 #define	SX_NOTRECURSED		LA_NOTRECURSED
 #endif
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_SX_H_ */