Index: head/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c
===================================================================
--- head/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c	(revision 284296)
+++ head/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c	(revision 284297)
@@ -1,1921 +1,1933 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 #include <stdio.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <stdarg.h>
 #include <string.h>
 #include <strings.h>
 #include <ctype.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
 #include <limits.h>
 #include <sys/types.h>
 #include <sys/modctl.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <dtrace.h>
 #include <sys/lockstat.h>
 #include <alloca.h>
 #include <signal.h>
 #include <assert.h>
 
 #ifdef illumos
 #define	GETOPT_EOF	EOF
 #else
 #include <sys/time.h>
 #include <sys/resource.h>
 
 #define	mergesort(a, b, c, d)	lsmergesort(a, b, c, d)
 #define	GETOPT_EOF		(-1)
 
 typedef	uintptr_t	pc_t;
 #endif
 
 #define	LOCKSTAT_OPTSTR	"x:bths:n:d:i:l:f:e:ckwWgCHEATID:RpPo:V"
 
 #define	LS_MAX_STACK_DEPTH	50
 #define	LS_MAX_EVENTS		64
 
 typedef struct lsrec {
 	struct lsrec	*ls_next;	/* next in hash chain */
 	uintptr_t	ls_lock;	/* lock address */
 	uintptr_t	ls_caller;	/* caller address */
 	uint32_t	ls_count;	/* cumulative event count */
 	uint32_t	ls_event;	/* type of event */
 	uintptr_t	ls_refcnt;	/* cumulative reference count */
 	uint64_t	ls_time;	/* cumulative event duration */
 	uint32_t	ls_hist[64];	/* log2(duration) histogram */
 	uintptr_t	ls_stack[LS_MAX_STACK_DEPTH];
 } lsrec_t;
 
 typedef struct lsdata {
 	struct lsrec	*lsd_next;	/* next available */
 	int		lsd_count;	/* number of records */
 } lsdata_t;
 
 /*
  * Definitions for the types of experiments which can be run.  They are
  * listed in increasing order of memory cost and processing time cost.
  * The numerical value of each type is the number of bytes needed per record.
  */
 #define	LS_BASIC	offsetof(lsrec_t, ls_time)
 #define	LS_TIME		offsetof(lsrec_t, ls_hist[0])
 #define	LS_HIST		offsetof(lsrec_t, ls_stack[0])
 #define	LS_STACK(depth)	offsetof(lsrec_t, ls_stack[depth])
 
 static void report_stats(FILE *, lsrec_t **, size_t, uint64_t, uint64_t);
 static void report_trace(FILE *, lsrec_t **);
 
 extern int symtab_init(void);
 extern char *addr_to_sym(uintptr_t, uintptr_t *, size_t *);
 extern uintptr_t sym_to_addr(char *name);
 extern size_t sym_size(char *name);
 extern char *strtok_r(char *, const char *, char **);
 
 #define	DEFAULT_NRECS	10000
 #define	DEFAULT_HZ	97
 #define	MAX_HZ		1000
 #define	MIN_AGGSIZE	(16 * 1024)
 #define	MAX_AGGSIZE	(32 * 1024 * 1024)
 
 static int g_stkdepth;
 static int g_topn = INT_MAX;
 static hrtime_t g_elapsed;
 static int g_rates = 0;
 static int g_pflag = 0;
 static int g_Pflag = 0;
 static int g_wflag = 0;
 static int g_Wflag = 0;
 static int g_cflag = 0;
 static int g_kflag = 0;
 static int g_gflag = 0;
 static int g_Vflag = 0;
 static int g_tracing = 0;
 static size_t g_recsize;
 static size_t g_nrecs;
 static int g_nrecs_used;
 static uchar_t g_enabled[LS_MAX_EVENTS];
 static hrtime_t g_min_duration[LS_MAX_EVENTS];
 static dtrace_hdl_t *g_dtp;
 static char *g_predicate;
 static char *g_ipredicate;
 static char *g_prog;
 static int g_proglen;
 static int g_dropped;
 
 typedef struct ls_event_info {
 	char	ev_type;
 	char	ev_lhdr[20];
 	char	ev_desc[80];
 	char	ev_units[10];
 	char	ev_name[DTRACE_NAMELEN];
 	char	*ev_predicate;
 	char	*ev_acquire;
 } ls_event_info_t;
 
 static ls_event_info_t g_event_info[LS_MAX_EVENTS] = {
 	{ 'C',	"Lock",	"Adaptive mutex spin",			"nsec",
 	    "lockstat:::adaptive-spin" },
 	{ 'C',	"Lock",	"Adaptive mutex block",			"nsec",
 	    "lockstat:::adaptive-block" },
 	{ 'C',	"Lock",	"Spin lock spin",			"nsec",
 	    "lockstat:::spin-spin" },
 	{ 'C',	"Lock",	"Thread lock spin",			"nsec",
 	    "lockstat:::thread-spin" },
 	{ 'C',	"Lock",	"R/W writer blocked by writer",		"nsec",
 	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 1" },
 	{ 'C',	"Lock",	"R/W writer blocked by readers",	"nsec",
 	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 0 && arg4" },
 	{ 'C',	"Lock",	"R/W reader blocked by writer",		"nsec",
 	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 1" },
 	{ 'C',	"Lock",	"R/W reader blocked by write wanted",	"nsec",
 	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 0 && arg4" },
-	{ 'C',	"Lock",	"Unknown event (type 8)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 9)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 10)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 11)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 12)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 13)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 14)",		"units"	},
-	{ 'C',	"Lock",	"Unknown event (type 15)",		"units"	},
+	{ 'C',	"Lock",	"R/W writer spin on writer",		"nsec",
+	    "lockstat:::rw-spin", "arg2 == 0 && arg3 == 1" },
+	{ 'C',	"Lock",	"R/W writer spin on readers",		"nsec",
+	    "lockstat:::rw-spin", "arg2 == 0 && arg3 == 0 && arg4" },
+	{ 'C',	"Lock",	"R/W reader spin on writer",		"nsec",
+	    "lockstat:::rw-spin", "arg2 != 0 && arg3 == 1" },
+	{ 'C',	"Lock",	"R/W reader spin on write wanted",	"nsec",
+	    "lockstat:::rw-spin", "arg2 != 0 && arg3 == 0 && arg4" },
+	{ 'C',	"Lock",	"SX exclusive block",			"nsec",
+	    "lockstat:::sx-block", "arg2 == 0" },
+	{ 'C',	"Lock",	"SX shared block",			"nsec",
+	    "lockstat:::sx-block", "arg2 != 0" },
+	{ 'C',	"Lock",	"SX exclusive spin",			"nsec",
+	    "lockstat:::sx-spin", "arg2 == 0" },
+	{ 'C',	"Lock",	"SX shared spin",			"nsec",
+	    "lockstat:::sx-spin", "arg2 != 0" },
 	{ 'C',	"Lock",	"Unknown event (type 16)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 17)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 18)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 19)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 20)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 21)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 22)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 23)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 24)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 25)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 26)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 27)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 28)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 29)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 30)",		"units"	},
 	{ 'C',	"Lock",	"Unknown event (type 31)",		"units"	},
 	{ 'H',	"Lock",	"Adaptive mutex hold",			"nsec",
 	    "lockstat:::adaptive-release", NULL,
 	    "lockstat:::adaptive-acquire" },
 	{ 'H',	"Lock",	"Spin lock hold",			"nsec",
 	    "lockstat:::spin-release", NULL,
 	    "lockstat:::spin-acquire" },
 	{ 'H',	"Lock",	"R/W writer hold",			"nsec",
-	    "lockstat:::rw-release", "arg1 == 0",
-	    "lockstat:::rw-acquire" },
+	    "lockstat::rw_wunlock:rw-release", NULL,
+	    "lockstat::rw_wlock:rw-acquire" },
 	{ 'H',	"Lock",	"R/W reader hold",			"nsec",
-	    "lockstat:::rw-release", "arg1 != 0",
-	    "lockstat:::rw-acquire" },
-	{ 'H',	"Lock",	"Unknown event (type 36)",		"units"	},
-	{ 'H',	"Lock",	"Unknown event (type 37)",		"units"	},
+	    "lockstat::rw_runlock:rw-release", NULL,
+	    "lockstat::rw_rlock:rw-acquire" },
+	{ 'H',	"Lock",	"SX shared hold",			"nsec",
+	    "lockstat::sx_sunlock:sx-release", NULL,
+	    "lockstat::sx_slock:sx-acquire" },
+	{ 'H',	"Lock",	"SX exclusive hold",			"nsec",
+	    "lockstat::sx_xunlock:sx-release", NULL,
+	    "lockstat::sx_xlock:sx-acquire" },
 	{ 'H',	"Lock",	"Unknown event (type 38)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 39)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 40)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 41)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 42)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 43)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 44)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 45)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 46)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 47)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 48)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 49)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 50)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 51)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 52)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 53)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 54)",		"units"	},
 	{ 'H',	"Lock",	"Unknown event (type 55)",		"units"	},
 #ifdef illumos
 	{ 'I',	"CPU+PIL", "Profiling interrupt",		"nsec",
 #else
 	{ 'I',	"CPU+Pri_Class", "Profiling interrupt",		"nsec",
 #endif
 	    "profile:::profile-97", NULL },
 	{ 'I',	"Lock",	"Unknown event (type 57)",		"units"	},
 	{ 'I',	"Lock",	"Unknown event (type 58)",		"units"	},
 	{ 'I',	"Lock",	"Unknown event (type 59)",		"units"	},
 	{ 'E',	"Lock",	"Recursive lock entry detected",	"(N/A)",
 	    "lockstat:::rw-release", NULL, "lockstat:::rw-acquire" },
 	{ 'E',	"Lock",	"Lockstat enter failure",		"(N/A)"	},
 	{ 'E',	"Lock",	"Lockstat exit failure",		"nsec"	},
 	{ 'E',	"Lock",	"Lockstat record failure",		"(N/A)"	},
 };
 
 #ifndef illumos
 static char *g_pri_class[] = {
 	"",
 	"Intr",
 	"RealT",
 	"TShar",
 	"Idle"
 };
 #endif
 
 static void
 fail(int do_perror, const char *message, ...)
 {
 	va_list args;
 	int save_errno = errno;
 
 	va_start(args, message);
 	(void) fprintf(stderr, "lockstat: ");
 	(void) vfprintf(stderr, message, args);
 	va_end(args);
 	if (do_perror)
 		(void) fprintf(stderr, ": %s", strerror(save_errno));
 	(void) fprintf(stderr, "\n");
 	exit(2);
 }
 
 static void
 dfail(const char *message, ...)
 {
 	va_list args;
 
 	va_start(args, message);
 	(void) fprintf(stderr, "lockstat: ");
 	(void) vfprintf(stderr, message, args);
 	va_end(args);
 	(void) fprintf(stderr, ": %s\n",
 	    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
 
 	exit(2);
 }
 
 static void
 show_events(char event_type, char *desc)
 {
 	int i, first = -1, last;
 
 	for (i = 0; i < LS_MAX_EVENTS; i++) {
 		ls_event_info_t *evp = &g_event_info[i];
 		if (evp->ev_type != event_type ||
 		    strncmp(evp->ev_desc, "Unknown event", 13) == 0)
 			continue;
 		if (first == -1)
 			first = i;
 		last = i;
 	}
 
 	(void) fprintf(stderr,
 	    "\n%s events (lockstat -%c or lockstat -e %d-%d):\n\n",
 	    desc, event_type, first, last);
 
 	for (i = first; i <= last; i++)
 		(void) fprintf(stderr,
 		    "%4d = %s\n", i, g_event_info[i].ev_desc);
 }
 
 static void
 usage(void)
 {
 	(void) fprintf(stderr,
 	    "Usage: lockstat [options] command [args]\n"
 	    "\nGeneral options:\n\n"
 	    "  -V              print the corresponding D program\n"
 	    "\nEvent selection options:\n\n"
 	    "  -C              watch contention events [on by default]\n"
 	    "  -E              watch error events [off by default]\n"
 	    "  -H              watch hold events [off by default]\n"
 	    "  -I              watch interrupt events [off by default]\n"
 	    "  -A              watch all lock events [equivalent to -CH]\n"
 	    "  -e event_list   only watch the specified events (shown below);\n"
 	    "                  <event_list> is a comma-separated list of\n"
 	    "                  events or ranges of events, e.g. 1,4-7,35\n"
 	    "  -i rate         interrupt rate for -I [default: %d Hz]\n"
 	    "\nData gathering options:\n\n"
 	    "  -b              basic statistics (lock, caller, event count)\n"
 	    "  -t              timing for all events [default]\n"
 	    "  -h              histograms for event times\n"
 	    "  -s depth        stack traces <depth> deep\n"
 	    "  -x opt[=val]    enable or modify DTrace options\n"
 	    "\nData filtering options:\n\n"
 	    "  -n nrecords     maximum number of data records [default: %d]\n"
 	    "  -l lock[,size]  only watch <lock>, which can be specified as a\n"
 	    "                  symbolic name or hex address; <size> defaults\n"
 	    "                  to the ELF symbol size if available, 1 if not\n"
 	    "  -f func[,size]  only watch events generated by <func>\n"
 	    "  -d duration     only watch events longer than <duration>\n"
 	    "  -T              trace (rather than sample) events\n"
 	    "\nData reporting options:\n\n"
 	    "  -c              coalesce lock data for arrays like pse_mutex[]\n"
 	    "  -k              coalesce PCs within functions\n"
 	    "  -g              show total events generated by function\n"
 	    "  -w              wherever: don't distinguish events by caller\n"
 	    "  -W              whichever: don't distinguish events by lock\n"
 	    "  -R              display rates rather than counts\n"
 	    "  -p              parsable output format (awk(1)-friendly)\n"
 	    "  -P              sort lock data by (count * avg_time) product\n"
 	    "  -D n            only display top <n> events of each type\n"
 	    "  -o filename     send output to <filename>\n",
 	    DEFAULT_HZ, DEFAULT_NRECS);
 
 	show_events('C', "Contention");
 	show_events('H', "Hold-time");
 	show_events('I', "Interrupt");
 	show_events('E', "Error");
 	(void) fprintf(stderr, "\n");
 
 	exit(1);
 }
 
 static int
 lockcmp(lsrec_t *a, lsrec_t *b)
 {
 	int i;
 
 	if (a->ls_event < b->ls_event)
 		return (-1);
 	if (a->ls_event > b->ls_event)
 		return (1);
 
 	for (i = g_stkdepth - 1; i >= 0; i--) {
 		if (a->ls_stack[i] < b->ls_stack[i])
 			return (-1);
 		if (a->ls_stack[i] > b->ls_stack[i])
 			return (1);
 	}
 
 	if (a->ls_caller < b->ls_caller)
 		return (-1);
 	if (a->ls_caller > b->ls_caller)
 		return (1);
 
 	if (a->ls_lock < b->ls_lock)
 		return (-1);
 	if (a->ls_lock > b->ls_lock)
 		return (1);
 
 	return (0);
 }
 
 static int
 countcmp(lsrec_t *a, lsrec_t *b)
 {
 	if (a->ls_event < b->ls_event)
 		return (-1);
 	if (a->ls_event > b->ls_event)
 		return (1);
 
 	return (b->ls_count - a->ls_count);
 }
 
 static int
 timecmp(lsrec_t *a, lsrec_t *b)
 {
 	if (a->ls_event < b->ls_event)
 		return (-1);
 	if (a->ls_event > b->ls_event)
 		return (1);
 
 	if (a->ls_time < b->ls_time)
 		return (1);
 	if (a->ls_time > b->ls_time)
 		return (-1);
 
 	return (0);
 }
 
 static int
 lockcmp_anywhere(lsrec_t *a, lsrec_t *b)
 {
 	if (a->ls_event < b->ls_event)
 		return (-1);
 	if (a->ls_event > b->ls_event)
 		return (1);
 
 	if (a->ls_lock < b->ls_lock)
 		return (-1);
 	if (a->ls_lock > b->ls_lock)
 		return (1);
 
 	return (0);
 }
 
 static int
 lock_and_count_cmp_anywhere(lsrec_t *a, lsrec_t *b)
 {
 	if (a->ls_event < b->ls_event)
 		return (-1);
 	if (a->ls_event > b->ls_event)
 		return (1);
 
 	if (a->ls_lock < b->ls_lock)
 		return (-1);
 	if (a->ls_lock > b->ls_lock)
 		return (1);
 
 	return (b->ls_count - a->ls_count);
 }
 
 static int
 sitecmp_anylock(lsrec_t *a, lsrec_t *b)
 {
 	int i;
 
 	if (a->ls_event < b->ls_event)
 		return (-1);
 	if (a->ls_event > b->ls_event)
 		return (1);
 
 	for (i = g_stkdepth - 1; i >= 0; i--) {
 		if (a->ls_stack[i] < b->ls_stack[i])
 			return (-1);
 		if (a->ls_stack[i] > b->ls_stack[i])
 			return (1);
 	}
 
 	if (a->ls_caller < b->ls_caller)
 		return (-1);
 	if (a->ls_caller > b->ls_caller)
 		return (1);
 
 	return (0);
 }
 
 static int
 site_and_count_cmp_anylock(lsrec_t *a, lsrec_t *b)
 {
 	int i;
 
 	if (a->ls_event < b->ls_event)
 		return (-1);
 	if (a->ls_event > b->ls_event)
 		return (1);
 
 	for (i = g_stkdepth - 1; i >= 0; i--) {
 		if (a->ls_stack[i] < b->ls_stack[i])
 			return (-1);
 		if (a->ls_stack[i] > b->ls_stack[i])
 			return (1);
 	}
 
 	if (a->ls_caller < b->ls_caller)
 		return (-1);
 	if (a->ls_caller > b->ls_caller)
 		return (1);
 
 	return (b->ls_count - a->ls_count);
 }
 
 static void
 lsmergesort(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **a, lsrec_t **b, int n)
 {
 	int m = n / 2;
 	int i, j;
 
 	if (m > 1)
 		lsmergesort(cmp, a, b, m);
 	if (n - m > 1)
 		lsmergesort(cmp, a + m, b + m, n - m);
 	for (i = m; i > 0; i--)
 		b[i - 1] = a[i - 1];
 	for (j = m - 1; j < n - 1; j++)
 		b[n + m - j - 2] = a[j + 1];
 	while (i < j)
 		*a++ = cmp(b[i], b[j]) < 0 ? b[i++] : b[j--];
 	*a = b[i];
 }
 
 static void
 coalesce(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **lock, int n)
 {
 	int i, j;
 	lsrec_t *target, *current;
 
 	target = lock[0];
 
 	for (i = 1; i < n; i++) {
 		current = lock[i];
 		if (cmp(current, target) != 0) {
 			target = current;
 			continue;
 		}
 		current->ls_event = LS_MAX_EVENTS;
 		target->ls_count += current->ls_count;
 		target->ls_refcnt += current->ls_refcnt;
 		if (g_recsize < LS_TIME)
 			continue;
 		target->ls_time += current->ls_time;
 		if (g_recsize < LS_HIST)
 			continue;
 		for (j = 0; j < 64; j++)
 			target->ls_hist[j] += current->ls_hist[j];
 	}
 }
 
 static void
 coalesce_symbol(uintptr_t *addrp)
 {
 	uintptr_t symoff;
 	size_t symsize;
 
 	if (addr_to_sym(*addrp, &symoff, &symsize) != NULL && symoff < symsize)
 		*addrp -= symoff;
 }
 
 static void
 predicate_add(char **pred, char *what, char *cmp, uintptr_t value)
 {
 	char *new;
 	int len, newlen;
 
 	if (what == NULL)
 		return;
 
 	if (*pred == NULL) {
 		*pred = malloc(1);
 		*pred[0] = '\0';
 	}
 
 	len = strlen(*pred);
 	newlen = len + strlen(what) + 32 + strlen("( && )");
 	new = malloc(newlen);
 
 	if (*pred[0] != '\0') {
 		if (cmp != NULL) {
 			(void) sprintf(new, "(%s) && (%s %s 0x%p)",
 			    *pred, what, cmp, (void *)value);
 		} else {
 			(void) sprintf(new, "(%s) && (%s)", *pred, what);
 		}
 	} else {
 		if (cmp != NULL) {
 			(void) sprintf(new, "%s %s 0x%p",
 			    what, cmp, (void *)value);
 		} else {
 			(void) sprintf(new, "%s", what);
 		}
 	}
 
 	free(*pred);
 	*pred = new;
 }
 
 static void
 predicate_destroy(char **pred)
 {
 	free(*pred);
 	*pred = NULL;
 }
 
 static void
 filter_add(char **filt, char *what, uintptr_t base, uintptr_t size)
 {
 	char buf[256], *c = buf, *new;
 	int len, newlen;
 
 	if (*filt == NULL) {
 		*filt = malloc(1);
 		*filt[0] = '\0';
 	}
 
 #ifdef illumos
 	(void) sprintf(c, "%s(%s >= 0x%p && %s < 0x%p)", *filt[0] != '\0' ?
 	    " || " : "", what, (void *)base, what, (void *)(base + size));
 #else
 	(void) sprintf(c, "%s(%s >= %p && %s < %p)", *filt[0] != '\0' ?
 	    " || " : "", what, (void *)base, what, (void *)(base + size));
 #endif
 
 	newlen = (len = strlen(*filt) + 1) + strlen(c);
 	new = malloc(newlen);
 	bcopy(*filt, new, len);
 	(void) strcat(new, c);
 	free(*filt);
 	*filt = new;
 }
 
 static void
 filter_destroy(char **filt)
 {
 	free(*filt);
 	*filt = NULL;
 }
 
 static void
 dprog_add(const char *fmt, ...)
 {
 	va_list args;
 	int size, offs;
 	char c;
 
 	va_start(args, fmt);
 	size = vsnprintf(&c, 1, fmt, args) + 1;
 	va_end(args);
 
 	if (g_proglen == 0) {
 		offs = 0;
 	} else {
 		offs = g_proglen - 1;
 	}
 
 	g_proglen = offs + size;
 
 	if ((g_prog = realloc(g_prog, g_proglen)) == NULL)
 		fail(1, "failed to reallocate program text");
 
 	va_start(args, fmt);
 	(void) vsnprintf(&g_prog[offs], size, fmt, args);
 	va_end(args);
 }
 
 /*
  * This function may read like an open sewer, but keep in mind that programs
  * that generate other programs are rarely pretty.  If one has the unenviable
  * task of maintaining or -- worse -- extending this code, use the -V option
  * to examine the D program as generated by this function.
  */
 static void
 dprog_addevent(int event)
 {
 	ls_event_info_t *info = &g_event_info[event];
 	char *pred = NULL;
 	char stack[20];
 	const char *arg0, *caller;
 	char *arg1 = "arg1";
 	char buf[80];
 	hrtime_t dur;
 	int depth;
 
 	if (info->ev_name[0] == '\0')
 		return;
 
 	if (info->ev_type == 'I') {
 		/*
 		 * For interrupt events, arg0 (normally the lock pointer) is
 		 * the CPU address plus the current pil, and arg1 (normally
 		 * the number of nanoseconds) is the number of nanoseconds
 		 * late -- and it's stored in arg2.
 		 */
 #ifdef illumos
 		arg0 = "(uintptr_t)curthread->t_cpu + \n"
 		    "\t    curthread->t_cpu->cpu_profile_pil";
 #else
 		arg0 = "(uintptr_t)(curthread->td_oncpu << 16) + \n"
 		    "\t    0x01000000 + curthread->td_pri_class";
 #endif
 		caller = "(uintptr_t)arg0";
 		arg1 = "arg2";
 	} else {
 		arg0 = "(uintptr_t)arg0";
 		caller = "caller";
 	}
 
 	if (g_recsize > LS_HIST) {
 		for (depth = 0; g_recsize > LS_STACK(depth); depth++)
 			continue;
 
 		if (g_tracing) {
 			(void) sprintf(stack, "\tstack(%d);\n", depth);
 		} else {
 			(void) sprintf(stack, ", stack(%d)", depth);
 		}
 	} else {
 		(void) sprintf(stack, "");
 	}
 
 	if (info->ev_acquire != NULL) {
 		/*
 		 * If this is a hold event, we need to generate an additional
 		 * clause for the acquire; the clause for the release will be
 		 * generated with the aggregating statement, below.
 		 */
 		dprog_add("%s\n", info->ev_acquire);
 		predicate_add(&pred, info->ev_predicate, NULL, 0);
 		predicate_add(&pred, g_predicate, NULL, 0);
 		if (pred != NULL)
 			dprog_add("/%s/\n", pred);
 
 		dprog_add("{\n");
 		(void) sprintf(buf, "self->ev%d[(uintptr_t)arg0]", event);
 
 		if (info->ev_type == 'H') {
 			dprog_add("\t%s = timestamp;\n", buf);
 		} else {
 			/*
 			 * If this isn't a hold event, it's the recursive
 			 * error event.  For this, we simply bump the
 			 * thread-local, per-lock count.
 			 */
 			dprog_add("\t%s++;\n", buf);
 		}
 
 		dprog_add("}\n\n");
 		predicate_destroy(&pred);
 		pred = NULL;
 
 		if (info->ev_type == 'E') {
 			/*
 			 * If this is the recursive lock error event, we need
 			 * to generate an additional clause to decrement the
 			 * thread-local, per-lock count.  This assures that we
 			 * only execute the aggregating clause if we have
 			 * recursive entry.
 			 */
 			dprog_add("%s\n", info->ev_name);
 			dprog_add("/%s/\n{\n\t%s--;\n}\n\n", buf, buf);
 		}
 
 		predicate_add(&pred, buf, NULL, 0);
 
 		if (info->ev_type == 'H') {
 			(void) sprintf(buf, "timestamp -\n\t    "
 			    "self->ev%d[(uintptr_t)arg0]", event);
 		}
 
 		arg1 = buf;
 	} else {
 		predicate_add(&pred, info->ev_predicate, NULL, 0);
 		if (info->ev_type != 'I')
 			predicate_add(&pred, g_predicate, NULL, 0);
 		else
 			predicate_add(&pred, g_ipredicate, NULL, 0);
 	}
 
 	if ((dur = g_min_duration[event]) != 0)
 		predicate_add(&pred, arg1, ">=", dur);
 
 	dprog_add("%s\n", info->ev_name);
 
 	if (pred != NULL)
 		dprog_add("/%s/\n", pred);
 	predicate_destroy(&pred);
 
 	dprog_add("{\n");
 
 	if (g_tracing) {
 		dprog_add("\ttrace(%dULL);\n", event);
 		dprog_add("\ttrace(%s);\n", arg0);
 		dprog_add("\ttrace(%s);\n", caller);
 		dprog_add(stack);
 	} else {
 		/*
 		 * The ordering here is important:  when we process the
 		 * aggregate, we count on the fact that @avg appears before
 		 * @hist in program order to assure that @avg is assigned the
 		 * first aggregation variable ID and @hist assigned the
 		 * second; see the comment in process_aggregate() for details.
 		 */
 		dprog_add("\t@avg[%dULL, %s, %s%s] = avg(%s);\n",
 		    event, arg0, caller, stack, arg1);
 
 		if (g_recsize >= LS_HIST) {
 			dprog_add("\t@hist[%dULL, %s, %s%s] = quantize"
 			    "(%s);\n", event, arg0, caller, stack, arg1);
 		}
 	}
 
 	if (info->ev_acquire != NULL)
 		dprog_add("\tself->ev%d[arg0] = 0;\n", event);
 
 	dprog_add("}\n\n");
 }
 
 static void
 dprog_compile()
 {
 	dtrace_prog_t *prog;
 	dtrace_proginfo_t info;
 
 	if (g_Vflag) {
 		(void) fprintf(stderr, "lockstat: vvvv D program vvvv\n");
 		(void) fputs(g_prog, stderr);
 		(void) fprintf(stderr, "lockstat: ^^^^ D program ^^^^\n");
 	}
 
 	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
 	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
 		dfail("failed to compile program");
 
 	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
 		dfail("failed to enable probes");
 
 	if (dtrace_go(g_dtp) != 0)
 		dfail("couldn't start tracing");
 }
 
 static void
 #ifdef illumos
 status_fire(void)
 #else
 status_fire(int i)
 #endif
 {}
 
 static void
 status_init(void)
 {
 	dtrace_optval_t val, status, agg;
 	struct sigaction act;
 	struct itimerspec ts;
 	struct sigevent ev;
 	timer_t tid;
 
 	if (dtrace_getopt(g_dtp, "statusrate", &status) == -1)
 		dfail("failed to get 'statusrate'");
 
 	if (dtrace_getopt(g_dtp, "aggrate", &agg) == -1)
 		dfail("failed to get 'statusrate'");
 
 	/*
 	 * We would want to awaken at a rate that is the GCD of the statusrate
 	 * and the aggrate -- but that seems a bit absurd.  Instead, we'll
 	 * simply awaken at a rate that is the more frequent of the two, which
 	 * assures that we're never later than the interval implied by the
 	 * more frequent rate.
 	 */
 	val = status < agg ? status : agg;
 
 	(void) sigemptyset(&act.sa_mask);
 	act.sa_flags = 0;
 	act.sa_handler = status_fire;
 	(void) sigaction(SIGUSR1, &act, NULL);
 
 	ev.sigev_notify = SIGEV_SIGNAL;
 	ev.sigev_signo = SIGUSR1;
 
 	if (timer_create(CLOCK_REALTIME, &ev, &tid) == -1)
 		dfail("cannot create CLOCK_REALTIME timer");
 
 	ts.it_value.tv_sec = val / NANOSEC;
 	ts.it_value.tv_nsec = val % NANOSEC;
 	ts.it_interval = ts.it_value;
 
 	if (timer_settime(tid, TIMER_RELTIME, &ts, NULL) == -1)
 		dfail("cannot set time on CLOCK_REALTIME timer");
 }
 
 static void
 status_check(void)
 {
 	if (!g_tracing && dtrace_aggregate_snap(g_dtp) != 0)
 		dfail("failed to snap aggregate");
 
 	if (dtrace_status(g_dtp) == -1)
 		dfail("dtrace_status()");
 }
 
 static void
 lsrec_fill(lsrec_t *lsrec, const dtrace_recdesc_t *rec, int nrecs, caddr_t data)
 {
 	bzero(lsrec, g_recsize);
 	lsrec->ls_count = 1;
 
 	if ((g_recsize > LS_HIST && nrecs < 4) || (nrecs < 3))
 		fail(0, "truncated DTrace record");
 
 	if (rec->dtrd_size != sizeof (uint64_t))
 		fail(0, "bad event size in first record");
 
 	/* LINTED - alignment */
 	lsrec->ls_event = (uint32_t)*((uint64_t *)(data + rec->dtrd_offset));
 	rec++;
 
 	if (rec->dtrd_size != sizeof (uintptr_t))
 		fail(0, "bad lock address size in second record");
 
 	/* LINTED - alignment */
 	lsrec->ls_lock = *((uintptr_t *)(data + rec->dtrd_offset));
 	rec++;
 
 	if (rec->dtrd_size != sizeof (uintptr_t))
 		fail(0, "bad caller size in third record");
 
 	/* LINTED - alignment */
 	lsrec->ls_caller = *((uintptr_t *)(data + rec->dtrd_offset));
 	rec++;
 
 	if (g_recsize > LS_HIST) {
 		int frames, i;
 		pc_t *stack;
 
 		frames = rec->dtrd_size / sizeof (pc_t);
 		/* LINTED - alignment */
 		stack = (pc_t *)(data + rec->dtrd_offset);
 
 		for (i = 1; i < frames; i++)
 			lsrec->ls_stack[i - 1] = stack[i];
 	}
 }
 
 /*ARGSUSED*/
 static int
 count_aggregate(const dtrace_aggdata_t *agg, void *arg)
 {
 	*((size_t *)arg) += 1;
 
 	return (DTRACE_AGGWALK_NEXT);
 }
 
 static int
 process_aggregate(const dtrace_aggdata_t *agg, void *arg)
 {
 	const dtrace_aggdesc_t *aggdesc = agg->dtada_desc;
 	caddr_t data = agg->dtada_data;
 	lsdata_t *lsdata = arg;
 	lsrec_t *lsrec = lsdata->lsd_next;
 	const dtrace_recdesc_t *rec;
 	uint64_t *avg, *quantized;
 	int i, j;
 
 	assert(lsdata->lsd_count < g_nrecs);
 
 	/*
 	 * Aggregation variable IDs are guaranteed to be generated in program
 	 * order, and they are guaranteed to start from DTRACE_AGGVARIDNONE
 	 * plus one.  As "avg" appears before "hist" in program order, we know
 	 * that "avg" will be allocated the first aggregation variable ID, and
 	 * "hist" will be allocated the second aggregation variable ID -- and
 	 * we therefore use the aggregation variable ID to differentiate the
 	 * cases.
 	 */
 	if (aggdesc->dtagd_varid > DTRACE_AGGVARIDNONE + 1) {
 		/*
 		 * If this is the histogram entry.  We'll copy the quantized
 		 * data into lc_hist, and jump over the rest.
 		 */
 		rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
 
 		if (aggdesc->dtagd_varid != DTRACE_AGGVARIDNONE + 2)
 			fail(0, "bad variable ID in aggregation record");
 
 		if (rec->dtrd_size !=
 		    DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
 			fail(0, "bad quantize size in aggregation record");
 
 		/* LINTED - alignment */
 		quantized = (uint64_t *)(data + rec->dtrd_offset);
 
 		for (i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
 		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++)
 			lsrec->ls_hist[j] = quantized[i];
 
 		goto out;
 	}
 
 	lsrec_fill(lsrec, &aggdesc->dtagd_rec[1],
 	    aggdesc->dtagd_nrecs - 1, data);
 
 	rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
 
 	if (rec->dtrd_size != 2 * sizeof (uint64_t))
 		fail(0, "bad avg size in aggregation record");
 
 	/* LINTED - alignment */
 	avg = (uint64_t *)(data + rec->dtrd_offset);
 	lsrec->ls_count = (uint32_t)avg[0];
 	lsrec->ls_time = (uintptr_t)avg[1];
 
 	if (g_recsize >= LS_HIST)
 		return (DTRACE_AGGWALK_NEXT);
 
 out:
 	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
 	lsdata->lsd_count++;
 
 	return (DTRACE_AGGWALK_NEXT);
 }
 
 static int
 process_trace(const dtrace_probedata_t *pdata, void *arg)
 {
 	lsdata_t *lsdata = arg;
 	lsrec_t *lsrec = lsdata->lsd_next;
 	dtrace_eprobedesc_t *edesc = pdata->dtpda_edesc;
 	caddr_t data = pdata->dtpda_data;
 
 	if (lsdata->lsd_count >= g_nrecs)
 		return (DTRACE_CONSUME_NEXT);
 
 	lsrec_fill(lsrec, edesc->dtepd_rec, edesc->dtepd_nrecs, data);
 
 	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
 	lsdata->lsd_count++;
 
 	return (DTRACE_CONSUME_NEXT);
 }
 
 static int
 process_data(FILE *out, char *data)
 {
 	lsdata_t lsdata;
 
 	/* LINTED - alignment */
 	lsdata.lsd_next = (lsrec_t *)data;
 	lsdata.lsd_count = 0;
 
 	if (g_tracing) {
 		if (dtrace_consume(g_dtp, out,
 		    process_trace, NULL, &lsdata) != 0)
 			dfail("failed to consume buffer");
 
 		return (lsdata.lsd_count);
 	}
 
 	if (dtrace_aggregate_walk_keyvarsorted(g_dtp,
 	    process_aggregate, &lsdata) != 0)
 		dfail("failed to walk aggregate");
 
 	return (lsdata.lsd_count);
 }
 
 /*ARGSUSED*/
 static int
 drophandler(const dtrace_dropdata_t *data, void *arg)
 {
 	g_dropped++;
 	(void) fprintf(stderr, "lockstat: warning: %s", data->dtdda_msg);
 	return (DTRACE_HANDLE_OK);
 }
 
 int
 main(int argc, char **argv)
 {
 	char *data_buf;
 	lsrec_t *lsp, **current, **first, **sort_buf, **merge_buf;
 	FILE *out = stdout;
 	int c;
 	pid_t child;
 	int status;
 	int i, j;
 	hrtime_t duration;
 	char *addrp, *offp, *sizep, *evp, *lastp, *p;
 	uintptr_t addr;
 	size_t size, off;
 	int events_specified = 0;
 	int exec_errno = 0;
 	uint32_t event;
 	char *filt = NULL, *ifilt = NULL;
 	static uint64_t ev_count[LS_MAX_EVENTS + 1];
 	static uint64_t ev_time[LS_MAX_EVENTS + 1];
 	dtrace_optval_t aggsize;
 	char aggstr[10];
 	long ncpus;
 	int dynvar = 0;
 	int err;
 
 	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
 		fail(0, "cannot open dtrace library: %s",
 		    dtrace_errmsg(NULL, err));
 	}
 
 	if (dtrace_handle_drop(g_dtp, &drophandler, NULL) == -1)
 		dfail("couldn't establish drop handler");
 
 	if (symtab_init() == -1)
 		fail(1, "can't load kernel symbols");
 
 	g_nrecs = DEFAULT_NRECS;
 
 	while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
 		switch (c) {
 		case 'b':
 			g_recsize = LS_BASIC;
 			break;
 
 		case 't':
 			g_recsize = LS_TIME;
 			break;
 
 		case 'h':
 			g_recsize = LS_HIST;
 			break;
 
 		case 's':
 			if (!isdigit(optarg[0]))
 				usage();
 			g_stkdepth = atoi(optarg);
 			if (g_stkdepth > LS_MAX_STACK_DEPTH)
 				fail(0, "max stack depth is %d",
 				    LS_MAX_STACK_DEPTH);
 			g_recsize = LS_STACK(g_stkdepth);
 			break;
 
 		case 'n':
 			if (!isdigit(optarg[0]))
 				usage();
 			g_nrecs = atoi(optarg);
 			break;
 
 		case 'd':
 			if (!isdigit(optarg[0]))
 				usage();
 			duration = atoll(optarg);
 
 			/*
 			 * XXX -- durations really should be per event
 			 * since the units are different, but it's hard
 			 * to express this nicely in the interface.
 			 * Not clear yet what the cleanest solution is.
 			 */
 			for (i = 0; i < LS_MAX_EVENTS; i++)
 				if (g_event_info[i].ev_type != 'E')
 					g_min_duration[i] = duration;
 
 			break;
 
 		case 'i':
 			if (!isdigit(optarg[0]))
 				usage();
 			i = atoi(optarg);
 			if (i <= 0)
 				usage();
 			if (i > MAX_HZ)
 				fail(0, "max interrupt rate is %d Hz", MAX_HZ);
 
 			for (j = 0; j < LS_MAX_EVENTS; j++)
 				if (strcmp(g_event_info[j].ev_desc,
 				    "Profiling interrupt") == 0)
 					break;
 
 			(void) sprintf(g_event_info[j].ev_name,
 			    "profile:::profile-%d", i);
 			break;
 
 		case 'l':
 		case 'f':
 			addrp = strtok(optarg, ",");
 			sizep = strtok(NULL, ",");
 			addrp = strtok(optarg, ",+");
 			offp = strtok(NULL, ",");
 
 			size = sizep ? strtoul(sizep, NULL, 0) : 1;
 			off = offp ? strtoul(offp, NULL, 0) : 0;
 
 			if (addrp[0] == '0') {
 				addr = strtoul(addrp, NULL, 16) + off;
 			} else {
 				addr = sym_to_addr(addrp) + off;
 				if (sizep == NULL)
 					size = sym_size(addrp) - off;
 				if (addr - off == 0)
 					fail(0, "symbol '%s' not found", addrp);
 				if (size == 0)
 					size = 1;
 			}
 
 
 			if (c == 'l') {
 				filter_add(&filt, "arg0", addr, size);
 			} else {
 				filter_add(&filt, "caller", addr, size);
 				filter_add(&ifilt, "arg0", addr, size);
 			}
 			break;
 
 		case 'e':
 			evp = strtok_r(optarg, ",", &lastp);
 			while (evp) {
 				int ev1, ev2;
 				char *evp2;
 
 				(void) strtok(evp, "-");
 				evp2 = strtok(NULL, "-");
 				ev1 = atoi(evp);
 				ev2 = evp2 ? atoi(evp2) : ev1;
 				if ((uint_t)ev1 >= LS_MAX_EVENTS ||
 				    (uint_t)ev2 >= LS_MAX_EVENTS || ev1 > ev2)
 					fail(0, "-e events out of range");
 				for (i = ev1; i <= ev2; i++)
 					g_enabled[i] = 1;
 				evp = strtok_r(NULL, ",", &lastp);
 			}
 			events_specified = 1;
 			break;
 
 		case 'c':
 			g_cflag = 1;
 			break;
 
 		case 'k':
 			g_kflag = 1;
 			break;
 
 		case 'w':
 			g_wflag = 1;
 			break;
 
 		case 'W':
 			g_Wflag = 1;
 			break;
 
 		case 'g':
 			g_gflag = 1;
 			break;
 
 		case 'C':
 		case 'E':
 		case 'H':
 		case 'I':
 			for (i = 0; i < LS_MAX_EVENTS; i++)
 				if (g_event_info[i].ev_type == c)
 					g_enabled[i] = 1;
 			events_specified = 1;
 			break;
 
 		case 'A':
 			for (i = 0; i < LS_MAX_EVENTS; i++)
 				if (strchr("CH", g_event_info[i].ev_type))
 					g_enabled[i] = 1;
 			events_specified = 1;
 			break;
 
 		case 'T':
 			g_tracing = 1;
 			break;
 
 		case 'D':
 			if (!isdigit(optarg[0]))
 				usage();
 			g_topn = atoi(optarg);
 			break;
 
 		case 'R':
 			g_rates = 1;
 			break;
 
 		case 'p':
 			g_pflag = 1;
 			break;
 
 		case 'P':
 			g_Pflag = 1;
 			break;
 
 		case 'o':
 			if ((out = fopen(optarg, "w")) == NULL)
 				fail(1, "error opening file");
 			break;
 
 		case 'V':
 			g_Vflag = 1;
 			break;
 
 		default:
 			if (strchr(LOCKSTAT_OPTSTR, c) == NULL)
 				usage();
 		}
 	}
 
 	if (filt != NULL) {
 		predicate_add(&g_predicate, filt, NULL, 0);
 		filter_destroy(&filt);
 	}
 
 	if (ifilt != NULL) {
 		predicate_add(&g_ipredicate, ifilt, NULL, 0);
 		filter_destroy(&ifilt);
 	}
 
 	if (g_recsize == 0) {
 		if (g_gflag) {
 			g_stkdepth = LS_MAX_STACK_DEPTH;
 			g_recsize = LS_STACK(g_stkdepth);
 		} else {
 			g_recsize = LS_TIME;
 		}
 	}
 
 	if (g_gflag && g_recsize <= LS_STACK(0))
 		fail(0, "'-g' requires at least '-s 1' data gathering");
 
 	/*
 	 * Make sure the alignment is reasonable
 	 */
 	g_recsize = -(-g_recsize & -sizeof (uint64_t));
 
 	for (i = 0; i < LS_MAX_EVENTS; i++) {
 		/*
 		 * If no events were specified, enable -C.
 		 */
 		if (!events_specified && g_event_info[i].ev_type == 'C')
 			g_enabled[i] = 1;
 	}
 
 	for (i = 0; i < LS_MAX_EVENTS; i++) {
 		if (!g_enabled[i])
 			continue;
 
 		if (g_event_info[i].ev_acquire != NULL) {
 			/*
 			 * If we've enabled a hold event, we must explicitly
 			 * allocate dynamic variable space.
 			 */
 			dynvar = 1;
 		}
 
 		dprog_addevent(i);
 	}
 
 	/*
 	 * Make sure there are remaining arguments to specify a child command
 	 * to execute.
 	 */
 	if (argc <= optind)
 		usage();
 
 	if ((ncpus = sysconf(_SC_NPROCESSORS_ONLN)) == -1)
 		dfail("couldn't determine number of online CPUs");
 
 	/*
 	 * By default, we set our data buffer size to be the number of records
 	 * multiplied by the size of the record, doubled to account for some
 	 * DTrace slop and divided by the number of CPUs.  We silently clamp
 	 * the aggregation size at both a minimum and a maximum to prevent
 	 * absurdly low or high values.
 	 */
 	if ((aggsize = (g_nrecs * g_recsize * 2) / ncpus) < MIN_AGGSIZE)
 		aggsize = MIN_AGGSIZE;
 
 	if (aggsize > MAX_AGGSIZE)
 		aggsize = MAX_AGGSIZE;
 
 	(void) sprintf(aggstr, "%lld", (long long)aggsize);
 
 	if (!g_tracing) {
 		if (dtrace_setopt(g_dtp, "bufsize", "4k") == -1)
 			dfail("failed to set 'bufsize'");
 
 		if (dtrace_setopt(g_dtp, "aggsize", aggstr) == -1)
 			dfail("failed to set 'aggsize'");
 
 		if (dynvar) {
 			/*
 			 * If we're using dynamic variables, we set our
 			 * dynamic variable size to be one megabyte per CPU,
 			 * with a hard-limit of 32 megabytes.  This may still
 			 * be too small in some cases, but it can be tuned
 			 * manually via -x if need be.
 			 */
 			(void) sprintf(aggstr, "%ldm", ncpus < 32 ? ncpus : 32);
 
 			if (dtrace_setopt(g_dtp, "dynvarsize", aggstr) == -1)
 				dfail("failed to set 'dynvarsize'");
 		}
 	} else {
 		if (dtrace_setopt(g_dtp, "bufsize", aggstr) == -1)
 			dfail("failed to set 'bufsize'");
 	}
 
 	if (dtrace_setopt(g_dtp, "statusrate", "10sec") == -1)
 		dfail("failed to set 'statusrate'");
 
 	optind = 1;
 	while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
 		switch (c) {
 		case 'x':
 			if ((p = strchr(optarg, '=')) != NULL)
 				*p++ = '\0';
 
 			if (dtrace_setopt(g_dtp, optarg, p) != 0)
 				dfail("failed to set -x %s", optarg);
 			break;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	dprog_compile();
 	status_init();
 
 	g_elapsed = -gethrtime();
 
 	/*
 	 * Spawn the specified command and wait for it to complete.
 	 */
 	child = fork();
 	if (child == -1)
 		fail(1, "cannot fork");
 	if (child == 0) {
 		(void) dtrace_close(g_dtp);
 		(void) execvp(argv[0], &argv[0]);
 		exec_errno = errno;
 		exit(127);
 	}
 
 #ifdef illumos
 	while (waitpid(child, &status, WEXITED) != child)
 #else
 	while (waitpid(child, &status, 0) != child)
 #endif
 		status_check();
 
 	g_elapsed += gethrtime();
 
 	if (WIFEXITED(status)) {
 		if (WEXITSTATUS(status) != 0) {
 			if (exec_errno != 0) {
 				errno = exec_errno;
 				fail(1, "could not execute %s", argv[0]);
 			}
 			(void) fprintf(stderr,
 			    "lockstat: warning: %s exited with code %d\n",
 			    argv[0], WEXITSTATUS(status));
 		}
 	} else {
 		(void) fprintf(stderr,
 		    "lockstat: warning: %s died on signal %d\n",
 		    argv[0], WTERMSIG(status));
 	}
 
 	if (dtrace_stop(g_dtp) == -1)
 		dfail("failed to stop dtrace");
 
 	/*
 	 * Before we read out the results, we need to allocate our buffer.
 	 * If we're tracing, then we'll just use the precalculated size.  If
 	 * we're not, then we'll take a snapshot of the aggregate, and walk
 	 * it to count the number of records.
 	 */
 	if (!g_tracing) {
 		if (dtrace_aggregate_snap(g_dtp) != 0)
 			dfail("failed to snap aggregate");
 
 		g_nrecs = 0;
 
 		if (dtrace_aggregate_walk(g_dtp,
 		    count_aggregate, &g_nrecs) != 0)
 			dfail("failed to walk aggregate");
 	}
 
 #ifdef illumos
 	if ((data_buf = memalign(sizeof (uint64_t),
 	    (g_nrecs + 1) * g_recsize)) == NULL)
 #else
 	if (posix_memalign((void **)&data_buf, sizeof (uint64_t),  
 	    (g_nrecs + 1) * g_recsize) )
 #endif
 		fail(1, "Memory allocation failed");
 
 	/*
 	 * Read out the DTrace data.
 	 */
 	g_nrecs_used = process_data(out, data_buf);
 
 	if (g_nrecs_used > g_nrecs || g_dropped)
 		(void) fprintf(stderr, "lockstat: warning: "
 		    "ran out of data records (use -n for more)\n");
 
 	/* LINTED - alignment */
 	for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
 	    /* LINTED - alignment */
 	    lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
 		ev_count[lsp->ls_event] += lsp->ls_count;
 		ev_time[lsp->ls_event] += lsp->ls_time;
 	}
 
 	/*
 	 * If -g was specified, convert stacks into individual records.
 	 */
 	if (g_gflag) {
 		lsrec_t *newlsp, *oldlsp;
 
 #ifdef illumos
 		newlsp = memalign(sizeof (uint64_t),
 		    g_nrecs_used * LS_TIME * (g_stkdepth + 1));
 #else
 		posix_memalign((void **)&newlsp, sizeof (uint64_t), 
 		    g_nrecs_used * LS_TIME * (g_stkdepth + 1));
 #endif
 		if (newlsp == NULL)
 			fail(1, "Cannot allocate space for -g processing");
 		lsp = newlsp;
 		/* LINTED - alignment */
 		for (i = 0, oldlsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
 		    /* LINTED - alignment */
 		    oldlsp = (lsrec_t *)((char *)oldlsp + g_recsize)) {
 			int fr;
 			int caller_in_stack = 0;
 
 			if (oldlsp->ls_count == 0)
 				continue;
 
 			for (fr = 0; fr < g_stkdepth; fr++) {
 				if (oldlsp->ls_stack[fr] == 0)
 					break;
 				if (oldlsp->ls_stack[fr] == oldlsp->ls_caller)
 					caller_in_stack = 1;
 				bcopy(oldlsp, lsp, LS_TIME);
 				lsp->ls_caller = oldlsp->ls_stack[fr];
 				/* LINTED - alignment */
 				lsp = (lsrec_t *)((char *)lsp + LS_TIME);
 			}
 			if (!caller_in_stack) {
 				bcopy(oldlsp, lsp, LS_TIME);
 				/* LINTED - alignment */
 				lsp = (lsrec_t *)((char *)lsp + LS_TIME);
 			}
 		}
 		g_nrecs = g_nrecs_used =
 		    ((uintptr_t)lsp - (uintptr_t)newlsp) / LS_TIME;
 		g_recsize = LS_TIME;
 		g_stkdepth = 0;
 		free(data_buf);
 		data_buf = (char *)newlsp;
 	}
 
 	if ((sort_buf = calloc(2 * (g_nrecs + 1),
 	    sizeof (void *))) == NULL)
 		fail(1, "Sort buffer allocation failed");
 	merge_buf = sort_buf + (g_nrecs + 1);
 
 	/*
 	 * Build the sort buffer, discarding zero-count records along the way.
 	 */
 	/* LINTED - alignment */
 	for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
 	    /* LINTED - alignment */
 	    lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
 		if (lsp->ls_count == 0)
 			lsp->ls_event = LS_MAX_EVENTS;
 		sort_buf[i] = lsp;
 	}
 
 	if (g_nrecs_used == 0)
 		exit(0);
 
 	/*
 	 * Add a sentinel after the last record
 	 */
 	sort_buf[i] = lsp;
 	lsp->ls_event = LS_MAX_EVENTS;
 
 	if (g_tracing) {
 		report_trace(out, sort_buf);
 		return (0);
 	}
 
 	/*
 	 * Application of -g may have resulted in multiple records
 	 * with the same signature; coalesce them.
 	 */
 	if (g_gflag) {
 		mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
 		coalesce(lockcmp, sort_buf, g_nrecs_used);
 	}
 
 	/*
 	 * Coalesce locks within the same symbol if -c option specified.
 	 * Coalesce PCs within the same function if -k option specified.
 	 */
 	if (g_cflag || g_kflag) {
 		for (i = 0; i < g_nrecs_used; i++) {
 			int fr;
 			lsp = sort_buf[i];
 			if (g_cflag)
 				coalesce_symbol(&lsp->ls_lock);
 			if (g_kflag) {
 				for (fr = 0; fr < g_stkdepth; fr++)
 					coalesce_symbol(&lsp->ls_stack[fr]);
 				coalesce_symbol(&lsp->ls_caller);
 			}
 		}
 		mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
 		coalesce(lockcmp, sort_buf, g_nrecs_used);
 	}
 
 	/*
 	 * Coalesce callers if -w option specified
 	 */
 	if (g_wflag) {
 		mergesort(lock_and_count_cmp_anywhere,
 		    sort_buf, merge_buf, g_nrecs_used);
 		coalesce(lockcmp_anywhere, sort_buf, g_nrecs_used);
 	}
 
 	/*
 	 * Coalesce locks if -W option specified
 	 */
 	if (g_Wflag) {
 		mergesort(site_and_count_cmp_anylock,
 		    sort_buf, merge_buf, g_nrecs_used);
 		coalesce(sitecmp_anylock, sort_buf, g_nrecs_used);
 	}
 
 	/*
 	 * Sort data by contention count (ls_count) or total time (ls_time),
 	 * depending on g_Pflag.  Override g_Pflag if time wasn't measured.
 	 */
 	if (g_recsize < LS_TIME)
 		g_Pflag = 0;
 
 	if (g_Pflag)
 		mergesort(timecmp, sort_buf, merge_buf, g_nrecs_used);
 	else
 		mergesort(countcmp, sort_buf, merge_buf, g_nrecs_used);
 
 	/*
 	 * Display data by event type
 	 */
 	first = &sort_buf[0];
 	while ((event = (*first)->ls_event) < LS_MAX_EVENTS) {
 		current = first;
 		while ((lsp = *current)->ls_event == event)
 			current++;
 		report_stats(out, first, current - first, ev_count[event],
 		    ev_time[event]);
 		first = current;
 	}
 
 	return (0);
 }
 
 static char *
 format_symbol(char *buf, uintptr_t addr, int show_size)
 {
 	uintptr_t symoff;
 	char *symname;
 	size_t symsize;
 
 	symname = addr_to_sym(addr, &symoff, &symsize);
 
 	if (show_size && symoff == 0)
 		(void) sprintf(buf, "%s[%ld]", symname, (long)symsize);
 	else if (symoff == 0)
 		(void) sprintf(buf, "%s", symname);
 	else if (symoff < 16 && bcmp(symname, "cpu[", 4) == 0)	/* CPU+PIL */
 #ifdef illumos
 		(void) sprintf(buf, "%s+%ld", symname, (long)symoff);
 #else
 		(void) sprintf(buf, "%s+%s", symname, g_pri_class[(int)symoff]);
 #endif
 	else if (symoff <= symsize || (symoff < 256 && addr != symoff))
 		(void) sprintf(buf, "%s+0x%llx", symname,
 		    (unsigned long long)symoff);
 	else
 		(void) sprintf(buf, "0x%llx", (unsigned long long)addr);
 	return (buf);
 }
 
 static void
 report_stats(FILE *out, lsrec_t **sort_buf, size_t nrecs, uint64_t total_count,
 	uint64_t total_time)
 {
 	uint32_t event = sort_buf[0]->ls_event;
 	lsrec_t *lsp;
 	double ptotal = 0.0;
 	double percent;
 	int i, j, fr;
 	int displayed;
 	int first_bin, last_bin, max_bin_count, total_bin_count;
 	int rectype;
 	char buf[256];
 	char lhdr[80], chdr[80];
 
 	rectype = g_recsize;
 
 	if (g_topn == 0) {
 		(void) fprintf(out, "%20llu %s\n",
 		    g_rates == 0 ? total_count :
 		    ((unsigned long long)total_count * NANOSEC) / g_elapsed,
 		    g_event_info[event].ev_desc);
 		return;
 	}
 
 	(void) sprintf(lhdr, "%s%s",
 	    g_Wflag ? "Hottest " : "", g_event_info[event].ev_lhdr);
 	(void) sprintf(chdr, "%s%s",
 	    g_wflag ? "Hottest " : "", "Caller");
 
 	if (!g_pflag)
 		(void) fprintf(out,
 		    "\n%s: %.0f events in %.3f seconds (%.0f events/sec)\n\n",
 		    g_event_info[event].ev_desc, (double)total_count,
 		    (double)g_elapsed / NANOSEC,
 		    (double)total_count * NANOSEC / g_elapsed);
 
 	if (!g_pflag && rectype < LS_HIST) {
 		(void) sprintf(buf, "%s", g_event_info[event].ev_units);
 		(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
 		    g_rates ? "ops/s" : "Count",
 		    g_gflag ? "genr" : "indv",
 		    "cuml", "rcnt", rectype >= LS_TIME ? buf : "", lhdr, chdr);
 		(void) fprintf(out, "---------------------------------"
 		    "----------------------------------------------\n");
 	}
 
 	displayed = 0;
 	for (i = 0; i < nrecs; i++) {
 		lsp = sort_buf[i];
 
 		if (displayed++ >= g_topn)
 			break;
 
 		if (g_pflag) {
 			int j;
 
 			(void) fprintf(out, "%u %u",
 			    lsp->ls_event, lsp->ls_count);
 			(void) fprintf(out, " %s",
 			    format_symbol(buf, lsp->ls_lock, g_cflag));
 			(void) fprintf(out, " %s",
 			    format_symbol(buf, lsp->ls_caller, 0));
 			(void) fprintf(out, " %f",
 			    (double)lsp->ls_refcnt / lsp->ls_count);
 			if (rectype >= LS_TIME)
 				(void) fprintf(out, " %llu",
 				    (unsigned long long)lsp->ls_time);
 			if (rectype >= LS_HIST) {
 				for (j = 0; j < 64; j++)
 					(void) fprintf(out, " %u",
 					    lsp->ls_hist[j]);
 			}
 			for (j = 0; j < LS_MAX_STACK_DEPTH; j++) {
 				if (rectype <= LS_STACK(j) ||
 				    lsp->ls_stack[j] == 0)
 					break;
 				(void) fprintf(out, " %s",
 				    format_symbol(buf, lsp->ls_stack[j], 0));
 			}
 			(void) fprintf(out, "\n");
 			continue;
 		}
 
 		if (rectype >= LS_HIST) {
 			(void) fprintf(out, "---------------------------------"
 			    "----------------------------------------------\n");
 			(void) sprintf(buf, "%s",
 			    g_event_info[event].ev_units);
 			(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
 			    g_rates ? "ops/s" : "Count",
 			    g_gflag ? "genr" : "indv",
 			    "cuml", "rcnt", buf, lhdr, chdr);
 		}
 
 		if (g_Pflag && total_time != 0)
 			percent = (lsp->ls_time * 100.00) / total_time;
 		else
 			percent = (lsp->ls_count * 100.00) / total_count;
 
 		ptotal += percent;
 
 		if (rectype >= LS_TIME)
 			(void) sprintf(buf, "%llu",
 			    (unsigned long long)(lsp->ls_time / lsp->ls_count));
 		else
 			buf[0] = '\0';
 
 		(void) fprintf(out, "%5llu ",
 		    g_rates == 0 ? lsp->ls_count :
 		    ((uint64_t)lsp->ls_count * NANOSEC) / g_elapsed);
 
 		(void) fprintf(out, "%3.0f%% ", percent);
 
 		if (g_gflag)
 			(void) fprintf(out, "---- ");
 		else
 			(void) fprintf(out, "%3.0f%% ", ptotal);
 
 		(void) fprintf(out, "%4.2f %8s ",
 		    (double)lsp->ls_refcnt / lsp->ls_count, buf);
 
 		(void) fprintf(out, "%-22s ",
 		    format_symbol(buf, lsp->ls_lock, g_cflag));
 
 		(void) fprintf(out, "%-24s\n",
 		    format_symbol(buf, lsp->ls_caller, 0));
 
 		if (rectype < LS_HIST)
 			continue;
 
 		(void) fprintf(out, "\n");
 		(void) fprintf(out, "%10s %31s %-9s %-24s\n",
 		    g_event_info[event].ev_units,
 		    "------ Time Distribution ------",
 		    g_rates ? "ops/s" : "count",
 		    rectype > LS_STACK(0) ? "Stack" : "");
 
 		first_bin = 0;
 		while (lsp->ls_hist[first_bin] == 0)
 			first_bin++;
 
 		last_bin = 63;
 		while (lsp->ls_hist[last_bin] == 0)
 			last_bin--;
 
 		max_bin_count = 0;
 		total_bin_count = 0;
 		for (j = first_bin; j <= last_bin; j++) {
 			total_bin_count += lsp->ls_hist[j];
 			if (lsp->ls_hist[j] > max_bin_count)
 				max_bin_count = lsp->ls_hist[j];
 		}
 
 		/*
 		 * If we went a few frames below the caller, ignore them
 		 */
 		for (fr = 3; fr > 0; fr--)
 			if (lsp->ls_stack[fr] == lsp->ls_caller)
 				break;
 
 		for (j = first_bin; j <= last_bin; j++) {
 			uint_t depth = (lsp->ls_hist[j] * 30) / total_bin_count;
 			(void) fprintf(out, "%10llu |%s%s %-9u ",
 			    1ULL << j,
 			    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 30 - depth,
 			    "                              " + depth,
 			    g_rates == 0 ? lsp->ls_hist[j] :
 			    (uint_t)(((uint64_t)lsp->ls_hist[j] * NANOSEC) /
 			    g_elapsed));
 			if (rectype <= LS_STACK(fr) || lsp->ls_stack[fr] == 0) {
 				(void) fprintf(out, "\n");
 				continue;
 			}
 			(void) fprintf(out, "%-24s\n",
 			    format_symbol(buf, lsp->ls_stack[fr], 0));
 			fr++;
 		}
 		while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
 			(void) fprintf(out, "%15s %-36s %-24s\n", "", "",
 			    format_symbol(buf, lsp->ls_stack[fr], 0));
 			fr++;
 		}
 	}
 
 	if (!g_pflag)
 		(void) fprintf(out, "---------------------------------"
 		    "----------------------------------------------\n");
 
 	(void) fflush(out);
 }
 
 static void
 report_trace(FILE *out, lsrec_t **sort_buf)
 {
 	lsrec_t *lsp;
 	int i, fr;
 	int rectype;
 	char buf[256], buf2[256];
 
 	rectype = g_recsize;
 
 	if (!g_pflag) {
 		(void) fprintf(out, "%5s  %7s  %11s  %-24s  %-24s\n",
 		    "Event", "Time", "Owner", "Lock", "Caller");
 		(void) fprintf(out, "---------------------------------"
 		    "----------------------------------------------\n");
 	}
 
 	for (i = 0; i < g_nrecs_used; i++) {
 
 		lsp = sort_buf[i];
 
 		if (lsp->ls_event >= LS_MAX_EVENTS || lsp->ls_count == 0)
 			continue;
 
 		(void) fprintf(out, "%2d  %10llu  %11p  %-24s  %-24s\n",
 		    lsp->ls_event, (unsigned long long)lsp->ls_time,
 		    (void *)lsp->ls_next,
 		    format_symbol(buf, lsp->ls_lock, 0),
 		    format_symbol(buf2, lsp->ls_caller, 0));
 
 		if (rectype <= LS_STACK(0))
 			continue;
 
 		/*
 		 * If we went a few frames below the caller, ignore them
 		 */
 		for (fr = 3; fr > 0; fr--)
 			if (lsp->ls_stack[fr] == lsp->ls_caller)
 				break;
 
 		while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
 			(void) fprintf(out, "%53s  %-24s\n", "",
 			    format_symbol(buf, lsp->ls_stack[fr], 0));
 			fr++;
 		}
 		(void) fprintf(out, "\n");
 	}
 
 	(void) fflush(out);
 }
Index: head/sys/kern/kern_mutex.c
===================================================================
--- head/sys/kern/kern_mutex.c	(revision 284296)
+++ head/sys/kern/kern_mutex.c	(revision 284297)
@@ -1,1021 +1,1037 @@
 /*-
  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
  */
 
 /*
  * Machine independent bits of mutex implementation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_adaptive_mutexes.h"
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/turnstile.h>
 #include <sys/vmmeter.h>
 #include <sys/lock_profile.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cpu.h>
 
 #include <ddb/ddb.h>
 
 #include <fs/devfs/devfs_int.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
 #define	ADAPTIVE_MUTEXES
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DEFINE( , , lock, failed);
 #endif
 
 /*
  * Return the mutex address when the lock cookie address is provided.
  * This functionality assumes that struct mtx* have a member named mtx_lock.
  */
 #define	mtxlock2mtx(c)	(__containerof(c, struct mtx, mtx_lock))
 
 /*
  * Internal utility macros.
  */
 #define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
 
 #define	mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED)
 
 #define	mtx_owner(m)	((struct thread *)((m)->mtx_lock & ~MTX_FLAGMASK))
 
 static void	assert_mtx(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_mtx(const struct lock_object *lock);
 #endif
 static void	lock_mtx(struct lock_object *lock, uintptr_t how);
 static void	lock_spin(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_mtx(const struct lock_object *lock,
 		    struct thread **owner);
 #endif
 static uintptr_t unlock_mtx(struct lock_object *lock);
 static uintptr_t unlock_spin(struct lock_object *lock);
 
 /*
  * Lock classes for sleep and spin mutexes.
  */
 struct lock_class lock_class_mtx_sleep = {
 	.lc_name = "sleep mutex",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
 	.lc_assert = assert_mtx,
 #ifdef DDB
 	.lc_ddb_show = db_show_mtx,
 #endif
 	.lc_lock = lock_mtx,
 	.lc_unlock = unlock_mtx,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_mtx,
 #endif
 };
 struct lock_class lock_class_mtx_spin = {
 	.lc_name = "spin mutex",
 	.lc_flags = LC_SPINLOCK | LC_RECURSABLE,
 	.lc_assert = assert_mtx,
 #ifdef DDB
 	.lc_ddb_show = db_show_mtx,
 #endif
 	.lc_lock = lock_spin,
 	.lc_unlock = unlock_spin,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_mtx,
 #endif
 };
 
 /*
  * System-wide mutexes
  */
 struct mtx blocked_lock;
 struct mtx Giant;
 
 void
 assert_mtx(const struct lock_object *lock, int what)
 {
 
 	mtx_assert((const struct mtx *)lock, what);
 }
 
 void
 lock_mtx(struct lock_object *lock, uintptr_t how)
 {
 
 	mtx_lock((struct mtx *)lock);
 }
 
 void
 lock_spin(struct lock_object *lock, uintptr_t how)
 {
 
 	panic("spin locks can only use msleep_spin");
 }
 
 uintptr_t
 unlock_mtx(struct lock_object *lock)
 {
 	struct mtx *m;
 
 	m = (struct mtx *)lock;
 	mtx_assert(m, MA_OWNED | MA_NOTRECURSED);
 	mtx_unlock(m);
 	return (0);
 }
 
 uintptr_t
 unlock_spin(struct lock_object *lock)
 {
 
 	panic("spin locks can only use msleep_spin");
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_mtx(const struct lock_object *lock, struct thread **owner)
 {
 	const struct mtx *m = (const struct mtx *)lock;
 
 	*owner = mtx_owner(m);
 	return (mtx_unowned(m) == 0);
 }
 #endif
 
 /*
  * Function versions of the inlined __mtx_* macros.  These are used by
  * modules and can also be called from assembly language if needed.
  */
 void
 __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d",
 	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 	WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) |
 	    LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 
 	__mtx_lock(m, curthread, opts, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE,
 	    file, line);
 	curthread->td_locks++;
 }
 
 void
 __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 
 	__mtx_unlock(m, curthread, opts, file, line);
 	curthread->td_locks--;
 }
 
 void
 __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	if (mtx_owned(m))
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0,
 	    ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
 	opts &= ~MTX_RECURSE;
 	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
 	    file, line, NULL);
 	__mtx_lock_spin(m, curthread, opts, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 }
 
 void
 __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 
 	__mtx_unlock_spin(m);
 }
 
 /*
  * The important part of mtx_trylock{,_flags}()
  * Tries to acquire lock `m.'  If this function is called on a mutex that
  * is already owned, it will recursively acquire the lock.
  */
 int
 _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int rval;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d",
 	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_trylock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 
 	if (mtx_owned(m) && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 	    (opts & MTX_RECURSE) != 0)) {
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		rval = 1;
 	} else
 		rval = _mtx_obtain_lock(m, (uintptr_t)curthread);
 	opts &= ~MTX_RECURSE;
 
 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		curthread->td_locks++;
 		if (m->mtx_recurse == 0)
 			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_LOCK_ACQUIRE,
 			    m, contested, waittime, file, line);
 
 	}
 
 	return (rval);
 }
 
 /*
  * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
  *
  * We call this if the lock is either contested (i.e. we need to go to
  * sleep waiting for it), or if we need to recurse on it.
  */
 void
 __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
     const char *file, int line)
 {
 	struct mtx *m;
 	struct turnstile *ts;
 	uintptr_t v;
 #ifdef ADAPTIVE_MUTEXES
 	volatile struct thread *owner;
 #endif
 #ifdef KTR
 	int cont_logged = 0;
 #endif
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
 	uint64_t spin_cnt = 0;
 	uint64_t sleep_cnt = 0;
 	int64_t sleep_time = 0;
+	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	if (mtx_owned(m)) {
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0,
 	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
 		opts &= ~MTX_RECURSE;
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
 			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
 		return;
 	}
 	opts &= ~MTX_RECURSE;
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&m->lock_object,
 		    &contested, &waittime);
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR4(KTR_LOCK,
 		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
 		    m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
+#ifdef KDTRACE_HOOKS
+	all_time -= lockstat_nsecs();
+#endif
 
 	while (!_mtx_obtain_lock(m, tid)) {
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
 #ifdef ADAPTIVE_MUTEXES
 		/*
 		 * If the owner is running on another CPU, spin until the
 		 * owner stops running or the state of the lock changes.
 		 */
 		v = m->mtx_lock;
 		if (v != MTX_UNOWNED) {
 			owner = (struct thread *)(v & ~MTX_FLAGMASK);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&m->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, m, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname((struct thread *)tid),
 				    "spinning", "lockname:\"%s\"",
 				    m->lock_object.lo_name);
 				while (mtx_owner(m) == owner &&
 				    TD_IS_RUNNING(owner)) {
 					cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 					spin_cnt++;
 #endif
 				}
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname((struct thread *)tid),
 				    "running");
 				continue;
 			}
 		}
 #endif
 
 		ts = turnstile_trywait(&m->lock_object);
 		v = m->mtx_lock;
 
 		/*
 		 * Check if the lock has been released while spinning for
 		 * the turnstile chain lock.
 		 */
 		if (v == MTX_UNOWNED) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 #ifdef ADAPTIVE_MUTEXES
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		owner = (struct thread *)(v & ~MTX_FLAGMASK);
 		if (TD_IS_RUNNING(owner)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 #endif
 
 		/*
 		 * If the mutex isn't already contested and a failure occurs
 		 * setting the contested bit, the mutex was either released
 		 * or the state of the MTX_RECURSED bit changed.
 		 */
 		if ((v & MTX_CONTESTED) == 0 &&
 		    !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 		/*
 		 * We definitely must sleep for this lock.
 		 */
 		mtx_assert(m, MA_NOTOWNED);
 
 #ifdef KTR
 		if (!cont_logged) {
 			CTR6(KTR_CONTENTION,
 			    "contention: %p at %s:%d wants %s, taken by %s:%d",
 			    (void *)tid, file, line, m->lock_object.lo_name,
 			    WITNESS_FILE(&m->lock_object),
 			    WITNESS_LINE(&m->lock_object));
 			cont_logged = 1;
 		}
 #endif
 
 		/*
 		 * Block on the turnstile.
 		 */
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs();
 #endif
 		turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs();
 		sleep_cnt++;
 #endif
 	}
+#ifdef KDTRACE_HOOKS
+	all_time += lockstat_nsecs();
+#endif
 #ifdef KTR
 	if (cont_logged) {
 		CTR4(KTR_CONTENTION,
 		    "contention end: %s acquired by %p at %s:%d",
 		    m->lock_object.lo_name, (void *)tid, file, line);
 	}
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_LOCK_ACQUIRE, m, contested,
 	    waittime, file, line);
 #ifdef KDTRACE_HOOKS
 	if (sleep_time)
 		LOCKSTAT_RECORD1(LS_MTX_LOCK_BLOCK, m, sleep_time);
 
 	/*
 	 * Only record the loops spinning and not sleeping. 
 	 */
 	if (spin_cnt > sleep_cnt)
-		LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (spin_cnt - sleep_cnt));
+		LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (all_time - sleep_time));
 #endif
 }
 
 static void
 _mtx_lock_spin_failed(struct mtx *m)
 {
 	struct thread *td;
 
 	td = mtx_owner(m);
 
 	/* If the mutex is unlocked, try again. */
 	if (td == NULL)
 		return;
 
 	printf( "spin lock %p (%s) held by %p (tid %d) too long\n",
 	    m, m->lock_object.lo_name, td, td->td_tid);
 #ifdef WITNESS
 	witness_display_spinlock(&m->lock_object, td, printf);
 #endif
 	panic("spin lock held too long");
 }
 
 #ifdef SMP
 /*
  * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock.
  *
  * This is only called if we need to actually spin for the lock. Recursion
  * is handled inline.
  */
 void
 _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
     const char *file, int line)
 {
 	struct mtx *m;
 	int i = 0;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
+#ifdef KDTRACE_HOOKS
+	int64_t spin_time = 0;
+#endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
 	    "spinning", "lockname:\"%s\"", m->lock_object.lo_name);
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
+#ifdef KDTRACE_HOOKS
+	spin_time -= lockstat_nsecs();
+#endif
 	while (!_mtx_obtain_lock(m, tid)) {
 
 		/* Give interrupts a chance while we spin. */
 		spinlock_exit();
 		while (m->mtx_lock != MTX_UNOWNED) {
 			if (i++ < 10000000) {
 				cpu_spinwait();
 				continue;
 			}
 			if (i < 60000000 || kdb_active || panicstr != NULL)
 				DELAY(1);
 			else
 				_mtx_lock_spin_failed(m);
 			cpu_spinwait();
 		}
 		spinlock_enter();
 	}
+#ifdef KDTRACE_HOOKS
+	spin_time += lockstat_nsecs();
+#endif
 
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
 	KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
 	    "running");
 
 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, m,
 	    contested, waittime, (file), (line));
-	LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, i);
+	LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, spin_time);
 }
 #endif /* SMP */
 
 void
 thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
 {
 	struct mtx *m;
 	uintptr_t tid;
 	int i;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
-	uint64_t spin_cnt = 0;
+	int64_t spin_time = 0;
 #endif
 
 	i = 0;
 	tid = (uintptr_t)curthread;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
+#ifdef KDTRACE_HOOKS
+	spin_time -= lockstat_nsecs();
+#endif
 	for (;;) {
 retry:
 		spinlock_enter();
 		m = td->td_lock;
 		KASSERT(m->mtx_lock != MTX_DESTROYED,
 		    ("thread_lock() of destroyed mutex @ %s:%d", file, line));
 		KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 		    ("thread_lock() of sleep mutex %s @ %s:%d",
 		    m->lock_object.lo_name, file, line));
 		if (mtx_owned(m))
 			KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
 	    ("thread_lock: recursed on non-recursive mutex %s @ %s:%d\n",
 			    m->lock_object.lo_name, file, line));
 		WITNESS_CHECKORDER(&m->lock_object,
 		    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 		while (!_mtx_obtain_lock(m, tid)) {
-#ifdef KDTRACE_HOOKS
-			spin_cnt++;
-#endif
 			if (m->mtx_lock == tid) {
 				m->mtx_recurse++;
 				break;
 			}
 #ifdef HWPMC_HOOKS
 			PMC_SOFT_CALL( , , lock, failed);
 #endif
 			lock_profile_obtain_lock_failed(&m->lock_object,
 			    &contested, &waittime);
 			/* Give interrupts a chance while we spin. */
 			spinlock_exit();
 			while (m->mtx_lock != MTX_UNOWNED) {
 				if (i++ < 10000000)
 					cpu_spinwait();
 				else if (i < 60000000 ||
 				    kdb_active || panicstr != NULL)
 					DELAY(1);
 				else
 					_mtx_lock_spin_failed(m);
 				cpu_spinwait();
 				if (m != td->td_lock)
 					goto retry;
 			}
 			spinlock_enter();
 		}
 		if (m == td->td_lock)
 			break;
 		__mtx_unlock_spin(m);	/* does spinlock_exit() */
+	}
 #ifdef KDTRACE_HOOKS
-		spin_cnt++;
+	spin_time += lockstat_nsecs();
 #endif
-	}
 	if (m->mtx_recurse == 0)
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE,
 		    m, contested, waittime, (file), (line));
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
-	LOCKSTAT_RECORD1(LS_THREAD_LOCK_SPIN, m, spin_cnt);
+	LOCKSTAT_RECORD1(LS_THREAD_LOCK_SPIN, m, spin_time);
 }
 
 struct mtx *
 thread_lock_block(struct thread *td)
 {
 	struct mtx *lock;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	lock = td->td_lock;
 	td->td_lock = &blocked_lock;
 	mtx_unlock_spin(lock);
 
 	return (lock);
 }
 
 void
 thread_lock_unblock(struct thread *td, struct mtx *new)
 {
 	mtx_assert(new, MA_OWNED);
 	MPASS(td->td_lock == &blocked_lock);
 	atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
 }
 
 void
 thread_lock_set(struct thread *td, struct mtx *new)
 {
 	struct mtx *lock;
 
 	mtx_assert(new, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	lock = td->td_lock;
 	td->td_lock = new;
 	mtx_unlock_spin(lock);
 }
 
 /*
  * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
  *
  * We are only called here if the lock is recursed or contested (i.e. we
  * need to wake up a blocked thread).
  */
 void
 __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 	struct turnstile *ts;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	if (mtx_recursed(m)) {
 		if (--(m->mtx_recurse) == 0)
 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
 		return;
 	}
 
 	/*
 	 * We have to lock the chain before the turnstile so this turnstile
 	 * can be removed from the hash list if it is empty.
 	 */
 	turnstile_chain_lock(&m->lock_object);
 	ts = turnstile_lookup(&m->lock_object);
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
 	MPASS(ts != NULL);
 	turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
 	_mtx_release_lock_quick(m);
 
 	/*
 	 * This turnstile is now no longer associated with the mutex.  We can
 	 * unlock the chain lock so a new turnstile may take it's place.
 	 */
 	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	turnstile_chain_unlock(&m->lock_object);
 }
 
 /*
  * All the unlocking of MTX_SPIN locks is done inline.
  * See the __mtx_unlock_spin() macro for the details.
  */
 
 /*
  * The backing function for the INVARIANTS-enabled mtx_assert()
  */
 #ifdef INVARIANT_SUPPORT
 void
 __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct mtx *m;
 
 	if (panicstr != NULL || dumping)
 		return;
 
 	m = mtxlock2mtx(c);
 
 	switch (what) {
 	case MA_OWNED:
 	case MA_OWNED | MA_RECURSED:
 	case MA_OWNED | MA_NOTRECURSED:
 		if (!mtx_owned(m))
 			panic("mutex %s not owned at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		if (mtx_recursed(m)) {
 			if ((what & MA_NOTRECURSED) != 0)
 				panic("mutex %s recursed at %s:%d",
 				    m->lock_object.lo_name, file, line);
 		} else if ((what & MA_RECURSED) != 0) {
 			panic("mutex %s unrecursed at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		}
 		break;
 	case MA_NOTOWNED:
 		if (mtx_owned(m))
 			panic("mutex %s owned at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		break;
 	default:
 		panic("unknown mtx_assert at %s:%d", file, line);
 	}
 }
 #endif
 
 /*
  * The MUTEX_DEBUG-enabled mtx_validate()
  *
  * Most of these checks have been moved off into the LO_INITIALIZED flag
  * maintained by the witness code.
  */
 #ifdef MUTEX_DEBUG
 
 void	mtx_validate(struct mtx *);
 
 void
 mtx_validate(struct mtx *m)
 {
 
 /*
  * XXX: When kernacc() does not require Giant we can reenable this check
  */
 #ifdef notyet
 	/*
 	 * Can't call kernacc() from early init386(), especially when
 	 * initializing Giant mutex, because some stuff in kernacc()
 	 * requires Giant itself.
 	 */
 	if (!cold)
 		if (!kernacc((caddr_t)m, sizeof(m),
 		    VM_PROT_READ | VM_PROT_WRITE))
 			panic("Can't read and write to mutex %p", m);
 #endif
 }
 #endif
 
 /*
  * General init routine used by the MTX_SYSINIT() macro.
  */
 void
 mtx_sysinit(void *arg)
 {
 	struct mtx_args *margs = arg;
 
 	mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL,
 	    margs->ma_opts);
 }
 
 /*
  * Mutex initialization routine; initialize lock `m' of type contained in
  * `opts' with options contained in `opts' and name `name.'  The optional
  * lock type `type' is used as a general lock category name for use with
  * witness.
  */
 void
 _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts)
 {
 	struct mtx *m;
 	struct lock_class *class;
 	int flags;
 
 	m = mtxlock2mtx(c);
 
 	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
 	    MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock,
 	    ("%s: mtx_lock not aligned for %s: %p", __func__, name,
 	    &m->mtx_lock));
 
 #ifdef MUTEX_DEBUG
 	/* Diagnostic and error correction */
 	mtx_validate(m);
 #endif
 
 	/* Determine lock class and lock flags. */
 	if (opts & MTX_SPIN)
 		class = &lock_class_mtx_spin;
 	else
 		class = &lock_class_mtx_sleep;
 	flags = 0;
 	if (opts & MTX_QUIET)
 		flags |= LO_QUIET;
 	if (opts & MTX_RECURSE)
 		flags |= LO_RECURSABLE;
 	if ((opts & MTX_NOWITNESS) == 0)
 		flags |= LO_WITNESS;
 	if (opts & MTX_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & MTX_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (opts & MTX_NEW)
 		flags |= LO_NEW;
 
 	/* Initialize mutex. */
 	lock_init(&m->lock_object, class, name, type, flags);
 
 	m->mtx_lock = MTX_UNOWNED;
 	m->mtx_recurse = 0;
 }
 
 /*
  * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
  * passed in as a flag here because if the corresponding mtx_init() was
  * called with MTX_QUIET set, then it will already be set in the mutex's
  * flags.
  */
 void
 _mtx_destroy(volatile uintptr_t *c)
 {
 	struct mtx *m;
 
 	m = mtxlock2mtx(c);
 
 	if (!mtx_owned(m))
 		MPASS(mtx_unowned(m));
 	else {
 		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
 
 		/* Perform the non-mtx related part of mtx_unlock_spin(). */
 		if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin)
 			spinlock_exit();
 		else
 			curthread->td_locks--;
 
 		lock_profile_release_lock(&m->lock_object);
 		/* Tell witness this isn't locked to make it happy. */
 		WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__,
 		    __LINE__);
 	}
 
 	m->mtx_lock = MTX_DESTROYED;
 	lock_destroy(&m->lock_object);
 }
 
 /*
  * Intialize the mutex code and system mutexes.  This is called from the MD
  * startup code prior to mi_startup().  The per-CPU data space needs to be
  * setup before this is called.
  */
 void
 mutex_init(void)
 {
 
 	/* Setup turnstiles so that sleep mutexes work. */
 	init_turnstiles();
 
 	/*
 	 * Initialize mutexes.
 	 */
 	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
 	mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN);
 	blocked_lock.mtx_lock = 0xdeadc0de;	/* Always blocked. */
 	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
 	mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_statmtx, "pstatl", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_itimmtx, "pitiml", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_profmtx, "pprofl", NULL, MTX_SPIN);
 	mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
 	mtx_lock(&Giant);
 }
 
 #ifdef DDB
 void
 db_show_mtx(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct mtx *m;
 
 	m = (const struct mtx *)lock;
 
 	db_printf(" flags: {");
 	if (LOCK_CLASS(lock) == &lock_class_mtx_spin)
 		db_printf("SPIN");
 	else
 		db_printf("DEF");
 	if (m->lock_object.lo_flags & LO_RECURSABLE)
 		db_printf(", RECURSE");
 	if (m->lock_object.lo_flags & LO_DUPOK)
 		db_printf(", DUPOK");
 	db_printf("}\n");
 	db_printf(" state: {");
 	if (mtx_unowned(m))
 		db_printf("UNOWNED");
 	else if (mtx_destroyed(m))
 		db_printf("DESTROYED");
 	else {
 		db_printf("OWNED");
 		if (m->mtx_lock & MTX_CONTESTED)
 			db_printf(", CONTESTED");
 		if (m->mtx_lock & MTX_RECURSED)
 			db_printf(", RECURSED");
 	}
 	db_printf("}\n");
 	if (!mtx_unowned(m) && !mtx_destroyed(m)) {
 		td = mtx_owner(m);
 		db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (mtx_recursed(m))
 			db_printf(" recursed: %d\n", m->mtx_recurse);
 	}
 }
 #endif
Index: head/sys/kern/kern_rwlock.c
===================================================================
--- head/sys/kern/kern_rwlock.c	(revision 284296)
+++ head/sys/kern/kern_rwlock.c	(revision 284297)
@@ -1,1252 +1,1274 @@
 /*-
  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Machine independent bits of reader/writer lock implementation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_rwlocks.h"
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/turnstile.h>
 
 #include <machine/cpu.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
 #define	ADAPTIVE_RWLOCKS
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /*
  * Return the rwlock address when the lock cookie address is provided.
  * This functionality assumes that struct rwlock* have a member named rw_lock.
  */
 #define	rwlock2rw(c)	(__containerof(c, struct rwlock, rw_lock))
 
 #ifdef ADAPTIVE_RWLOCKS
 static int rowner_retries = 10;
 static int rowner_loops = 10000;
 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
     "rwlock debugging");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 static void	db_show_rwlock(const struct lock_object *lock);
 #endif
 static void	assert_rw(const struct lock_object *lock, int what);
 static void	lock_rw(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_rw(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_rw(struct lock_object *lock);
 
 struct lock_class lock_class_rw = {
 	.lc_name = "rw",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_rw,
 #ifdef DDB
 	.lc_ddb_show = db_show_rwlock,
 #endif
 	.lc_lock = lock_rw,
 	.lc_unlock = unlock_rw,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_rw,
 #endif
 };
 
 /*
  * Return a pointer to the owning thread if the lock is write-locked or
  * NULL if the lock is unlocked or read-locked.
  */
 #define	rw_wowner(rw)							\
 	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
 	    (struct thread *)RW_OWNER((rw)->rw_lock))
 
 /*
  * Returns if a write owner is recursed.  Write ownership is not assured
  * here and should be previously checked.
  */
 #define	rw_recursed(rw)		((rw)->rw_recurse != 0)
 
 /*
  * Return true if curthread helds the lock.
  */
 #define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
 
 /*
  * Return a pointer to the owning thread for this lock who should receive
  * any priority lent by threads that block on this lock.  Currently this
  * is identical to rw_wowner().
  */
 #define	rw_owner(rw)		rw_wowner(rw)
 
 #ifndef INVARIANTS
 #define	__rw_assert(c, what, file, line)
 #endif
 
 void
 assert_rw(const struct lock_object *lock, int what)
 {
 
 	rw_assert((const struct rwlock *)lock, what);
 }
 
 void
 lock_rw(struct lock_object *lock, uintptr_t how)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	if (how)
 		rw_rlock(rw);
 	else
 		rw_wlock(rw);
 }
 
 uintptr_t
 unlock_rw(struct lock_object *lock)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
 	if (rw->rw_lock & RW_LOCK_READ) {
 		rw_runlock(rw);
 		return (1);
 	} else {
 		rw_wunlock(rw);
 		return (0);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_rw(const struct lock_object *lock, struct thread **owner)
 {
 	const struct rwlock *rw = (const struct rwlock *)lock;
 	uintptr_t x = rw->rw_lock;
 
 	*owner = rw_wowner(rw);
 	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
 	    (*owner != NULL));
 }
 #endif
 
 void
 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
 {
 	struct rwlock *rw;
 	int flags;
 
 	rw = rwlock2rw(c);
 
 	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
 	    RW_RECURSE | RW_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
 	    ("%s: rw_lock not aligned for %s: %p", __func__, name,
 	    &rw->rw_lock));
 
 	flags = LO_UPGRADABLE;
 	if (opts & RW_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & RW_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & RW_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & RW_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & RW_QUIET)
 		flags |= LO_QUIET;
 	if (opts & RW_NEW)
 		flags |= LO_NEW;
 
 	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
 	rw->rw_lock = RW_UNLOCKED;
 	rw->rw_recurse = 0;
 }
 
 void
 _rw_destroy(volatile uintptr_t *c)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
 	KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
 	rw->rw_lock = RW_DESTROYED;
 	lock_destroy(&rw->lock_object);
 }
 
 void
 rw_sysinit(void *arg)
 {
 	struct rw_args *args = arg;
 
 	rw_init((struct rwlock *)args->ra_rw, args->ra_desc);
 }
 
 void
 rw_sysinit_flags(void *arg)
 {
 	struct rw_args_flags *args = arg;
 
 	rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
 	    args->ra_flags);
 }
 
 int
 _rw_wowned(const volatile uintptr_t *c)
 {
 
 	return (rw_wowner(rwlock2rw(c)) == curthread);
 }
 
 void
 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	__rw_wlock(rw, curthread, file, line);
 	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
 	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	curthread->td_locks++;
 }
 
 int
 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	int rval;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
 
 	if (rw_wlocked(rw) &&
 	    (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) {
 		rw->rw_recurse++;
 		rval = 1;
 	} else
 		rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED,
 		    (uintptr_t)curthread);
 
 	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
+		if (!rw_recursed(rw))
+			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE,
+			    rw, 0, 0, file, line);
 		curthread->td_locks++;
 	}
 	return (rval);
 }
 
 void
 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_WLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
 	    line);
 	__rw_wunlock(rw, curthread, file, line);
 	curthread->td_locks--;
 }
 /*
  * Determines whether a new reader can acquire a lock.  Succeeds if the
  * reader already owns a read lock and the lock is locked for read to
  * prevent deadlock from reader recursion.  Also succeeds if the lock
  * is unlocked and has no writer waiters or spinners.  Failing otherwise
  * prioritizes writers before readers.
  */
 #define	RW_CAN_READ(_rw)						\
     ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) &	\
     (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==	\
     RW_LOCK_READ)
 
 void
 __rw_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 #ifdef ADAPTIVE_RWLOCKS
 	volatile struct thread *owner;
 	int spintries = 0;
 	int i;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	uintptr_t v;
 #ifdef KDTRACE_HOOKS
+	uintptr_t state;
 	uint64_t spin_cnt = 0;
 	uint64_t sleep_cnt = 0;
 	int64_t sleep_time = 0;
+	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
 	KASSERT(rw_wowner(rw) != curthread,
 	    ("rw_rlock: wlock already held for %s @ %s:%d",
 	    rw->lock_object.lo_name, file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
 
+#ifdef KDTRACE_HOOKS
+	all_time -= lockstat_nsecs();
+	state = rw->rw_lock;
+#endif
 	for (;;) {
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
 		/*
 		 * Handle the easy case.  If no other thread has a write
 		 * lock, then try to bump up the count of read locks.  Note
 		 * that we have to preserve the current state of the
 		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
 		 * read lock, then rw_lock must have changed, so restart
 		 * the loop.  Note that this handles the case of a
 		 * completely unlocked rwlock since such a lock is encoded
 		 * as a read lock with no waiters.
 		 */
 		v = rw->rw_lock;
 		if (RW_CAN_READ(v)) {
 			/*
 			 * The RW_LOCK_READ_WAITERS flag should only be set
 			 * if the lock has been unlocked and write waiters
 			 * were present.
 			 */
 			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v,
 			    v + RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeed %p -> %p", __func__,
 					    rw, (void *)v,
 					    (void *)(v + RW_ONE_READER));
 				break;
 			}
 			continue;
 		}
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&rw->lock_object,
 		    &contested, &waittime);
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((v & RW_LOCK_READ) == 0) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, rw, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", rw->lock_object.lo_name);
 				while ((struct thread*)RW_OWNER(rw->rw_lock) ==
 				    owner && TD_IS_RUNNING(owner)) {
 					cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 					spin_cnt++;
 #endif
 				}
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		} else if (spintries < rowner_retries) {
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			for (i = 0; i < rowner_loops; i++) {
 				v = rw->rw_lock;
 				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
 					break;
 				cpu_spinwait();
 			}
 #ifdef KDTRACE_HOOKS
 			spin_cnt += rowner_loops - i;
 #endif
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			if (i != rowner_loops)
 				continue;
 		}
 #endif
 
 		/*
 		 * Okay, now it's the hard case.  Some other thread already
 		 * has a write lock or there are write waiters present,
 		 * acquire the turnstile lock so we can begin the process
 		 * of blocking.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 
 		/*
 		 * The lock might have been released while we spun, so
 		 * recheck its state and restart the loop if needed.
 		 */
 		v = rw->rw_lock;
 		if (RW_CAN_READ(v)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if ((v & RW_LOCK_READ) == 0) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * The lock is held in write mode or it already has waiters.
 		 */
 		MPASS(!RW_CAN_READ(v));
 
 		/*
 		 * If the RW_LOCK_READ_WAITERS flag is already set, then
 		 * we can go ahead and block.  If it is not set then try
 		 * to set it.  If we fail to set it drop the turnstile
 		 * lock and restart the loop.
 		 */
 		if (!(v & RW_LOCK_READ_WAITERS)) {
 			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 			    v | RW_LOCK_READ_WAITERS)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
 				    __func__, rw);
 		}
 
 		/*
 		 * We were unable to acquire the lock and the read waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs();
 #endif
 		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs();
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 	}
+#ifdef KDTRACE_HOOKS
+	all_time += lockstat_nsecs();
+	if (sleep_time)
+		LOCKSTAT_RECORD4(LS_RW_RLOCK_BLOCK, rw, sleep_time,
+		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
+		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
+	/* Record only the loops spinning and not sleeping. */
+	if (spin_cnt > sleep_cnt)
+		LOCKSTAT_RECORD4(LS_RW_RLOCK_SPIN, rw, all_time - sleep_time,
+		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
+		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
+#endif
 	/*
 	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
 	 * however.  turnstiles don't like owners changing between calls to
 	 * turnstile_wait() currently.
 	 */
 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, rw, contested,
 	    waittime, file, line);
 	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&rw->lock_object, 0, file, line);
 	curthread->td_locks++;
 	curthread->td_rw_rlocks++;
-#ifdef KDTRACE_HOOKS
-	if (sleep_time)
-		LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time);
-
-	/*
-	 * Record only the loops spinning and not sleeping. 
-	 */
-	if (spin_cnt > sleep_cnt)
-		LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
-#endif
 }
 
 int
 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 
 	for (;;) {
 		x = rw->rw_lock;
 		KASSERT(rw->rw_lock != RW_DESTROYED,
 		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
 		if (!(x & RW_LOCK_READ))
 			break;
 		if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) {
 			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
 			    line);
 			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
+			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE,
+			    rw, 0, 0, file, line);
 			curthread->td_locks++;
 			curthread->td_rw_rlocks++;
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 void
 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t x, v, queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_RLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
 
 	/* TODO: drop "owner of record" here. */
 
 	for (;;) {
 		/*
 		 * See if there is more than one read lock held.  If so,
 		 * just drop one and return.
 		 */
 		x = rw->rw_lock;
 		if (RW_READERS(x) > 1) {
 			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
 			    x - RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, rw, (void *)x,
 					    (void *)(x - RW_ONE_READER));
 				break;
 			}
 			continue;
 		}
 		/*
 		 * If there aren't any waiters for a write lock, then try
 		 * to drop it quickly.
 		 */
 		if (!(x & RW_LOCK_WAITERS)) {
 			MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
 			    RW_READERS_LOCK(1));
 			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
 			    RW_UNLOCKED)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p last succeeded",
 					    __func__, rw);
 				break;
 			}
 			continue;
 		}
 		/*
 		 * Ok, we know we have waiters and we think we are the
 		 * last reader, so grab the turnstile lock.
 		 */
 		turnstile_chain_lock(&rw->lock_object);
 		v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 		MPASS(v & RW_LOCK_WAITERS);
 
 		/*
 		 * Try to drop our lock leaving the lock in a unlocked
 		 * state.
 		 *
 		 * If you wanted to do explicit lock handoff you'd have to
 		 * do it here.  You'd also want to use turnstile_signal()
 		 * and you'd have to handle the race where a higher
 		 * priority thread blocks on the write lock before the
 		 * thread you wakeup actually runs and have the new thread
 		 * "steal" the lock.  For now it's a lot simpler to just
 		 * wakeup all of the waiters.
 		 *
 		 * As above, if we fail, then another thread might have
 		 * acquired a read lock, so drop the turnstile lock and
 		 * restart.
 		 */
 		x = RW_UNLOCKED;
 		if (v & RW_LOCK_WRITE_WAITERS) {
 			queue = TS_EXCLUSIVE_QUEUE;
 			x |= (v & RW_LOCK_READ_WAITERS);
 		} else
 			queue = TS_SHARED_QUEUE;
 		if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
 		    x)) {
 			turnstile_chain_unlock(&rw->lock_object);
 			continue;
 		}
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
 			    __func__, rw);
 
 		/*
 		 * Ok.  The lock is released and all that's left is to
 		 * wake up the waiters.  Note that the lock might not be
 		 * free anymore, but in that case the writers will just
 		 * block again if they run before the new lock holder(s)
 		 * release the lock.
 		 */
 		ts = turnstile_lookup(&rw->lock_object);
 		MPASS(ts != NULL);
 		turnstile_broadcast(ts, queue);
 		turnstile_unpend(ts, TS_SHARED_LOCK);
 		turnstile_chain_unlock(&rw->lock_object);
 		break;
 	}
 	LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw);
 	curthread->td_locks--;
 	curthread->td_rw_rlocks--;
 }
 
 /*
  * This function is called when we are unable to obtain a write lock on the
  * first try.  This means that at least one other thread holds either a
  * read or write lock.
  */
 void
 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
     int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 #ifdef ADAPTIVE_RWLOCKS
 	volatile struct thread *owner;
 	int spintries = 0;
 	int i;
 #endif
 	uintptr_t v, x;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 #ifdef KDTRACE_HOOKS
+	uintptr_t state;
 	uint64_t spin_cnt = 0;
 	uint64_t sleep_cnt = 0;
 	int64_t sleep_time = 0;
+	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	if (rw_wlocked(rw)) {
 		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
 		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
 		    __func__, rw->lock_object.lo_name, file, line));
 		rw->rw_recurse++;
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
 		return;
 	}
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
 
+#ifdef KDTRACE_HOOKS
+	all_time -= lockstat_nsecs();
+	state = rw->rw_lock;
+#endif
 	while (!_rw_write_lock(rw, tid)) {
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&rw->lock_object,
 		    &contested, &waittime);
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		v = rw->rw_lock;
 		owner = (struct thread *)RW_OWNER(v);
 		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 				    __func__, rw, owner);
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
 			    TD_IS_RUNNING(owner)) {
 				cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 				spin_cnt++;
 #endif
 			}
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			continue;
 		}
 		if ((v & RW_LOCK_READ) && RW_READERS(v) &&
 		    spintries < rowner_retries) {
 			if (!(v & RW_LOCK_WRITE_SPINNER)) {
 				if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 				    v | RW_LOCK_WRITE_SPINNER)) {
 					continue;
 				}
 			}
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			for (i = 0; i < rowner_loops; i++) {
 				if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
 					break;
 				cpu_spinwait();
 			}
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 #ifdef KDTRACE_HOOKS
 			spin_cnt += rowner_loops - i;
 #endif
 			if (i != rowner_loops)
 				continue;
 		}
 #endif
 		ts = turnstile_trywait(&rw->lock_object);
 		v = rw->rw_lock;
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if (!(v & RW_LOCK_READ)) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 		}
 #endif
 		/*
 		 * Check for the waiters flags about this rwlock.
 		 * If the lock was released, without maintain any pending
 		 * waiters queue, simply try to acquire it.
 		 * If a pending waiters queue is present, claim the lock
 		 * ownership and maintain the pending queue.
 		 */
 		x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 		if ((v & ~x) == RW_UNLOCKED) {
 			x &= ~RW_LOCK_WRITE_SPINNER;
 			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) {
 				if (x)
 					turnstile_claim(ts);
 				else
 					turnstile_cancel(ts);
 				break;
 			}
 			turnstile_cancel(ts);
 			continue;
 		}
 		/*
 		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
 		 * set it.  If we fail to set it, then loop back and try
 		 * again.
 		 */
 		if (!(v & RW_LOCK_WRITE_WAITERS)) {
 			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 			    v | RW_LOCK_WRITE_WAITERS)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
 				    __func__, rw);
 		}
 		/*
 		 * We were unable to acquire the lock and the write waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs();
 #endif
 		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs();
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 #ifdef ADAPTIVE_RWLOCKS
 		spintries = 0;
 #endif
 	}
-	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
-	    waittime, file, line);
 #ifdef KDTRACE_HOOKS
+	all_time += lockstat_nsecs();
 	if (sleep_time)
-		LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time);
+		LOCKSTAT_RECORD4(LS_RW_WLOCK_BLOCK, rw, sleep_time,
+		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
+		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
-	/*
-	 * Record only the loops spinning and not sleeping.
-	 */ 
+	/* Record only the loops spinning and not sleeping. */
 	if (spin_cnt > sleep_cnt)
-		LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
+		LOCKSTAT_RECORD4(LS_RW_WLOCK_SPIN, rw, all_time - sleep_time,
+		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
+		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 #endif
+	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
+	    waittime, file, line);
 }
 
 /*
  * This function is called if the first try at releasing a write lock failed.
  * This means that one of the 2 waiter bits must be set indicating that at
  * least one thread is waiting on this lock.
  */
 void
 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
     int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t v;
 	int queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	if (rw_wlocked(rw) && rw_recursed(rw)) {
 		rw->rw_recurse--;
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
 		return;
 	}
 
 	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
 	    ("%s: neither of the waiter flags are set", __func__));
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
 
 	turnstile_chain_lock(&rw->lock_object);
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 
 	/*
 	 * Use the same algo as sx locks for now.  Prefer waking up shared
 	 * waiters if we have any over writers.  This is probably not ideal.
 	 *
 	 * 'v' is the value we are going to write back to rw_lock.  If we
 	 * have waiters on both queues, we need to preserve the state of
 	 * the waiter flag for the queue we don't wake up.  For now this is
 	 * hardcoded for the algorithm mentioned above.
 	 *
 	 * In the case of both readers and writers waiting we wakeup the
 	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
 	 * new writer comes in before a reader it will claim the lock up
 	 * above.  There is probably a potential priority inversion in
 	 * there that could be worked around either by waking both queues
 	 * of waiters or doing some complicated lock handoff gymnastics.
 	 */
 	v = RW_UNLOCKED;
 	if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) {
 		queue = TS_EXCLUSIVE_QUEUE;
 		v |= (rw->rw_lock & RW_LOCK_READ_WAITERS);
 	} else
 		queue = TS_SHARED_QUEUE;
 
 	/* Wake up all waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
 		    queue == TS_SHARED_QUEUE ? "read" : "write");
 	turnstile_broadcast(ts, queue);
 	atomic_store_rel_ptr(&rw->rw_lock, v);
 	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	turnstile_chain_unlock(&rw->lock_object);
 }
 
 /*
  * Attempt to do a non-blocking upgrade from a read lock to a write
  * lock.  This will only succeed if this thread holds a single read
  * lock.  Returns true if the upgrade succeeded and false otherwise.
  */
 int
 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	uintptr_t v, x, tid;
 	struct turnstile *ts;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_RLOCKED, file, line);
 
 	/*
 	 * Attempt to switch from one reader to a writer.  If there
 	 * are any write waiters, then we will have to lock the
 	 * turnstile first to prevent races with another writer
 	 * calling turnstile_wait() before we have claimed this
 	 * turnstile.  So, do the simple case of no waiters first.
 	 */
 	tid = (uintptr_t)curthread;
 	success = 0;
 	for (;;) {
 		v = rw->rw_lock;
 		if (RW_READERS(v) > 1)
 			break;
 		if (!(v & RW_LOCK_WAITERS)) {
 			success = atomic_cmpset_ptr(&rw->rw_lock, v, tid);
 			if (!success)
 				continue;
 			break;
 		}
 
 		/*
 		 * Ok, we think we have waiters, so lock the turnstile.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 		v = rw->rw_lock;
 		if (RW_READERS(v) > 1) {
 			turnstile_cancel(ts);
 			break;
 		}
 		/*
 		 * Try to switch from one reader to a writer again.  This time
 		 * we honor the current state of the waiters flags.
 		 * If we obtain the lock with the flags set, then claim
 		 * ownership of the turnstile.
 		 */
 		x = rw->rw_lock & RW_LOCK_WAITERS;
 		success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x);
 		if (success) {
 			if (x)
 				turnstile_claim(ts);
 			else
 				turnstile_cancel(ts);
 			break;
 		}
 		turnstile_cancel(ts);
 	}
 	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
 	if (success) {
 		curthread->td_rw_rlocks--;
 		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, rw);
 	}
 	return (success);
 }
 
 /*
  * Downgrade a write lock into a single read lock.
  */
 void
 __rw_downgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t tid, v;
 	int rwait, wwait;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (rw_recursed(rw))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
 
 	/*
 	 * Convert from a writer to a single reader.  First we handle
 	 * the easy case with no waiters.  If there are any waiters, we
 	 * lock the turnstile and "disown" the lock.
 	 */
 	tid = (uintptr_t)curthread;
 	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
 		goto out;
 
 	/*
 	 * Ok, we think we have waiters, so lock the turnstile so we can
 	 * read the waiter flags without any races.
 	 */
 	turnstile_chain_lock(&rw->lock_object);
 	v = rw->rw_lock & RW_LOCK_WAITERS;
 	rwait = v & RW_LOCK_READ_WAITERS;
 	wwait = v & RW_LOCK_WRITE_WAITERS;
 	MPASS(rwait | wwait);
 
 	/*
 	 * Downgrade from a write lock while preserving waiters flag
 	 * and give up ownership of the turnstile.
 	 */
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 	if (!wwait)
 		v &= ~RW_LOCK_READ_WAITERS;
 	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
 	/*
 	 * Wake other readers if there are no writers pending.  Otherwise they
 	 * won't be able to acquire the lock anyway.
 	 */
 	if (rwait && !wwait) {
 		turnstile_broadcast(ts, TS_SHARED_QUEUE);
 		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	} else
 		turnstile_disown(ts);
 	turnstile_chain_unlock(&rw->lock_object);
 out:
 	curthread->td_rw_rlocks++;
 	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, rw);
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef __rw_assert
 #endif
 
 /*
  * In the non-WITNESS case, rw_assert() can only detect that at least
  * *some* thread owns an rlock, but it cannot guarantee that *this*
  * thread owns an rlock.
  */
 void
 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct rwlock *rw;
 
 	if (panicstr != NULL)
 		return;
 
 	rw = rwlock2rw(c);
 
 	switch (what) {
 	case RA_LOCKED:
 	case RA_LOCKED | RA_RECURSED:
 	case RA_LOCKED | RA_NOTRECURSED:
 	case RA_RLOCKED:
 	case RA_RLOCKED | RA_RECURSED:
 	case RA_RLOCKED | RA_NOTRECURSED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has a write lock or we have one
 		 * and are asserting a read lock, fail.  Also, if no one
 		 * has a lock at all, fail.
 		 */
 		if (rw->rw_lock == RW_UNLOCKED ||
 		    (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
 		    rw_wowner(rw) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    rw->lock_object.lo_name, (what & RA_RLOCKED) ?
 			    "read " : "", file, line);
 
 		if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
 			if (rw_recursed(rw)) {
 				if (what & RA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    rw->lock_object.lo_name, file,
 					    line);
 			} else if (what & RA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case RA_WLOCKED:
 	case RA_WLOCKED | RA_RECURSED:
 	case RA_WLOCKED | RA_NOTRECURSED:
 		if (rw_wowner(rw) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		if (rw_recursed(rw)) {
 			if (what & RA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		} else if (what & RA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		break;
 	case RA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold a write lock fail.  We can't reliably check
 		 * to see if we hold a read lock or not.
 		 */
 		if (rw_wowner(rw) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif /* INVARIANT_SUPPORT */
 
 #ifdef DDB
 void
 db_show_rwlock(const struct lock_object *lock)
 {
 	const struct rwlock *rw;
 	struct thread *td;
 
 	rw = (const struct rwlock *)lock;
 
 	db_printf(" state: ");
 	if (rw->rw_lock == RW_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (rw->rw_lock == RW_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (rw->rw_lock & RW_LOCK_READ)
 		db_printf("RLOCK: %ju locks\n",
 		    (uintmax_t)(RW_READERS(rw->rw_lock)));
 	else {
 		td = rw_wowner(rw);
 		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (rw_recursed(rw))
 			db_printf(" recursed: %u\n", rw->rw_recurse);
 	}
 	db_printf(" waiters: ");
 	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
 	case RW_LOCK_READ_WAITERS:
 		db_printf("readers\n");
 		break;
 	case RW_LOCK_WRITE_WAITERS:
 		db_printf("writers\n");
 		break;
 	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
 		db_printf("readers and writers\n");
 		break;
 	default:
 		db_printf("none\n");
 		break;
 	}
 }
 
 #endif
Index: head/sys/kern/kern_sx.c
===================================================================
--- head/sys/kern/kern_sx.c	(revision 284296)
+++ head/sys/kern/kern_sx.c	(revision 284297)
@@ -1,1228 +1,1255 @@
 /*-
  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
  * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */
 
 /*
  * Shared/exclusive locks.  This implementation attempts to ensure
  * deterministic lock granting behavior, so that slocks and xlocks are
  * interleaved.
  *
  * Priority propagation will not generally raise the priority of lock holders,
  * so should not be relied upon in combination with sx locks.
  */
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_sx.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #include <machine/cpu.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #define	ADAPTIVE_SX
 #endif
 
 CTASSERT((SX_NOADAPTIVE & LO_CLASSFLAGS) == SX_NOADAPTIVE);
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /* Handy macros for sleep queues. */
 #define	SQ_EXCLUSIVE_QUEUE	0
 #define	SQ_SHARED_QUEUE		1
 
 /*
  * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
  * drop Giant anytime we have to sleep or if we adaptively spin.
  */
 #define	GIANT_DECLARE							\
 	int _giantcnt = 0;						\
 	WITNESS_SAVE_DECL(Giant)					\
 
 #define	GIANT_SAVE() do {						\
 	if (mtx_owned(&Giant)) {					\
 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
 		while (mtx_owned(&Giant)) {				\
 			_giantcnt++;					\
 			mtx_unlock(&Giant);				\
 		}							\
 	}								\
 } while (0)
 
 #define GIANT_RESTORE() do {						\
 	if (_giantcnt > 0) {						\
 		mtx_assert(&Giant, MA_NOTOWNED);			\
 		while (_giantcnt--)					\
 			mtx_lock(&Giant);				\
 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
 	}								\
 } while (0)
 
 /*
  * Returns true if an exclusive lock is recursed.  It assumes
  * curthread currently has an exclusive lock.
  */
 #define	sx_recursed(sx)		((sx)->sx_recurse != 0)
 
 static void	assert_sx(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_sx(const struct lock_object *lock);
 #endif
 static void	lock_sx(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_sx(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_sx(struct lock_object *lock);
 
 struct lock_class lock_class_sx = {
 	.lc_name = "sx",
 	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_sx,
 #ifdef DDB
 	.lc_ddb_show = db_show_sx,
 #endif
 	.lc_lock = lock_sx,
 	.lc_unlock = unlock_sx,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_sx,
 #endif
 };
 
 #ifndef INVARIANTS
 #define	_sx_assert(sx, what, file, line)
 #endif
 
 #ifdef ADAPTIVE_SX
 static u_int asx_retries = 10;
 static u_int asx_loops = 10000;
 static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging");
 SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
 SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
 #endif
 
 void
 assert_sx(const struct lock_object *lock, int what)
 {
 
 	sx_assert((const struct sx *)lock, what);
 }
 
 void
 lock_sx(struct lock_object *lock, uintptr_t how)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	if (how)
 		sx_slock(sx);
 	else
 		sx_xlock(sx);
 }
 
 uintptr_t
 unlock_sx(struct lock_object *lock)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
 	if (sx_xlocked(sx)) {
 		sx_xunlock(sx);
 		return (0);
 	} else {
 		sx_sunlock(sx);
 		return (1);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_sx(const struct lock_object *lock, struct thread **owner)
 {
         const struct sx *sx = (const struct sx *)lock;
 	uintptr_t x = sx->sx_lock;
 
         *owner = (struct thread *)SX_OWNER(x);
         return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) :
 	    (*owner != NULL));
 }
 #endif
 
 void
 sx_sysinit(void *arg)
 {
 	struct sx_args *sargs = arg;
 
 	sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
 }
 
 void
 sx_init_flags(struct sx *sx, const char *description, int opts)
 {
 	int flags;
 
 	MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
 	    SX_NOPROFILE | SX_NOADAPTIVE | SX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock,
 	    ("%s: sx_lock not aligned for %s: %p", __func__, description,
 	    &sx->sx_lock));
 
 	flags = LO_SLEEPABLE | LO_UPGRADABLE;
 	if (opts & SX_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & SX_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & SX_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & SX_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & SX_QUIET)
 		flags |= LO_QUIET;
 	if (opts & SX_NEW)
 		flags |= LO_NEW;
 
 	flags |= opts & SX_NOADAPTIVE;
 	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
 	sx->sx_lock = SX_LOCK_UNLOCKED;
 	sx->sx_recurse = 0;
 }
 
 void
 sx_destroy(struct sx *sx)
 {
 
 	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
 	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
 	sx->sx_lock = SX_LOCK_DESTROYED;
 	lock_destroy(&sx->lock_object);
 }
 
 int
 _sx_slock(struct sx *sx, int opts, const char *file, int line)
 {
 	int error = 0;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
 	error = __sx_slock(sx, opts, file, line);
 	if (!error) {
 		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
 		WITNESS_LOCK(&sx->lock_object, 0, file, line);
 		curthread->td_locks++;
 	}
 
 	return (error);
 }
 
 int
 sx_try_slock_(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_try_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 
 	for (;;) {
 		x = sx->sx_lock;
 		KASSERT(x != SX_LOCK_DESTROYED,
 		    ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
 		if (!(x & SX_LOCK_SHARED))
 			break;
 		if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) {
 			LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
 			WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
+			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_SLOCK_ACQUIRE,
+			    sx, 0, 0, file, line);
 			curthread->td_locks++;
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 int
 _sx_xlock(struct sx *sx, int opts, const char *file, int line)
 {
 	int error = 0;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	error = __sx_xlock(sx, curthread, opts, file, line);
 	if (!error) {
 		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
 		    file, line);
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 		curthread->td_locks++;
 	}
 
 	return (error);
 }
 
 int
 sx_try_xlock_(struct sx *sx, const char *file, int line)
 {
 	int rval;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
 
 	if (sx_xlocked(sx) &&
 	    (sx->lock_object.lo_flags & LO_RECURSABLE) != 0) {
 		sx->sx_recurse++;
 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		rval = 1;
 	} else
 		rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED,
 		    (uintptr_t)curthread);
 	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
+		if (!sx_recursed(sx))
+			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_XLOCK_ACQUIRE,
+			    sx, 0, 0, file, line);
 		curthread->td_locks++;
 	}
 
 	return (rval);
 }
 
 void
 _sx_sunlock(struct sx *sx, const char *file, int line)
 {
 
 	if (SCHEDULER_STOPPED())
 		return;
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
 	__sx_sunlock(sx, file, line);
 	curthread->td_locks--;
 }
 
 void
 _sx_xunlock(struct sx *sx, const char *file, int line)
 {
 
 	if (SCHEDULER_STOPPED())
 		return;
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
 	    line);
 	__sx_xunlock(sx, curthread, file, line);
 	curthread->td_locks--;
 }
 
 /*
  * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
  * This will only succeed if this thread holds a single shared lock.
  * Return 1 if if the upgrade succeed, 0 otherwise.
  */
 int
 sx_try_upgrade_(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 
 	/*
 	 * Try to switch from one shared lock to an exclusive lock.  We need
 	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
 	 * we will wake up the exclusive waiters when we drop the lock.
 	 */
 	x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
 	success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
 	    (uintptr_t)curthread | x);
 	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
 	if (success) {
 		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(LS_SX_TRYUPGRADE_UPGRADE, sx);
 	}
 	return (success);
 }
 
 /*
  * Downgrade an unrecursed exclusive lock into a single shared lock.
  */
 void
 sx_downgrade_(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 	int wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (sx_recursed(sx))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
 
 	/*
 	 * Try to switch from an exclusive lock with no shared waiters
 	 * to one sharer with no shared waiters.  If there are
 	 * exclusive waiters, we don't need to lock the sleep queue so
 	 * long as we preserve the flag.  We do one quick try and if
 	 * that fails we grab the sleepq lock to keep the flags from
 	 * changing and do it the slow way.
 	 *
 	 * We have to lock the sleep queue if there are shared waiters
 	 * so we can wake them up.
 	 */
 	x = sx->sx_lock;
 	if (!(x & SX_LOCK_SHARED_WAITERS) &&
 	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS))) {
 		LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 		return;
 	}
 
 	/*
 	 * Lock the sleep queue so we can read the waiters bits
 	 * without any races and wakeup any shared waiters.
 	 */
 	sleepq_lock(&sx->lock_object);
 
 	/*
 	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
 	 * shared lock.  If there are any shared waiters, wake them up.
 	 */
 	wakeup_swapper = 0;
 	x = sx->sx_lock;
 	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
 	if (x & SX_LOCK_SHARED_WAITERS)
 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
 		    0, SQ_SHARED_QUEUE);
 	sleepq_release(&sx->lock_object);
 
 	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(LS_SX_DOWNGRADE_DOWNGRADE, sx);
 
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_xlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 int
 _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
     int line)
 {
 	GIANT_DECLARE;
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
 	u_int i, spintries = 0;
 #endif
 	uintptr_t x;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int error = 0;
 #ifdef	KDTRACE_HOOKS
+	uintptr_t state;
 	uint64_t spin_cnt = 0;
 	uint64_t sleep_cnt = 0;
 	int64_t sleep_time = 0;
+	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 	/* If we already hold an exclusive lock, then recurse. */
 	if (sx_xlocked(sx)) {
 		KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
 	    ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
 		    sx->lock_object.lo_name, file, line));
 		sx->sx_recurse++;
 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
 		return (0);
 	}
 
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
 
+#ifdef KDTRACE_HOOKS
+	all_time -= lockstat_nsecs();
+	state = sx->sx_lock;
+#endif
 	while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
 		    &waittime);
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		x = sx->sx_lock;
 		if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			if ((x & SX_LOCK_SHARED) == 0) {
 				x = SX_OWNER(x);
 				owner = (struct thread *)x;
 				if (TD_IS_RUNNING(owner)) {
 					if (LOCK_LOG_TEST(&sx->lock_object, 0))
 						CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 						    __func__, sx, owner);
 					KTR_STATE1(KTR_SCHED, "thread",
 					    sched_tdname(curthread), "spinning",
 					    "lockname:\"%s\"",
 					    sx->lock_object.lo_name);
 					GIANT_SAVE();
 					while (SX_OWNER(sx->sx_lock) == x &&
 					    TD_IS_RUNNING(owner)) {
 						cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 						spin_cnt++;
 #endif
 					}
 					KTR_STATE0(KTR_SCHED, "thread",
 					    sched_tdname(curthread), "running");
 					continue;
 				}
 			} else if (SX_SHARERS(x) && spintries < asx_retries) {
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", sx->lock_object.lo_name);
 				GIANT_SAVE();
 				spintries++;
 				for (i = 0; i < asx_loops; i++) {
 					if (LOCK_LOG_TEST(&sx->lock_object, 0))
 						CTR4(KTR_LOCK,
 				    "%s: shared spinning on %p with %u and %u",
 						    __func__, sx, spintries, i);
 					x = sx->sx_lock;
 					if ((x & SX_LOCK_SHARED) == 0 ||
 					    SX_SHARERS(x) == 0)
 						break;
 					cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 					spin_cnt++;
 #endif
 				}
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				if (i != asx_loops)
 					continue;
 			}
 		}
 #endif
 
 		sleepq_lock(&sx->lock_object);
 		x = sx->sx_lock;
 
 		/*
 		 * If the lock was released while spinning on the
 		 * sleep queue chain lock, try again.
 		 */
 		if (x == SX_LOCK_UNLOCKED) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the sleep queue
 		 * chain lock.  If so, drop the sleep queue lock and try
 		 * again.
 		 */
 		if (!(x & SX_LOCK_SHARED) &&
 		    (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * If an exclusive lock was released with both shared
 		 * and exclusive waiters and a shared waiter hasn't
 		 * woken up and acquired the lock yet, sx_lock will be
 		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
 		 * If we see that value, try to acquire it once.  Note
 		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
 		 * as there are other exclusive waiters still.  If we
 		 * fail, restart the loop.
 		 */
 		if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
 			if (atomic_cmpset_acq_ptr(&sx->sx_lock,
 			    SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS,
 			    tid | SX_LOCK_EXCLUSIVE_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
 				    __func__, sx);
 				break;
 			}
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 		/*
 		 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
 		 * than loop back and retry.
 		 */
 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
 			    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
 				    __func__, sx);
 		}
 
 		/*
 		 * Since we have been unable to acquire the exclusive
 		 * lock and the exclusive waiters flag is set, we have
 		 * to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs();
 #endif
 		GIANT_SAVE();
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs();
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 	}
-
-	GIANT_RESTORE();
-	if (!error)
-		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_XLOCK_ACQUIRE, sx,
-		    contested, waittime, file, line);
 #ifdef KDTRACE_HOOKS
+	all_time += lockstat_nsecs();
 	if (sleep_time)
-		LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time);
+		LOCKSTAT_RECORD4(LS_SX_XLOCK_BLOCK, sx, sleep_time,
+		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
+		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (spin_cnt > sleep_cnt)
-		LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt));
+		LOCKSTAT_RECORD4(LS_SX_XLOCK_SPIN, sx, all_time - sleep_time,
+		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
+		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 #endif
+	if (!error)
+		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_XLOCK_ACQUIRE, sx,
+		    contested, waittime, file, line);
+	GIANT_RESTORE();
 	return (error);
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_xunlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 void
 _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
 {
 	uintptr_t x;
 	int queue, wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
 
 	/* If the lock is recursed, then unrecurse one level. */
 	if (sx_xlocked(sx) && sx_recursed(sx)) {
 		if ((--sx->sx_recurse) == 0)
 			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
 		return;
 	}
 	MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
 	    SX_LOCK_EXCLUSIVE_WAITERS));
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
 
 	sleepq_lock(&sx->lock_object);
 	x = SX_LOCK_UNLOCKED;
 
 	/*
 	 * The wake up algorithm here is quite simple and probably not
 	 * ideal.  It gives precedence to shared waiters if they are
 	 * present.  For this condition, we have to preserve the
 	 * state of the exclusive waiters flag.
 	 * If interruptible sleeps left the shared queue empty avoid a
 	 * starvation for the threads sleeping on the exclusive queue by giving
 	 * them precedence and cleaning up the shared waiters bit anyway.
 	 */
 	if ((sx->sx_lock & SX_LOCK_SHARED_WAITERS) != 0 &&
 	    sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) {
 		queue = SQ_SHARED_QUEUE;
 		x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
 	} else
 		queue = SQ_EXCLUSIVE_QUEUE;
 
 	/* Wake up all the waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
 		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
 		    "exclusive");
 	atomic_store_rel_ptr(&sx->sx_lock, x);
 	wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
 	    queue);
 	sleepq_release(&sx->lock_object);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_slock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 int
 _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
 {
 	GIANT_DECLARE;
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	uintptr_t x;
 	int error = 0;
 #ifdef KDTRACE_HOOKS
+	uintptr_t state;
 	uint64_t spin_cnt = 0;
 	uint64_t sleep_cnt = 0;
 	int64_t sleep_time = 0;
+	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
+#ifdef KDTRACE_HOOKS
+	state = sx->sx_lock;
+	all_time -= lockstat_nsecs();
+#endif
+
 	/*
 	 * As with rwlocks, we don't make any attempt to try to block
 	 * shared locks once there is an exclusive waiter.
 	 */
 	for (;;) {
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
 		x = sx->sx_lock;
 
 		/*
 		 * If no other thread has an exclusive lock then try to bump up
 		 * the count of sharers.  Since we have to preserve the state
 		 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
 		 * shared lock loop back and retry.
 		 */
 		if (x & SX_LOCK_SHARED) {
 			MPASS(!(x & SX_LOCK_SHARED_WAITERS));
 			if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
 			    x + SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeed %p -> %p", __func__,
 					    sx, (void *)x,
 					    (void *)(x + SX_ONE_SHARER));
 				break;
 			}
 			continue;
 		}
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
 		    &waittime);
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			x = SX_OWNER(x);
 			owner = (struct thread *)x;
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, sx, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", sx->lock_object.lo_name);
 				GIANT_SAVE();
 				while (SX_OWNER(sx->sx_lock) == x &&
 				    TD_IS_RUNNING(owner)) {
 #ifdef KDTRACE_HOOKS
 					spin_cnt++;
 #endif
 					cpu_spinwait();
 				}
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * Some other thread already has an exclusive lock, so
 		 * start the process of blocking.
 		 */
 		sleepq_lock(&sx->lock_object);
 		x = sx->sx_lock;
 
 		/*
 		 * The lock could have been released while we spun.
 		 * In this case loop back and retry.
 		 */
 		if (x & SX_LOCK_SHARED) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if (!(x & SX_LOCK_SHARED) &&
 		    (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
 		 * fail to set it drop the sleep queue lock and loop
 		 * back.
 		 */
 		if (!(x & SX_LOCK_SHARED_WAITERS)) {
 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
 			    x | SX_LOCK_SHARED_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
 				    __func__, sx);
 		}
 
 		/*
 		 * Since we have been unable to acquire the shared lock,
 		 * we have to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs();
 #endif
 		GIANT_SAVE();
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs();
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 	}
-	if (error == 0)
-		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_SLOCK_ACQUIRE, sx,
-		    contested, waittime, file, line);
 #ifdef KDTRACE_HOOKS
+	all_time += lockstat_nsecs();
 	if (sleep_time)
-		LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time);
+		LOCKSTAT_RECORD4(LS_SX_SLOCK_BLOCK, sx, sleep_time,
+		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
+		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (spin_cnt > sleep_cnt)
-		LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt));
+		LOCKSTAT_RECORD4(LS_SX_SLOCK_SPIN, sx, all_time - sleep_time,
+		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
+		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 #endif
+	if (error == 0)
+		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_SLOCK_ACQUIRE, sx,
+		    contested, waittime, file, line);
 	GIANT_RESTORE();
 	return (error);
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_sunlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 void
 _sx_sunlock_hard(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 	int wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	for (;;) {
 		x = sx->sx_lock;
 
 		/*
 		 * We should never have sharers while at least one thread
 		 * holds a shared lock.
 		 */
 		KASSERT(!(x & SX_LOCK_SHARED_WAITERS),
 		    ("%s: waiting sharers", __func__));
 
 		/*
 		 * See if there is more than one shared lock held.  If
 		 * so, just drop one and return.
 		 */
 		if (SX_SHARERS(x) > 1) {
 			if (atomic_cmpset_rel_ptr(&sx->sx_lock, x,
 			    x - SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, sx, (void *)x,
 					    (void *)(x - SX_ONE_SHARER));
 				break;
 			}
 			continue;
 		}
 
 		/*
 		 * If there aren't any waiters for an exclusive lock,
 		 * then try to drop it quickly.
 		 */
 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
 			MPASS(x == SX_SHARERS_LOCK(1));
 			if (atomic_cmpset_rel_ptr(&sx->sx_lock,
 			    SX_SHARERS_LOCK(1), SX_LOCK_UNLOCKED)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p last succeeded",
 					    __func__, sx);
 				break;
 			}
 			continue;
 		}
 
 		/*
 		 * At this point, there should just be one sharer with
 		 * exclusive waiters.
 		 */
 		MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
 
 		sleepq_lock(&sx->lock_object);
 
 		/*
 		 * Wake up semantic here is quite simple:
 		 * Just wake up all the exclusive waiters.
 		 * Note that the state of the lock could have changed,
 		 * so if it fails loop back and retry.
 		 */
 		if (!atomic_cmpset_rel_ptr(&sx->sx_lock,
 		    SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS,
 		    SX_LOCK_UNLOCKED)) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
 			    "exclusive queue", __func__, sx);
 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
 		    0, SQ_EXCLUSIVE_QUEUE);
 		sleepq_release(&sx->lock_object);
 		if (wakeup_swapper)
 			kick_proc0();
 		break;
 	}
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef	_sx_assert
 #endif
 
 /*
  * In the non-WITNESS case, sx_assert() can only detect that at least
  * *some* thread owns an slock, but it cannot guarantee that *this*
  * thread owns an slock.
  */
 void
 _sx_assert(const struct sx *sx, int what, const char *file, int line)
 {
 #ifndef WITNESS
 	int slocked = 0;
 #endif
 
 	if (panicstr != NULL)
 		return;
 	switch (what) {
 	case SA_SLOCKED:
 	case SA_SLOCKED | SA_NOTRECURSED:
 	case SA_SLOCKED | SA_RECURSED:
 #ifndef WITNESS
 		slocked = 1;
 		/* FALLTHROUGH */
 #endif
 	case SA_LOCKED:
 	case SA_LOCKED | SA_NOTRECURSED:
 	case SA_LOCKED | SA_RECURSED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has an exclusive lock or we
 		 * have one and are asserting a shared lock, fail.
 		 * Also, if no one has a lock at all, fail.
 		 */
 		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
 		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
 		    sx_xholder(sx) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    sx->lock_object.lo_name, slocked ? "share " : "",
 			    file, line);
 
 		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
 			if (sx_recursed(sx)) {
 				if (what & SA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    sx->lock_object.lo_name, file,
 					    line);
 			} else if (what & SA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case SA_XLOCKED:
 	case SA_XLOCKED | SA_NOTRECURSED:
 	case SA_XLOCKED | SA_RECURSED:
 		if (sx_xholder(sx) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		if (sx_recursed(sx)) {
 			if (what & SA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		} else if (what & SA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		break;
 	case SA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold an exclusve lock fail.  We can't
 		 * reliably check to see if we hold a shared lock or
 		 * not.
 		 */
 		if (sx_xholder(sx) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown sx lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif	/* INVARIANT_SUPPORT */
 
 #ifdef DDB
 static void
 db_show_sx(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct sx *sx;
 
 	sx = (const struct sx *)lock;
 
 	db_printf(" state: ");
 	if (sx->sx_lock == SX_LOCK_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (sx->sx_lock == SX_LOCK_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else {
 		td = sx_xholder(sx);
 		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (sx_recursed(sx))
 			db_printf(" recursed: %d\n", sx->sx_recurse);
 	}
 
 	db_printf(" waiters: ");
 	switch(sx->sx_lock &
 	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
 	case SX_LOCK_SHARED_WAITERS:
 		db_printf("shared\n");
 		break;
 	case SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive\n");
 		break;
 	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive and shared\n");
 		break;
 	default:
 		db_printf("none\n");
 	}
 }
 
 /*
  * Check to see if a thread that is blocked on a sleep queue is actually
  * blocked on an sx lock.  If so, output some details and return true.
  * If the lock has an exclusive owner, return that in *ownerp.
  */
 int
 sx_chain(struct thread *td, struct thread **ownerp)
 {
 	struct sx *sx;
 
 	/*
 	 * Check to see if this thread is blocked on an sx lock.
 	 * First, we check the lock class.  If that is ok, then we
 	 * compare the lock name against the wait message.
 	 */
 	sx = td->td_wchan;
 	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
 	    sx->lock_object.lo_name != td->td_wmesg)
 		return (0);
 
 	/* We think we have an sx lock, so output some details. */
 	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
 	*ownerp = sx_xholder(sx);
 	if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK (count %ju)\n",
 		    (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else
 		db_printf("XLOCK\n");
 	return (1);
 }
 #endif
Index: head/sys/sys/lockstat.h
===================================================================
--- head/sys/sys/lockstat.h	(revision 284296)
+++ head/sys/sys/lockstat.h	(revision 284297)
@@ -1,220 +1,223 @@
 /*-
  * Copyright (c) 2008-2009 Stacey Son <sson@FreeBSD.org> 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
  
 /*
  * DTrace lockstat provider definitions
  *
  */
 
 #ifndef	_SYS_LOCKSTAT_H
 #define	_SYS_LOCKSTAT_H
 
 #ifdef	_KERNEL
 
 /*
  * Spin Locks
  */
 #define	LS_MTX_SPIN_LOCK_ACQUIRE	0
 #define	LS_MTX_SPIN_UNLOCK_RELEASE	1
 #define	LS_MTX_SPIN_LOCK_SPIN		2
 
 /*
  * Adaptive Locks
  */
 #define	LS_MTX_LOCK_ACQUIRE		3
 #define	LS_MTX_UNLOCK_RELEASE		4
 #define	LS_MTX_LOCK_SPIN		5
 #define	LS_MTX_LOCK_BLOCK		6
 #define	LS_MTX_TRYLOCK_ACQUIRE		7
 
 /*
  * Reader/Writer Locks
  */
 #define	LS_RW_RLOCK_ACQUIRE		8
 #define	LS_RW_RUNLOCK_RELEASE		9	
 #define	LS_RW_WLOCK_ACQUIRE		10
 #define	LS_RW_WUNLOCK_RELEASE		11
 #define	LS_RW_RLOCK_SPIN		12
 #define	LS_RW_RLOCK_BLOCK		13
 #define	LS_RW_WLOCK_SPIN		14
 #define	LS_RW_WLOCK_BLOCK		15
 #define	LS_RW_TRYUPGRADE_UPGRADE	16
 #define	LS_RW_DOWNGRADE_DOWNGRADE	17
 
 /*
  * Shared/Exclusive Locks
  */
 #define	LS_SX_SLOCK_ACQUIRE		18
 #define	LS_SX_SUNLOCK_RELEASE		19
 #define	LS_SX_XLOCK_ACQUIRE		20
 #define	LS_SX_XUNLOCK_RELEASE		21
 #define	LS_SX_SLOCK_SPIN		22
 #define	LS_SX_SLOCK_BLOCK		23
 #define	LS_SX_XLOCK_SPIN		24
 #define	LS_SX_XLOCK_BLOCK		25
 #define	LS_SX_TRYUPGRADE_UPGRADE	26
 #define	LS_SX_DOWNGRADE_DOWNGRADE	27
 
 /* 
  * Thread Locks
  */
 #define	LS_THREAD_LOCK_SPIN		28
 
 /*
  * Lockmanager Locks 
  *  According to locking(9) Lockmgr locks are "Largely deprecated"
  *  so no support for these have been added in the lockstat provider.
  */
 
 #define	LS_NPROBES			29
 
 #define	LS_MTX_LOCK			"mtx_lock"
 #define	LS_MTX_UNLOCK			"mtx_unlock"
 #define	LS_MTX_SPIN_LOCK		"mtx_lock_spin"
 #define	LS_MTX_SPIN_UNLOCK		"mtx_unlock_spin"
 #define	LS_MTX_TRYLOCK			"mtx_trylock"
 #define	LS_RW_RLOCK			"rw_rlock"
 #define	LS_RW_WLOCK			"rw_wlock"
 #define	LS_RW_RUNLOCK			"rw_runlock"
 #define	LS_RW_WUNLOCK			"rw_wunlock"
 #define	LS_RW_TRYUPGRADE		"rw_try_upgrade"
 #define	LS_RW_DOWNGRADE			"rw_downgrade"
 #define	LS_SX_SLOCK			"sx_slock"
 #define	LS_SX_XLOCK			"sx_xlock"
 #define	LS_SX_SUNLOCK			"sx_sunlock"
 #define	LS_SX_XUNLOCK			"sx_xunlock"
 #define	LS_SX_TRYUPGRADE		"sx_try_upgrade"
 #define	LS_SX_DOWNGRADE			"sx_downgrade"
 #define	LS_THREAD_LOCK			"thread_lock"
 
 #define	LS_ACQUIRE			"acquire"
 #define	LS_RELEASE			"release"
 #define	LS_SPIN				"spin"
 #define	LS_BLOCK			"block"
 #define	LS_UPGRADE			"upgrade"
 #define	LS_DOWNGRADE			"downgrade"
 
 #define	LS_TYPE_ADAPTIVE		"adaptive"
 #define	LS_TYPE_SPIN			"spin"
 #define	LS_TYPE_THREAD			"thread"
 #define	LS_TYPE_RW			"rw"
 #define	LS_TYPE_SX			"sx"
 
 #define	LSA_ACQUIRE			(LS_TYPE_ADAPTIVE "-" LS_ACQUIRE)
 #define	LSA_RELEASE			(LS_TYPE_ADAPTIVE "-" LS_RELEASE)
 #define	LSA_SPIN			(LS_TYPE_ADAPTIVE "-" LS_SPIN)
 #define	LSA_BLOCK			(LS_TYPE_ADAPTIVE "-" LS_BLOCK)
 #define	LSS_ACQUIRE			(LS_TYPE_SPIN "-" LS_ACQUIRE)
 #define	LSS_RELEASE			(LS_TYPE_SPIN "-" LS_RELEASE)
 #define	LSS_SPIN			(LS_TYPE_SPIN "-" LS_SPIN)
 #define	LSR_ACQUIRE			(LS_TYPE_RW "-" LS_ACQUIRE)
 #define	LSR_RELEASE			(LS_TYPE_RW "-" LS_RELEASE)
 #define	LSR_BLOCK			(LS_TYPE_RW "-" LS_BLOCK)
 #define	LSR_SPIN			(LS_TYPE_RW "-" LS_SPIN)
 #define	LSR_UPGRADE			(LS_TYPE_RW "-" LS_UPGRADE)
 #define	LSR_DOWNGRADE			(LS_TYPE_RW "-" LS_DOWNGRADE)
 #define	LSX_ACQUIRE			(LS_TYPE_SX "-" LS_ACQUIRE)
 #define	LSX_RELEASE			(LS_TYPE_SX "-" LS_RELEASE)
 #define	LSX_BLOCK			(LS_TYPE_SX "-" LS_BLOCK)
 #define	LSX_SPIN			(LS_TYPE_SX "-" LS_SPIN)
 #define	LSX_UPGRADE			(LS_TYPE_SX "-" LS_UPGRADE)
 #define	LSX_DOWNGRADE			(LS_TYPE_SX "-" LS_DOWNGRADE)
 #define	LST_SPIN			(LS_TYPE_THREAD "-" LS_SPIN)
 
 /*
  * The following must match the type definition of dtrace_probe.  It is
  * defined this way to avoid having to rely on CDDL code.
  */
 extern uint32_t lockstat_probemap[LS_NPROBES];
 typedef void (*lockstat_probe_func_t)(uint32_t, uintptr_t arg0, uintptr_t arg1,
     uintptr_t arg2, uintptr_t arg3, uintptr_t arg4);
 extern lockstat_probe_func_t lockstat_probe_func;
 extern uint64_t lockstat_nsecs(void);
 
 #ifdef	KDTRACE_HOOKS
 /*
  * Macros to record lockstat probes.
  */
 #define	LOCKSTAT_RECORD4(probe, lp, arg1, arg2, arg3, arg4)  do {	\
 	uint32_t id;							\
 									\
 	if ((id = lockstat_probemap[(probe)])) 				\
 	    (*lockstat_probe_func)(id, (uintptr_t)(lp), (arg1),	(arg2),	\
 		(arg3), (arg4));					\
 } while (0)
 
 #define	LOCKSTAT_RECORD(probe, lp, arg1) \
 	LOCKSTAT_RECORD4(probe, lp, arg1, 0, 0, 0)
 
 #define	LOCKSTAT_RECORD0(probe, lp)     \
 	LOCKSTAT_RECORD4(probe, lp, 0, 0, 0, 0)
 
 #define	LOCKSTAT_RECORD1(probe, lp, arg1) \
 	LOCKSTAT_RECORD4(probe, lp, arg1, 0, 0, 0)
 
 #define	LOCKSTAT_RECORD2(probe, lp, arg1, arg2) \
 	LOCKSTAT_RECORD4(probe, lp, arg1, arg2, 0, 0)
 
 #define	LOCKSTAT_RECORD3(probe, lp, arg1, arg2, arg3) \
 	LOCKSTAT_RECORD4(probe, lp, arg1, arg2, arg3, 0)
 
 #define	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(probe, lp, c, wt, f, l)  do {   \
 	uint32_t id;							     \
 									     \
     	lock_profile_obtain_lock_success(&(lp)->lock_object, c, wt, f, l);   \
 	if ((id = lockstat_probemap[(probe)])) 			     	     \
 		(*lockstat_probe_func)(id, (uintptr_t)(lp), 0, 0, 0, 0);     \
 } while (0)
 
 #define	LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp)  do {			     \
 	uint32_t id;							     \
 									     \
 	lock_profile_release_lock(&(lp)->lock_object);			     \
 	if ((id = lockstat_probemap[(probe)])) 			     	     \
 		(*lockstat_probe_func)(id, (uintptr_t)(lp), 0, 0, 0, 0);     \
 } while (0)
 
+#define	LOCKSTAT_WRITER		0
+#define	LOCKSTAT_READER		1
+
 #else	/* !KDTRACE_HOOKS */
 
 #define	LOCKSTAT_RECORD(probe, lp, arg1)
 #define	LOCKSTAT_RECORD0(probe, lp)
 #define	LOCKSTAT_RECORD1(probe, lp, arg1)
 #define	LOCKSTAT_RECORD2(probe, lp, arg1, arg2)
 #define	LOCKSTAT_RECORD3(probe, lp, arg1, arg2, arg3)
 #define	LOCKSTAT_RECORD4(probe, lp, arg1, arg2, arg3, arg4)
 
 #define	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(probe, lp, c, wt, f, l)	\
 	lock_profile_obtain_lock_success(&(lp)->lock_object, c, wt, f, l)
 
 #define	LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp)  			\
 	lock_profile_release_lock(&(lp)->lock_object)
 
 #endif	/* !KDTRACE_HOOKS */
 
 #endif	/* _KERNEL */
 
 #endif	/* _SYS_LOCKSTAT_H */