Index: projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/usdt/tst.enabled2.ksh
===================================================================
--- projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/usdt/tst.enabled2.ksh	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/usdt/tst.enabled2.ksh	(revision 313267)
@@ -1,113 +1,113 @@
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #
 
 #
 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #ident	"%Z%%M%	%I%	%E% SMI"
 
 #
 # This test is primarily intended to verify a fix for SPARC, but there's no
 # harm in running it on other platforms. Here, we verify that is-enabled
 # probes don't interfere with return values from previously invoked functions.
 #
 
 if [ $# != 1 ]; then
 	echo expected one argument: '<'dtrace-path'>'
 	exit 2
 fi
 
 dtrace=$1
 DIR=/var/tmp/dtest.$$
 
 mkdir $DIR
 cd $DIR
 
 cat > prov.d <<EOF
 provider test_prov {
 	probe go();
 };
 EOF
 
 $dtrace -h -s prov.d
 if [ $? -ne 0 ]; then
 	print -u2 "failed to generate header file"
 	exit 1
 fi
 
 cat > test.c <<EOF
 #include <stdio.h>
 #include "prov.h"
 
 int
 foo(void)
 {
 	return (24);
 }
 
 int
 main(int argc, char **argv)
 {
 	int a = foo();
 	if (TEST_PROV_GO_ENABLED()) {
 		TEST_PROV_GO();
 	}
 	(void) printf("%d %d %d\n", a, a, a);
 
 	return (0);
 }
 EOF
 
-cc -c -xO2 test.c
+cc -c -O2 test.c
 if [ $? -ne 0 ]; then
 	print -u2 "failed to compile test.c"
 	exit 1
 fi
 $dtrace -G -s prov.d test.o
 if [ $? -ne 0 ]; then
 	print -u2 "failed to create DOF"
 	exit 1
 fi
 cc -o test test.o prov.o
 if [ $? -ne 0 ]; then
 	print -u2 "failed to link final executable"
 	exit 1
 fi
 
 script()
 {
 	./test
 
 	$dtrace -c ./test -qs /dev/stdin <<EOF
 	test_prov\$target:::
 	{
 	}
 EOF
 }
 
 script
 status=$?
 
 cd /
 /bin/rm -rf $DIR
 
 exit $status
Index: projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/usdt/tst.include.ksh
===================================================================
--- projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/usdt/tst.include.ksh	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/usdt/tst.include.ksh	(revision 313267)
@@ -1,61 +1,61 @@
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #
 
 #
 # Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 # ident	"%Z%%M%	%I%	%E% SMI"
 
-# Make sure <unistd.h> defines _DTRACE_VERSION
+# Make sure <sys/sdt.h> defines _DTRACE_VERSION
 
 DIR=/var/tmp/dtest.$$
 
 mkdir $DIR
 cd $DIR
 
 cat > test.c <<EOF
-#include <unistd.h>
+#include <sys/sdt.h>
 
 int
 main(int argc, char **argv)
 {
 #ifdef _DTRACE_VERSION
 	return (0);
 #else
 	return (1);
 #endif
 }
 EOF
 
-cc -xarch=generic -o test test.c
+cc -o test test.c
 if [ $? -ne 0 ]; then
 	print -u2 "failed to compile test.c"
 	exit 1
 fi
 
 ./test
 status=$?
 
 cd /
 /bin/rm -rf $DIR
 
 exit $status
Index: projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dof.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dof.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/lib/libdtrace/common/dt_dof.c	(revision 313267)
@@ -1,986 +1,976 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 by Delphix. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/types.h>
 #ifdef illumos
 #include <sys/sysmacros.h>
 #endif
 
 #include <strings.h>
 #ifdef illumos
 #include <alloca.h>
 #endif
 #include <assert.h>
 #include <stdlib.h>
 #include <errno.h>
 #include <limits.h>
 
 #include <dt_impl.h>
 #include <dt_strtab.h>
 #include <dt_program.h>
 #include <dt_provider.h>
 #include <dt_xlator.h>
 #include <dt_dof.h>
 
 void
 dt_dof_init(dtrace_hdl_t *dtp)
 {
 	dt_dof_t *ddo = &dtp->dt_dof;
 
 	ddo->ddo_hdl = dtp;
 	ddo->ddo_nsecs = 0;
 	ddo->ddo_strsec = DOF_SECIDX_NONE;
 	ddo->ddo_xlimport = NULL;
 	ddo->ddo_xlexport = NULL;
 
 	dt_buf_create(dtp, &ddo->ddo_secs, "section headers", 0);
 	dt_buf_create(dtp, &ddo->ddo_strs, "string table", 0);
 	dt_buf_create(dtp, &ddo->ddo_ldata, "loadable data", 0);
 	dt_buf_create(dtp, &ddo->ddo_udata, "unloadable data", 0);
 
 	dt_buf_create(dtp, &ddo->ddo_probes, "probe data", 0);
 	dt_buf_create(dtp, &ddo->ddo_args, "probe args", 0);
 	dt_buf_create(dtp, &ddo->ddo_offs, "probe offs", 0);
 	dt_buf_create(dtp, &ddo->ddo_enoffs, "probe is-enabled offs", 0);
 	dt_buf_create(dtp, &ddo->ddo_rels, "probe rels", 0);
 
 	dt_buf_create(dtp, &ddo->ddo_xlms, "xlate members", 0);
 }
 
 void
 dt_dof_fini(dtrace_hdl_t *dtp)
 {
 	dt_dof_t *ddo = &dtp->dt_dof;
 
 	dt_free(dtp, ddo->ddo_xlimport);
 	dt_free(dtp, ddo->ddo_xlexport);
 
 	dt_buf_destroy(dtp, &ddo->ddo_secs);
 	dt_buf_destroy(dtp, &ddo->ddo_strs);
 	dt_buf_destroy(dtp, &ddo->ddo_ldata);
 	dt_buf_destroy(dtp, &ddo->ddo_udata);
 
 	dt_buf_destroy(dtp, &ddo->ddo_probes);
 	dt_buf_destroy(dtp, &ddo->ddo_args);
 	dt_buf_destroy(dtp, &ddo->ddo_offs);
 	dt_buf_destroy(dtp, &ddo->ddo_enoffs);
 	dt_buf_destroy(dtp, &ddo->ddo_rels);
 
 	dt_buf_destroy(dtp, &ddo->ddo_xlms);
 }
 
 static int
 dt_dof_reset(dtrace_hdl_t *dtp, dtrace_prog_t *pgp)
 {
 	dt_dof_t *ddo = &dtp->dt_dof;
 	uint_t i, nx = dtp->dt_xlatorid;
 
 	assert(ddo->ddo_hdl == dtp);
 	ddo->ddo_pgp = pgp;
 
 	ddo->ddo_nsecs = 0;
 	ddo->ddo_strsec = DOF_SECIDX_NONE;
 
 	dt_free(dtp, ddo->ddo_xlimport);
 	dt_free(dtp, ddo->ddo_xlexport);
 
 	ddo->ddo_xlimport = dt_alloc(dtp, sizeof (dof_secidx_t) * nx);
 	ddo->ddo_xlexport = dt_alloc(dtp, sizeof (dof_secidx_t) * nx);
 
 	if (nx != 0 && (ddo->ddo_xlimport == NULL || ddo->ddo_xlexport == NULL))
 		return (-1); /* errno is set for us */
 
 	for (i = 0; i < nx; i++) {
 		ddo->ddo_xlimport[i] = DOF_SECIDX_NONE;
 		ddo->ddo_xlexport[i] = DOF_SECIDX_NONE;
 	}
 
 	dt_buf_reset(dtp, &ddo->ddo_secs);
 	dt_buf_reset(dtp, &ddo->ddo_strs);
 	dt_buf_reset(dtp, &ddo->ddo_ldata);
 	dt_buf_reset(dtp, &ddo->ddo_udata);
 
 	dt_buf_reset(dtp, &ddo->ddo_probes);
 	dt_buf_reset(dtp, &ddo->ddo_args);
 	dt_buf_reset(dtp, &ddo->ddo_offs);
 	dt_buf_reset(dtp, &ddo->ddo_enoffs);
 	dt_buf_reset(dtp, &ddo->ddo_rels);
 
 	dt_buf_reset(dtp, &ddo->ddo_xlms);
 	return (0);
 }
 
 /*
  * Add a loadable DOF section to the file using the specified data buffer and
  * the specified DOF section attributes.  DOF_SECF_LOAD must be set in flags.
  * If 'data' is NULL, the caller is responsible for manipulating the ldata buf.
  */
 static dof_secidx_t
 dof_add_lsect(dt_dof_t *ddo, const void *data, uint32_t type,
     uint32_t align, uint32_t flags, uint32_t entsize, uint64_t size)
 {
 	dtrace_hdl_t *dtp = ddo->ddo_hdl;
 	dof_sec_t s;
 
 	s.dofs_type = type;
 	s.dofs_align = align;
 	s.dofs_flags = flags | DOF_SECF_LOAD;
 	s.dofs_entsize = entsize;
 	s.dofs_offset = dt_buf_offset(&ddo->ddo_ldata, align);
 	s.dofs_size = size;
 
 	dt_buf_write(dtp, &ddo->ddo_secs, &s, sizeof (s), sizeof (uint64_t));
 
 	if (data != NULL)
 		dt_buf_write(dtp, &ddo->ddo_ldata, data, size, align);
 
 	return (ddo->ddo_nsecs++);
 }
 
 /*
  * Add an unloadable DOF section to the file using the specified data buffer
  * and DOF section attributes.  DOF_SECF_LOAD must *not* be set in flags.
  * If 'data' is NULL, the caller is responsible for manipulating the udata buf.
  */
 static dof_secidx_t
 dof_add_usect(dt_dof_t *ddo, const void *data, uint32_t type,
     uint32_t align, uint32_t flags, uint32_t entsize, uint64_t size)
 {
 	dtrace_hdl_t *dtp = ddo->ddo_hdl;
 	dof_sec_t s;
 
 	s.dofs_type = type;
 	s.dofs_align = align;
 	s.dofs_flags = flags & ~DOF_SECF_LOAD;
 	s.dofs_entsize = entsize;
 	s.dofs_offset = dt_buf_offset(&ddo->ddo_udata, align);
 	s.dofs_size = size;
 
 	dt_buf_write(dtp, &ddo->ddo_secs, &s, sizeof (s), sizeof (uint64_t));
 
 	if (data != NULL)
 		dt_buf_write(dtp, &ddo->ddo_udata, data, size, align);
 
 	return (ddo->ddo_nsecs++);
 }
 
 /*
  * Add a string to the global string table associated with the DOF.  The offset
  * of the string is returned as an index into the string table.
  */
 static dof_stridx_t
 dof_add_string(dt_dof_t *ddo, const char *s)
 {
 	dt_buf_t *bp = &ddo->ddo_strs;
 	dof_stridx_t i = dt_buf_len(bp);
 
 	if (i != 0 && (s == NULL || *s == '\0'))
 		return (0); /* string table has \0 at offset 0 */
 
 	dt_buf_write(ddo->ddo_hdl, bp, s, strlen(s) + 1, sizeof (char));
 	return (i);
 }
 
 static dof_attr_t
 dof_attr(const dtrace_attribute_t *ap)
 {
 	return (DOF_ATTR(ap->dtat_name, ap->dtat_data, ap->dtat_class));
 }
 
 static dof_secidx_t
 dof_add_difo(dt_dof_t *ddo, const dtrace_difo_t *dp)
 {
 	dof_secidx_t dsecs[5]; /* enough for all possible DIFO sections */
 	uint_t nsecs = 0;
 
 	dof_difohdr_t *dofd;
 	dof_relohdr_t dofr;
 	dof_secidx_t relsec;
 
 	dof_secidx_t strsec = DOF_SECIDX_NONE;
 	dof_secidx_t intsec = DOF_SECIDX_NONE;
 	dof_secidx_t hdrsec = DOF_SECIDX_NONE;
 
 	if (dp->dtdo_buf != NULL) {
 		dsecs[nsecs++] = dof_add_lsect(ddo, dp->dtdo_buf,
 		    DOF_SECT_DIF, sizeof (dif_instr_t), 0,
 		    sizeof (dif_instr_t), sizeof (dif_instr_t) * dp->dtdo_len);
 	}
 
 	if (dp->dtdo_inttab != NULL) {
 		dsecs[nsecs++] = intsec = dof_add_lsect(ddo, dp->dtdo_inttab,
 		    DOF_SECT_INTTAB, sizeof (uint64_t), 0,
 		    sizeof (uint64_t), sizeof (uint64_t) * dp->dtdo_intlen);
 	}
 
 	if (dp->dtdo_strtab != NULL) {
 		dsecs[nsecs++] = strsec = dof_add_lsect(ddo, dp->dtdo_strtab,
 		    DOF_SECT_STRTAB, sizeof (char), 0, 0, dp->dtdo_strlen);
 	}
 
 	if (dp->dtdo_vartab != NULL) {
 		dsecs[nsecs++] = dof_add_lsect(ddo, dp->dtdo_vartab,
 		    DOF_SECT_VARTAB, sizeof (uint_t), 0, sizeof (dtrace_difv_t),
 		    sizeof (dtrace_difv_t) * dp->dtdo_varlen);
 	}
 
 	if (dp->dtdo_xlmtab != NULL) {
 		dof_xlref_t *xlt, *xlp;
 		dt_node_t **pnp;
 
 		xlt = alloca(sizeof (dof_xlref_t) * dp->dtdo_xlmlen);
 		pnp = dp->dtdo_xlmtab;
 
 		/*
 		 * dtdo_xlmtab contains pointers to the translator members.
 		 * The translator itself is in sect ddo_xlimport[dxp->dx_id].
 		 * The XLMEMBERS entries are in order by their dn_membid, so
 		 * the member section offset is the population count of bits
 		 * in ddo_pgp->dp_xlrefs[] up to and not including dn_membid.
 		 */
 		for (xlp = xlt; xlp < xlt + dp->dtdo_xlmlen; xlp++) {
 			dt_node_t *dnp = *pnp++;
 			dt_xlator_t *dxp = dnp->dn_membexpr->dn_xlator;
 
 			xlp->dofxr_xlator = ddo->ddo_xlimport[dxp->dx_id];
 			xlp->dofxr_member = dt_popcb(
 			    ddo->ddo_pgp->dp_xrefs[dxp->dx_id], dnp->dn_membid);
 			xlp->dofxr_argn = (uint32_t)dxp->dx_arg;
 		}
 
 		dsecs[nsecs++] = dof_add_lsect(ddo, xlt, DOF_SECT_XLTAB,
 		    sizeof (dof_secidx_t), 0, sizeof (dof_xlref_t),
 		    sizeof (dof_xlref_t) * dp->dtdo_xlmlen);
 	}
 
 	/*
 	 * Copy the return type and the array of section indices that form the
 	 * DIFO into a single dof_difohdr_t and then add DOF_SECT_DIFOHDR.
 	 */
 	assert(nsecs <= sizeof (dsecs) / sizeof (dsecs[0]));
 	dofd = alloca(sizeof (dtrace_diftype_t) + sizeof (dsecs));
 	bcopy(&dp->dtdo_rtype, &dofd->dofd_rtype, sizeof (dtrace_diftype_t));
 	bcopy(dsecs, &dofd->dofd_links, sizeof (dof_secidx_t) * nsecs);
 
 	hdrsec = dof_add_lsect(ddo, dofd, DOF_SECT_DIFOHDR,
 	    sizeof (dof_secidx_t), 0, 0,
 	    sizeof (dtrace_diftype_t) + sizeof (dof_secidx_t) * nsecs);
 
 	/*
 	 * Add any other sections related to dtrace_difo_t.  These are not
 	 * referenced in dof_difohdr_t because they are not used by emulation.
 	 */
 	if (dp->dtdo_kreltab != NULL) {
 		relsec = dof_add_lsect(ddo, dp->dtdo_kreltab, DOF_SECT_RELTAB,
 		    sizeof (uint64_t), 0, sizeof (dof_relodesc_t),
 		    sizeof (dof_relodesc_t) * dp->dtdo_krelen);
 
 		/*
 		 * This code assumes the target of all relocations is the
 		 * integer table 'intsec' (DOF_SECT_INTTAB).  If other sections
 		 * need relocation in the future this will need to change.
 		 */
 		dofr.dofr_strtab = strsec;
 		dofr.dofr_relsec = relsec;
 		dofr.dofr_tgtsec = intsec;
 
 		(void) dof_add_lsect(ddo, &dofr, DOF_SECT_KRELHDR,
 		    sizeof (dof_secidx_t), 0, 0, sizeof (dof_relohdr_t));
 	}
 
 	if (dp->dtdo_ureltab != NULL) {
 		relsec = dof_add_lsect(ddo, dp->dtdo_ureltab, DOF_SECT_RELTAB,
 		    sizeof (uint64_t), 0, sizeof (dof_relodesc_t),
 		    sizeof (dof_relodesc_t) * dp->dtdo_urelen);
 
 		/*
 		 * This code assumes the target of all relocations is the
 		 * integer table 'intsec' (DOF_SECT_INTTAB).  If other sections
 		 * need relocation in the future this will need to change.
 		 */
 		dofr.dofr_strtab = strsec;
 		dofr.dofr_relsec = relsec;
 		dofr.dofr_tgtsec = intsec;
 
 		(void) dof_add_lsect(ddo, &dofr, DOF_SECT_URELHDR,
 		    sizeof (dof_secidx_t), 0, 0, sizeof (dof_relohdr_t));
 	}
 
 	return (hdrsec);
 }
 
 static void
 dof_add_translator(dt_dof_t *ddo, const dt_xlator_t *dxp, uint_t type)
 {
 	dtrace_hdl_t *dtp = ddo->ddo_hdl;
 	dof_xlmember_t dofxm;
 	dof_xlator_t dofxl;
 	dof_secidx_t *xst;
 
 	char buf[DT_TYPE_NAMELEN];
 	dt_node_t *dnp;
 	uint_t i = 0;
 
 	assert(type == DOF_SECT_XLIMPORT || type == DOF_SECT_XLEXPORT);
 	xst = type == DOF_SECT_XLIMPORT ? ddo->ddo_xlimport : ddo->ddo_xlexport;
 
 	if (xst[dxp->dx_id] != DOF_SECIDX_NONE)
 		return; /* translator has already been emitted */
 
 	dt_buf_reset(dtp, &ddo->ddo_xlms);
 
 	/*
 	 * Generate an array of dof_xlmember_t's into ddo_xlms.  If we are
 	 * importing the translator, add only those members referenced by the
 	 * program and set the dofxm_difo reference of each member to NONE.  If
 	 * we're exporting the translator, add all members and a DIFO for each.
 	 */
 	for (dnp = dxp->dx_members; dnp != NULL; dnp = dnp->dn_list, i++) {
 		if (type == DOF_SECT_XLIMPORT) {
 			if (!BT_TEST(ddo->ddo_pgp->dp_xrefs[dxp->dx_id], i))
 				continue; /* member is not referenced */
 			dofxm.dofxm_difo = DOF_SECIDX_NONE;
 		} else {
 			dofxm.dofxm_difo = dof_add_difo(ddo,
 			    dxp->dx_membdif[dnp->dn_membid]);
 		}
 
 		dofxm.dofxm_name = dof_add_string(ddo, dnp->dn_membname);
 		dt_node_diftype(dtp, dnp, &dofxm.dofxm_type);
 
 		dt_buf_write(dtp, &ddo->ddo_xlms,
 		    &dofxm, sizeof (dofxm), sizeof (uint32_t));
 	}
 
 	dofxl.dofxl_members = dof_add_lsect(ddo, NULL, DOF_SECT_XLMEMBERS,
 	    sizeof (uint32_t), 0, sizeof (dofxm), dt_buf_len(&ddo->ddo_xlms));
 
 	dt_buf_concat(dtp, &ddo->ddo_ldata, &ddo->ddo_xlms, sizeof (uint32_t));
 
 	dofxl.dofxl_strtab = ddo->ddo_strsec;
 	dofxl.dofxl_argv = dof_add_string(ddo, ctf_type_name(
 	    dxp->dx_src_ctfp, dxp->dx_src_type, buf, sizeof (buf)));
 	dofxl.dofxl_argc = 1;
 	dofxl.dofxl_type = dof_add_string(ddo, ctf_type_name(
 	    dxp->dx_dst_ctfp, dxp->dx_dst_type, buf, sizeof (buf)));
 	dofxl.dofxl_attr = dof_attr(&dxp->dx_souid.di_attr);
 
 	xst[dxp->dx_id] = dof_add_lsect(ddo, &dofxl, type,
 	    sizeof (uint32_t), 0, 0, sizeof (dofxl));
 }
 
 /*ARGSUSED*/
 static int
 dof_add_probe(dt_idhash_t *dhp, dt_ident_t *idp, void *data)
 {
 	dt_dof_t *ddo = data;
 	dtrace_hdl_t *dtp = ddo->ddo_hdl;
 	dt_probe_t *prp = idp->di_data;
 
 	dof_probe_t dofpr;
 	dof_relodesc_t dofr;
 	dt_probe_instance_t *pip;
 	dt_node_t *dnp;
 
 	char buf[DT_TYPE_NAMELEN];
 	uint_t i;
 
 	dofpr.dofpr_addr = 0;
 	dofpr.dofpr_name = dof_add_string(ddo, prp->pr_name);
 	dofpr.dofpr_nargv = dt_buf_len(&ddo->ddo_strs);
 
 	for (dnp = prp->pr_nargs; dnp != NULL; dnp = dnp->dn_list) {
 		(void) dof_add_string(ddo, ctf_type_name(dnp->dn_ctfp,
 		    dnp->dn_type, buf, sizeof (buf)));
 	}
 
 	dofpr.dofpr_xargv = dt_buf_len(&ddo->ddo_strs);
 
 	for (dnp = prp->pr_xargs; dnp != NULL; dnp = dnp->dn_list) {
 		(void) dof_add_string(ddo, ctf_type_name(dnp->dn_ctfp,
 		    dnp->dn_type, buf, sizeof (buf)));
 	}
 
 	dofpr.dofpr_argidx = dt_buf_len(&ddo->ddo_args) / sizeof (uint8_t);
 
 	for (i = 0; i < prp->pr_xargc; i++) {
 		dt_buf_write(dtp, &ddo->ddo_args, &prp->pr_mapping[i],
 		    sizeof (uint8_t), sizeof (uint8_t));
 	}
 
 	dofpr.dofpr_nargc = prp->pr_nargc;
 	dofpr.dofpr_xargc = prp->pr_xargc;
 	dofpr.dofpr_pad1 = 0;
 	dofpr.dofpr_pad2 = 0;
 
 	for (pip = prp->pr_inst; pip != NULL; pip = pip->pi_next) {
 		dt_dprintf("adding probe for %s:%s\n", pip->pi_fname,
 		    prp->pr_name);
 
 		dofpr.dofpr_func = dof_add_string(ddo, pip->pi_fname);
 
 		/*
 		 * There should be one probe offset or is-enabled probe offset
 		 * or else this probe instance won't have been created. The
 		 * kernel will reject DOF which has a probe with no offsets.
 		 */
 		assert(pip->pi_noffs + pip->pi_nenoffs > 0);
 
 		dofpr.dofpr_offidx =
 		    dt_buf_len(&ddo->ddo_offs) / sizeof (uint32_t);
 		dofpr.dofpr_noffs = pip->pi_noffs;
 		dt_buf_write(dtp, &ddo->ddo_offs, pip->pi_offs,
 		    pip->pi_noffs * sizeof (uint32_t), sizeof (uint32_t));
 
 		dofpr.dofpr_enoffidx =
 		    dt_buf_len(&ddo->ddo_enoffs) / sizeof (uint32_t);
 		dofpr.dofpr_nenoffs = pip->pi_nenoffs;
 		dt_buf_write(dtp, &ddo->ddo_enoffs, pip->pi_enoffs,
 		    pip->pi_nenoffs * sizeof (uint32_t), sizeof (uint32_t));
 
-		/*
-		 * If pi_rname isn't set, the relocation will be against the
-		 * function name. If it is, the relocation will be against
-		 * pi_rname. This will be used if the function is scoped
-		 * locally so an alternate symbol is added for the purpose
-		 * of this relocation.
-		 */
-		if (pip->pi_rname == NULL)
-			dofr.dofr_name = dofpr.dofpr_func;
-		else
-			dofr.dofr_name = dof_add_string(ddo, pip->pi_rname);
-		dofr.dofr_type = DOF_RELO_SETX;
+		dofr.dofr_name = dof_add_string(ddo, pip->pi_rname);
+		dofr.dofr_type = DOF_RELO_DOFREL;
 		dofr.dofr_offset = dt_buf_len(&ddo->ddo_probes);
 		dofr.dofr_data = 0;
 
 		dt_buf_write(dtp, &ddo->ddo_rels, &dofr,
 		    sizeof (dofr), sizeof (uint64_t));
 
 		dt_buf_write(dtp, &ddo->ddo_probes, &dofpr,
 		    sizeof (dofpr), sizeof (uint64_t));
 	}
 
 	return (0);
 }
 
 static int
 dof_add_provider(dt_dof_t *ddo, const dt_provider_t *pvp)
 {
 	dtrace_hdl_t *dtp = ddo->ddo_hdl;
 	dof_provider_t dofpv;
 	dof_relohdr_t dofr;
 	dof_secidx_t *dofs;
 	ulong_t xr, nxr;
 	size_t sz;
 	id_t i;
 
 	if (pvp->pv_flags & DT_PROVIDER_IMPL) {
 		/*
 		 * ignore providers that are exported by dtrace(7D)
 		 */
 		return (0);
 	}
 
 	nxr = dt_popcb(pvp->pv_xrefs, pvp->pv_xrmax);
 	dofs = alloca(sizeof (dof_secidx_t) * (nxr + 1));
 	xr = 1; /* reserve dofs[0] for the provider itself */
 
 	/*
 	 * For each translator referenced by the provider (pv_xrefs), emit an
 	 * exported translator section for it if one hasn't been created yet.
 	 */
 	for (i = 0; i < pvp->pv_xrmax; i++) {
 		if (BT_TEST(pvp->pv_xrefs, i) &&
 		    dtp->dt_xlatemode == DT_XL_DYNAMIC) {
 			dof_add_translator(ddo,
 			    dt_xlator_lookup_id(dtp, i), DOF_SECT_XLEXPORT);
 			dofs[xr++] = ddo->ddo_xlexport[i];
 		}
 	}
 
 	dt_buf_reset(dtp, &ddo->ddo_probes);
 	dt_buf_reset(dtp, &ddo->ddo_args);
 	dt_buf_reset(dtp, &ddo->ddo_offs);
 	dt_buf_reset(dtp, &ddo->ddo_enoffs);
 	dt_buf_reset(dtp, &ddo->ddo_rels);
 
 	(void) dt_idhash_iter(pvp->pv_probes, dof_add_probe, ddo);
 
 	if (dt_buf_len(&ddo->ddo_probes) == 0)
 		return (dt_set_errno(dtp, EDT_NOPROBES));
 
 	dofpv.dofpv_probes = dof_add_lsect(ddo, NULL, DOF_SECT_PROBES,
 	    sizeof (uint64_t), 0, sizeof (dof_probe_t),
 	    dt_buf_len(&ddo->ddo_probes));
 
 	dt_buf_concat(dtp, &ddo->ddo_ldata,
 	    &ddo->ddo_probes, sizeof (uint64_t));
 
 	dofpv.dofpv_prargs = dof_add_lsect(ddo, NULL, DOF_SECT_PRARGS,
 	    sizeof (uint8_t), 0, sizeof (uint8_t), dt_buf_len(&ddo->ddo_args));
 
 	dt_buf_concat(dtp, &ddo->ddo_ldata, &ddo->ddo_args, sizeof (uint8_t));
 
 	dofpv.dofpv_proffs = dof_add_lsect(ddo, NULL, DOF_SECT_PROFFS,
 	    sizeof (uint_t), 0, sizeof (uint_t), dt_buf_len(&ddo->ddo_offs));
 
 	dt_buf_concat(dtp, &ddo->ddo_ldata, &ddo->ddo_offs, sizeof (uint_t));
 
 	if ((sz = dt_buf_len(&ddo->ddo_enoffs)) != 0) {
 		dofpv.dofpv_prenoffs = dof_add_lsect(ddo, NULL,
 		    DOF_SECT_PRENOFFS, sizeof (uint_t), 0, sizeof (uint_t), sz);
 	} else {
 		dofpv.dofpv_prenoffs = DOF_SECT_NONE;
 	}
 
 	dt_buf_concat(dtp, &ddo->ddo_ldata, &ddo->ddo_enoffs, sizeof (uint_t));
 
 	dofpv.dofpv_strtab = ddo->ddo_strsec;
 	dofpv.dofpv_name = dof_add_string(ddo, pvp->pv_desc.dtvd_name);
 
 	dofpv.dofpv_provattr = dof_attr(&pvp->pv_desc.dtvd_attr.dtpa_provider);
 	dofpv.dofpv_modattr = dof_attr(&pvp->pv_desc.dtvd_attr.dtpa_mod);
 	dofpv.dofpv_funcattr = dof_attr(&pvp->pv_desc.dtvd_attr.dtpa_func);
 	dofpv.dofpv_nameattr = dof_attr(&pvp->pv_desc.dtvd_attr.dtpa_name);
 	dofpv.dofpv_argsattr = dof_attr(&pvp->pv_desc.dtvd_attr.dtpa_args);
 
 	dofs[0] = dof_add_lsect(ddo, &dofpv, DOF_SECT_PROVIDER,
 	    sizeof (dof_secidx_t), 0, 0, sizeof (dof_provider_t));
 
 	dofr.dofr_strtab = dofpv.dofpv_strtab;
 	dofr.dofr_tgtsec = dofpv.dofpv_probes;
 	dofr.dofr_relsec = dof_add_lsect(ddo, NULL, DOF_SECT_RELTAB,
 	    sizeof (uint64_t), 0, sizeof (dof_relodesc_t),
 	    dt_buf_len(&ddo->ddo_rels));
 
 	dt_buf_concat(dtp, &ddo->ddo_ldata, &ddo->ddo_rels, sizeof (uint64_t));
 
 	(void) dof_add_lsect(ddo, &dofr, DOF_SECT_URELHDR,
 	    sizeof (dof_secidx_t), 0, 0, sizeof (dof_relohdr_t));
 
 	if (nxr != 0 && dtp->dt_xlatemode == DT_XL_DYNAMIC) {
 		(void) dof_add_lsect(ddo, dofs, DOF_SECT_PREXPORT,
 		    sizeof (dof_secidx_t), 0, sizeof (dof_secidx_t),
 		    sizeof (dof_secidx_t) * (nxr + 1));
 	}
 
 	return (0);
 }
 
 static int
 dof_hdr(dtrace_hdl_t *dtp, uint8_t dofversion, dof_hdr_t *hp)
 {
 	/*
 	 * If our config values cannot fit in a uint8_t, we can't generate a
 	 * DOF header since the values won't fit.  This can only happen if the
 	 * user forcibly compiles a program with an artificial configuration.
 	 */
 	if (dtp->dt_conf.dtc_difversion > UINT8_MAX ||
 	    dtp->dt_conf.dtc_difintregs > UINT8_MAX ||
 	    dtp->dt_conf.dtc_diftupregs > UINT8_MAX)
 		return (dt_set_errno(dtp, EOVERFLOW));
 
 	bzero(hp, sizeof (dof_hdr_t));
 
 	hp->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
 	hp->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
 	hp->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
 	hp->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
 
 	if (dtp->dt_conf.dtc_ctfmodel == CTF_MODEL_LP64)
 		hp->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_LP64;
 	else
 		hp->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_ILP32;
 
 	hp->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
 	hp->dofh_ident[DOF_ID_VERSION] = dofversion;
 	hp->dofh_ident[DOF_ID_DIFVERS] = dtp->dt_conf.dtc_difversion;
 	hp->dofh_ident[DOF_ID_DIFIREG] = dtp->dt_conf.dtc_difintregs;
 	hp->dofh_ident[DOF_ID_DIFTREG] = dtp->dt_conf.dtc_diftupregs;
 
 	hp->dofh_hdrsize = sizeof (dof_hdr_t);
 	hp->dofh_secsize = sizeof (dof_sec_t);
 	hp->dofh_secoff = sizeof (dof_hdr_t);
 
 	return (0);
 }
 
 void *
 dtrace_dof_create(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, uint_t flags)
 {
 	dt_dof_t *ddo = &dtp->dt_dof;
 
 	const dtrace_ecbdesc_t *edp, *last;
 	const dtrace_probedesc_t *pdp;
 	const dtrace_actdesc_t *ap;
 	const dt_stmt_t *stp;
 
 	uint_t maxacts = 0;
 	uint_t maxfmt = 0;
 
 	dt_provider_t *pvp;
 	dt_xlator_t *dxp;
 	dof_actdesc_t *dofa;
 	dof_sec_t *sp;
 	size_t ssize, lsize;
 	dof_hdr_t h;
 
 	dt_buf_t dof;
 	char *fmt;
 	uint_t i;
 
 	if (flags & ~DTRACE_D_MASK) {
 		(void) dt_set_errno(dtp, EINVAL);
 		return (NULL);
 	}
 
 	flags |= dtp->dt_dflags;
 
 	if (dof_hdr(dtp, pgp->dp_dofversion, &h) != 0)
 		return (NULL);
 
 	if (dt_dof_reset(dtp, pgp) != 0)
 		return (NULL);
 
 	/*
 	 * Iterate through the statement list computing the maximum number of
 	 * actions and the maximum format string for allocating local buffers.
 	 */
 	for (last = NULL, stp = dt_list_next(&pgp->dp_stmts);
 	    stp != NULL; stp = dt_list_next(stp), last = edp) {
 
 		dtrace_stmtdesc_t *sdp = stp->ds_desc;
 		dtrace_actdesc_t *ap = sdp->dtsd_action;
 
 		if (sdp->dtsd_fmtdata != NULL) {
 			i = dtrace_printf_format(dtp,
 			    sdp->dtsd_fmtdata, NULL, 0);
 			maxfmt = MAX(maxfmt, i);
 		}
 
 		if ((edp = sdp->dtsd_ecbdesc) == last)
 			continue; /* same ecb as previous statement */
 
 		for (i = 0, ap = edp->dted_action; ap; ap = ap->dtad_next)
 			i++;
 
 		maxacts = MAX(maxacts, i);
 	}
 
 	dofa = alloca(sizeof (dof_actdesc_t) * maxacts);
 	fmt = alloca(maxfmt + 1);
 
 	ddo->ddo_strsec = dof_add_lsect(ddo, NULL, DOF_SECT_STRTAB, 1, 0, 0, 0);
 	(void) dof_add_string(ddo, "");
 
 	/*
 	 * If there are references to dynamic translators in the program, add
 	 * an imported translator table entry for each referenced translator.
 	 */
 	if (pgp->dp_xrefslen != 0) {
 		for (dxp = dt_list_next(&dtp->dt_xlators);
 		    dxp != NULL; dxp = dt_list_next(dxp)) {
 			if (dxp->dx_id < pgp->dp_xrefslen &&
 			    pgp->dp_xrefs[dxp->dx_id] != NULL)
 				dof_add_translator(ddo, dxp, DOF_SECT_XLIMPORT);
 		}
 	}
 
 	/*
 	 * Now iterate through the statement list, creating the DOF section
 	 * headers and data for each one and adding them to our buffers.
 	 */
 	for (last = NULL, stp = dt_list_next(&pgp->dp_stmts);
 	    stp != NULL; stp = dt_list_next(stp), last = edp) {
 
 		dof_secidx_t probesec = DOF_SECIDX_NONE;
 		dof_secidx_t prdsec = DOF_SECIDX_NONE;
 		dof_secidx_t actsec = DOF_SECIDX_NONE;
 
 		const dt_stmt_t *next = stp;
 		dtrace_stmtdesc_t *sdp = stp->ds_desc;
 		dof_stridx_t strndx = 0;
 		dof_probedesc_t dofp;
 		dof_ecbdesc_t dofe;
 		uint_t i;
 
 		if ((edp = stp->ds_desc->dtsd_ecbdesc) == last)
 			continue; /* same ecb as previous statement */
 
 		pdp = &edp->dted_probe;
 
 		/*
 		 * Add a DOF_SECT_PROBEDESC for the ECB's probe description,
 		 * and copy the probe description strings into the string table.
 		 */
 		dofp.dofp_strtab = ddo->ddo_strsec;
 		dofp.dofp_provider = dof_add_string(ddo, pdp->dtpd_provider);
 		dofp.dofp_mod = dof_add_string(ddo, pdp->dtpd_mod);
 		dofp.dofp_func = dof_add_string(ddo, pdp->dtpd_func);
 		dofp.dofp_name = dof_add_string(ddo, pdp->dtpd_name);
 		dofp.dofp_id = pdp->dtpd_id;
 
 		probesec = dof_add_lsect(ddo, &dofp, DOF_SECT_PROBEDESC,
 		    sizeof (dof_secidx_t), 0,
 		    sizeof (dof_probedesc_t), sizeof (dof_probedesc_t));
 
 		/*
 		 * If there is a predicate DIFO associated with the ecbdesc,
 		 * write out the DIFO sections and save the DIFO section index.
 		 */
 		if (edp->dted_pred.dtpdd_difo != NULL)
 			prdsec = dof_add_difo(ddo, edp->dted_pred.dtpdd_difo);
 
 		/*
 		 * Now iterate through the action list generating DIFOs as
 		 * referenced therein and adding action descriptions to 'dofa'.
 		 */
 		for (i = 0, ap = edp->dted_action;
 		    ap != NULL; ap = ap->dtad_next, i++) {
 
 			if (ap->dtad_difo != NULL) {
 				dofa[i].dofa_difo =
 				    dof_add_difo(ddo, ap->dtad_difo);
 			} else
 				dofa[i].dofa_difo = DOF_SECIDX_NONE;
 
 			/*
 			 * If the first action in a statement has string data,
 			 * add the string to the global string table.  This can
 			 * be due either to a printf() format string
 			 * (dtsd_fmtdata) or a print() type string
 			 * (dtsd_strdata).
 			 */
 			if (sdp != NULL && ap == sdp->dtsd_action) {
 				if (sdp->dtsd_fmtdata != NULL) {
 					(void) dtrace_printf_format(dtp,
 					    sdp->dtsd_fmtdata, fmt, maxfmt + 1);
 					strndx = dof_add_string(ddo, fmt);
 				} else if (sdp->dtsd_strdata != NULL) {
 					strndx = dof_add_string(ddo,
 					    sdp->dtsd_strdata);
 				} else {
 					strndx = 0; /* use dtad_arg instead */
 				}
 
 				if ((next = dt_list_next(next)) != NULL)
 					sdp = next->ds_desc;
 				else
 					sdp = NULL;
 			}
 
 			if (strndx != 0) {
 				dofa[i].dofa_arg = strndx;
 				dofa[i].dofa_strtab = ddo->ddo_strsec;
 			} else {
 				dofa[i].dofa_arg = ap->dtad_arg;
 				dofa[i].dofa_strtab = DOF_SECIDX_NONE;
 			}
 
 			dofa[i].dofa_kind = ap->dtad_kind;
 			dofa[i].dofa_ntuple = ap->dtad_ntuple;
 			dofa[i].dofa_uarg = ap->dtad_uarg;
 		}
 
 		if (i > 0) {
 			actsec = dof_add_lsect(ddo, dofa, DOF_SECT_ACTDESC,
 			    sizeof (uint64_t), 0, sizeof (dof_actdesc_t),
 			    sizeof (dof_actdesc_t) * i);
 		}
 
 		/*
 		 * Now finally, add the DOF_SECT_ECBDESC referencing all the
 		 * previously created sub-sections.
 		 */
 		dofe.dofe_probes = probesec;
 		dofe.dofe_pred = prdsec;
 		dofe.dofe_actions = actsec;
 		dofe.dofe_pad = 0;
 		dofe.dofe_uarg = edp->dted_uarg;
 
 		(void) dof_add_lsect(ddo, &dofe, DOF_SECT_ECBDESC,
 		    sizeof (uint64_t), 0, 0, sizeof (dof_ecbdesc_t));
 	}
 
 	/*
 	 * If any providers are user-defined, output DOF sections corresponding
 	 * to the providers and the probes and arguments that they define.
 	 */
 	if (flags & DTRACE_D_PROBES) {
 		for (pvp = dt_list_next(&dtp->dt_provlist);
 		    pvp != NULL; pvp = dt_list_next(pvp)) {
 			if (dof_add_provider(ddo, pvp) != 0)
 				return (NULL);
 		}
 	}
 
 	/*
 	 * If we're not stripping unloadable sections, generate compiler
 	 * comments and any other unloadable miscellany.
 	 */
 	if (!(flags & DTRACE_D_STRIP)) {
 		(void) dof_add_usect(ddo, _dtrace_version, DOF_SECT_COMMENTS,
 		    sizeof (char), 0, 0, strlen(_dtrace_version) + 1);
 		(void) dof_add_usect(ddo, &dtp->dt_uts, DOF_SECT_UTSNAME,
 		    sizeof (char), 0, 0, sizeof (struct utsname));
 	}
 
 	/*
 	 * Compute and fill in the appropriate values for the dof_hdr_t's
 	 * dofh_secnum, dofh_loadsz, and dofh_filez values.
 	 */
 	h.dofh_secnum = ddo->ddo_nsecs;
 	ssize = sizeof (h) + dt_buf_len(&ddo->ddo_secs);
 
 	h.dofh_loadsz = ssize +
 	    dt_buf_len(&ddo->ddo_ldata) +
 	    dt_buf_len(&ddo->ddo_strs);
 
 	if (dt_buf_len(&ddo->ddo_udata) != 0) {
 		lsize = roundup(h.dofh_loadsz, sizeof (uint64_t));
 		h.dofh_filesz = lsize + dt_buf_len(&ddo->ddo_udata);
 	} else {
 		lsize = h.dofh_loadsz;
 		h.dofh_filesz = lsize;
 	}
 
 	/*
 	 * Set the global DOF_SECT_STRTAB's offset to be after the header,
 	 * section headers, and other loadable data.  Since we're going to
 	 * iterate over the buffer data directly, we must check for errors.
 	 */
 	if ((i = dt_buf_error(&ddo->ddo_secs)) != 0) {
 		(void) dt_set_errno(dtp, i);
 		return (NULL);
 	}
 
 	sp = dt_buf_ptr(&ddo->ddo_secs);
 	assert(sp[ddo->ddo_strsec].dofs_type == DOF_SECT_STRTAB);
 	assert(ssize == sizeof (h) + sizeof (dof_sec_t) * ddo->ddo_nsecs);
 
 	sp[ddo->ddo_strsec].dofs_offset = ssize + dt_buf_len(&ddo->ddo_ldata);
 	sp[ddo->ddo_strsec].dofs_size = dt_buf_len(&ddo->ddo_strs);
 
 	/*
 	 * Now relocate all the other section headers by adding the appropriate
 	 * delta to their respective dofs_offset values.
 	 */
 	for (i = 0; i < ddo->ddo_nsecs; i++, sp++) {
 		if (i == ddo->ddo_strsec)
 			continue; /* already relocated above */
 
 		if (sp->dofs_flags & DOF_SECF_LOAD)
 			sp->dofs_offset += ssize;
 		else
 			sp->dofs_offset += lsize;
 	}
 
 	/*
 	 * Finally, assemble the complete in-memory DOF buffer by writing the
 	 * header and then concatenating all our buffers.  dt_buf_concat() will
 	 * propagate any errors and cause dt_buf_claim() to return NULL.
 	 */
 	dt_buf_create(dtp, &dof, "dof", h.dofh_filesz);
 
 	dt_buf_write(dtp, &dof, &h, sizeof (h), sizeof (uint64_t));
 	dt_buf_concat(dtp, &dof, &ddo->ddo_secs, sizeof (uint64_t));
 	dt_buf_concat(dtp, &dof, &ddo->ddo_ldata, sizeof (uint64_t));
 	dt_buf_concat(dtp, &dof, &ddo->ddo_strs, sizeof (char));
 	dt_buf_concat(dtp, &dof, &ddo->ddo_udata, sizeof (uint64_t));
 
 	return (dt_buf_claim(dtp, &dof));
 }
 
 void
 dtrace_dof_destroy(dtrace_hdl_t *dtp, void *dof)
 {
 	dt_free(dtp, dof);
 }
 
 void *
 dtrace_getopt_dof(dtrace_hdl_t *dtp)
 {
 	dof_hdr_t *dof;
 	dof_sec_t *sec;
 	dof_optdesc_t *dofo;
 	int i, nopts = 0, len = sizeof (dof_hdr_t) +
 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t));
 
 	for (i = 0; i < DTRACEOPT_MAX; i++) {
 		if (dtp->dt_options[i] != DTRACEOPT_UNSET)
 			nopts++;
 	}
 
 	len += sizeof (dof_optdesc_t) * nopts;
 
 	if ((dof = dt_zalloc(dtp, len)) == NULL ||
 	    dof_hdr(dtp, DOF_VERSION, dof) != 0) {
 		dt_free(dtp, dof);
 		return (NULL);
 	}
 
 	dof->dofh_secnum = 1;	/* only DOF_SECT_OPTDESC */
 	dof->dofh_loadsz = len;
 	dof->dofh_filesz = len;
 
 	/*
 	 * Fill in the option section header...
 	 */
 	sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
 	sec->dofs_type = DOF_SECT_OPTDESC;
 	sec->dofs_align = sizeof (uint64_t);
 	sec->dofs_flags = DOF_SECF_LOAD;
 	sec->dofs_entsize = sizeof (dof_optdesc_t);
 
 	dofo = (dof_optdesc_t *)((uintptr_t)sec +
 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
 
 	sec->dofs_offset = (uintptr_t)dofo - (uintptr_t)dof;
 	sec->dofs_size = sizeof (dof_optdesc_t) * nopts;
 
 	for (i = 0; i < DTRACEOPT_MAX; i++) {
 		if (dtp->dt_options[i] == DTRACEOPT_UNSET)
 			continue;
 
 		dofo->dofo_option = i;
 		dofo->dofo_strtab = DOF_SECIDX_NONE;
 		dofo->dofo_value = dtp->dt_options[i];
 		dofo++;
 	}
 
 	return (dof);
 }
 
 void *
 dtrace_geterr_dof(dtrace_hdl_t *dtp)
 {
 	if (dtp->dt_errprog != NULL)
 		return (dtrace_dof_create(dtp, dtp->dt_errprog, 0));
 
 	(void) dt_set_errno(dtp, EDT_BADERROR);
 	return (NULL);
 }
Index: projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/lib/libdtrace/common/dt_link.c	(revision 313267)
@@ -1,1983 +1,1945 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 #define	ELF_TARGET_ALL
 #include <elf.h>
 
 #include <sys/types.h>
 #ifdef illumos
 #include <sys/sysmacros.h>
 #else
 #define	P2ROUNDUP(x, align)		(-(-(x) & -(align)))
 #endif
 
 #include <unistd.h>
 #include <strings.h>
 #ifdef illumos
 #include <alloca.h>
 #endif
 #include <limits.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <fcntl.h>
 #include <errno.h>
 #ifdef illumos
 #include <wait.h>
 #else
 #include <sys/wait.h>
 #include <libelf.h>
 #include <gelf.h>
 #include <sys/mman.h>
 #endif
 #include <assert.h>
 #include <sys/ipc.h>
 
 #include <dt_impl.h>
 #include <dt_provider.h>
 #include <dt_program.h>
 #include <dt_string.h>
 
 #define	ESHDR_NULL	0
 #define	ESHDR_SHSTRTAB	1
 #define	ESHDR_DOF	2
 #define	ESHDR_STRTAB	3
 #define	ESHDR_SYMTAB	4
 #define	ESHDR_REL	5
 #define	ESHDR_NUM	6
 
 #define	PWRITE_SCN(index, data) \
 	(lseek64(fd, (off64_t)elf_file.shdr[(index)].sh_offset, SEEK_SET) != \
 	(off64_t)elf_file.shdr[(index)].sh_offset || \
 	dt_write(dtp, fd, (data), elf_file.shdr[(index)].sh_size) != \
 	elf_file.shdr[(index)].sh_size)
 
 static const char DTRACE_SHSTRTAB32[] = "\0"
 ".shstrtab\0"		/* 1 */
 ".SUNW_dof\0"		/* 11 */
 ".strtab\0"		/* 21 */
 ".symtab\0"		/* 29 */
 #ifdef __sparc
 ".rela.SUNW_dof";	/* 37 */
 #else
 ".rel.SUNW_dof";	/* 37 */
 #endif
 
 static const char DTRACE_SHSTRTAB64[] = "\0"
 ".shstrtab\0"		/* 1 */
 ".SUNW_dof\0"		/* 11 */
 ".strtab\0"		/* 21 */
 ".symtab\0"		/* 29 */
 ".rela.SUNW_dof";	/* 37 */
 
 static const char DOFSTR[] = "__SUNW_dof";
 static const char DOFLAZYSTR[] = "___SUNW_dof";
 
 typedef struct dt_link_pair {
 	struct dt_link_pair *dlp_next;	/* next pair in linked list */
 	void *dlp_str;			/* buffer for string table */
 	void *dlp_sym;			/* buffer for symbol table */
 } dt_link_pair_t;
 
 typedef struct dof_elf32 {
 	uint32_t de_nrel;		/* relocation count */
 #ifdef __sparc
 	Elf32_Rela *de_rel;		/* array of relocations for sparc */
 #else
 	Elf32_Rel *de_rel;		/* array of relocations for x86 */
 #endif
 	uint32_t de_nsym;		/* symbol count */
 	Elf32_Sym *de_sym;		/* array of symbols */
 	uint32_t de_strlen;		/* size of of string table */
 	char *de_strtab;		/* string table */
 	uint32_t de_global;		/* index of the first global symbol */
 } dof_elf32_t;
 
 static int
 prepare_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf32_t *dep)
 {
 	dof_sec_t *dofs, *s;
 	dof_relohdr_t *dofrh;
 	dof_relodesc_t *dofr;
 	char *strtab;
 	int i, j, nrel;
 	size_t strtabsz = 1;
 	uint32_t count = 0;
 	size_t base;
 	Elf32_Sym *sym;
 #ifdef __sparc
 	Elf32_Rela *rel;
 #else
 	Elf32_Rel *rel;
 #endif
 
 	/*LINTED*/
 	dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff);
 
 	/*
 	 * First compute the size of the string table and the number of
 	 * relocations present in the DOF.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 			continue;
 
 		/*LINTED*/
 		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 
 		s = &dofs[dofrh->dofr_strtab];
 		strtab = (char *)dof + s->dofs_offset;
 		assert(strtab[0] == '\0');
 		strtabsz += s->dofs_size - 1;
 
 		s = &dofs[dofrh->dofr_relsec];
 		/*LINTED*/
 		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 		count += s->dofs_size / s->dofs_entsize;
 	}
 
 	dep->de_strlen = strtabsz;
 	dep->de_nrel = count;
 	dep->de_nsym = count + 1; /* the first symbol is always null */
 
 	if (dtp->dt_lazyload) {
 		dep->de_strlen += sizeof (DOFLAZYSTR);
 		dep->de_nsym++;
 	} else {
 		dep->de_strlen += sizeof (DOFSTR);
 		dep->de_nsym++;
 	}
 
 	if ((dep->de_rel = calloc(dep->de_nrel,
 	    sizeof (dep->de_rel[0]))) == NULL) {
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf32_Sym))) == NULL) {
 		free(dep->de_rel);
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) {
 		free(dep->de_rel);
 		free(dep->de_sym);
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	count = 0;
 	strtabsz = 1;
 	dep->de_strtab[0] = '\0';
 	rel = dep->de_rel;
 	sym = dep->de_sym;
 	dep->de_global = 1;
 
 	/*
 	 * The first symbol table entry must be zeroed and is always ignored.
 	 */
 	bzero(sym, sizeof (Elf32_Sym));
 	sym++;
 
 	/*
 	 * Take a second pass through the DOF sections filling in the
 	 * memory we allocated.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 			continue;
 
 		/*LINTED*/
 		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 
 		s = &dofs[dofrh->dofr_strtab];
 		strtab = (char *)dof + s->dofs_offset;
 		bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size);
 		base = strtabsz;
 		strtabsz += s->dofs_size - 1;
 
 		s = &dofs[dofrh->dofr_relsec];
 		/*LINTED*/
 		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 		nrel = s->dofs_size / s->dofs_entsize;
 
 		s = &dofs[dofrh->dofr_tgtsec];
 
 		for (j = 0; j < nrel; j++) {
 #if defined(__aarch64__)
 /* XXX */
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 #elif defined(__arm__)
 /* XXX */
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 #elif defined(__i386) || defined(__amd64)
 			rel->r_offset = s->dofs_offset +
 			    dofr[j].dofr_offset;
 			rel->r_info = ELF32_R_INFO(count + dep->de_global,
-			    R_386_32);
+			    R_386_PC32);
 #elif defined(__mips__)
 /* XXX */
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 #elif defined(__powerpc__)
 			/*
 			 * Add 4 bytes to hit the low half of this 64-bit
 			 * big-endian address.
 			 */
 			rel->r_offset = s->dofs_offset +
 			    dofr[j].dofr_offset + 4;
 			rel->r_info = ELF32_R_INFO(count + dep->de_global,
 			    R_PPC_REL32);
 #elif defined(__riscv__)
 /* XXX */
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
-#elif defined(__sparc)
-			/*
-			 * Add 4 bytes to hit the low half of this 64-bit
-			 * big-endian address.
-			 */
-			rel->r_offset = s->dofs_offset +
-			    dofr[j].dofr_offset + 4;
-			rel->r_info = ELF32_R_INFO(count + dep->de_global,
-			    R_SPARC_32);
 #else
 #error unknown ISA
 #endif
 
 			sym->st_name = base + dofr[j].dofr_name - 1;
 			sym->st_value = 0;
 			sym->st_size = 0;
 			sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC);
-			sym->st_other = 0;
+			sym->st_other = ELF32_ST_VISIBILITY(STV_HIDDEN);
 			sym->st_shndx = SHN_UNDEF;
 
 			rel++;
 			sym++;
 			count++;
 		}
 	}
 
 	/*
 	 * Add a symbol for the DOF itself. We use a different symbol for
 	 * lazily and actively loaded DOF to make them easy to distinguish.
 	 */
 	sym->st_name = strtabsz;
 	sym->st_value = 0;
 	sym->st_size = dof->dofh_filesz;
 	sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_OBJECT);
-#ifdef illumos
-	sym->st_other = 0;
-#else
 	sym->st_other = ELF32_ST_VISIBILITY(STV_HIDDEN);
-#endif
 	sym->st_shndx = ESHDR_DOF;
 	sym++;
 
 	if (dtp->dt_lazyload) {
 		bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz,
 		    sizeof (DOFLAZYSTR));
 		strtabsz += sizeof (DOFLAZYSTR);
 	} else {
 		bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR));
 		strtabsz += sizeof (DOFSTR);
 	}
 
 	assert(count == dep->de_nrel);
 	assert(strtabsz == dep->de_strlen);
 
 	return (0);
 }
 
 
 typedef struct dof_elf64 {
 	uint32_t de_nrel;
 	Elf64_Rela *de_rel;
 	uint32_t de_nsym;
 	Elf64_Sym *de_sym;
 
 	uint32_t de_strlen;
 	char *de_strtab;
 
 	uint32_t de_global;
 } dof_elf64_t;
 
 static int
 prepare_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, dof_elf64_t *dep)
 {
 	dof_sec_t *dofs, *s;
 	dof_relohdr_t *dofrh;
 	dof_relodesc_t *dofr;
 	char *strtab;
 	int i, j, nrel;
 	size_t strtabsz = 1;
 #ifdef illumos
 	uint32_t count = 0;
 #else
 	uint64_t count = 0;
 #endif
 	size_t base;
 	Elf64_Sym *sym;
 	Elf64_Rela *rel;
 
 	/*LINTED*/
 	dofs = (dof_sec_t *)((char *)dof + dof->dofh_secoff);
 
 	/*
 	 * First compute the size of the string table and the number of
 	 * relocations present in the DOF.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 			continue;
 
 		/*LINTED*/
 		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 
 		s = &dofs[dofrh->dofr_strtab];
 		strtab = (char *)dof + s->dofs_offset;
 		assert(strtab[0] == '\0');
 		strtabsz += s->dofs_size - 1;
 
 		s = &dofs[dofrh->dofr_relsec];
 		/*LINTED*/
 		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 		count += s->dofs_size / s->dofs_entsize;
 	}
 
 	dep->de_strlen = strtabsz;
 	dep->de_nrel = count;
 	dep->de_nsym = count + 1; /* the first symbol is always null */
 
 	if (dtp->dt_lazyload) {
 		dep->de_strlen += sizeof (DOFLAZYSTR);
 		dep->de_nsym++;
 	} else {
 		dep->de_strlen += sizeof (DOFSTR);
 		dep->de_nsym++;
 	}
 
 	if ((dep->de_rel = calloc(dep->de_nrel,
 	    sizeof (dep->de_rel[0]))) == NULL) {
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	if ((dep->de_sym = calloc(dep->de_nsym, sizeof (Elf64_Sym))) == NULL) {
 		free(dep->de_rel);
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	if ((dep->de_strtab = calloc(dep->de_strlen, 1)) == NULL) {
 		free(dep->de_rel);
 		free(dep->de_sym);
 		return (dt_set_errno(dtp, EDT_NOMEM));
 	}
 
 	count = 0;
 	strtabsz = 1;
 	dep->de_strtab[0] = '\0';
 	rel = dep->de_rel;
 	sym = dep->de_sym;
 	dep->de_global = 1;
 
 	/*
 	 * The first symbol table entry must be zeroed and is always ignored.
 	 */
 	bzero(sym, sizeof (Elf64_Sym));
 	sym++;
 
 	/*
 	 * Take a second pass through the DOF sections filling in the
 	 * memory we allocated.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		if (dofs[i].dofs_type != DOF_SECT_URELHDR)
 			continue;
 
 		/*LINTED*/
 		dofrh = (dof_relohdr_t *)((char *)dof + dofs[i].dofs_offset);
 
 		s = &dofs[dofrh->dofr_strtab];
 		strtab = (char *)dof + s->dofs_offset;
 		bcopy(strtab + 1, dep->de_strtab + strtabsz, s->dofs_size);
 		base = strtabsz;
 		strtabsz += s->dofs_size - 1;
 
 		s = &dofs[dofrh->dofr_relsec];
 		/*LINTED*/
 		dofr = (dof_relodesc_t *)((char *)dof + s->dofs_offset);
 		nrel = s->dofs_size / s->dofs_entsize;
 
 		s = &dofs[dofrh->dofr_tgtsec];
 
 		for (j = 0; j < nrel; j++) {
 #if defined(__aarch64__)
 /* XXX */
 #elif defined(__arm__)
 /* XXX */
 #elif defined(__mips__)
 /* XXX */
 #elif defined(__powerpc__)
 			rel->r_offset = s->dofs_offset +
 			    dofr[j].dofr_offset;
 			rel->r_info = ELF64_R_INFO(count + dep->de_global,
 			    R_PPC64_REL64);
 #elif defined(__riscv__)
 /* XXX */
 #elif defined(__i386) || defined(__amd64)
 			rel->r_offset = s->dofs_offset +
 			    dofr[j].dofr_offset;
-#ifdef illumos
 			rel->r_info = ELF64_R_INFO(count + dep->de_global,
-			    R_AMD64_64);
+			    R_X86_64_PC64);
 #else
-			rel->r_info = ELF64_R_INFO(count + dep->de_global,
-			    R_X86_64_RELATIVE);
-#endif
-#elif defined(__sparc)
-			rel->r_offset = s->dofs_offset +
-			    dofr[j].dofr_offset;
-			rel->r_info = ELF64_R_INFO(count + dep->de_global,
-			    R_SPARC_64);
-#else
 #error unknown ISA
 #endif
 
 			sym->st_name = base + dofr[j].dofr_name - 1;
 			sym->st_value = 0;
 			sym->st_size = 0;
 			sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
-			sym->st_other = 0;
+			sym->st_other = ELF64_ST_VISIBILITY(STV_HIDDEN);
 			sym->st_shndx = SHN_UNDEF;
 
 			rel++;
 			sym++;
 			count++;
 		}
 	}
 
 	/*
 	 * Add a symbol for the DOF itself. We use a different symbol for
 	 * lazily and actively loaded DOF to make them easy to distinguish.
 	 */
 	sym->st_name = strtabsz;
 	sym->st_value = 0;
 	sym->st_size = dof->dofh_filesz;
 	sym->st_info = GELF_ST_INFO(STB_GLOBAL, STT_OBJECT);
-#ifdef illumos
-	sym->st_other = 0;
-#else
 	sym->st_other = ELF64_ST_VISIBILITY(STV_HIDDEN);
-#endif
 	sym->st_shndx = ESHDR_DOF;
 	sym++;
 
 	if (dtp->dt_lazyload) {
 		bcopy(DOFLAZYSTR, dep->de_strtab + strtabsz,
 		    sizeof (DOFLAZYSTR));
 		strtabsz += sizeof (DOFLAZYSTR);
 	} else {
 		bcopy(DOFSTR, dep->de_strtab + strtabsz, sizeof (DOFSTR));
 		strtabsz += sizeof (DOFSTR);
 	}
 
 	assert(count == dep->de_nrel);
 	assert(strtabsz == dep->de_strlen);
 
 	return (0);
 }
 
 /*
  * Write out an ELF32 file prologue consisting of a header, section headers,
  * and a section header string table.  The DOF data will follow this prologue
  * and complete the contents of the given ELF file.
  */
 static int
 dump_elf32(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd)
 {
 	struct {
 		Elf32_Ehdr ehdr;
 		Elf32_Shdr shdr[ESHDR_NUM];
 	} elf_file;
 
 	Elf32_Shdr *shp;
 	Elf32_Off off;
 	dof_elf32_t de;
 	int ret = 0;
 	uint_t nshdr;
 
 	if (prepare_elf32(dtp, dof, &de) != 0)
 		return (-1); /* errno is set for us */
 
 	/*
 	 * If there are no relocations, we only need enough sections for
 	 * the shstrtab and the DOF.
 	 */
 	nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM;
 
 	bzero(&elf_file, sizeof (elf_file));
 
 	elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0;
 	elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1;
 	elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2;
 	elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3;
 	elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT;
 	elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS32;
 #if BYTE_ORDER == _BIG_ENDIAN
 	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
 #else
 	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
 #endif
 #if defined(__FreeBSD__)
 	elf_file.ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
 #endif
 	elf_file.ehdr.e_type = ET_REL;
 #if defined(__arm__)
 	elf_file.ehdr.e_machine = EM_ARM;
 #elif defined(__mips__)
 	elf_file.ehdr.e_machine = EM_MIPS;
 #elif defined(__powerpc__)
 	elf_file.ehdr.e_machine = EM_PPC;
 #elif defined(__sparc)
 	elf_file.ehdr.e_machine = EM_SPARC;
 #elif defined(__i386) || defined(__amd64)
 	elf_file.ehdr.e_machine = EM_386;
 #endif
 	elf_file.ehdr.e_version = EV_CURRENT;
 	elf_file.ehdr.e_shoff = sizeof (Elf32_Ehdr);
 	elf_file.ehdr.e_ehsize = sizeof (Elf32_Ehdr);
 	elf_file.ehdr.e_phentsize = sizeof (Elf32_Phdr);
 	elf_file.ehdr.e_shentsize = sizeof (Elf32_Shdr);
 	elf_file.ehdr.e_shnum = nshdr;
 	elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB;
 	off = sizeof (elf_file) + nshdr * sizeof (Elf32_Shdr);
 
 	shp = &elf_file.shdr[ESHDR_SHSTRTAB];
 	shp->sh_name = 1; /* DTRACE_SHSTRTAB32[1] = ".shstrtab" */
 	shp->sh_type = SHT_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_size = sizeof (DTRACE_SHSTRTAB32);
 	shp->sh_addralign = sizeof (char);
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 
 	shp = &elf_file.shdr[ESHDR_DOF];
 	shp->sh_name = 11; /* DTRACE_SHSTRTAB32[11] = ".SUNW_dof" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_SUNW_dof;
 	shp->sh_offset = off;
 	shp->sh_size = dof->dofh_filesz;
 	shp->sh_addralign = 8;
 	off = shp->sh_offset + shp->sh_size;
 
 	shp = &elf_file.shdr[ESHDR_STRTAB];
 	shp->sh_name = 21; /* DTRACE_SHSTRTAB32[21] = ".strtab" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_size = de.de_strlen;
 	shp->sh_addralign = sizeof (char);
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4);
 
 	shp = &elf_file.shdr[ESHDR_SYMTAB];
 	shp->sh_name = 29; /* DTRACE_SHSTRTAB32[29] = ".symtab" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_SYMTAB;
 	shp->sh_entsize = sizeof (Elf32_Sym);
 	shp->sh_link = ESHDR_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_info = de.de_global;
 	shp->sh_size = de.de_nsym * sizeof (Elf32_Sym);
 	shp->sh_addralign = 4;
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 4);
 
 	if (de.de_nrel == 0) {
 		if (dt_write(dtp, fd, &elf_file,
 		    sizeof (elf_file)) != sizeof (elf_file) ||
 		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) ||
 		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 		    PWRITE_SCN(ESHDR_DOF, dof)) {
 			ret = dt_set_errno(dtp, errno);
 		}
 	} else {
 		shp = &elf_file.shdr[ESHDR_REL];
 		shp->sh_name = 37; /* DTRACE_SHSTRTAB32[37] = ".rel.SUNW_dof" */
 		shp->sh_flags = SHF_ALLOC;
 #ifdef __sparc
 		shp->sh_type = SHT_RELA;
 #else
 		shp->sh_type = SHT_REL;
 #endif
 		shp->sh_entsize = sizeof (de.de_rel[0]);
 		shp->sh_link = ESHDR_SYMTAB;
 		shp->sh_info = ESHDR_DOF;
 		shp->sh_offset = off;
 		shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]);
 		shp->sh_addralign = 4;
 
 		if (dt_write(dtp, fd, &elf_file,
 		    sizeof (elf_file)) != sizeof (elf_file) ||
 		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB32) ||
 		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 		    PWRITE_SCN(ESHDR_REL, de.de_rel) ||
 		    PWRITE_SCN(ESHDR_DOF, dof)) {
 			ret = dt_set_errno(dtp, errno);
 		}
 	}
 
 	free(de.de_strtab);
 	free(de.de_sym);
 	free(de.de_rel);
 
 	return (ret);
 }
 
 /*
  * Write out an ELF64 file prologue consisting of a header, section headers,
  * and a section header string table.  The DOF data will follow this prologue
  * and complete the contents of the given ELF file.
  */
 static int
 dump_elf64(dtrace_hdl_t *dtp, const dof_hdr_t *dof, int fd)
 {
 	struct {
 		Elf64_Ehdr ehdr;
 		Elf64_Shdr shdr[ESHDR_NUM];
 	} elf_file;
 
 	Elf64_Shdr *shp;
 	Elf64_Off off;
 	dof_elf64_t de;
 	int ret = 0;
 	uint_t nshdr;
 
 	if (prepare_elf64(dtp, dof, &de) != 0)
 		return (-1); /* errno is set for us */
 
 	/*
 	 * If there are no relocations, we only need enough sections for
 	 * the shstrtab and the DOF.
 	 */
 	nshdr = de.de_nrel == 0 ? ESHDR_SYMTAB + 1 : ESHDR_NUM;
 
 	bzero(&elf_file, sizeof (elf_file));
 
 	elf_file.ehdr.e_ident[EI_MAG0] = ELFMAG0;
 	elf_file.ehdr.e_ident[EI_MAG1] = ELFMAG1;
 	elf_file.ehdr.e_ident[EI_MAG2] = ELFMAG2;
 	elf_file.ehdr.e_ident[EI_MAG3] = ELFMAG3;
 	elf_file.ehdr.e_ident[EI_VERSION] = EV_CURRENT;
 	elf_file.ehdr.e_ident[EI_CLASS] = ELFCLASS64;
 #if BYTE_ORDER == _BIG_ENDIAN
 	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
 #else
 	elf_file.ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
 #endif
 #if defined(__FreeBSD__)
 	elf_file.ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
 #endif
 	elf_file.ehdr.e_type = ET_REL;
 #if defined(__arm__)
 	elf_file.ehdr.e_machine = EM_ARM;
 #elif defined(__mips__)
 	elf_file.ehdr.e_machine = EM_MIPS;
 #elif defined(__powerpc64__)
 	elf_file.ehdr.e_machine = EM_PPC64;
 #elif defined(__sparc)
 	elf_file.ehdr.e_machine = EM_SPARCV9;
 #elif defined(__i386) || defined(__amd64)
 	elf_file.ehdr.e_machine = EM_AMD64;
 #endif
 	elf_file.ehdr.e_version = EV_CURRENT;
 	elf_file.ehdr.e_shoff = sizeof (Elf64_Ehdr);
 	elf_file.ehdr.e_ehsize = sizeof (Elf64_Ehdr);
 	elf_file.ehdr.e_phentsize = sizeof (Elf64_Phdr);
 	elf_file.ehdr.e_shentsize = sizeof (Elf64_Shdr);
 	elf_file.ehdr.e_shnum = nshdr;
 	elf_file.ehdr.e_shstrndx = ESHDR_SHSTRTAB;
 	off = sizeof (elf_file) + nshdr * sizeof (Elf64_Shdr);
 
 	shp = &elf_file.shdr[ESHDR_SHSTRTAB];
 	shp->sh_name = 1; /* DTRACE_SHSTRTAB64[1] = ".shstrtab" */
 	shp->sh_type = SHT_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_size = sizeof (DTRACE_SHSTRTAB64);
 	shp->sh_addralign = sizeof (char);
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 
 	shp = &elf_file.shdr[ESHDR_DOF];
 	shp->sh_name = 11; /* DTRACE_SHSTRTAB64[11] = ".SUNW_dof" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_SUNW_dof;
 	shp->sh_offset = off;
 	shp->sh_size = dof->dofh_filesz;
 	shp->sh_addralign = 8;
 	off = shp->sh_offset + shp->sh_size;
 
 	shp = &elf_file.shdr[ESHDR_STRTAB];
 	shp->sh_name = 21; /* DTRACE_SHSTRTAB64[21] = ".strtab" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_size = de.de_strlen;
 	shp->sh_addralign = sizeof (char);
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 
 	shp = &elf_file.shdr[ESHDR_SYMTAB];
 	shp->sh_name = 29; /* DTRACE_SHSTRTAB64[29] = ".symtab" */
 	shp->sh_flags = SHF_ALLOC;
 	shp->sh_type = SHT_SYMTAB;
 	shp->sh_entsize = sizeof (Elf64_Sym);
 	shp->sh_link = ESHDR_STRTAB;
 	shp->sh_offset = off;
 	shp->sh_info = de.de_global;
 	shp->sh_size = de.de_nsym * sizeof (Elf64_Sym);
 	shp->sh_addralign = 8;
 	off = P2ROUNDUP(shp->sh_offset + shp->sh_size, 8);
 
 	if (de.de_nrel == 0) {
 		if (dt_write(dtp, fd, &elf_file,
 		    sizeof (elf_file)) != sizeof (elf_file) ||
 		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) ||
 		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 		    PWRITE_SCN(ESHDR_DOF, dof)) {
 			ret = dt_set_errno(dtp, errno);
 		}
 	} else {
 		shp = &elf_file.shdr[ESHDR_REL];
 		shp->sh_name = 37; /* DTRACE_SHSTRTAB64[37] = ".rel.SUNW_dof" */
 		shp->sh_flags = SHF_ALLOC;
 		shp->sh_type = SHT_RELA;
 		shp->sh_entsize = sizeof (de.de_rel[0]);
 		shp->sh_link = ESHDR_SYMTAB;
 		shp->sh_info = ESHDR_DOF;
 		shp->sh_offset = off;
 		shp->sh_size = de.de_nrel * sizeof (de.de_rel[0]);
 		shp->sh_addralign = 8;
 
 		if (dt_write(dtp, fd, &elf_file,
 		    sizeof (elf_file)) != sizeof (elf_file) ||
 		    PWRITE_SCN(ESHDR_SHSTRTAB, DTRACE_SHSTRTAB64) ||
 		    PWRITE_SCN(ESHDR_STRTAB, de.de_strtab) ||
 		    PWRITE_SCN(ESHDR_SYMTAB, de.de_sym) ||
 		    PWRITE_SCN(ESHDR_REL, de.de_rel) ||
 		    PWRITE_SCN(ESHDR_DOF, dof)) {
 			ret = dt_set_errno(dtp, errno);
 		}
 	}
 
 	free(de.de_strtab);
 	free(de.de_sym);
 	free(de.de_rel);
 
 	return (ret);
 }
 
 static int
-dt_symtab_lookup(Elf_Data *data_sym, int nsym, uintptr_t addr, uint_t shn,
-    GElf_Sym *sym, int uses_funcdesc, Elf *elf)
+dt_symtab_lookup(Elf_Data *data_sym, int start, int end, uintptr_t addr,
+    uint_t shn, GElf_Sym *sym, int uses_funcdesc, Elf *elf)
 {
-	int i, ret = -1;
 	Elf64_Addr symval;
 	Elf_Scn *opd_scn;
 	Elf_Data *opd_desc;
-	GElf_Sym s;
+	int i;
 
-	for (i = 0; i < nsym && gelf_getsym(data_sym, i, sym) != NULL; i++) {
+	for (i = start; i < end && gelf_getsym(data_sym, i, sym) != NULL; i++) {
 		if (GELF_ST_TYPE(sym->st_info) == STT_FUNC) {
 			symval = sym->st_value;
 			if (uses_funcdesc) {
 				opd_scn = elf_getscn(elf, sym->st_shndx);
 				opd_desc = elf_rawdata(opd_scn, NULL);
 				symval =
 				    *(uint64_t*)((char *)opd_desc->d_buf + symval);
 			}
 			if ((uses_funcdesc || shn == sym->st_shndx) &&
-			    symval <= addr &&
-			    addr < symval + sym->st_size) {
-				if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL)
-					return (0);
-
-				ret = 0;
-				s = *sym;
-			}
+			    symval <= addr && addr < symval + sym->st_size)
+				return (0);
 		}
 	}
 
-	if (ret == 0)
-		*sym = s;
-	return (ret);
+	return (-1);
 }
 
 #if defined(__aarch64__)
 /* XXX */
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 	return (0);
 }
 #elif defined(__arm__)
 /* XXX */
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 	return (0);
 }
 #elif defined(__mips__)
 /* XXX */
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 	return (0);
 }
 #elif defined(__powerpc__)
 /* The sentinel is 'xor r3,r3,r3'. */
 #define DT_OP_XOR_R3	0x7c631a78
 
 #define DT_OP_NOP		0x60000000
 #define DT_OP_BLR		0x4e800020
 
 /* This captures all forms of branching to address. */
 #define DT_IS_BRANCH(inst)	((inst & 0xfc000000) == 0x48000000)
 #define DT_IS_BL(inst)	(DT_IS_BRANCH(inst) && (inst & 0x01))
 
 /* XXX */
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 	uint32_t *ip;
 
 	if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0)
 		return (-1);
 
 	/*LINTED*/
 	ip = (uint32_t *)(p + rela->r_offset);
 
 	/*
 	 * We only know about some specific relocation types.
 	 */
 	if (GELF_R_TYPE(rela->r_info) != R_PPC_REL24 &&
 	    GELF_R_TYPE(rela->r_info) != R_PPC_PLTREL24)
 		return (-1);
 
 	/*
 	 * We may have already processed this object file in an earlier linker
 	 * invocation. Check to see if the present instruction sequence matches
 	 * the one we would install below.
 	 */
 	if (isenabled) {
 		if (ip[0] == DT_OP_XOR_R3) {
 			(*off) += sizeof (ip[0]);
 			return (0);
 		}
 	} else {
 		if (ip[0] == DT_OP_NOP) {
 			(*off) += sizeof (ip[0]);
 			return (0);
 		}
 	}
 
 	/*
 	 * We only expect branch to address instructions.
 	 */
 	if (!DT_IS_BRANCH(ip[0])) {
 		dt_dprintf("found %x instead of a branch instruction at %llx\n",
 		    ip[0], (u_longlong_t)rela->r_offset);
 		return (-1);
 	}
 
 	if (isenabled) {
 		/*
 		 * It would necessarily indicate incorrect usage if an is-
 		 * enabled probe were tail-called so flag that as an error.
 		 * It's also potentially (very) tricky to handle gracefully,
 		 * but could be done if this were a desired use scenario.
 		 */
 		if (!DT_IS_BL(ip[0])) {
 			dt_dprintf("tail call to is-enabled probe at %llx\n",
 			    (u_longlong_t)rela->r_offset);
 			return (-1);
 		}
 
 		ip[0] = DT_OP_XOR_R3;
 		(*off) += sizeof (ip[0]);
 	} else {
 		if (DT_IS_BL(ip[0]))
 			ip[0] = DT_OP_NOP;
 		else
 			ip[0] = DT_OP_BLR;
 	}
 
 	return (0);
 }
 #elif defined(__riscv__)
 /* XXX */
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 printf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
 	return (0);
 }
 #elif defined(__sparc)
 
 #define	DT_OP_RET		0x81c7e008
 #define	DT_OP_NOP		0x01000000
 #define	DT_OP_CALL		0x40000000
 #define	DT_OP_CLR_O0		0x90102000
 
 #define	DT_IS_MOV_O7(inst)	(((inst) & 0xffffe000) == 0x9e100000)
 #define	DT_IS_RESTORE(inst)	(((inst) & 0xc1f80000) == 0x81e80000)
 #define	DT_IS_RETL(inst)	(((inst) & 0xfff83fff) == 0x81c02008)
 
 #define	DT_RS2(inst)		((inst) & 0x1f)
 #define	DT_MAKE_RETL(reg)	(0x81c02008 | ((reg) << 14))
 
 /*ARGSUSED*/
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 	uint32_t *ip;
 
 	if ((rela->r_offset & (sizeof (uint32_t) - 1)) != 0)
 		return (-1);
 
 	/*LINTED*/
 	ip = (uint32_t *)(p + rela->r_offset);
 
 	/*
 	 * We only know about some specific relocation types.
 	 */
 	if (GELF_R_TYPE(rela->r_info) != R_SPARC_WDISP30 &&
 	    GELF_R_TYPE(rela->r_info) != R_SPARC_WPLT30)
 		return (-1);
 
 	/*
 	 * We may have already processed this object file in an earlier linker
 	 * invocation. Check to see if the present instruction sequence matches
 	 * the one we would install below.
 	 */
 	if (isenabled) {
 		if (ip[0] == DT_OP_NOP) {
 			(*off) += sizeof (ip[0]);
 			return (0);
 		}
 	} else {
 		if (DT_IS_RESTORE(ip[1])) {
 			if (ip[0] == DT_OP_RET) {
 				(*off) += sizeof (ip[0]);
 				return (0);
 			}
 		} else if (DT_IS_MOV_O7(ip[1])) {
 			if (DT_IS_RETL(ip[0]))
 				return (0);
 		} else {
 			if (ip[0] == DT_OP_NOP) {
 				(*off) += sizeof (ip[0]);
 				return (0);
 			}
 		}
 	}
 
 	/*
 	 * We only expect call instructions with a displacement of 0.
 	 */
 	if (ip[0] != DT_OP_CALL) {
 		dt_dprintf("found %x instead of a call instruction at %llx\n",
 		    ip[0], (u_longlong_t)rela->r_offset);
 		return (-1);
 	}
 
 	if (isenabled) {
 		/*
 		 * It would necessarily indicate incorrect usage if an is-
 		 * enabled probe were tail-called so flag that as an error.
 		 * It's also potentially (very) tricky to handle gracefully,
 		 * but could be done if this were a desired use scenario.
 		 */
 		if (DT_IS_RESTORE(ip[1]) || DT_IS_MOV_O7(ip[1])) {
 			dt_dprintf("tail call to is-enabled probe at %llx\n",
 			    (u_longlong_t)rela->r_offset);
 			return (-1);
 		}
 
 
 		/*
 		 * On SPARC, we take advantage of the fact that the first
 		 * argument shares the same register as for the return value.
 		 * The macro handles the work of zeroing that register so we
 		 * don't need to do anything special here. We instrument the
 		 * instruction in the delay slot as we'll need to modify the
 		 * return register after that instruction has been emulated.
 		 */
 		ip[0] = DT_OP_NOP;
 		(*off) += sizeof (ip[0]);
 	} else {
 		/*
 		 * If the call is followed by a restore, it's a tail call so
 		 * change the call to a ret. If the call if followed by a mov
 		 * of a register into %o7, it's a tail call in leaf context
 		 * so change the call to a retl-like instruction that returns
 		 * to that register value + 8 (rather than the typical %o7 +
 		 * 8); the delay slot instruction is left, but should have no
 		 * effect. Otherwise we change the call to be a nop. We
 		 * identify the subsequent instruction as the probe point in
 		 * all but the leaf tail-call case to ensure that arguments to
 		 * the probe are complete and consistent. An astute, though
 		 * largely hypothetical, observer would note that there is the
 		 * possibility of a false-positive probe firing if the function
 		 * contained a branch to the instruction in the delay slot of
 		 * the call. Fixing this would require significant in-kernel
 		 * modifications, and isn't worth doing until we see it in the
 		 * wild.
 		 */
 		if (DT_IS_RESTORE(ip[1])) {
 			ip[0] = DT_OP_RET;
 			(*off) += sizeof (ip[0]);
 		} else if (DT_IS_MOV_O7(ip[1])) {
 			ip[0] = DT_MAKE_RETL(DT_RS2(ip[1]));
 		} else {
 			ip[0] = DT_OP_NOP;
 			(*off) += sizeof (ip[0]);
 		}
 	}
 
 	return (0);
 }
 
 #elif defined(__i386) || defined(__amd64)
 
 #define	DT_OP_NOP		0x90
 #define	DT_OP_RET		0xc3
 #define	DT_OP_CALL		0xe8
 #define	DT_OP_JMP32		0xe9
 #define	DT_OP_REX_RAX		0x48
 #define	DT_OP_XOR_EAX_0		0x33
 #define	DT_OP_XOR_EAX_1		0xc0
 
 static int
 dt_modtext(dtrace_hdl_t *dtp, char *p, int isenabled, GElf_Rela *rela,
     uint32_t *off)
 {
 	uint8_t *ip = (uint8_t *)(p + rela->r_offset - 1);
 	uint8_t ret;
 
 	/*
 	 * On x86, the first byte of the instruction is the call opcode and
 	 * the next four bytes are the 32-bit address; the relocation is for
 	 * the address operand. We back up the offset to the first byte of
 	 * the instruction. For is-enabled probes, we later advance the offset
 	 * so that it hits the first nop in the instruction sequence.
 	 */
 	(*off) -= 1;
 
 	/*
 	 * We only know about some specific relocation types. Luckily
 	 * these types have the same values on both 32-bit and 64-bit
 	 * x86 architectures.
 	 */
 	if (GELF_R_TYPE(rela->r_info) != R_386_PC32 &&
 	    GELF_R_TYPE(rela->r_info) != R_386_PLT32)
 		return (-1);
 
 	/*
 	 * We may have already processed this object file in an earlier linker
 	 * invocation. Check to see if the present instruction sequence matches
 	 * the one we would install. For is-enabled probes, we advance the
 	 * offset to the first nop instruction in the sequence to match the
 	 * text modification code below.
 	 */
 	if (!isenabled) {
 		if ((ip[0] == DT_OP_NOP || ip[0] == DT_OP_RET) &&
 		    ip[1] == DT_OP_NOP && ip[2] == DT_OP_NOP &&
 		    ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP)
 			return (0);
 	} else if (dtp->dt_oflags & DTRACE_O_LP64) {
 		if (ip[0] == DT_OP_REX_RAX &&
 		    ip[1] == DT_OP_XOR_EAX_0 && ip[2] == DT_OP_XOR_EAX_1 &&
 		    (ip[3] == DT_OP_NOP || ip[3] == DT_OP_RET) &&
 		    ip[4] == DT_OP_NOP) {
 			(*off) += 3;
 			return (0);
 		}
 	} else {
 		if (ip[0] == DT_OP_XOR_EAX_0 && ip[1] == DT_OP_XOR_EAX_1 &&
 		    (ip[2] == DT_OP_NOP || ip[2] == DT_OP_RET) &&
 		    ip[3] == DT_OP_NOP && ip[4] == DT_OP_NOP) {
 			(*off) += 2;
 			return (0);
 		}
 	}
 
 	/*
 	 * We expect either a call instrution with a 32-bit displacement or a
 	 * jmp instruction with a 32-bit displacement acting as a tail-call.
 	 */
 	if (ip[0] != DT_OP_CALL && ip[0] != DT_OP_JMP32) {
 		dt_dprintf("found %x instead of a call or jmp instruction at "
 		    "%llx\n", ip[0], (u_longlong_t)rela->r_offset);
 		return (-1);
 	}
 
 	ret = (ip[0] == DT_OP_JMP32) ? DT_OP_RET : DT_OP_NOP;
 
 	/*
 	 * Establish the instruction sequence -- all nops for probes, and an
 	 * instruction to clear the return value register (%eax/%rax) followed
 	 * by nops for is-enabled probes. For is-enabled probes, we advance
 	 * the offset to the first nop. This isn't stricly necessary but makes
 	 * for more readable disassembly when the probe is enabled.
 	 */
 	if (!isenabled) {
 		ip[0] = ret;
 		ip[1] = DT_OP_NOP;
 		ip[2] = DT_OP_NOP;
 		ip[3] = DT_OP_NOP;
 		ip[4] = DT_OP_NOP;
 	} else if (dtp->dt_oflags & DTRACE_O_LP64) {
 		ip[0] = DT_OP_REX_RAX;
 		ip[1] = DT_OP_XOR_EAX_0;
 		ip[2] = DT_OP_XOR_EAX_1;
 		ip[3] = ret;
 		ip[4] = DT_OP_NOP;
 		(*off) += 3;
 	} else {
 		ip[0] = DT_OP_XOR_EAX_0;
 		ip[1] = DT_OP_XOR_EAX_1;
 		ip[2] = ret;
 		ip[3] = DT_OP_NOP;
 		ip[4] = DT_OP_NOP;
 		(*off) += 2;
 	}
 
 	return (0);
 }
 
 #else
 #error unknown ISA
 #endif
 
 /*PRINTFLIKE5*/
 static int
 dt_link_error(dtrace_hdl_t *dtp, Elf *elf, int fd, dt_link_pair_t *bufs,
     const char *format, ...)
 {
 	va_list ap;
 	dt_link_pair_t *pair;
 
 	va_start(ap, format);
 	dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap);
 	va_end(ap);
 
 	if (elf != NULL)
 		(void) elf_end(elf);
 
 	if (fd >= 0)
 		(void) close(fd);
 
 	while ((pair = bufs) != NULL) {
 		bufs = pair->dlp_next;
 		dt_free(dtp, pair->dlp_str);
 		dt_free(dtp, pair->dlp_sym);
 		dt_free(dtp, pair);
 	}
 
 	return (dt_set_errno(dtp, EDT_COMPILER));
 }
 
 static int
 process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 {
 	static const char dt_prefix[] = "__dtrace";
 	static const char dt_enabled[] = "enabled";
 	static const char dt_symprefix[] = "$dtrace";
 	static const char dt_symfmt[] = "%s%ld.%s";
 	char probename[DTRACE_NAMELEN];
 	int fd, i, ndx, eprobe, mod = 0;
 	Elf *elf = NULL;
 	GElf_Ehdr ehdr;
 	Elf_Scn *scn_rel, *scn_sym, *scn_str, *scn_tgt;
 	Elf_Data *data_rel, *data_sym, *data_str, *data_tgt;
 	GElf_Shdr shdr_rel, shdr_sym, shdr_str, shdr_tgt;
 	GElf_Sym rsym, fsym, dsym;
 	GElf_Rela rela;
 	char *s, *p, *r;
 	char pname[DTRACE_PROVNAMELEN];
 	dt_provider_t *pvp;
 	dt_probe_t *prp;
 	uint32_t off, eclass, emachine1, emachine2;
-	size_t symsize, nsym, isym, istr, len;
+	size_t symsize, osym, nsym, isym, istr, len;
 	key_t objkey;
 	dt_link_pair_t *pair, *bufs = NULL;
 	dt_strtab_t *strtab;
+	void *tmp;
 
 	if ((fd = open64(obj, O_RDWR)) == -1) {
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "failed to open %s: %s", obj, strerror(errno)));
 	}
 
 	if ((elf = elf_begin(fd, ELF_C_RDWR, NULL)) == NULL) {
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "failed to process %s: %s", obj, elf_errmsg(elf_errno())));
 	}
 
 	switch (elf_kind(elf)) {
 	case ELF_K_ELF:
 		break;
 	case ELF_K_AR:
 		return (dt_link_error(dtp, elf, fd, bufs, "archives are not "
 		    "permitted; use the contents of the archive instead: %s",
 		    obj));
 	default:
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "invalid file type: %s", obj));
 	}
 
 	if (gelf_getehdr(elf, &ehdr) == NULL) {
 		return (dt_link_error(dtp, elf, fd, bufs, "corrupt file: %s",
 		    obj));
 	}
 
 	if (dtp->dt_oflags & DTRACE_O_LP64) {
 		eclass = ELFCLASS64;
 #if defined(__mips__)
 		emachine1 = emachine2 = EM_MIPS;
 #elif defined(__powerpc__)
 		emachine1 = emachine2 = EM_PPC64;
 #elif defined(__sparc)
 		emachine1 = emachine2 = EM_SPARCV9;
 #elif defined(__i386) || defined(__amd64)
 		emachine1 = emachine2 = EM_AMD64;
 #endif
 		symsize = sizeof (Elf64_Sym);
 	} else {
 		eclass = ELFCLASS32;
 #if defined(__arm__)
 		emachine1 = emachine2 = EM_ARM;
 #elif defined(__mips__)
 		emachine1 = emachine2 = EM_MIPS;
 #elif defined(__powerpc__)
 		emachine1 = emachine2 = EM_PPC;
 #elif defined(__sparc)
 		emachine1 = EM_SPARC;
 		emachine2 = EM_SPARC32PLUS;
 #elif defined(__i386) || defined(__amd64)
 		emachine1 = emachine2 = EM_386;
 #endif
 		symsize = sizeof (Elf32_Sym);
 	}
 
 	if (ehdr.e_ident[EI_CLASS] != eclass) {
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "incorrect ELF class for object file: %s", obj));
 	}
 
 	if (ehdr.e_machine != emachine1 && ehdr.e_machine != emachine2) {
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "incorrect ELF machine type for object file: %s", obj));
 	}
 
 	/*
 	 * We use this token as a relatively unique handle for this file on the
 	 * system in order to disambiguate potential conflicts between files of
 	 * the same name which contain identially named local symbols.
 	 */
 	if ((objkey = ftok(obj, 0)) == (key_t)-1) {
 		return (dt_link_error(dtp, elf, fd, bufs,
 		    "failed to generate unique key for object file: %s", obj));
 	}
 
 	scn_rel = NULL;
 	while ((scn_rel = elf_nextscn(elf, scn_rel)) != NULL) {
 		if (gelf_getshdr(scn_rel, &shdr_rel) == NULL)
 			goto err;
 
 		/*
 		 * Skip any non-relocation sections.
 		 */
 		if (shdr_rel.sh_type != SHT_RELA && shdr_rel.sh_type != SHT_REL)
 			continue;
 
 		if ((data_rel = elf_getdata(scn_rel, NULL)) == NULL)
 			goto err;
 
 		/*
 		 * Grab the section, section header and section data for the
 		 * symbol table that this relocation section references.
 		 */
 		if ((scn_sym = elf_getscn(elf, shdr_rel.sh_link)) == NULL ||
 		    gelf_getshdr(scn_sym, &shdr_sym) == NULL ||
 		    (data_sym = elf_getdata(scn_sym, NULL)) == NULL)
 			goto err;
 
 		/*
 		 * Ditto for that symbol table's string table.
 		 */
 		if ((scn_str = elf_getscn(elf, shdr_sym.sh_link)) == NULL ||
 		    gelf_getshdr(scn_str, &shdr_str) == NULL ||
 		    (data_str = elf_getdata(scn_str, NULL)) == NULL)
 			goto err;
 
 		/*
 		 * Grab the section, section header and section data for the
 		 * target section for the relocations. For the relocations
 		 * we're looking for -- this will typically be the text of the
 		 * object file.
 		 */
 		if ((scn_tgt = elf_getscn(elf, shdr_rel.sh_info)) == NULL ||
 		    gelf_getshdr(scn_tgt, &shdr_tgt) == NULL ||
 		    (data_tgt = elf_getdata(scn_tgt, NULL)) == NULL)
 			goto err;
 
 		/*
 		 * We're looking for relocations to symbols matching this form:
 		 *
 		 *   __dtrace[enabled]_<prov>___<probe>
 		 *
 		 * For the generated object, we need to record the location
 		 * identified by the relocation, and create a new relocation
 		 * in the generated object that will be resolved at link time
 		 * to the location of the function in which the probe is
 		 * embedded. In the target object, we change the matched symbol
 		 * so that it will be ignored at link time, and we modify the
 		 * target (text) section to replace the call instruction with
 		 * one or more nops.
 		 *
-		 * If the function containing the probe is locally scoped
-		 * (static), we create an alias used by the relocation in the
-		 * generated object. The alias, a new symbol, will be global
-		 * (so that the relocation from the generated object can be
-		 * resolved), and hidden (so that it is converted to a local
-		 * symbol at link time). Such aliases have this form:
+		 * To avoid runtime overhead, the relocations added to the
+		 * generated object should be resolved at static link time. We
+		 * therefore create aliases for the functions that contain
+		 * probes. An alias is global (so that the relocation from the
+		 * generated object can be resolved), and hidden (so that its
+		 * address is known at static link time). Such aliases have this
+		 * form:
 		 *
 		 *   $dtrace<key>.<function>
 		 *
 		 * We take a first pass through all the relocations to
 		 * populate our string table and count the number of extra
 		 * symbols we'll require.
 		 */
 		strtab = dt_strtab_create(1);
 		nsym = 0;
 		isym = data_sym->d_size / symsize;
 		istr = data_str->d_size;
 
 		for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) {
 
 			if (shdr_rel.sh_type == SHT_RELA) {
 				if (gelf_getrela(data_rel, i, &rela) == NULL)
 					continue;
 			} else {
 				GElf_Rel rel;
 				if (gelf_getrel(data_rel, i, &rel) == NULL)
 					continue;
 				rela.r_offset = rel.r_offset;
 				rela.r_info = rel.r_info;
 				rela.r_addend = 0;
 			}
 
 			if (gelf_getsym(data_sym, GELF_R_SYM(rela.r_info),
 			    &rsym) == NULL) {
 				dt_strtab_destroy(strtab);
 				goto err;
 			}
 
 			s = (char *)data_str->d_buf + rsym.st_name;
 
 			if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0)
 				continue;
 
-			if (dt_symtab_lookup(data_sym, isym, rela.r_offset,
-			    shdr_rel.sh_info, &fsym,
-			    (emachine1 == EM_PPC64), elf) != 0) {
+			if (dt_symtab_lookup(data_sym, 0, isym, rela.r_offset,
+			    shdr_rel.sh_info, &fsym, (emachine1 == EM_PPC64),
+			    elf) != 0) {
 				dt_strtab_destroy(strtab);
 				goto err;
 			}
 
-			if (GELF_ST_BIND(fsym.st_info) != STB_LOCAL)
-				continue;
-
 			if (fsym.st_name > data_str->d_size) {
 				dt_strtab_destroy(strtab);
 				goto err;
 			}
 
 			s = (char *)data_str->d_buf + fsym.st_name;
 
 			/*
 			 * If this symbol isn't of type function, we've really
 			 * driven off the rails or the object file is corrupt.
 			 */
 			if (GELF_ST_TYPE(fsym.st_info) != STT_FUNC) {
 				dt_strtab_destroy(strtab);
 				return (dt_link_error(dtp, elf, fd, bufs,
 				    "expected %s to be of type function", s));
 			}
 
 			len = snprintf(NULL, 0, dt_symfmt, dt_symprefix,
 			    objkey, s) + 1;
 			if ((p = dt_alloc(dtp, len)) == NULL) {
 				dt_strtab_destroy(strtab);
 				goto err;
 			}
 			(void) snprintf(p, len, dt_symfmt, dt_symprefix,
 			    objkey, s);
 
 			if (dt_strtab_index(strtab, p) == -1) {
 				nsym++;
 				(void) dt_strtab_insert(strtab, p);
 			}
 
 			dt_free(dtp, p);
 		}
 
 		/*
-		 * If needed, allocate the additional space for the symbol
-		 * table and string table copying the old data into the new
-		 * buffers, and marking the buffers as dirty. We inject those
-		 * newly allocated buffers into the libelf data structures, but
-		 * are still responsible for freeing them once we're done with
-		 * the elf handle.
+		 * If any probes were found, allocate the additional space for
+		 * the symbol table and string table, copying the old data into
+		 * the new buffers, and marking the buffers as dirty. We inject
+		 * those newly allocated buffers into the libelf data
+		 * structures, but are still responsible for freeing them once
+		 * we're done with the elf handle.
 		 */
 		if (nsym > 0) {
 			/*
 			 * The first byte of the string table is reserved for
 			 * the \0 entry.
 			 */
 			len = dt_strtab_size(strtab) - 1;
 
 			assert(len > 0);
 			assert(dt_strtab_index(strtab, "") == 0);
 
 			dt_strtab_destroy(strtab);
 
 			if ((pair = dt_alloc(dtp, sizeof (*pair))) == NULL)
 				goto err;
 
 			if ((pair->dlp_str = dt_alloc(dtp, data_str->d_size +
 			    len)) == NULL) {
 				dt_free(dtp, pair);
 				goto err;
 			}
 
 			if ((pair->dlp_sym = dt_alloc(dtp, data_sym->d_size +
 			    nsym * symsize)) == NULL) {
 				dt_free(dtp, pair->dlp_str);
 				dt_free(dtp, pair);
 				goto err;
 			}
 
 			pair->dlp_next = bufs;
 			bufs = pair;
 
 			bcopy(data_str->d_buf, pair->dlp_str, data_str->d_size);
+			tmp = data_str->d_buf;
 			data_str->d_buf = pair->dlp_str;
+			pair->dlp_str = tmp;
 			data_str->d_size += len;
 			(void) elf_flagdata(data_str, ELF_C_SET, ELF_F_DIRTY);
 
 			shdr_str.sh_size += len;
 			(void) gelf_update_shdr(scn_str, &shdr_str);
 
 			bcopy(data_sym->d_buf, pair->dlp_sym, data_sym->d_size);
+			tmp = data_sym->d_buf;
 			data_sym->d_buf = pair->dlp_sym;
+			pair->dlp_sym = tmp;
 			data_sym->d_size += nsym * symsize;
 			(void) elf_flagdata(data_sym, ELF_C_SET, ELF_F_DIRTY);
 
 			shdr_sym.sh_size += nsym * symsize;
 			(void) gelf_update_shdr(scn_sym, &shdr_sym);
 
+			osym = isym;
 			nsym += isym;
 		} else {
 			dt_strtab_destroy(strtab);
+			continue;
 		}
 
 		/*
 		 * Now that the tables have been allocated, perform the
 		 * modifications described above.
 		 */
 		for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) {
 
 			if (shdr_rel.sh_type == SHT_RELA) {
 				if (gelf_getrela(data_rel, i, &rela) == NULL)
 					continue;
 			} else {
 				GElf_Rel rel;
 				if (gelf_getrel(data_rel, i, &rel) == NULL)
 					continue;
 				rela.r_offset = rel.r_offset;
 				rela.r_info = rel.r_info;
 				rela.r_addend = 0;
 			}
 
 			ndx = GELF_R_SYM(rela.r_info);
 
 			if (gelf_getsym(data_sym, ndx, &rsym) == NULL ||
 			    rsym.st_name > data_str->d_size)
 				goto err;
 
 			s = (char *)data_str->d_buf + rsym.st_name;
 
 			if (strncmp(s, dt_prefix, sizeof (dt_prefix) - 1) != 0)
 				continue;
 
 			s += sizeof (dt_prefix) - 1;
 
 			/*
 			 * Check to see if this is an 'is-enabled' check as
 			 * opposed to a normal probe.
 			 */
 			if (strncmp(s, dt_enabled,
 			    sizeof (dt_enabled) - 1) == 0) {
 				s += sizeof (dt_enabled) - 1;
 				eprobe = 1;
 				*eprobesp = 1;
 				dt_dprintf("is-enabled probe\n");
 			} else {
 				eprobe = 0;
 				dt_dprintf("normal probe\n");
 			}
 
 			if (*s++ != '_')
 				goto err;
 
 			if ((p = strstr(s, "___")) == NULL ||
 			    p - s >= sizeof (pname))
 				goto err;
 
 			bcopy(s, pname, p - s);
 			pname[p - s] = '\0';
 
-			if (dt_symtab_lookup(data_sym, isym, rela.r_offset,
-			    shdr_rel.sh_info, &fsym,
+			if (dt_symtab_lookup(data_sym, osym, isym,
+			    rela.r_offset, shdr_rel.sh_info, &fsym,
+			    (emachine1 == EM_PPC64), elf) != 0 &&
+			    dt_symtab_lookup(data_sym, 0, osym,
+			    rela.r_offset, shdr_rel.sh_info, &fsym,
 			    (emachine1 == EM_PPC64), elf) != 0)
 				goto err;
 
 			if (fsym.st_name > data_str->d_size)
 				goto err;
 
 			assert(GELF_ST_TYPE(fsym.st_info) == STT_FUNC);
 
 			/*
-			 * If a NULL relocation name is passed to
-			 * dt_probe_define(), the function name is used for the
-			 * relocation. The relocation needs to use a mangled
-			 * name if the symbol is locally scoped; the function
-			 * name may need to change if we've found the global
-			 * alias for the locally scoped symbol (we prefer
-			 * global symbols to locals in dt_symtab_lookup()).
+			 * If this is our first time encountering this symbol,
+			 * emit an alias.
 			 */
 			s = (char *)data_str->d_buf + fsym.st_name;
-			r = NULL;
 
-			if (GELF_ST_BIND(fsym.st_info) == STB_LOCAL) {
+			if (strncmp(s, dt_symprefix,
+			    sizeof (dt_symprefix) - 1) != 0) {
+				u_int bind = GELF_ST_BIND(fsym.st_info);
+
 				dsym = fsym;
 				dsym.st_name = istr;
-				dsym.st_info = GELF_ST_INFO(STB_GLOBAL,
-				    STT_FUNC);
-				dsym.st_other =
-				    ELF64_ST_VISIBILITY(STV_ELIMINATE);
+				dsym.st_info = GELF_ST_INFO(bind == STB_LOCAL ?
+				    STB_GLOBAL : bind, STT_FUNC);
+				dsym.st_other = GELF_ST_VISIBILITY(STV_HIDDEN);
 				(void) gelf_update_sym(data_sym, isym, &dsym);
-
-				r = (char *)data_str->d_buf + istr;
-				istr += 1 + sprintf(r, dt_symfmt,
-				    dt_symprefix, objkey, s);
+				r = (char *) data_str->d_buf + istr;
+				istr += 1 + sprintf(r, dt_symfmt, dt_symprefix, objkey,
+				    s);
 				isym++;
 				assert(isym <= nsym);
-
-			} else if (strncmp(s, dt_symprefix,
-			    strlen(dt_symprefix)) == 0) {
+			} else {
 				r = s;
-				if ((s = strchr(s, '.')) == NULL)
-					goto err;
+				s = strchr(s, '.');
+				assert(s != NULL);
 				s++;
 			}
 
 			if ((pvp = dt_provider_lookup(dtp, pname)) == NULL) {
 				return (dt_link_error(dtp, elf, fd, bufs,
 				    "no such provider %s", pname));
 			}
 
 			if (strlcpy(probename, p + 3, sizeof (probename)) >=
 			    sizeof (probename))
 				return (dt_link_error(dtp, elf, fd, bufs,
 				    "invalid probe name %s", probename));
 			(void) strhyphenate(probename);
 			if ((prp = dt_probe_lookup(pvp, probename)) == NULL)
 				return (dt_link_error(dtp, elf, fd, bufs,
 				    "no such probe %s", probename));
 
 			assert(fsym.st_value <= rela.r_offset);
 
 			off = rela.r_offset - fsym.st_value;
 			if (dt_modtext(dtp, data_tgt->d_buf, eprobe,
 			    &rela, &off) != 0)
 				goto err;
 
 			if (dt_probe_define(pvp, prp, s, r, off, eprobe) != 0) {
 				return (dt_link_error(dtp, elf, fd, bufs,
 				    "failed to allocate space for probe"));
 			}
 #ifndef illumos
 			/*
 			 * Our linker doesn't understand the SUNW_IGNORE ndx and
 			 * will try to use this relocation when we build the
 			 * final executable. Since we are done processing this
 			 * relocation, mark it as inexistant and let libelf
 			 * remove it from the file.
 			 * If this wasn't done, we would have garbage added to
 			 * the executable file as the symbol is going to be
 			 * change from UND to ABS.
 			 */
 			if (shdr_rel.sh_type == SHT_RELA) {
 				rela.r_offset = 0;
 				rela.r_info  = 0;
 				rela.r_addend = 0;
 				(void) gelf_update_rela(data_rel, i, &rela);
 			} else {
 				GElf_Rel rel;
 				rel.r_offset = 0;
 				rel.r_info = 0;
 				(void) gelf_update_rel(data_rel, i, &rel);
 			}
 #endif
 
 			mod = 1;
 			(void) elf_flagdata(data_tgt, ELF_C_SET, ELF_F_DIRTY);
 
 			/*
 			 * This symbol may already have been marked to
 			 * be ignored by another relocation referencing
 			 * the same symbol or if this object file has
 			 * already been processed by an earlier link
 			 * invocation.
 			 */
 #ifndef illumos
 #define SHN_SUNW_IGNORE	SHN_ABS
 #endif
 			if (rsym.st_shndx != SHN_SUNW_IGNORE) {
 				rsym.st_shndx = SHN_SUNW_IGNORE;
 				(void) gelf_update_sym(data_sym, ndx, &rsym);
 			}
 		}
 	}
 
 	if (mod && elf_update(elf, ELF_C_WRITE) == -1)
 		goto err;
 
 	(void) elf_end(elf);
 	(void) close(fd);
 
-#ifndef illumos
-	if (nsym > 0)
-#endif
 	while ((pair = bufs) != NULL) {
 		bufs = pair->dlp_next;
 		dt_free(dtp, pair->dlp_str);
 		dt_free(dtp, pair->dlp_sym);
 		dt_free(dtp, pair);
 	}
 
 	return (0);
 
 err:
 	return (dt_link_error(dtp, elf, fd, bufs,
 	    "an error was encountered while processing %s", obj));
 }
 
 int
 dtrace_program_link(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, uint_t dflags,
     const char *file, int objc, char *const objv[])
 {
 #ifndef illumos
 	char tfile[PATH_MAX];
 #endif
 	char drti[PATH_MAX];
 	dof_hdr_t *dof;
 	int fd, status, i, cur;
 	char *cmd, tmp;
 	size_t len;
 	int eprobes = 0, ret = 0;
 
 #ifndef illumos
 	if (access(file, R_OK) == 0) {
 		fprintf(stderr, "dtrace: target object (%s) already exists. "
 		    "Please remove the target\ndtrace: object and rebuild all "
 		    "the source objects if you wish to run the DTrace\n"
 		    "dtrace: linking process again\n", file);
 		/*
 		 * Several build infrastructures run DTrace twice (e.g.
 		 * postgres) and we don't want the build to fail. Return
 		 * 0 here since this isn't really a fatal error.
 		 */
 		return (0);
 	}
 #endif
 
 	/*
 	 * A NULL program indicates a special use in which we just link
 	 * together a bunch of object files specified in objv and then
 	 * unlink(2) those object files.
 	 */
 	if (pgp == NULL) {
 		const char *fmt = "%s -o %s -r";
 
 		len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file) + 1;
 
 		for (i = 0; i < objc; i++)
 			len += strlen(objv[i]) + 1;
 
 		cmd = alloca(len);
 
 		cur = snprintf(cmd, len, fmt, dtp->dt_ld_path, file);
 
 		for (i = 0; i < objc; i++)
 			cur += snprintf(cmd + cur, len - cur, " %s", objv[i]);
 
 		if ((status = system(cmd)) == -1) {
 			return (dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to run %s: %s", dtp->dt_ld_path,
 			    strerror(errno)));
 		}
 
 		if (WIFSIGNALED(status)) {
 			return (dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to link %s: %s failed due to signal %d",
 			    file, dtp->dt_ld_path, WTERMSIG(status)));
 		}
 
 		if (WEXITSTATUS(status) != 0) {
 			return (dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to link %s: %s exited with status %d\n",
 			    file, dtp->dt_ld_path, WEXITSTATUS(status)));
 		}
 
 		for (i = 0; i < objc; i++) {
 			if (strcmp(objv[i], file) != 0)
 				(void) unlink(objv[i]);
 		}
 
 		return (0);
 	}
 
 	for (i = 0; i < objc; i++) {
 		if (process_obj(dtp, objv[i], &eprobes) != 0)
 			return (-1); /* errno is set for us */
 	}
 
 	/*
 	 * If there are is-enabled probes then we need to force use of DOF
 	 * version 2.
 	 */
 	if (eprobes && pgp->dp_dofversion < DOF_VERSION_2)
 		pgp->dp_dofversion = DOF_VERSION_2;
 
 	if ((dof = dtrace_dof_create(dtp, pgp, dflags)) == NULL)
 		return (-1); /* errno is set for us */
 
 #ifdef illumos
 	/*
 	 * Create a temporary file and then unlink it if we're going to
 	 * combine it with drti.o later.  We can still refer to it in child
 	 * processes as /dev/fd/<fd>.
 	 */
 	if ((fd = open64(file, O_RDWR | O_CREAT | O_TRUNC, 0666)) == -1) {
 		return (dt_link_error(dtp, NULL, -1, NULL,
 		    "failed to open %s: %s", file, strerror(errno)));
 	}
 #else
 	snprintf(tfile, sizeof(tfile), "%s.XXXXXX", file);
 	if ((fd = mkostemp(tfile, O_CLOEXEC)) == -1)
 		return (dt_link_error(dtp, NULL, -1, NULL,
 		    "failed to create temporary file %s: %s",
 		    tfile, strerror(errno)));
 #endif
 
 	/*
 	 * If -xlinktype=DOF has been selected, just write out the DOF.
 	 * Otherwise proceed to the default of generating and linking ELF.
 	 */
 	switch (dtp->dt_linktype) {
 	case DT_LTYP_DOF:
 		if (dt_write(dtp, fd, dof, dof->dofh_filesz) < dof->dofh_filesz)
 			ret = errno;
 
 		if (close(fd) != 0 && ret == 0)
 			ret = errno;
 
 		if (ret != 0) {
 			return (dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to write %s: %s", file, strerror(ret)));
 		}
 
 		return (0);
 
 	case DT_LTYP_ELF:
 		break; /* fall through to the rest of dtrace_program_link() */
 
 	default:
 		return (dt_link_error(dtp, NULL, -1, NULL,
 		    "invalid link type %u\n", dtp->dt_linktype));
 	}
 
 
 #ifdef illumos
 	if (!dtp->dt_lazyload)
 		(void) unlink(file);
 #endif
 
 	if (dtp->dt_oflags & DTRACE_O_LP64)
 		status = dump_elf64(dtp, dof, fd);
 	else
 		status = dump_elf32(dtp, dof, fd);
 
 #ifdef illumos
 	if (status != 0 || lseek(fd, 0, SEEK_SET) != 0) {
 		return (dt_link_error(dtp, NULL, -1, NULL,
 		    "failed to write %s: %s", file, strerror(errno)));
 	}
 #else
 	if (status != 0)
 		return (dt_link_error(dtp, NULL, -1, NULL,
 		    "failed to write %s: %s", tfile,
 		    strerror(dtrace_errno(dtp))));
 #endif
 
 	if (!dtp->dt_lazyload) {
 #ifdef illumos
 		const char *fmt = "%s -o %s -r -Blocal -Breduce /dev/fd/%d %s";
 
 		if (dtp->dt_oflags & DTRACE_O_LP64) {
 			(void) snprintf(drti, sizeof (drti),
 			    "%s/64/drti.o", _dtrace_libdir);
 		} else {
 			(void) snprintf(drti, sizeof (drti),
 			    "%s/drti.o", _dtrace_libdir);
 		}
 
 		len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, fd,
 		    drti) + 1;
 
 		cmd = alloca(len);
 
 		(void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file, fd, drti);
 #else
 		const char *fmt = "%s -o %s -r %s %s";
 		dt_dirpath_t *dp = dt_list_next(&dtp->dt_lib_path);
 
 		(void) snprintf(drti, sizeof (drti), "%s/drti.o", dp->dir_path);
 
 		len = snprintf(&tmp, 1, fmt, dtp->dt_ld_path, file, tfile,
 		    drti) + 1;
 
 		cmd = alloca(len);
 
 		(void) snprintf(cmd, len, fmt, dtp->dt_ld_path, file, tfile,
 		    drti);
 #endif
 		if ((status = system(cmd)) == -1) {
 			ret = dt_link_error(dtp, NULL, fd, NULL,
 			    "failed to run %s: %s", dtp->dt_ld_path,
 			    strerror(errno));
 			goto done;
 		}
 
 		if (WIFSIGNALED(status)) {
 			ret = dt_link_error(dtp, NULL, fd, NULL,
 			    "failed to link %s: %s failed due to signal %d",
 			    file, dtp->dt_ld_path, WTERMSIG(status));
 			goto done;
 		}
 
 		if (WEXITSTATUS(status) != 0) {
 			ret = dt_link_error(dtp, NULL, fd, NULL,
 			    "failed to link %s: %s exited with status %d\n",
 			    file, dtp->dt_ld_path, WEXITSTATUS(status));
 			goto done;
 		}
 		(void) close(fd); /* release temporary file */
 
 #ifdef __FreeBSD__
 		/*
 		 * Now that we've linked drti.o, reduce the global __SUNW_dof
 		 * symbol to a local symbol. This is needed to so that multiple
 		 * generated object files (for different providers, for
 		 * instance) can be linked together. This is accomplished using
 		 * the -Blocal flag with Sun's linker, but GNU ld doesn't appear
 		 * to have an equivalent option.
 		 */
 		asprintf(&cmd, "%s --localize-hidden %s", dtp->dt_objcopy_path,
 		    file);
 		if ((status = system(cmd)) == -1) {
 			ret = dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to run %s: %s", dtp->dt_objcopy_path,
 			    strerror(errno));
 			free(cmd);
 			goto done;
 		}
 		free(cmd);
 
 		if (WIFSIGNALED(status)) {
 			ret = dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to link %s: %s failed due to signal %d",
 			    file, dtp->dt_objcopy_path, WTERMSIG(status));
 			goto done;
 		}
 
 		if (WEXITSTATUS(status) != 0) {
 			ret = dt_link_error(dtp, NULL, -1, NULL,
 			    "failed to link %s: %s exited with status %d\n",
 			    file, dtp->dt_objcopy_path, WEXITSTATUS(status));
 			goto done;
 		}
 #endif
 	} else {
 #ifdef __FreeBSD__
 		if (rename(tfile, file) != 0) {
 			ret = dt_link_error(dtp, NULL, fd, NULL,
 			    "failed to rename %s to %s: %s", tfile, file,
 			    strerror(errno));
 			goto done;
 		}
 #endif
 		(void) close(fd);
 	}
 
 done:
 	dtrace_dof_destroy(dtp, dof);
 
 #ifdef __FreeBSD__
 	if (!dtp->dt_lazyload)
 		(void) unlink(tfile);
 #endif
 	return (ret);
 }
Index: projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c	(revision 313267)
@@ -1,902 +1,900 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /*
  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  */
 
 #include <sys/types.h>
 #ifdef illumos
 #include <sys/sysmacros.h>
 #endif
 
 #include <assert.h>
 #include <limits.h>
 #include <strings.h>
 #include <stdlib.h>
 #ifdef illumos
 #include <alloca.h>
 #endif
 #include <unistd.h>
 #include <errno.h>
 
 #include <dt_provider.h>
 #include <dt_module.h>
 #include <dt_string.h>
 #include <dt_list.h>
 #include <dt_pid.h>
 #include <dtrace.h>
 
 static dt_provider_t *
 dt_provider_insert(dtrace_hdl_t *dtp, dt_provider_t *pvp, uint_t h)
 {
 	dt_list_append(&dtp->dt_provlist, pvp);
 
 	pvp->pv_next = dtp->dt_provs[h];
 	dtp->dt_provs[h] = pvp;
 	dtp->dt_nprovs++;
 
 	return (pvp);
 }
 
 dt_provider_t *
 dt_provider_lookup(dtrace_hdl_t *dtp, const char *name)
 {
 	uint_t h = dt_strtab_hash(name, NULL) % dtp->dt_provbuckets;
 	dtrace_providerdesc_t desc;
 	dt_provider_t *pvp;
 
 	for (pvp = dtp->dt_provs[h]; pvp != NULL; pvp = pvp->pv_next) {
 		if (strcmp(pvp->pv_desc.dtvd_name, name) == 0)
 			return (pvp);
 	}
 
 	if (strisglob(name) || name[0] == '\0') {
 		(void) dt_set_errno(dtp, EDT_NOPROV);
 		return (NULL);
 	}
 
 	bzero(&desc, sizeof (desc));
 	(void) strlcpy(desc.dtvd_name, name, DTRACE_PROVNAMELEN);
 
 	if (dt_ioctl(dtp, DTRACEIOC_PROVIDER, &desc) == -1) {
 		(void) dt_set_errno(dtp, errno == ESRCH ? EDT_NOPROV : errno);
 		return (NULL);
 	}
 
 	if ((pvp = dt_provider_create(dtp, name)) == NULL)
 		return (NULL); /* dt_errno is set for us */
 
 	bcopy(&desc, &pvp->pv_desc, sizeof (desc));
 	pvp->pv_flags |= DT_PROVIDER_IMPL;
 	return (pvp);
 }
 
 dt_provider_t *
 dt_provider_create(dtrace_hdl_t *dtp, const char *name)
 {
 	dt_provider_t *pvp;
 
 	if ((pvp = dt_zalloc(dtp, sizeof (dt_provider_t))) == NULL)
 		return (NULL);
 
 	(void) strlcpy(pvp->pv_desc.dtvd_name, name, DTRACE_PROVNAMELEN);
 	pvp->pv_probes = dt_idhash_create(pvp->pv_desc.dtvd_name, NULL, 0, 0);
 	pvp->pv_gen = dtp->dt_gen;
 	pvp->pv_hdl = dtp;
 
 	if (pvp->pv_probes == NULL) {
 		dt_free(dtp, pvp);
 		(void) dt_set_errno(dtp, EDT_NOMEM);
 		return (NULL);
 	}
 
 	pvp->pv_desc.dtvd_attr.dtpa_provider = _dtrace_prvattr;
 	pvp->pv_desc.dtvd_attr.dtpa_mod = _dtrace_prvattr;
 	pvp->pv_desc.dtvd_attr.dtpa_func = _dtrace_prvattr;
 	pvp->pv_desc.dtvd_attr.dtpa_name = _dtrace_prvattr;
 	pvp->pv_desc.dtvd_attr.dtpa_args = _dtrace_prvattr;
 
 	return (dt_provider_insert(dtp, pvp,
 	    dt_strtab_hash(name, NULL) % dtp->dt_provbuckets));
 }
 
 void
 dt_provider_destroy(dtrace_hdl_t *dtp, dt_provider_t *pvp)
 {
 	dt_provider_t **pp;
 	uint_t h;
 
 	assert(pvp->pv_hdl == dtp);
 
 	h = dt_strtab_hash(pvp->pv_desc.dtvd_name, NULL) % dtp->dt_provbuckets;
 	pp = &dtp->dt_provs[h];
 
 	while (*pp != NULL && *pp != pvp)
 		pp = &(*pp)->pv_next;
 
 	assert(*pp != NULL && *pp == pvp);
 	*pp = pvp->pv_next;
 
 	dt_list_delete(&dtp->dt_provlist, pvp);
 	dtp->dt_nprovs--;
 
 	if (pvp->pv_probes != NULL)
 		dt_idhash_destroy(pvp->pv_probes);
 
 	dt_node_link_free(&pvp->pv_nodes);
 	dt_free(dtp, pvp->pv_xrefs);
 	dt_free(dtp, pvp);
 }
 
 int
 dt_provider_xref(dtrace_hdl_t *dtp, dt_provider_t *pvp, id_t id)
 {
 	size_t oldsize = BT_SIZEOFMAP(pvp->pv_xrmax);
 	size_t newsize = BT_SIZEOFMAP(dtp->dt_xlatorid);
 
 	assert(id >= 0 && id < dtp->dt_xlatorid);
 
 	if (newsize > oldsize) {
 		ulong_t *xrefs = dt_zalloc(dtp, newsize);
 
 		if (xrefs == NULL)
 			return (-1);
 
 		bcopy(pvp->pv_xrefs, xrefs, oldsize);
 		dt_free(dtp, pvp->pv_xrefs);
 
 		pvp->pv_xrefs = xrefs;
 		pvp->pv_xrmax = dtp->dt_xlatorid;
 	}
 
 	BT_SET(pvp->pv_xrefs, id);
 	return (0);
 }
 
 static uint8_t
 dt_probe_argmap(dt_node_t *xnp, dt_node_t *nnp)
 {
 	uint8_t i;
 
 	for (i = 0; nnp != NULL; i++) {
 		if (nnp->dn_string != NULL &&
 		    strcmp(nnp->dn_string, xnp->dn_string) == 0)
 			break;
 		else
 			nnp = nnp->dn_list;
 	}
 
 	return (i);
 }
 
 static dt_node_t *
 dt_probe_alloc_args(dt_provider_t *pvp, int argc)
 {
 	dt_node_t *args = NULL, *pnp = NULL, *dnp;
 	int i;
 
 	for (i = 0; i < argc; i++, pnp = dnp) {
 		if ((dnp = dt_node_xalloc(pvp->pv_hdl, DT_NODE_TYPE)) == NULL)
 			return (NULL);
 
 		dnp->dn_link = pvp->pv_nodes;
 		pvp->pv_nodes = dnp;
 
 		if (args == NULL)
 			args = dnp;
 		else
 			pnp->dn_list = dnp;
 	}
 
 	return (args);
 }
 
 static size_t
 dt_probe_keylen(const dtrace_probedesc_t *pdp)
 {
 	return (strlen(pdp->dtpd_mod) + 1 +
 	    strlen(pdp->dtpd_func) + 1 + strlen(pdp->dtpd_name) + 1);
 }
 
 static char *
 dt_probe_key(const dtrace_probedesc_t *pdp, char *s)
 {
 	(void) snprintf(s, INT_MAX, "%s:%s:%s",
 	    pdp->dtpd_mod, pdp->dtpd_func, pdp->dtpd_name);
 	return (s);
 }
 
 /*
  * If a probe was discovered from the kernel, ask dtrace(7D) for a description
  * of each of its arguments, including native and translated types.
  */
 static dt_probe_t *
 dt_probe_discover(dt_provider_t *pvp, const dtrace_probedesc_t *pdp)
 {
 	dtrace_hdl_t *dtp = pvp->pv_hdl;
 	char *name = dt_probe_key(pdp, alloca(dt_probe_keylen(pdp)));
 
 	dt_node_t *xargs, *nargs;
 	dt_ident_t *idp;
 	dt_probe_t *prp;
 
 	dtrace_typeinfo_t dtt;
 	int i, nc, xc;
 
 	int adc = _dtrace_argmax;
 	dtrace_argdesc_t *adv = alloca(sizeof (dtrace_argdesc_t) * adc);
 	dtrace_argdesc_t *adp = adv;
 
 	assert(strcmp(pvp->pv_desc.dtvd_name, pdp->dtpd_provider) == 0);
 	assert(pdp->dtpd_id != DTRACE_IDNONE);
 
 	dt_dprintf("discovering probe %s:%s id=%d\n",
 	    pvp->pv_desc.dtvd_name, name, pdp->dtpd_id);
 
 	for (nc = -1, i = 0; i < adc; i++, adp++) {
 		bzero(adp, sizeof (dtrace_argdesc_t));
 		adp->dtargd_ndx = i;
 		adp->dtargd_id = pdp->dtpd_id;
 
 		if (dt_ioctl(dtp, DTRACEIOC_PROBEARG, adp) != 0) {
 			(void) dt_set_errno(dtp, errno);
 			return (NULL);
 		}
 
 		if (adp->dtargd_ndx == DTRACE_ARGNONE)
 			break; /* all argument descs have been retrieved */
 
 		nc = MAX(nc, adp->dtargd_mapping);
 	}
 
 	xc = i;
 	nc++;
 
 	/*
 	 * The pid provider believes in giving the kernel a break. No reason to
 	 * give the kernel all the ctf containers that we're keeping ourselves
 	 * just to get it back from it. So if we're coming from a pid provider
 	 * probe and the kernel gave us no argument information we'll get some
 	 * here. If for some crazy reason the kernel knows about our userland
 	 * types then we just ignore this.
 	 */
 	if (xc == 0 && nc == 0 &&
 	    strncmp(pvp->pv_desc.dtvd_name, "pid", 3) == 0) {
 		nc = adc;
 		dt_pid_get_types(dtp, pdp, adv, &nc);
 		xc = nc;
 	}
 
 	/*
 	 * Now that we have discovered the number of native and translated
 	 * arguments from the argument descriptions, allocate a new probe ident
 	 * and corresponding dt_probe_t and hash it into the provider.
 	 */
 	xargs = dt_probe_alloc_args(pvp, xc);
 	nargs = dt_probe_alloc_args(pvp, nc);
 
 	if ((xc != 0 && xargs == NULL) || (nc != 0 && nargs == NULL))
 		return (NULL); /* dt_errno is set for us */
 
 	idp = dt_ident_create(name, DT_IDENT_PROBE,
 	    DT_IDFLG_ORPHAN, pdp->dtpd_id, _dtrace_defattr, 0,
 	    &dt_idops_probe, NULL, dtp->dt_gen);
 
 	if (idp == NULL) {
 		(void) dt_set_errno(dtp, EDT_NOMEM);
 		return (NULL);
 	}
 
 	if ((prp = dt_probe_create(dtp, idp, 2,
 	    nargs, nc, xargs, xc)) == NULL) {
 		dt_ident_destroy(idp);
 		return (NULL);
 	}
 
 	dt_probe_declare(pvp, prp);
 
 	/*
 	 * Once our new dt_probe_t is fully constructed, iterate over the
 	 * cached argument descriptions and assign types to prp->pr_nargv[]
 	 * and prp->pr_xargv[] and assign mappings to prp->pr_mapping[].
 	 */
 	for (adp = adv, i = 0; i < xc; i++, adp++) {
 		if (dtrace_type_strcompile(dtp,
 		    adp->dtargd_native, &dtt) != 0) {
 			dt_dprintf("failed to resolve input type %s "
 			    "for %s:%s arg #%d: %s\n", adp->dtargd_native,
 			    pvp->pv_desc.dtvd_name, name, i + 1,
 			    dtrace_errmsg(dtp, dtrace_errno(dtp)));
 
 			dtt.dtt_object = NULL;
 			dtt.dtt_ctfp = NULL;
 			dtt.dtt_type = CTF_ERR;
 		} else {
 			dt_node_type_assign(prp->pr_nargv[adp->dtargd_mapping],
 			    dtt.dtt_ctfp, dtt.dtt_type,
 			    dtt.dtt_flags & DTT_FL_USER ? B_TRUE : B_FALSE);
 		}
 
 		if (dtt.dtt_type != CTF_ERR && (adp->dtargd_xlate[0] == '\0' ||
 		    strcmp(adp->dtargd_native, adp->dtargd_xlate) == 0)) {
 			dt_node_type_propagate(prp->pr_nargv[
 			    adp->dtargd_mapping], prp->pr_xargv[i]);
 		} else if (dtrace_type_strcompile(dtp,
 		    adp->dtargd_xlate, &dtt) != 0) {
 			dt_dprintf("failed to resolve output type %s "
 			    "for %s:%s arg #%d: %s\n", adp->dtargd_xlate,
 			    pvp->pv_desc.dtvd_name, name, i + 1,
 			    dtrace_errmsg(dtp, dtrace_errno(dtp)));
 
 			dtt.dtt_object = NULL;
 			dtt.dtt_ctfp = NULL;
 			dtt.dtt_type = CTF_ERR;
 		} else {
 			dt_node_type_assign(prp->pr_xargv[i],
 			    dtt.dtt_ctfp, dtt.dtt_type, B_FALSE);
 		}
 
 		prp->pr_mapping[i] = adp->dtargd_mapping;
 		prp->pr_argv[i] = dtt;
 	}
 
 	return (prp);
 }
 
 /*
  * Lookup a probe declaration based on a known provider and full or partially
  * specified module, function, and name.  If the probe is not known to us yet,
  * ask dtrace(7D) to match the description and then cache any useful results.
  */
 dt_probe_t *
 dt_probe_lookup(dt_provider_t *pvp, const char *s)
 {
 	dtrace_hdl_t *dtp = pvp->pv_hdl;
 	dtrace_probedesc_t pd;
 	dt_ident_t *idp;
 	size_t keylen;
 	char *key;
 
 	if (dtrace_str2desc(dtp, DTRACE_PROBESPEC_NAME, s, &pd) != 0)
 		return (NULL); /* dt_errno is set for us */
 
 	keylen = dt_probe_keylen(&pd);
 	key = dt_probe_key(&pd, alloca(keylen));
 
 	/*
 	 * If the probe is already declared, then return the dt_probe_t from
 	 * the existing identifier.  This could come from a static declaration
 	 * or it could have been cached from an earlier call to this function.
 	 */
 	if ((idp = dt_idhash_lookup(pvp->pv_probes, key)) != NULL)
 		return (idp->di_data);
 
 	/*
 	 * If the probe isn't known, use the probe description computed above
 	 * to ask dtrace(7D) to find the first matching probe.
 	 */
 	if (dt_ioctl(dtp, DTRACEIOC_PROBEMATCH, &pd) == 0)
 		return (dt_probe_discover(pvp, &pd));
 
 	if (errno == ESRCH || errno == EBADF)
 		(void) dt_set_errno(dtp, EDT_NOPROBE);
 	else
 		(void) dt_set_errno(dtp, errno);
 
 	return (NULL);
 }
 
 dt_probe_t *
 dt_probe_create(dtrace_hdl_t *dtp, dt_ident_t *idp, int protoc,
     dt_node_t *nargs, uint_t nargc, dt_node_t *xargs, uint_t xargc)
 {
 	dt_module_t *dmp;
 	dt_probe_t *prp;
 	const char *p;
 	uint_t i;
 
 	assert(idp->di_kind == DT_IDENT_PROBE);
 	assert(idp->di_data == NULL);
 
 	/*
 	 * If only a single prototype is given, set xargc/s to nargc/s to
 	 * simplify subsequent use.  Note that we can have one or both of nargs
 	 * and xargs be specified but set to NULL, indicating a void prototype.
 	 */
 	if (protoc < 2) {
 		assert(xargs == NULL);
 		assert(xargc == 0);
 		xargs = nargs;
 		xargc = nargc;
 	}
 
 	if ((prp = dt_alloc(dtp, sizeof (dt_probe_t))) == NULL)
 		return (NULL);
 
 	prp->pr_pvp = NULL;
 	prp->pr_ident = idp;
 
 	p = strrchr(idp->di_name, ':');
 	assert(p != NULL);
 	prp->pr_name = p + 1;
 
 	prp->pr_nargs = nargs;
 	prp->pr_nargv = dt_alloc(dtp, sizeof (dt_node_t *) * nargc);
 	prp->pr_nargc = nargc;
 	prp->pr_xargs = xargs;
 	prp->pr_xargv = dt_alloc(dtp, sizeof (dt_node_t *) * xargc);
 	prp->pr_xargc = xargc;
 	prp->pr_mapping = dt_alloc(dtp, sizeof (uint8_t) * xargc);
 	prp->pr_inst = NULL;
 	prp->pr_argv = dt_alloc(dtp, sizeof (dtrace_typeinfo_t) * xargc);
 	prp->pr_argc = xargc;
 
 	if ((prp->pr_nargc != 0 && prp->pr_nargv == NULL) ||
 	    (prp->pr_xargc != 0 && prp->pr_xargv == NULL) ||
 	    (prp->pr_xargc != 0 && prp->pr_mapping == NULL) ||
 	    (prp->pr_argc != 0 && prp->pr_argv == NULL)) {
 		dt_probe_destroy(prp);
 		return (NULL);
 	}
 
 	for (i = 0; i < xargc; i++, xargs = xargs->dn_list) {
 		if (xargs->dn_string != NULL)
 			prp->pr_mapping[i] = dt_probe_argmap(xargs, nargs);
 		else
 			prp->pr_mapping[i] = i;
 
 		prp->pr_xargv[i] = xargs;
 
 		if ((dmp = dt_module_lookup_by_ctf(dtp,
 		    xargs->dn_ctfp)) != NULL)
 			prp->pr_argv[i].dtt_object = dmp->dm_name;
 		else
 			prp->pr_argv[i].dtt_object = NULL;
 
 		prp->pr_argv[i].dtt_ctfp = xargs->dn_ctfp;
 		prp->pr_argv[i].dtt_type = xargs->dn_type;
 	}
 
 	for (i = 0; i < nargc; i++, nargs = nargs->dn_list)
 		prp->pr_nargv[i] = nargs;
 
 	idp->di_data = prp;
 	return (prp);
 }
 
 void
 dt_probe_declare(dt_provider_t *pvp, dt_probe_t *prp)
 {
 	assert(prp->pr_ident->di_kind == DT_IDENT_PROBE);
 	assert(prp->pr_ident->di_data == prp);
 	assert(prp->pr_pvp == NULL);
 
 	if (prp->pr_xargs != prp->pr_nargs)
 		pvp->pv_flags &= ~DT_PROVIDER_INTF;
 
 	prp->pr_pvp = pvp;
 	dt_idhash_xinsert(pvp->pv_probes, prp->pr_ident);
 }
 
 void
 dt_probe_destroy(dt_probe_t *prp)
 {
 	dt_probe_instance_t *pip, *pip_next;
 	dtrace_hdl_t *dtp;
 
 	if (prp->pr_pvp != NULL)
 		dtp = prp->pr_pvp->pv_hdl;
 	else
 		dtp = yypcb->pcb_hdl;
 
 	dt_node_list_free(&prp->pr_nargs);
 	dt_node_list_free(&prp->pr_xargs);
 
 	dt_free(dtp, prp->pr_nargv);
 	dt_free(dtp, prp->pr_xargv);
 
 	for (pip = prp->pr_inst; pip != NULL; pip = pip_next) {
 		pip_next = pip->pi_next;
 		dt_free(dtp, pip->pi_rname);
 		dt_free(dtp, pip->pi_fname);
 		dt_free(dtp, pip->pi_offs);
 		dt_free(dtp, pip->pi_enoffs);
 		dt_free(dtp, pip);
 	}
 
 	dt_free(dtp, prp->pr_mapping);
 	dt_free(dtp, prp->pr_argv);
 	dt_free(dtp, prp);
 }
 
 int
 dt_probe_define(dt_provider_t *pvp, dt_probe_t *prp,
     const char *fname, const char *rname, uint32_t offset, int isenabled)
 {
 	dtrace_hdl_t *dtp = pvp->pv_hdl;
 	dt_probe_instance_t *pip;
 	uint32_t **offs;
 	uint_t *noffs, *maxoffs;
 
 	assert(fname != NULL);
 
 	for (pip = prp->pr_inst; pip != NULL; pip = pip->pi_next) {
 		if (strcmp(pip->pi_fname, fname) == 0 &&
-		    ((rname == NULL && pip->pi_rname == NULL) ||
-		    (rname != NULL && pip->pi_rname != NULL &&
-		    strcmp(pip->pi_rname, rname) == 0)))
+		    strcmp(pip->pi_rname, rname) == 0)
 			break;
 	}
 
 	if (pip == NULL) {
 		if ((pip = dt_zalloc(dtp, sizeof (*pip))) == NULL)
 			return (-1);
 
 		if ((pip->pi_offs = dt_zalloc(dtp, sizeof (uint32_t))) == NULL)
 			goto nomem;
 
 		if ((pip->pi_enoffs = dt_zalloc(dtp,
 		    sizeof (uint32_t))) == NULL)
 			goto nomem;
 
 		if ((pip->pi_fname = strdup(fname)) == NULL)
 			goto nomem;
 
-		if (rname != NULL && (pip->pi_rname = strdup(rname)) == NULL)
+		if ((pip->pi_rname = strdup(rname)) == NULL)
 			goto nomem;
 
 		pip->pi_noffs = 0;
 		pip->pi_maxoffs = 1;
 		pip->pi_nenoffs = 0;
 		pip->pi_maxenoffs = 1;
 
 		pip->pi_next = prp->pr_inst;
 
 		prp->pr_inst = pip;
 	}
 
 	if (isenabled) {
 		offs = &pip->pi_enoffs;
 		noffs = &pip->pi_nenoffs;
 		maxoffs = &pip->pi_maxenoffs;
 	} else {
 		offs = &pip->pi_offs;
 		noffs = &pip->pi_noffs;
 		maxoffs = &pip->pi_maxoffs;
 	}
 
 	if (*noffs == *maxoffs) {
 		uint_t new_max = *maxoffs * 2;
 		uint32_t *new_offs = dt_alloc(dtp, sizeof (uint32_t) * new_max);
 
 		if (new_offs == NULL)
 			return (-1);
 
 		bcopy(*offs, new_offs, sizeof (uint32_t) * *maxoffs);
 
 		dt_free(dtp, *offs);
 		*maxoffs = new_max;
 		*offs = new_offs;
 	}
 
 	dt_dprintf("defined probe %s %s:%s %s() +0x%x (%s)\n",
 	    isenabled ? "(is-enabled)" : "",
 	    pvp->pv_desc.dtvd_name, prp->pr_ident->di_name, fname, offset,
-	    rname != NULL ? rname : fname);
+	    rname);
 
 	assert(*noffs < *maxoffs);
 	(*offs)[(*noffs)++] = offset;
 
 	return (0);
 
 nomem:
 	dt_free(dtp, pip->pi_fname);
 	dt_free(dtp, pip->pi_enoffs);
 	dt_free(dtp, pip->pi_offs);
 	dt_free(dtp, pip);
 	return (dt_set_errno(dtp, EDT_NOMEM));
 }
 
 /*
  * Lookup the dynamic translator type tag for the specified probe argument and
  * assign the type to the specified node.  If the type is not yet defined, add
  * it to the "D" module's type container as a typedef for an unknown type.
  */
 dt_node_t *
 dt_probe_tag(dt_probe_t *prp, uint_t argn, dt_node_t *dnp)
 {
 	dtrace_hdl_t *dtp = prp->pr_pvp->pv_hdl;
 	dtrace_typeinfo_t dtt;
 	size_t len;
 	char *tag;
 
 	len = snprintf(NULL, 0, "__dtrace_%s___%s_arg%u",
 	    prp->pr_pvp->pv_desc.dtvd_name, prp->pr_name, argn);
 
 	tag = alloca(len + 1);
 
 	(void) snprintf(tag, len + 1, "__dtrace_%s___%s_arg%u",
 	    prp->pr_pvp->pv_desc.dtvd_name, prp->pr_name, argn);
 
 	if (dtrace_lookup_by_type(dtp, DTRACE_OBJ_DDEFS, tag, &dtt) != 0) {
 		dtt.dtt_object = DTRACE_OBJ_DDEFS;
 		dtt.dtt_ctfp = DT_DYN_CTFP(dtp);
 		dtt.dtt_type = ctf_add_typedef(DT_DYN_CTFP(dtp),
 		    CTF_ADD_ROOT, tag, DT_DYN_TYPE(dtp));
 
 		if (dtt.dtt_type == CTF_ERR ||
 		    ctf_update(dtt.dtt_ctfp) == CTF_ERR) {
 			xyerror(D_UNKNOWN, "cannot define type %s: %s\n",
 			    tag, ctf_errmsg(ctf_errno(dtt.dtt_ctfp)));
 		}
 	}
 
 	bzero(dnp, sizeof (dt_node_t));
 	dnp->dn_kind = DT_NODE_TYPE;
 
 	dt_node_type_assign(dnp, dtt.dtt_ctfp, dtt.dtt_type, B_FALSE);
 	dt_node_attr_assign(dnp, _dtrace_defattr);
 
 	return (dnp);
 }
 
 /*ARGSUSED*/
 static int
 dt_probe_desc(dtrace_hdl_t *dtp, const dtrace_probedesc_t *pdp, void *arg)
 {
 	if (((dtrace_probedesc_t *)arg)->dtpd_id == DTRACE_IDNONE) {
 		bcopy(pdp, arg, sizeof (dtrace_probedesc_t));
 		return (0);
 	}
 
 	return (1);
 }
 
 dt_probe_t *
 dt_probe_info(dtrace_hdl_t *dtp,
     const dtrace_probedesc_t *pdp, dtrace_probeinfo_t *pip)
 {
 	int m_is_glob = pdp->dtpd_mod[0] == '\0' || strisglob(pdp->dtpd_mod);
 	int f_is_glob = pdp->dtpd_func[0] == '\0' || strisglob(pdp->dtpd_func);
 	int n_is_glob = pdp->dtpd_name[0] == '\0' || strisglob(pdp->dtpd_name);
 
 	dt_probe_t *prp = NULL;
 	const dtrace_pattr_t *pap;
 	dt_provider_t *pvp;
 	dt_ident_t *idp;
 
 	/*
 	 * Attempt to lookup the probe in our existing cache for this provider.
 	 * If none is found and an explicit probe ID was specified, discover
 	 * that specific probe and cache its description and arguments.
 	 */
 	if ((pvp = dt_provider_lookup(dtp, pdp->dtpd_provider)) != NULL) {
 		size_t keylen = dt_probe_keylen(pdp);
 		char *key = dt_probe_key(pdp, alloca(keylen));
 
 		if ((idp = dt_idhash_lookup(pvp->pv_probes, key)) != NULL)
 			prp = idp->di_data;
 		else if (pdp->dtpd_id != DTRACE_IDNONE)
 			prp = dt_probe_discover(pvp, pdp);
 	}
 
 	/*
 	 * If no probe was found in our cache, convert the caller's partial
 	 * probe description into a fully-formed matching probe description by
 	 * iterating over up to at most two probes that match 'pdp'.  We then
 	 * call dt_probe_discover() on the resulting probe identifier.
 	 */
 	if (prp == NULL) {
 		dtrace_probedesc_t pd;
 		int m;
 
 		bzero(&pd, sizeof (pd));
 		pd.dtpd_id = DTRACE_IDNONE;
 
 		/*
 		 * Call dtrace_probe_iter() to find matching probes.  Our
 		 * dt_probe_desc() callback will produce the following results:
 		 *
 		 * m < 0 dtrace_probe_iter() found zero matches (or failed).
 		 * m > 0 dtrace_probe_iter() found more than one match.
 		 * m = 0 dtrace_probe_iter() found exactly one match.
 		 */
 		if ((m = dtrace_probe_iter(dtp, pdp, dt_probe_desc, &pd)) < 0)
 			return (NULL); /* dt_errno is set for us */
 
 		if ((pvp = dt_provider_lookup(dtp, pd.dtpd_provider)) == NULL)
 			return (NULL); /* dt_errno is set for us */
 
 		/*
 		 * If more than one probe was matched, then do not report probe
 		 * information if either of the following conditions is true:
 		 *
 		 * (a) The Arguments Data stability of the matched provider is
 		 *	less than Evolving.
 		 *
 		 * (b) Any description component that is at least Evolving is
 		 *	empty or is specified using a globbing expression.
 		 *
 		 * These conditions imply that providers that provide Evolving
 		 * or better Arguments Data stability must guarantee that all
 		 * probes with identical field names in a field of Evolving or
 		 * better Name stability have identical argument signatures.
 		 */
 		if (m > 0) {
 			if (pvp->pv_desc.dtvd_attr.dtpa_args.dtat_data <
 			    DTRACE_STABILITY_EVOLVING) {
 				(void) dt_set_errno(dtp, EDT_UNSTABLE);
 				return (NULL);
 			}
 
 
 			if (pvp->pv_desc.dtvd_attr.dtpa_mod.dtat_name >=
 			    DTRACE_STABILITY_EVOLVING && m_is_glob) {
 				(void) dt_set_errno(dtp, EDT_UNSTABLE);
 				return (NULL);
 			}
 
 			if (pvp->pv_desc.dtvd_attr.dtpa_func.dtat_name >=
 			    DTRACE_STABILITY_EVOLVING && f_is_glob) {
 				(void) dt_set_errno(dtp, EDT_UNSTABLE);
 				return (NULL);
 			}
 
 			if (pvp->pv_desc.dtvd_attr.dtpa_name.dtat_name >=
 			    DTRACE_STABILITY_EVOLVING && n_is_glob) {
 				(void) dt_set_errno(dtp, EDT_UNSTABLE);
 				return (NULL);
 			}
 		}
 
 		/*
 		 * If we matched a probe exported by dtrace(7D), then discover
 		 * the real attributes.  Otherwise grab the static declaration.
 		 */
 		if (pd.dtpd_id != DTRACE_IDNONE)
 			prp = dt_probe_discover(pvp, &pd);
 		else
 			prp = dt_probe_lookup(pvp, pd.dtpd_name);
 
 		if (prp == NULL)
 			return (NULL); /* dt_errno is set for us */
 	}
 
 	assert(pvp != NULL && prp != NULL);
 
 	/*
 	 * Compute the probe description attributes by taking the minimum of
 	 * the attributes of the specified fields.  If no provider is specified
 	 * or a glob pattern is used for the provider, use Unstable attributes.
 	 */
 	if (pdp->dtpd_provider[0] == '\0' || strisglob(pdp->dtpd_provider))
 		pap = &_dtrace_prvdesc;
 	else
 		pap = &pvp->pv_desc.dtvd_attr;
 
 	pip->dtp_attr = pap->dtpa_provider;
 
 	if (!m_is_glob)
 		pip->dtp_attr = dt_attr_min(pip->dtp_attr, pap->dtpa_mod);
 	if (!f_is_glob)
 		pip->dtp_attr = dt_attr_min(pip->dtp_attr, pap->dtpa_func);
 	if (!n_is_glob)
 		pip->dtp_attr = dt_attr_min(pip->dtp_attr, pap->dtpa_name);
 
 	pip->dtp_arga = pap->dtpa_args;
 	pip->dtp_argv = prp->pr_argv;
 	pip->dtp_argc = prp->pr_argc;
 
 	return (prp);
 }
 
 int
 dtrace_probe_info(dtrace_hdl_t *dtp,
     const dtrace_probedesc_t *pdp, dtrace_probeinfo_t *pip)
 {
 	return (dt_probe_info(dtp, pdp, pip) != NULL ? 0 : -1);
 }
 
 /*ARGSUSED*/
 static int
 dt_probe_iter(dt_idhash_t *ihp, dt_ident_t *idp, dt_probe_iter_t *pit)
 {
 	const dt_probe_t *prp = idp->di_data;
 
 	if (!dt_gmatch(prp->pr_name, pit->pit_pat))
 		return (0); /* continue on and examine next probe in hash */
 
 	(void) strlcpy(pit->pit_desc.dtpd_name, prp->pr_name, DTRACE_NAMELEN);
 	pit->pit_desc.dtpd_id = idp->di_id;
 	pit->pit_matches++;
 
 	return (pit->pit_func(pit->pit_hdl, &pit->pit_desc, pit->pit_arg));
 }
 
 int
 dtrace_probe_iter(dtrace_hdl_t *dtp,
     const dtrace_probedesc_t *pdp, dtrace_probe_f *func, void *arg)
 {
 	const char *provider = pdp ? pdp->dtpd_provider : NULL;
 	dtrace_id_t id = DTRACE_IDNONE;
 
 	dtrace_probedesc_t pd;
 	dt_probe_iter_t pit;
 	int cmd, rv;
 
 	bzero(&pit, sizeof (pit));
 	pit.pit_hdl = dtp;
 	pit.pit_func = func;
 	pit.pit_arg = arg;
 	pit.pit_pat = pdp ? pdp->dtpd_name : NULL;
 
 	for (pit.pit_pvp = dt_list_next(&dtp->dt_provlist);
 	    pit.pit_pvp != NULL; pit.pit_pvp = dt_list_next(pit.pit_pvp)) {
 
 		if (pit.pit_pvp->pv_flags & DT_PROVIDER_IMPL)
 			continue; /* we'll get these later using dt_ioctl() */
 
 		if (!dt_gmatch(pit.pit_pvp->pv_desc.dtvd_name, provider))
 			continue;
 
 		(void) strlcpy(pit.pit_desc.dtpd_provider,
 		    pit.pit_pvp->pv_desc.dtvd_name, DTRACE_PROVNAMELEN);
 
 		if ((rv = dt_idhash_iter(pit.pit_pvp->pv_probes,
 		    (dt_idhash_f *)dt_probe_iter, &pit)) != 0)
 			return (rv);
 	}
 
 	if (pdp != NULL)
 		cmd = DTRACEIOC_PROBEMATCH;
 	else
 		cmd = DTRACEIOC_PROBES;
 
 	for (;;) {
 		if (pdp != NULL)
 			bcopy(pdp, &pd, sizeof (pd));
 
 		pd.dtpd_id = id;
 
 		if (dt_ioctl(dtp, cmd, &pd) != 0)
 			break;
 		else if ((rv = func(dtp, &pd, arg)) != 0)
 			return (rv);
 
 		pit.pit_matches++;
 		id = pd.dtpd_id + 1;
 	}
 
 	switch (errno) {
 	case ESRCH:
 	case EBADF:
 		return (pit.pit_matches ? 0 : dt_set_errno(dtp, EDT_NOPROBE));
 	case EINVAL:
 		return (dt_set_errno(dtp, EDT_BADPGLOB));
 	default:
 		return (dt_set_errno(dtp, errno));
 	}
 }
Index: projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris
===================================================================
--- projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris	(revision 313267)

Property changes on: projects/netbsd-tests-upstream-01-2017/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl/contrib/opensolaris:r312125-313266
Index: projects/netbsd-tests-upstream-01-2017/cddl
===================================================================
--- projects/netbsd-tests-upstream-01-2017/cddl	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/cddl	(revision 313267)

Property changes on: projects/netbsd-tests-upstream-01-2017/cddl
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl:r313244-313266
Index: projects/netbsd-tests-upstream-01-2017/sbin/ifconfig/ifieee80211.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sbin/ifconfig/ifieee80211.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sbin/ifconfig/ifieee80211.c	(revision 313267)
@@ -1,5741 +1,5741 @@
 /*
  * Copyright 2001 The Aerospace Corporation.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of The Aerospace Corporation may not be used to endorse or
  *    promote products derived from this software.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AEROSPACE CORPORATION ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*-
  * Copyright (c) 1997, 1998, 2000 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
 #include <net/route.h>
 
 #include <net80211/ieee80211_ioctl.h>
 #include <net80211/ieee80211_freebsd.h>
 #include <net80211/ieee80211_superg.h>
 #include <net80211/ieee80211_tdma.h>
 #include <net80211/ieee80211_mesh.h>
 
 #include <assert.h>
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <stdarg.h>
 #include <stddef.h>		/* NB: for offsetof */
 
 #include "ifconfig.h"
 
 #include <lib80211/lib80211_regdomain.h>
 #include <lib80211/lib80211_ioctl.h>
 
 #ifndef IEEE80211_FIXED_RATE_NONE
 #define	IEEE80211_FIXED_RATE_NONE	0xff
 #endif
 
 /* XXX need these publicly defined or similar */
 #ifndef IEEE80211_NODE_AUTH
 #define	IEEE80211_NODE_AUTH	0x000001	/* authorized for data */
 #define	IEEE80211_NODE_QOS	0x000002	/* QoS enabled */
 #define	IEEE80211_NODE_ERP	0x000004	/* ERP enabled */
 #define	IEEE80211_NODE_PWR_MGT	0x000010	/* power save mode enabled */
 #define	IEEE80211_NODE_AREF	0x000020	/* authentication ref held */
 #define	IEEE80211_NODE_HT	0x000040	/* HT enabled */
 #define	IEEE80211_NODE_HTCOMPAT	0x000080	/* HT setup w/ vendor OUI's */
 #define	IEEE80211_NODE_WPS	0x000100	/* WPS association */
 #define	IEEE80211_NODE_TSN	0x000200	/* TSN association */
 #define	IEEE80211_NODE_AMPDU_RX	0x000400	/* AMPDU rx enabled */
 #define	IEEE80211_NODE_AMPDU_TX	0x000800	/* AMPDU tx enabled */
 #define	IEEE80211_NODE_MIMO_PS	0x001000	/* MIMO power save enabled */
 #define	IEEE80211_NODE_MIMO_RTS	0x002000	/* send RTS in MIMO PS */
 #define	IEEE80211_NODE_RIFS	0x004000	/* RIFS enabled */
 #define	IEEE80211_NODE_SGI20	0x008000	/* Short GI in HT20 enabled */
 #define	IEEE80211_NODE_SGI40	0x010000	/* Short GI in HT40 enabled */
 #define	IEEE80211_NODE_ASSOCID	0x020000	/* xmit requires associd */
 #define	IEEE80211_NODE_AMSDU_RX	0x040000	/* AMSDU rx enabled */
 #define	IEEE80211_NODE_AMSDU_TX	0x080000	/* AMSDU tx enabled */
 #define	IEEE80211_NODE_VHT	0x100000	/* VHT enabled */
 #endif
 
 #define	MAXCHAN	1536		/* max 1.5K channels */
 
 #define	MAXCOL	78
 static	int col;
 static	char spacer;
 
 static void LINE_INIT(char c);
 static void LINE_BREAK(void);
 static void LINE_CHECK(const char *fmt, ...);
 
 static const char *modename[IEEE80211_MODE_MAX] = {
 	[IEEE80211_MODE_AUTO]	  = "auto",
 	[IEEE80211_MODE_11A]	  = "11a",
 	[IEEE80211_MODE_11B]	  = "11b",
 	[IEEE80211_MODE_11G]	  = "11g",
 	[IEEE80211_MODE_FH]	  = "fh",
 	[IEEE80211_MODE_TURBO_A]  = "turboA",
 	[IEEE80211_MODE_TURBO_G]  = "turboG",
 	[IEEE80211_MODE_STURBO_A] = "sturbo",
 	[IEEE80211_MODE_11NA]	  = "11na",
 	[IEEE80211_MODE_11NG]	  = "11ng",
 	[IEEE80211_MODE_HALF]	  = "half",
 	[IEEE80211_MODE_QUARTER]  = "quarter",
 	[IEEE80211_MODE_VHT_2GHZ] = "11acg",
 	[IEEE80211_MODE_VHT_5GHZ] = "11ac",
 };
 
 static void set80211(int s, int type, int val, int len, void *data);
 static int get80211(int s, int type, void *data, int len);
 static int get80211len(int s, int type, void *data, int len, int *plen);
 static int get80211val(int s, int type, int *val);
 static const char *get_string(const char *val, const char *sep,
     u_int8_t *buf, int *lenp);
 static void print_string(const u_int8_t *buf, int len);
 static void print_regdomain(const struct ieee80211_regdomain *, int);
 static void print_channels(int, const struct ieee80211req_chaninfo *,
     int allchans, int verbose);
 static void regdomain_makechannels(struct ieee80211_regdomain_req *,
     const struct ieee80211_devcaps_req *);
 static const char *mesh_linkstate_string(uint8_t state);
 
 static struct ieee80211req_chaninfo *chaninfo;
 static struct ieee80211_regdomain regdomain;
 static int gotregdomain = 0;
 static struct ieee80211_roamparams_req roamparams;
 static int gotroam = 0;
 static struct ieee80211_txparams_req txparams;
 static int gottxparams = 0;
 static struct ieee80211_channel curchan;
 static int gotcurchan = 0;
 static struct ifmediareq *ifmr;
 static int htconf = 0;
 static	int gothtconf = 0;
 
 static void
 gethtconf(int s)
 {
 	if (gothtconf)
 		return;
 	if (get80211val(s, IEEE80211_IOC_HTCONF, &htconf) < 0)
 		warn("unable to get HT configuration information");
 	gothtconf = 1;
 }
 
 /* VHT */
 static int vhtconf = 0;
 static	int gotvhtconf = 0;
 
 static void
 getvhtconf(int s)
 {
 	if (gotvhtconf)
 		return;
 	if (get80211val(s, IEEE80211_IOC_VHTCONF, &vhtconf) < 0)
 		warn("unable to get VHT configuration information");
 	gotvhtconf = 1;
 }
 
 /*
  * Collect channel info from the kernel.  We use this (mostly)
  * to handle mapping between frequency and IEEE channel number.
  */
 static void
 getchaninfo(int s)
 {
 	if (chaninfo != NULL)
 		return;
 	chaninfo = malloc(IEEE80211_CHANINFO_SIZE(MAXCHAN));
 	if (chaninfo == NULL)
 		errx(1, "no space for channel list");
 	if (get80211(s, IEEE80211_IOC_CHANINFO, chaninfo,
 	    IEEE80211_CHANINFO_SIZE(MAXCHAN)) < 0)
 		err(1, "unable to get channel information");
 	ifmr = ifmedia_getstate(s);
 	gethtconf(s);
 	getvhtconf(s);
 }
 
 static struct regdata *
 getregdata(void)
 {
 	static struct regdata *rdp = NULL;
 	if (rdp == NULL) {
 		rdp = lib80211_alloc_regdata();
 		if (rdp == NULL)
 			errx(-1, "missing or corrupted regdomain database");
 	}
 	return rdp;
 }
 
 /*
  * Given the channel at index i with attributes from,
  * check if there is a channel with attributes to in
  * the channel table.  With suitable attributes this
  * allows the caller to look for promotion; e.g. from
  * 11b > 11g.
  */
 static int
 canpromote(int i, int from, int to)
 {
 	const struct ieee80211_channel *fc = &chaninfo->ic_chans[i];
 	u_int j;
 
 	if ((fc->ic_flags & from) != from)
 		return i;
 	/* NB: quick check exploiting ordering of chans w/ same frequency */
 	if (i+1 < chaninfo->ic_nchans &&
 	    chaninfo->ic_chans[i+1].ic_freq == fc->ic_freq &&
 	    (chaninfo->ic_chans[i+1].ic_flags & to) == to)
 		return i+1;
 	/* brute force search in case channel list is not ordered */
 	for (j = 0; j < chaninfo->ic_nchans; j++) {
 		const struct ieee80211_channel *tc = &chaninfo->ic_chans[j];
 		if (j != i &&
 		    tc->ic_freq == fc->ic_freq && (tc->ic_flags & to) == to)
 		return j;
 	}
 	return i;
 }
 
 /*
  * Handle channel promotion.  When a channel is specified with
  * only a frequency we want to promote it to the ``best'' channel
  * available.  The channel list has separate entries for 11b, 11g,
  * 11a, and 11n[ga] channels so specifying a frequency w/o any
  * attributes requires we upgrade, e.g. from 11b -> 11g.  This
  * gets complicated when the channel is specified on the same
  * command line with a media request that constrains the available
  * channe list (e.g. mode 11a); we want to honor that to avoid
  * confusing behaviour.
  */
 /*
  * XXX VHT
  */
 static int
 promote(int i)
 {
 	/*
 	 * Query the current mode of the interface in case it's
 	 * constrained (e.g. to 11a).  We must do this carefully
 	 * as there may be a pending ifmedia request in which case
 	 * asking the kernel will give us the wrong answer.  This
 	 * is an unfortunate side-effect of the way ifconfig is
 	 * structure for modularity (yech).
 	 *
 	 * NB: ifmr is actually setup in getchaninfo (above); we
 	 *     assume it's called coincident with to this call so
 	 *     we have a ``current setting''; otherwise we must pass
 	 *     the socket descriptor down to here so we can make
 	 *     the ifmedia_getstate call ourselves.
 	 */
 	int chanmode = ifmr != NULL ? IFM_MODE(ifmr->ifm_current) : IFM_AUTO;
 
 	/* when ambiguous promote to ``best'' */
 	/* NB: we abitrarily pick HT40+ over HT40- */
 	if (chanmode != IFM_IEEE80211_11B)
 		i = canpromote(i, IEEE80211_CHAN_B, IEEE80211_CHAN_G);
 	if (chanmode != IFM_IEEE80211_11G && (htconf & 1)) {
 		i = canpromote(i, IEEE80211_CHAN_G,
 			IEEE80211_CHAN_G | IEEE80211_CHAN_HT20);
 		if (htconf & 2) {
 			i = canpromote(i, IEEE80211_CHAN_G,
 				IEEE80211_CHAN_G | IEEE80211_CHAN_HT40D);
 			i = canpromote(i, IEEE80211_CHAN_G,
 				IEEE80211_CHAN_G | IEEE80211_CHAN_HT40U);
 		}
 	}
 	if (chanmode != IFM_IEEE80211_11A && (htconf & 1)) {
 		i = canpromote(i, IEEE80211_CHAN_A,
 			IEEE80211_CHAN_A | IEEE80211_CHAN_HT20);
 		if (htconf & 2) {
 			i = canpromote(i, IEEE80211_CHAN_A,
 				IEEE80211_CHAN_A | IEEE80211_CHAN_HT40D);
 			i = canpromote(i, IEEE80211_CHAN_A,
 				IEEE80211_CHAN_A | IEEE80211_CHAN_HT40U);
 		}
 	}
 	return i;
 }
 
 static void
 mapfreq(struct ieee80211_channel *chan, int freq, int flags)
 {
 	u_int i;
 
 	for (i = 0; i < chaninfo->ic_nchans; i++) {
 		const struct ieee80211_channel *c = &chaninfo->ic_chans[i];
 
 		if (c->ic_freq == freq && (c->ic_flags & flags) == flags) {
 			if (flags == 0) {
 				/* when ambiguous promote to ``best'' */
 				c = &chaninfo->ic_chans[promote(i)];
 			}
 			*chan = *c;
 			return;
 		}
 	}
 	errx(1, "unknown/undefined frequency %u/0x%x", freq, flags);
 }
 
 static void
 mapchan(struct ieee80211_channel *chan, int ieee, int flags)
 {
 	u_int i;
 
 	for (i = 0; i < chaninfo->ic_nchans; i++) {
 		const struct ieee80211_channel *c = &chaninfo->ic_chans[i];
 
 		if (c->ic_ieee == ieee && (c->ic_flags & flags) == flags) {
 			if (flags == 0) {
 				/* when ambiguous promote to ``best'' */
 				c = &chaninfo->ic_chans[promote(i)];
 			}
 			*chan = *c;
 			return;
 		}
 	}
 	errx(1, "unknown/undefined channel number %d flags 0x%x", ieee, flags);
 }
 
 static const struct ieee80211_channel *
 getcurchan(int s)
 {
 	if (gotcurchan)
 		return &curchan;
 	if (get80211(s, IEEE80211_IOC_CURCHAN, &curchan, sizeof(curchan)) < 0) {
 		int val;
 		/* fall back to legacy ioctl */
 		if (get80211val(s, IEEE80211_IOC_CHANNEL, &val) < 0)
 			err(-1, "cannot figure out current channel");
 		getchaninfo(s);
 		mapchan(&curchan, val, 0);
 	}
 	gotcurchan = 1;
 	return &curchan;
 }
 
 static enum ieee80211_phymode
 chan2mode(const struct ieee80211_channel *c)
 {
 	if (IEEE80211_IS_CHAN_VHTA(c))
 		return IEEE80211_MODE_VHT_5GHZ;
 	if (IEEE80211_IS_CHAN_VHTG(c))
 		return IEEE80211_MODE_VHT_2GHZ;
 	if (IEEE80211_IS_CHAN_HTA(c))
 		return IEEE80211_MODE_11NA;
 	if (IEEE80211_IS_CHAN_HTG(c))
 		return IEEE80211_MODE_11NG;
 	if (IEEE80211_IS_CHAN_108A(c))
 		return IEEE80211_MODE_TURBO_A;
 	if (IEEE80211_IS_CHAN_108G(c))
 		return IEEE80211_MODE_TURBO_G;
 	if (IEEE80211_IS_CHAN_ST(c))
 		return IEEE80211_MODE_STURBO_A;
 	if (IEEE80211_IS_CHAN_FHSS(c))
 		return IEEE80211_MODE_FH;
 	if (IEEE80211_IS_CHAN_HALF(c))
 		return IEEE80211_MODE_HALF;
 	if (IEEE80211_IS_CHAN_QUARTER(c))
 		return IEEE80211_MODE_QUARTER;
 	if (IEEE80211_IS_CHAN_A(c))
 		return IEEE80211_MODE_11A;
 	if (IEEE80211_IS_CHAN_ANYG(c))
 		return IEEE80211_MODE_11G;
 	if (IEEE80211_IS_CHAN_B(c))
 		return IEEE80211_MODE_11B;
 	return IEEE80211_MODE_AUTO;
 }
 
 static void
 getroam(int s)
 {
 	if (gotroam)
 		return;
 	if (get80211(s, IEEE80211_IOC_ROAM,
 	    &roamparams, sizeof(roamparams)) < 0)
 		err(1, "unable to get roaming parameters");
 	gotroam = 1;
 }
 
 static void
 setroam_cb(int s, void *arg)
 {
 	struct ieee80211_roamparams_req *roam = arg;
 	set80211(s, IEEE80211_IOC_ROAM, 0, sizeof(*roam), roam);
 }
 
 static void
 gettxparams(int s)
 {
 	if (gottxparams)
 		return;
 	if (get80211(s, IEEE80211_IOC_TXPARAMS,
 	    &txparams, sizeof(txparams)) < 0)
 		err(1, "unable to get transmit parameters");
 	gottxparams = 1;
 }
 
 static void
 settxparams_cb(int s, void *arg)
 {
 	struct ieee80211_txparams_req *txp = arg;
 	set80211(s, IEEE80211_IOC_TXPARAMS, 0, sizeof(*txp), txp);
 }
 
 static void
 getregdomain(int s)
 {
 	if (gotregdomain)
 		return;
 	if (get80211(s, IEEE80211_IOC_REGDOMAIN,
 	    &regdomain, sizeof(regdomain)) < 0)
 		err(1, "unable to get regulatory domain info");
 	gotregdomain = 1;
 }
 
 static void
 getdevcaps(int s, struct ieee80211_devcaps_req *dc)
 {
 	if (get80211(s, IEEE80211_IOC_DEVCAPS, dc,
 	    IEEE80211_DEVCAPS_SPACE(dc)) < 0)
 		err(1, "unable to get device capabilities");
 }
 
 static void
 setregdomain_cb(int s, void *arg)
 {
 	struct ieee80211_regdomain_req *req;
 	struct ieee80211_regdomain *rd = arg;
 	struct ieee80211_devcaps_req *dc;
 	struct regdata *rdp = getregdata();
 
 	if (rd->country != NO_COUNTRY) {
 		const struct country *cc;
 		/*
 		 * Check current country seting to make sure it's
 		 * compatible with the new regdomain.  If not, then
 		 * override it with any default country for this
 		 * SKU.  If we cannot arrange a match, then abort.
 		 */
 		cc = lib80211_country_findbycc(rdp, rd->country);
 		if (cc == NULL)
 			errx(1, "unknown ISO country code %d", rd->country);
 		if (cc->rd->sku != rd->regdomain) {
 			const struct regdomain *rp;
 			/*
 			 * Check if country is incompatible with regdomain.
 			 * To enable multiple regdomains for a country code
 			 * we permit a mismatch between the regdomain and
 			 * the country's associated regdomain when the
 			 * regdomain is setup w/o a default country.  For
 			 * example, US is bound to the FCC regdomain but
 			 * we allow US to be combined with FCC3 because FCC3
 			 * has not default country.  This allows bogus
 			 * combinations like FCC3+DK which are resolved when
 			 * constructing the channel list by deferring to the
 			 * regdomain to construct the channel list.
 			 */
 			rp = lib80211_regdomain_findbysku(rdp, rd->regdomain);
 			if (rp == NULL)
 				errx(1, "country %s (%s) is not usable with "
 				    "regdomain %d", cc->isoname, cc->name,
 				    rd->regdomain);
 			else if (rp->cc != NULL && rp->cc != cc)
 				errx(1, "country %s (%s) is not usable with "
 				   "regdomain %s", cc->isoname, cc->name,
 				   rp->name);
 		}
 	}
 	/*
 	 * Fetch the device capabilities and calculate the
 	 * full set of netbands for which we request a new
 	 * channel list be constructed.  Once that's done we
 	 * push the regdomain info + channel list to the kernel.
 	 */
 	dc = malloc(IEEE80211_DEVCAPS_SIZE(MAXCHAN));
 	if (dc == NULL)
 		errx(1, "no space for device capabilities");
 	dc->dc_chaninfo.ic_nchans = MAXCHAN;
 	getdevcaps(s, dc);
 #if 0
 	if (verbose) {
 		printf("drivercaps: 0x%x\n", dc->dc_drivercaps);
 		printf("cryptocaps: 0x%x\n", dc->dc_cryptocaps);
 		printf("htcaps    : 0x%x\n", dc->dc_htcaps);
 		printf("vhtcaps   : 0x%x\n", dc->dc_vhtcaps);
 #if 0
 		memcpy(chaninfo, &dc->dc_chaninfo,
 		    IEEE80211_CHANINFO_SPACE(&dc->dc_chaninfo));
 		print_channels(s, &dc->dc_chaninfo, 1/*allchans*/, 1/*verbose*/);
 #endif
 	}
 #endif
 	req = malloc(IEEE80211_REGDOMAIN_SIZE(dc->dc_chaninfo.ic_nchans));
 	if (req == NULL)
 		errx(1, "no space for regdomain request");
 	req->rd = *rd;
 	regdomain_makechannels(req, dc);
 	if (verbose) {
 		LINE_INIT(':');
 		print_regdomain(rd, 1/*verbose*/);
 		LINE_BREAK();
 		/* blech, reallocate channel list for new data */
 		if (chaninfo != NULL)
 			free(chaninfo);
 		chaninfo = malloc(IEEE80211_CHANINFO_SPACE(&req->chaninfo));
 		if (chaninfo == NULL)
 			errx(1, "no space for channel list");
 		memcpy(chaninfo, &req->chaninfo,
 		    IEEE80211_CHANINFO_SPACE(&req->chaninfo));
 		print_channels(s, &req->chaninfo, 1/*allchans*/, 1/*verbose*/);
 	}
 	if (req->chaninfo.ic_nchans == 0)
 		errx(1, "no channels calculated");
 	set80211(s, IEEE80211_IOC_REGDOMAIN, 0,
 	    IEEE80211_REGDOMAIN_SPACE(req), req);
 	free(req);
 	free(dc);
 }
 
 static int
 ieee80211_mhz2ieee(int freq, int flags)
 {
 	struct ieee80211_channel chan;
 	mapfreq(&chan, freq, flags);
 	return chan.ic_ieee;
 }
 
 static int
 isanyarg(const char *arg)
 {
 	return (strncmp(arg, "-", 1) == 0 ||
 	    strncasecmp(arg, "any", 3) == 0 || strncasecmp(arg, "off", 3) == 0);
 }
 
 static void
 set80211ssid(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int		ssid;
 	int		len;
 	u_int8_t	data[IEEE80211_NWID_LEN];
 
 	ssid = 0;
 	len = strlen(val);
 	if (len > 2 && isdigit((int)val[0]) && val[1] == ':') {
 		ssid = atoi(val)-1;
 		val += 2;
 	}
 
 	bzero(data, sizeof(data));
 	len = sizeof(data);
 	if (get_string(val, NULL, data, &len) == NULL)
 		exit(1);
 
 	set80211(s, IEEE80211_IOC_SSID, ssid, len, data);
 }
 
 static void
 set80211meshid(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int		len;
 	u_int8_t	data[IEEE80211_NWID_LEN];
 
 	memset(data, 0, sizeof(data));
 	len = sizeof(data);
 	if (get_string(val, NULL, data, &len) == NULL)
 		exit(1);
 
 	set80211(s, IEEE80211_IOC_MESH_ID, 0, len, data);
 }	
 
 static void
 set80211stationname(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int			len;
 	u_int8_t		data[33];
 
 	bzero(data, sizeof(data));
 	len = sizeof(data);
 	get_string(val, NULL, data, &len);
 
 	set80211(s, IEEE80211_IOC_STATIONNAME, 0, len, data);
 }
 
 /*
  * Parse a channel specification for attributes/flags.
  * The syntax is:
  *	freq/xx		channel width (5,10,20,40,40+,40-)
  *	freq:mode	channel mode (a,b,g,h,n,t,s,d)
  *
  * These can be combined in either order; e.g. 2437:ng/40.
  * Modes are case insensitive.
  *
  * The result is not validated here; it's assumed to be
  * checked against the channel table fetched from the kernel.
  */ 
 static int
 getchannelflags(const char *val, int freq)
 {
 #define	_CHAN_HT	0x80000000
 	const char *cp;
 	int flags;
 	int is_vht = 0;
 
 	flags = 0;
 
 	cp = strchr(val, ':');
 	if (cp != NULL) {
 		for (cp++; isalpha((int) *cp); cp++) {
 			/* accept mixed case */
 			int c = *cp;
 			if (isupper(c))
 				c = tolower(c);
 			switch (c) {
 			case 'a':		/* 802.11a */
 				flags |= IEEE80211_CHAN_A;
 				break;
 			case 'b':		/* 802.11b */
 				flags |= IEEE80211_CHAN_B;
 				break;
 			case 'g':		/* 802.11g */
 				flags |= IEEE80211_CHAN_G;
 				break;
 			case 'v':		/* vht: 802.11ac */
 				is_vht = 1;
 				/* Fallthrough */
 			case 'h':		/* ht = 802.11n */
 			case 'n':		/* 802.11n */
 				flags |= _CHAN_HT;	/* NB: private */
 				break;
 			case 'd':		/* dt = Atheros Dynamic Turbo */
 				flags |= IEEE80211_CHAN_TURBO;
 				break;
 			case 't':		/* ht, dt, st, t */
 				/* dt and unadorned t specify Dynamic Turbo */
 				if ((flags & (IEEE80211_CHAN_STURBO|_CHAN_HT)) == 0)
 					flags |= IEEE80211_CHAN_TURBO;
 				break;
 			case 's':		/* st = Atheros Static Turbo */
 				flags |= IEEE80211_CHAN_STURBO;
 				break;
 			default:
 				errx(-1, "%s: Invalid channel attribute %c\n",
 				    val, *cp);
 			}
 		}
 	}
 	cp = strchr(val, '/');
 	if (cp != NULL) {
 		char *ep;
 		u_long cw = strtoul(cp+1, &ep, 10);
 
 		switch (cw) {
 		case 5:
 			flags |= IEEE80211_CHAN_QUARTER;
 			break;
 		case 10:
 			flags |= IEEE80211_CHAN_HALF;
 			break;
 		case 20:
 			/* NB: this may be removed below */
 			flags |= IEEE80211_CHAN_HT20;
 			break;
 		case 40:
 		case 80:
 		case 160:
 			/* Handle the 80/160 VHT flag */
 			if (cw == 80)
 				flags |= IEEE80211_CHAN_VHT80;
 			else if (cw == 160)
 				flags |= IEEE80211_CHAN_VHT160;
 
 			/* Fallthrough */
 			if (ep != NULL && *ep == '+')
 				flags |= IEEE80211_CHAN_HT40U;
 			else if (ep != NULL && *ep == '-')
 				flags |= IEEE80211_CHAN_HT40D;
 			break;
 		default:
 			errx(-1, "%s: Invalid channel width\n", val);
 		}
 	}
 
 	/*
 	 * Cleanup specifications.
 	 */ 
 	if ((flags & _CHAN_HT) == 0) {
 		/*
 		 * If user specified freq/20 or freq/40 quietly remove
 		 * HT cw attributes depending on channel use.  To give
 		 * an explicit 20/40 width for an HT channel you must
 		 * indicate it is an HT channel since all HT channels
 		 * are also usable for legacy operation; e.g. freq:n/40.
 		 */
 		flags &= ~IEEE80211_CHAN_HT;
 		flags &= ~IEEE80211_CHAN_VHT;
 	} else {
 		/*
 		 * Remove private indicator that this is an HT channel
 		 * and if no explicit channel width has been given
 		 * provide the default settings.
 		 */
 		flags &= ~_CHAN_HT;
 		if ((flags & IEEE80211_CHAN_HT) == 0) {
 			struct ieee80211_channel chan;
 			/*
 			 * Consult the channel list to see if we can use
 			 * HT40+ or HT40- (if both the map routines choose).
 			 */
 			if (freq > 255)
 				mapfreq(&chan, freq, 0);
 			else
 				mapchan(&chan, freq, 0);
 			flags |= (chan.ic_flags & IEEE80211_CHAN_HT);
 		}
 
 		/*
 		 * If VHT is enabled, then also set the VHT flag and the
 		 * relevant channel up/down.
 		 */
 		if (is_vht && (flags & IEEE80211_CHAN_HT)) {
 			/*
 			 * XXX yes, maybe we should just have VHT, and reuse
 			 * HT20/HT40U/HT40D
 			 */
 			if (flags & IEEE80211_CHAN_VHT80)
 				;
 			else if (flags & IEEE80211_CHAN_HT20)
 				flags |= IEEE80211_CHAN_VHT20;
 			else if (flags & IEEE80211_CHAN_HT40U)
 				flags |= IEEE80211_CHAN_VHT40U;
 			else if (flags & IEEE80211_CHAN_HT40D)
 				flags |= IEEE80211_CHAN_VHT40D;
 		}
 	}
 	return flags;
 #undef _CHAN_HT
 }
 
 static void
 getchannel(int s, struct ieee80211_channel *chan, const char *val)
 {
 	int v, flags;
 	char *eptr;
 
 	memset(chan, 0, sizeof(*chan));
 	if (isanyarg(val)) {
 		chan->ic_freq = IEEE80211_CHAN_ANY;
 		return;
 	}
 	getchaninfo(s);
 	errno = 0;
 	v = strtol(val, &eptr, 10);
 	if (val[0] == '\0' || val == eptr || errno == ERANGE ||
 	    /* channel may be suffixed with nothing, :flag, or /width */
 	    (eptr[0] != '\0' && eptr[0] != ':' && eptr[0] != '/'))
 		errx(1, "invalid channel specification%s",
 		    errno == ERANGE ? " (out of range)" : "");
 	flags = getchannelflags(val, v);
 	if (v > 255) {		/* treat as frequency */
 		mapfreq(chan, v, flags);
 	} else {
 		mapchan(chan, v, flags);
 	}
 }
 
 static void
 set80211channel(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	struct ieee80211_channel chan;
 
 	getchannel(s, &chan, val);
 	set80211(s, IEEE80211_IOC_CURCHAN, 0, sizeof(chan), &chan);
 }
 
 static void
 set80211chanswitch(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	struct ieee80211_chanswitch_req csr;
 
 	getchannel(s, &csr.csa_chan, val);
 	csr.csa_mode = 1;
 	csr.csa_count = 5;
 	set80211(s, IEEE80211_IOC_CHANSWITCH, 0, sizeof(csr), &csr);
 }
 
 static void
 set80211authmode(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int	mode;
 
 	if (strcasecmp(val, "none") == 0) {
 		mode = IEEE80211_AUTH_NONE;
 	} else if (strcasecmp(val, "open") == 0) {
 		mode = IEEE80211_AUTH_OPEN;
 	} else if (strcasecmp(val, "shared") == 0) {
 		mode = IEEE80211_AUTH_SHARED;
 	} else if (strcasecmp(val, "8021x") == 0) {
 		mode = IEEE80211_AUTH_8021X;
 	} else if (strcasecmp(val, "wpa") == 0) {
 		mode = IEEE80211_AUTH_WPA;
 	} else {
 		errx(1, "unknown authmode");
 	}
 
 	set80211(s, IEEE80211_IOC_AUTHMODE, mode, 0, NULL);
 }
 
 static void
 set80211powersavemode(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int	mode;
 
 	if (strcasecmp(val, "off") == 0) {
 		mode = IEEE80211_POWERSAVE_OFF;
 	} else if (strcasecmp(val, "on") == 0) {
 		mode = IEEE80211_POWERSAVE_ON;
 	} else if (strcasecmp(val, "cam") == 0) {
 		mode = IEEE80211_POWERSAVE_CAM;
 	} else if (strcasecmp(val, "psp") == 0) {
 		mode = IEEE80211_POWERSAVE_PSP;
 	} else if (strcasecmp(val, "psp-cam") == 0) {
 		mode = IEEE80211_POWERSAVE_PSP_CAM;
 	} else {
 		errx(1, "unknown powersavemode");
 	}
 
 	set80211(s, IEEE80211_IOC_POWERSAVE, mode, 0, NULL);
 }
 
 static void
 set80211powersave(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	if (d == 0)
 		set80211(s, IEEE80211_IOC_POWERSAVE, IEEE80211_POWERSAVE_OFF,
 		    0, NULL);
 	else
 		set80211(s, IEEE80211_IOC_POWERSAVE, IEEE80211_POWERSAVE_ON,
 		    0, NULL);
 }
 
 static void
 set80211powersavesleep(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_POWERSAVESLEEP, atoi(val), 0, NULL);
 }
 
 static void
 set80211wepmode(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int	mode;
 
 	if (strcasecmp(val, "off") == 0) {
 		mode = IEEE80211_WEP_OFF;
 	} else if (strcasecmp(val, "on") == 0) {
 		mode = IEEE80211_WEP_ON;
 	} else if (strcasecmp(val, "mixed") == 0) {
 		mode = IEEE80211_WEP_MIXED;
 	} else {
 		errx(1, "unknown wep mode");
 	}
 
 	set80211(s, IEEE80211_IOC_WEP, mode, 0, NULL);
 }
 
 static void
 set80211wep(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_WEP, d, 0, NULL);
 }
 
 static int
 isundefarg(const char *arg)
 {
 	return (strcmp(arg, "-") == 0 || strncasecmp(arg, "undef", 5) == 0);
 }
 
 static void
 set80211weptxkey(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	if (isundefarg(val))
 		set80211(s, IEEE80211_IOC_WEPTXKEY, IEEE80211_KEYIX_NONE, 0, NULL);
 	else
 		set80211(s, IEEE80211_IOC_WEPTXKEY, atoi(val)-1, 0, NULL);
 }
 
 static void
 set80211wepkey(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int		key = 0;
 	int		len;
 	u_int8_t	data[IEEE80211_KEYBUF_SIZE];
 
 	if (isdigit((int)val[0]) && val[1] == ':') {
 		key = atoi(val)-1;
 		val += 2;
 	}
 
 	bzero(data, sizeof(data));
 	len = sizeof(data);
 	get_string(val, NULL, data, &len);
 
 	set80211(s, IEEE80211_IOC_WEPKEY, key, len, data);
 }
 
 /*
  * This function is purely a NetBSD compatibility interface.  The NetBSD
  * interface is too inflexible, but it's there so we'll support it since
  * it's not all that hard.
  */
 static void
 set80211nwkey(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int		txkey;
 	int		i, len;
 	u_int8_t	data[IEEE80211_KEYBUF_SIZE];
 
 	set80211(s, IEEE80211_IOC_WEP, IEEE80211_WEP_ON, 0, NULL);
 
 	if (isdigit((int)val[0]) && val[1] == ':') {
 		txkey = val[0]-'0'-1;
 		val += 2;
 
 		for (i = 0; i < 4; i++) {
 			bzero(data, sizeof(data));
 			len = sizeof(data);
 			val = get_string(val, ",", data, &len);
 			if (val == NULL)
 				exit(1);
 
 			set80211(s, IEEE80211_IOC_WEPKEY, i, len, data);
 		}
 	} else {
 		bzero(data, sizeof(data));
 		len = sizeof(data);
 		get_string(val, NULL, data, &len);
 		txkey = 0;
 
 		set80211(s, IEEE80211_IOC_WEPKEY, 0, len, data);
 
 		bzero(data, sizeof(data));
 		for (i = 1; i < 4; i++)
 			set80211(s, IEEE80211_IOC_WEPKEY, i, 0, data);
 	}
 
 	set80211(s, IEEE80211_IOC_WEPTXKEY, txkey, 0, NULL);
 }
 
 static void
 set80211rtsthreshold(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_RTSTHRESHOLD,
 		isundefarg(val) ? IEEE80211_RTS_MAX : atoi(val), 0, NULL);
 }
 
 static void
 set80211protmode(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int	mode;
 
 	if (strcasecmp(val, "off") == 0) {
 		mode = IEEE80211_PROTMODE_OFF;
 	} else if (strcasecmp(val, "cts") == 0) {
 		mode = IEEE80211_PROTMODE_CTS;
 	} else if (strncasecmp(val, "rtscts", 3) == 0) {
 		mode = IEEE80211_PROTMODE_RTSCTS;
 	} else {
 		errx(1, "unknown protection mode");
 	}
 
 	set80211(s, IEEE80211_IOC_PROTMODE, mode, 0, NULL);
 }
 
 static void
 set80211htprotmode(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int	mode;
 
 	if (strcasecmp(val, "off") == 0) {
 		mode = IEEE80211_PROTMODE_OFF;
 	} else if (strncasecmp(val, "rts", 3) == 0) {
 		mode = IEEE80211_PROTMODE_RTSCTS;
 	} else {
 		errx(1, "unknown protection mode");
 	}
 
 	set80211(s, IEEE80211_IOC_HTPROTMODE, mode, 0, NULL);
 }
 
 static void
 set80211txpower(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	double v = atof(val);
 	int txpow;
 
 	txpow = (int) (2*v);
 	if (txpow != 2*v)
 		errx(-1, "invalid tx power (must be .5 dBm units)");
 	set80211(s, IEEE80211_IOC_TXPOWER, txpow, 0, NULL);
 }
 
 #define	IEEE80211_ROAMING_DEVICE	0
 #define	IEEE80211_ROAMING_AUTO		1
 #define	IEEE80211_ROAMING_MANUAL	2
 
 static void
 set80211roaming(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int mode;
 
 	if (strcasecmp(val, "device") == 0) {
 		mode = IEEE80211_ROAMING_DEVICE;
 	} else if (strcasecmp(val, "auto") == 0) {
 		mode = IEEE80211_ROAMING_AUTO;
 	} else if (strcasecmp(val, "manual") == 0) {
 		mode = IEEE80211_ROAMING_MANUAL;
 	} else {
 		errx(1, "unknown roaming mode");
 	}
 	set80211(s, IEEE80211_IOC_ROAMING, mode, 0, NULL);
 }
 
 static void
 set80211wme(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_WME, d, 0, NULL);
 }
 
 static void
 set80211hidessid(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_HIDESSID, d, 0, NULL);
 }
 
 static void
 set80211apbridge(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_APBRIDGE, d, 0, NULL);
 }
 
 static void
 set80211fastframes(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_FF, d, 0, NULL);
 }
 
 static void
 set80211dturbo(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_TURBOP, d, 0, NULL);
 }
 
 static void
 set80211chanlist(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	struct ieee80211req_chanlist chanlist;
 	char *temp, *cp, *tp;
 
 	temp = malloc(strlen(val) + 1);
 	if (temp == NULL)
 		errx(1, "malloc failed");
 	strcpy(temp, val);
 	memset(&chanlist, 0, sizeof(chanlist));
 	cp = temp;
 	for (;;) {
 		int first, last, f, c;
 
 		tp = strchr(cp, ',');
 		if (tp != NULL)
 			*tp++ = '\0';
 		switch (sscanf(cp, "%u-%u", &first, &last)) {
 		case 1:
 			if (first > IEEE80211_CHAN_MAX)
 				errx(-1, "channel %u out of range, max %u",
 					first, IEEE80211_CHAN_MAX);
 			setbit(chanlist.ic_channels, first);
 			break;
 		case 2:
 			if (first > IEEE80211_CHAN_MAX)
 				errx(-1, "channel %u out of range, max %u",
 					first, IEEE80211_CHAN_MAX);
 			if (last > IEEE80211_CHAN_MAX)
 				errx(-1, "channel %u out of range, max %u",
 					last, IEEE80211_CHAN_MAX);
 			if (first > last)
 				errx(-1, "void channel range, %u > %u",
 					first, last);
 			for (f = first; f <= last; f++)
 				setbit(chanlist.ic_channels, f);
 			break;
 		}
 		if (tp == NULL)
 			break;
 		c = *tp;
 		while (isspace(c))
 			tp++;
 		if (!isdigit(c))
 			break;
 		cp = tp;
 	}
 	set80211(s, IEEE80211_IOC_CHANLIST, 0, sizeof(chanlist), &chanlist);
 }
 
 static void
 set80211bssid(const char *val, int d, int s, const struct afswtch *rafp)
 {
 
 	if (!isanyarg(val)) {
 		char *temp;
 		struct sockaddr_dl sdl;
 
 		temp = malloc(strlen(val) + 2); /* ':' and '\0' */
 		if (temp == NULL)
 			errx(1, "malloc failed");
 		temp[0] = ':';
 		strcpy(temp + 1, val);
 		sdl.sdl_len = sizeof(sdl);
 		link_addr(temp, &sdl);
 		free(temp);
 		if (sdl.sdl_alen != IEEE80211_ADDR_LEN)
 			errx(1, "malformed link-level address");
 		set80211(s, IEEE80211_IOC_BSSID, 0,
 			IEEE80211_ADDR_LEN, LLADDR(&sdl));
 	} else {
 		uint8_t zerobssid[IEEE80211_ADDR_LEN];
 		memset(zerobssid, 0, sizeof(zerobssid));
 		set80211(s, IEEE80211_IOC_BSSID, 0,
 			IEEE80211_ADDR_LEN, zerobssid);
 	}
 }
 
 static int
 getac(const char *ac)
 {
 	if (strcasecmp(ac, "ac_be") == 0 || strcasecmp(ac, "be") == 0)
 		return WME_AC_BE;
 	if (strcasecmp(ac, "ac_bk") == 0 || strcasecmp(ac, "bk") == 0)
 		return WME_AC_BK;
 	if (strcasecmp(ac, "ac_vi") == 0 || strcasecmp(ac, "vi") == 0)
 		return WME_AC_VI;
 	if (strcasecmp(ac, "ac_vo") == 0 || strcasecmp(ac, "vo") == 0)
 		return WME_AC_VO;
 	errx(1, "unknown wme access class %s", ac);
 }
 
 static
 DECL_CMD_FUNC2(set80211cwmin, ac, val)
 {
 	set80211(s, IEEE80211_IOC_WME_CWMIN, atoi(val), getac(ac), NULL);
 }
 
 static
 DECL_CMD_FUNC2(set80211cwmax, ac, val)
 {
 	set80211(s, IEEE80211_IOC_WME_CWMAX, atoi(val), getac(ac), NULL);
 }
 
 static
 DECL_CMD_FUNC2(set80211aifs, ac, val)
 {
 	set80211(s, IEEE80211_IOC_WME_AIFS, atoi(val), getac(ac), NULL);
 }
 
 static
 DECL_CMD_FUNC2(set80211txoplimit, ac, val)
 {
 	set80211(s, IEEE80211_IOC_WME_TXOPLIMIT, atoi(val), getac(ac), NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211acm, ac, d)
 {
 	set80211(s, IEEE80211_IOC_WME_ACM, 1, getac(ac), NULL);
 }
 static
 DECL_CMD_FUNC(set80211noacm, ac, d)
 {
 	set80211(s, IEEE80211_IOC_WME_ACM, 0, getac(ac), NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211ackpolicy, ac, d)
 {
 	set80211(s, IEEE80211_IOC_WME_ACKPOLICY, 1, getac(ac), NULL);
 }
 static
 DECL_CMD_FUNC(set80211noackpolicy, ac, d)
 {
 	set80211(s, IEEE80211_IOC_WME_ACKPOLICY, 0, getac(ac), NULL);
 }
 
 static
 DECL_CMD_FUNC2(set80211bsscwmin, ac, val)
 {
 	set80211(s, IEEE80211_IOC_WME_CWMIN, atoi(val),
 		getac(ac)|IEEE80211_WMEPARAM_BSS, NULL);
 }
 
 static
 DECL_CMD_FUNC2(set80211bsscwmax, ac, val)
 {
 	set80211(s, IEEE80211_IOC_WME_CWMAX, atoi(val),
 		getac(ac)|IEEE80211_WMEPARAM_BSS, NULL);
 }
 
 static
 DECL_CMD_FUNC2(set80211bssaifs, ac, val)
 {
 	set80211(s, IEEE80211_IOC_WME_AIFS, atoi(val),
 		getac(ac)|IEEE80211_WMEPARAM_BSS, NULL);
 }
 
 static
 DECL_CMD_FUNC2(set80211bsstxoplimit, ac, val)
 {
 	set80211(s, IEEE80211_IOC_WME_TXOPLIMIT, atoi(val),
 		getac(ac)|IEEE80211_WMEPARAM_BSS, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211dtimperiod, val, d)
 {
 	set80211(s, IEEE80211_IOC_DTIM_PERIOD, atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211bintval, val, d)
 {
 	set80211(s, IEEE80211_IOC_BEACON_INTERVAL, atoi(val), 0, NULL);
 }
 
 static void
 set80211macmac(int s, int op, const char *val)
 {
 	char *temp;
 	struct sockaddr_dl sdl;
 
 	temp = malloc(strlen(val) + 2); /* ':' and '\0' */
 	if (temp == NULL)
 		errx(1, "malloc failed");
 	temp[0] = ':';
 	strcpy(temp + 1, val);
 	sdl.sdl_len = sizeof(sdl);
 	link_addr(temp, &sdl);
 	free(temp);
 	if (sdl.sdl_alen != IEEE80211_ADDR_LEN)
 		errx(1, "malformed link-level address");
 	set80211(s, op, 0, IEEE80211_ADDR_LEN, LLADDR(&sdl));
 }
 
 static
 DECL_CMD_FUNC(set80211addmac, val, d)
 {
 	set80211macmac(s, IEEE80211_IOC_ADDMAC, val);
 }
 
 static
 DECL_CMD_FUNC(set80211delmac, val, d)
 {
 	set80211macmac(s, IEEE80211_IOC_DELMAC, val);
 }
 
 static
 DECL_CMD_FUNC(set80211kickmac, val, d)
 {
 	char *temp;
 	struct sockaddr_dl sdl;
 	struct ieee80211req_mlme mlme;
 
 	temp = malloc(strlen(val) + 2); /* ':' and '\0' */
 	if (temp == NULL)
 		errx(1, "malloc failed");
 	temp[0] = ':';
 	strcpy(temp + 1, val);
 	sdl.sdl_len = sizeof(sdl);
 	link_addr(temp, &sdl);
 	free(temp);
 	if (sdl.sdl_alen != IEEE80211_ADDR_LEN)
 		errx(1, "malformed link-level address");
 	memset(&mlme, 0, sizeof(mlme));
 	mlme.im_op = IEEE80211_MLME_DEAUTH;
 	mlme.im_reason = IEEE80211_REASON_AUTH_EXPIRE;
 	memcpy(mlme.im_macaddr, LLADDR(&sdl), IEEE80211_ADDR_LEN);
 	set80211(s, IEEE80211_IOC_MLME, 0, sizeof(mlme), &mlme);
 }
 
 static
 DECL_CMD_FUNC(set80211maccmd, val, d)
 {
 	set80211(s, IEEE80211_IOC_MACCMD, d, 0, NULL);
 }
 
 static void
 set80211meshrtmac(int s, int req, const char *val)
 {
 	char *temp;
 	struct sockaddr_dl sdl;
 
 	temp = malloc(strlen(val) + 2); /* ':' and '\0' */
 	if (temp == NULL)
 		errx(1, "malloc failed");
 	temp[0] = ':';
 	strcpy(temp + 1, val);
 	sdl.sdl_len = sizeof(sdl);
 	link_addr(temp, &sdl);
 	free(temp);
 	if (sdl.sdl_alen != IEEE80211_ADDR_LEN)
 		errx(1, "malformed link-level address");
 	set80211(s, IEEE80211_IOC_MESH_RTCMD, req,
 	    IEEE80211_ADDR_LEN, LLADDR(&sdl));
 }
 
 static
 DECL_CMD_FUNC(set80211addmeshrt, val, d)
 {
 	set80211meshrtmac(s, IEEE80211_MESH_RTCMD_ADD, val);
 }
 
 static
 DECL_CMD_FUNC(set80211delmeshrt, val, d)
 {
 	set80211meshrtmac(s, IEEE80211_MESH_RTCMD_DELETE, val);
 }
 
 static
 DECL_CMD_FUNC(set80211meshrtcmd, val, d)
 {
 	set80211(s, IEEE80211_IOC_MESH_RTCMD, d, 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211hwmprootmode, val, d)
 {
 	int mode;
 
 	if (strcasecmp(val, "normal") == 0)
 		mode = IEEE80211_HWMP_ROOTMODE_NORMAL;
 	else if (strcasecmp(val, "proactive") == 0)
 		mode = IEEE80211_HWMP_ROOTMODE_PROACTIVE;
 	else if (strcasecmp(val, "rann") == 0)
 		mode = IEEE80211_HWMP_ROOTMODE_RANN;
 	else
 		mode = IEEE80211_HWMP_ROOTMODE_DISABLED;
 	set80211(s, IEEE80211_IOC_HWMP_ROOTMODE, mode, 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211hwmpmaxhops, val, d)
 {
 	set80211(s, IEEE80211_IOC_HWMP_MAXHOPS, atoi(val), 0, NULL);
 }
 
 static void
 set80211pureg(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_PUREG, d, 0, NULL);
 }
 
 static void
 set80211quiet(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_QUIET, d, 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211quietperiod, val, d)
 {
 	set80211(s, IEEE80211_IOC_QUIET_PERIOD, atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211quietcount, val, d)
 {
 	set80211(s, IEEE80211_IOC_QUIET_COUNT, atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211quietduration, val, d)
 {
 	set80211(s, IEEE80211_IOC_QUIET_DUR, atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211quietoffset, val, d)
 {
 	set80211(s, IEEE80211_IOC_QUIET_OFFSET, atoi(val), 0, NULL);
 }
 
 static void
 set80211bgscan(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_BGSCAN, d, 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211bgscanidle, val, d)
 {
 	set80211(s, IEEE80211_IOC_BGSCAN_IDLE, atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211bgscanintvl, val, d)
 {
 	set80211(s, IEEE80211_IOC_BGSCAN_INTERVAL, atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211scanvalid, val, d)
 {
 	set80211(s, IEEE80211_IOC_SCANVALID, atoi(val), 0, NULL);
 }
 
 /*
  * Parse an optional trailing specification of which netbands
  * to apply a parameter to.  This is basically the same syntax
  * as used for channels but you can concatenate to specify
  * multiple.  For example:
  *	14:abg		apply to 11a, 11b, and 11g
  *	6:ht		apply to 11na and 11ng
  * We don't make a big effort to catch silly things; this is
  * really a convenience mechanism.
  */
 static int
 getmodeflags(const char *val)
 {
 	const char *cp;
 	int flags;
 
 	flags = 0;
 
 	cp = strchr(val, ':');
 	if (cp != NULL) {
 		for (cp++; isalpha((int) *cp); cp++) {
 			/* accept mixed case */
 			int c = *cp;
 			if (isupper(c))
 				c = tolower(c);
 			switch (c) {
 			case 'a':		/* 802.11a */
 				flags |= IEEE80211_CHAN_A;
 				break;
 			case 'b':		/* 802.11b */
 				flags |= IEEE80211_CHAN_B;
 				break;
 			case 'g':		/* 802.11g */
 				flags |= IEEE80211_CHAN_G;
 				break;
 			case 'n':		/* 802.11n */
 				flags |= IEEE80211_CHAN_HT;
 				break;
 			case 'd':		/* dt = Atheros Dynamic Turbo */
 				flags |= IEEE80211_CHAN_TURBO;
 				break;
 			case 't':		/* ht, dt, st, t */
 				/* dt and unadorned t specify Dynamic Turbo */
 				if ((flags & (IEEE80211_CHAN_STURBO|IEEE80211_CHAN_HT)) == 0)
 					flags |= IEEE80211_CHAN_TURBO;
 				break;
 			case 's':		/* st = Atheros Static Turbo */
 				flags |= IEEE80211_CHAN_STURBO;
 				break;
 			case 'h':		/* 1/2-width channels */
 				flags |= IEEE80211_CHAN_HALF;
 				break;
 			case 'q':		/* 1/4-width channels */
 				flags |= IEEE80211_CHAN_QUARTER;
 				break;
 			case 'v':
 				/* XXX set HT too? */
 				flags |= IEEE80211_CHAN_VHT;
 				break;
 			default:
 				errx(-1, "%s: Invalid mode attribute %c\n",
 				    val, *cp);
 			}
 		}
 	}
 	return flags;
 }
 
 #define	IEEE80211_CHAN_HTA	(IEEE80211_CHAN_HT|IEEE80211_CHAN_5GHZ)
 #define	IEEE80211_CHAN_HTG	(IEEE80211_CHAN_HT|IEEE80211_CHAN_2GHZ)
 
 #define	_APPLY(_flags, _base, _param, _v) do {				\
     if (_flags & IEEE80211_CHAN_HT) {					\
 	    if ((_flags & (IEEE80211_CHAN_5GHZ|IEEE80211_CHAN_2GHZ)) == 0) {\
 		    _base.params[IEEE80211_MODE_11NA]._param = _v;	\
 		    _base.params[IEEE80211_MODE_11NG]._param = _v;	\
 	    } else if (_flags & IEEE80211_CHAN_5GHZ)			\
 		    _base.params[IEEE80211_MODE_11NA]._param = _v;	\
 	    else							\
 		    _base.params[IEEE80211_MODE_11NG]._param = _v;	\
     }									\
     if (_flags & IEEE80211_CHAN_TURBO) {				\
 	    if ((_flags & (IEEE80211_CHAN_5GHZ|IEEE80211_CHAN_2GHZ)) == 0) {\
 		    _base.params[IEEE80211_MODE_TURBO_A]._param = _v;	\
 		    _base.params[IEEE80211_MODE_TURBO_G]._param = _v;	\
 	    } else if (_flags & IEEE80211_CHAN_5GHZ)			\
 		    _base.params[IEEE80211_MODE_TURBO_A]._param = _v;	\
 	    else							\
 		    _base.params[IEEE80211_MODE_TURBO_G]._param = _v;	\
     }									\
     if (_flags & IEEE80211_CHAN_STURBO)					\
 	    _base.params[IEEE80211_MODE_STURBO_A]._param = _v;		\
     if ((_flags & IEEE80211_CHAN_A) == IEEE80211_CHAN_A)		\
 	    _base.params[IEEE80211_MODE_11A]._param = _v;		\
     if ((_flags & IEEE80211_CHAN_G) == IEEE80211_CHAN_G)		\
 	    _base.params[IEEE80211_MODE_11G]._param = _v;		\
     if ((_flags & IEEE80211_CHAN_B) == IEEE80211_CHAN_B)		\
 	    _base.params[IEEE80211_MODE_11B]._param = _v;		\
     if (_flags & IEEE80211_CHAN_HALF)					\
 	    _base.params[IEEE80211_MODE_HALF]._param = _v;		\
     if (_flags & IEEE80211_CHAN_QUARTER)				\
 	    _base.params[IEEE80211_MODE_QUARTER]._param = _v;		\
 } while (0)
 #define	_APPLY1(_flags, _base, _param, _v) do {				\
     if (_flags & IEEE80211_CHAN_HT) {					\
 	    if (_flags & IEEE80211_CHAN_5GHZ)				\
 		    _base.params[IEEE80211_MODE_11NA]._param = _v;	\
 	    else							\
 		    _base.params[IEEE80211_MODE_11NG]._param = _v;	\
     } else if ((_flags & IEEE80211_CHAN_108A) == IEEE80211_CHAN_108A)	\
 	    _base.params[IEEE80211_MODE_TURBO_A]._param = _v;		\
     else if ((_flags & IEEE80211_CHAN_108G) == IEEE80211_CHAN_108G)	\
 	    _base.params[IEEE80211_MODE_TURBO_G]._param = _v;		\
     else if ((_flags & IEEE80211_CHAN_ST) == IEEE80211_CHAN_ST)		\
 	    _base.params[IEEE80211_MODE_STURBO_A]._param = _v;		\
     else if (_flags & IEEE80211_CHAN_HALF)				\
 	    _base.params[IEEE80211_MODE_HALF]._param = _v;		\
     else if (_flags & IEEE80211_CHAN_QUARTER)				\
 	    _base.params[IEEE80211_MODE_QUARTER]._param = _v;		\
     else if ((_flags & IEEE80211_CHAN_A) == IEEE80211_CHAN_A)		\
 	    _base.params[IEEE80211_MODE_11A]._param = _v;		\
     else if ((_flags & IEEE80211_CHAN_G) == IEEE80211_CHAN_G)		\
 	    _base.params[IEEE80211_MODE_11G]._param = _v;		\
     else if ((_flags & IEEE80211_CHAN_B) == IEEE80211_CHAN_B)		\
 	    _base.params[IEEE80211_MODE_11B]._param = _v;		\
 } while (0)
 #define	_APPLY_RATE(_flags, _base, _param, _v) do {			\
     if (_flags & IEEE80211_CHAN_HT) {					\
 	(_v) = (_v / 2) | IEEE80211_RATE_MCS;				\
     }									\
     _APPLY(_flags, _base, _param, _v);					\
 } while (0)
 #define	_APPLY_RATE1(_flags, _base, _param, _v) do {			\
     if (_flags & IEEE80211_CHAN_HT) {					\
 	(_v) = (_v / 2) | IEEE80211_RATE_MCS;				\
     }									\
     _APPLY1(_flags, _base, _param, _v);					\
 } while (0)
 
 static
 DECL_CMD_FUNC(set80211roamrssi, val, d)
 {
 	double v = atof(val);
 	int rssi, flags;
 
 	rssi = (int) (2*v);
 	if (rssi != 2*v)
 		errx(-1, "invalid rssi (must be .5 dBm units)");
 	flags = getmodeflags(val);
 	getroam(s);
 	if (flags == 0) {		/* NB: no flags => current channel */
 		flags = getcurchan(s)->ic_flags;
 		_APPLY1(flags, roamparams, rssi, rssi);
 	} else
 		_APPLY(flags, roamparams, rssi, rssi);
 	callback_register(setroam_cb, &roamparams);
 }
 
 static int
 getrate(const char *val, const char *tag)
 {
 	double v = atof(val);
 	int rate;
 
 	rate = (int) (2*v);
 	if (rate != 2*v)
 		errx(-1, "invalid %s rate (must be .5 Mb/s units)", tag);
 	return rate;		/* NB: returns 2x the specified value */
 }
 
 static
 DECL_CMD_FUNC(set80211roamrate, val, d)
 {
 	int rate, flags;
 
 	rate = getrate(val, "roam");
 	flags = getmodeflags(val);
 	getroam(s);
 	if (flags == 0) {		/* NB: no flags => current channel */
 		flags = getcurchan(s)->ic_flags;
 		_APPLY_RATE1(flags, roamparams, rate, rate);
 	} else
 		_APPLY_RATE(flags, roamparams, rate, rate);
 	callback_register(setroam_cb, &roamparams);
 }
 
 static
 DECL_CMD_FUNC(set80211mcastrate, val, d)
 {
 	int rate, flags;
 
 	rate = getrate(val, "mcast");
 	flags = getmodeflags(val);
 	gettxparams(s);
 	if (flags == 0) {		/* NB: no flags => current channel */
 		flags = getcurchan(s)->ic_flags;
 		_APPLY_RATE1(flags, txparams, mcastrate, rate);
 	} else
 		_APPLY_RATE(flags, txparams, mcastrate, rate);
 	callback_register(settxparams_cb, &txparams);
 }
 
 static
 DECL_CMD_FUNC(set80211mgtrate, val, d)
 {
 	int rate, flags;
 
 	rate = getrate(val, "mgmt");
 	flags = getmodeflags(val);
 	gettxparams(s);
 	if (flags == 0) {		/* NB: no flags => current channel */
 		flags = getcurchan(s)->ic_flags;
 		_APPLY_RATE1(flags, txparams, mgmtrate, rate);
 	} else
 		_APPLY_RATE(flags, txparams, mgmtrate, rate);
 	callback_register(settxparams_cb, &txparams);
 }
 
 static
 DECL_CMD_FUNC(set80211ucastrate, val, d)
 {
 	int flags;
 
 	gettxparams(s);
 	flags = getmodeflags(val);
 	if (isanyarg(val)) {
 		if (flags == 0) {	/* NB: no flags => current channel */
 			flags = getcurchan(s)->ic_flags;
 			_APPLY1(flags, txparams, ucastrate,
 			    IEEE80211_FIXED_RATE_NONE);
 		} else
 			_APPLY(flags, txparams, ucastrate,
 			    IEEE80211_FIXED_RATE_NONE);
 	} else {
 		int rate = getrate(val, "ucast");
 		if (flags == 0) {	/* NB: no flags => current channel */
 			flags = getcurchan(s)->ic_flags;
 			_APPLY_RATE1(flags, txparams, ucastrate, rate);
 		} else
 			_APPLY_RATE(flags, txparams, ucastrate, rate);
 	}
 	callback_register(settxparams_cb, &txparams);
 }
 
 static
 DECL_CMD_FUNC(set80211maxretry, val, d)
 {
 	int v = atoi(val), flags;
 
 	flags = getmodeflags(val);
 	gettxparams(s);
 	if (flags == 0) {		/* NB: no flags => current channel */
 		flags = getcurchan(s)->ic_flags;
 		_APPLY1(flags, txparams, maxretry, v);
 	} else
 		_APPLY(flags, txparams, maxretry, v);
 	callback_register(settxparams_cb, &txparams);
 }
 #undef _APPLY_RATE
 #undef _APPLY
 #undef IEEE80211_CHAN_HTA
 #undef IEEE80211_CHAN_HTG
 
 static
 DECL_CMD_FUNC(set80211fragthreshold, val, d)
 {
 	set80211(s, IEEE80211_IOC_FRAGTHRESHOLD,
 		isundefarg(val) ? IEEE80211_FRAG_MAX : atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211bmissthreshold, val, d)
 {
 	set80211(s, IEEE80211_IOC_BMISSTHRESHOLD,
 		isundefarg(val) ? IEEE80211_HWBMISS_MAX : atoi(val), 0, NULL);
 }
 
 static void
 set80211burst(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_BURST, d, 0, NULL);
 }
 
 static void
 set80211doth(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_DOTH, d, 0, NULL);
 }
 
 static void
 set80211dfs(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_DFS, d, 0, NULL);
 }
 
 static void
 set80211shortgi(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_SHORTGI,
 		d ? (IEEE80211_HTCAP_SHORTGI20 | IEEE80211_HTCAP_SHORTGI40) : 0,
 		0, NULL);
 }
 
 static void
 set80211ampdu(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int ampdu;
 
 	if (get80211val(s, IEEE80211_IOC_AMPDU, &ampdu) < 0)
 		errx(-1, "cannot set AMPDU setting");
 	if (d < 0) {
 		d = -d;
 		ampdu &= ~d;
 	} else
 		ampdu |= d;
 	set80211(s, IEEE80211_IOC_AMPDU, ampdu, 0, NULL);
 }
 
 static void
 set80211stbc(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int stbc;
 
 	if (get80211val(s, IEEE80211_IOC_STBC, &stbc) < 0)
 		errx(-1, "cannot set STBC setting");
 	if (d < 0) {
 		d = -d;
 		stbc &= ~d;
 	} else
 		stbc |= d;
 	set80211(s, IEEE80211_IOC_STBC, stbc, 0, NULL);
 }
 
 static void
 set80211ldpc(const char *val, int d, int s, const struct afswtch *rafp)
 {
         int ldpc;
  
         if (get80211val(s, IEEE80211_IOC_LDPC, &ldpc) < 0)
                 errx(-1, "cannot set LDPC setting");
         if (d < 0) {
                 d = -d;
                 ldpc &= ~d;
         } else
                 ldpc |= d;
         set80211(s, IEEE80211_IOC_LDPC, ldpc, 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211ampdulimit, val, d)
 {
 	int v;
 
 	switch (atoi(val)) {
 	case 8:
 	case 8*1024:
 		v = IEEE80211_HTCAP_MAXRXAMPDU_8K;
 		break;
 	case 16:
 	case 16*1024:
 		v = IEEE80211_HTCAP_MAXRXAMPDU_16K;
 		break;
 	case 32:
 	case 32*1024:
 		v = IEEE80211_HTCAP_MAXRXAMPDU_32K;
 		break;
 	case 64:
 	case 64*1024:
 		v = IEEE80211_HTCAP_MAXRXAMPDU_64K;
 		break;
 	default:
 		errx(-1, "invalid A-MPDU limit %s", val);
 	}
 	set80211(s, IEEE80211_IOC_AMPDU_LIMIT, v, 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211ampdudensity, val, d)
 {
 	int v;
 
 	if (isanyarg(val) || strcasecmp(val, "na") == 0)
 		v = IEEE80211_HTCAP_MPDUDENSITY_NA;
 	else switch ((int)(atof(val)*4)) {
 	case 0:
 		v = IEEE80211_HTCAP_MPDUDENSITY_NA;
 		break;
 	case 1:
 		v = IEEE80211_HTCAP_MPDUDENSITY_025;
 		break;
 	case 2:
 		v = IEEE80211_HTCAP_MPDUDENSITY_05;
 		break;
 	case 4:
 		v = IEEE80211_HTCAP_MPDUDENSITY_1;
 		break;
 	case 8:
 		v = IEEE80211_HTCAP_MPDUDENSITY_2;
 		break;
 	case 16:
 		v = IEEE80211_HTCAP_MPDUDENSITY_4;
 		break;
 	case 32:
 		v = IEEE80211_HTCAP_MPDUDENSITY_8;
 		break;
 	case 64:
 		v = IEEE80211_HTCAP_MPDUDENSITY_16;
 		break;
 	default:
 		errx(-1, "invalid A-MPDU density %s", val);
 	}
 	set80211(s, IEEE80211_IOC_AMPDU_DENSITY, v, 0, NULL);
 }
 
 static void
 set80211amsdu(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	int amsdu;
 
 	if (get80211val(s, IEEE80211_IOC_AMSDU, &amsdu) < 0)
 		err(-1, "cannot get AMSDU setting");
 	if (d < 0) {
 		d = -d;
 		amsdu &= ~d;
 	} else
 		amsdu |= d;
 	set80211(s, IEEE80211_IOC_AMSDU, amsdu, 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211amsdulimit, val, d)
 {
 	set80211(s, IEEE80211_IOC_AMSDU_LIMIT, atoi(val), 0, NULL);
 }
 
 static void
 set80211puren(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_PUREN, d, 0, NULL);
 }
 
 static void
 set80211htcompat(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_HTCOMPAT, d, 0, NULL);
 }
 
 static void
 set80211htconf(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_HTCONF, d, 0, NULL);
 	htconf = d;
 }
 
 static void
 set80211dwds(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_DWDS, d, 0, NULL);
 }
 
 static void
 set80211inact(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_INACTIVITY, d, 0, NULL);
 }
 
 static void
 set80211tsn(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_TSN, d, 0, NULL);
 }
 
 static void
 set80211dotd(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_DOTD, d, 0, NULL);
 }
 
 static void
 set80211smps(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_SMPS, d, 0, NULL);
 }
 
 static void
 set80211rifs(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	set80211(s, IEEE80211_IOC_RIFS, d, 0, NULL);
 }
 
 static void
 set80211vhtconf(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	if (get80211val(s, IEEE80211_IOC_VHTCONF, &vhtconf) < 0)
 		errx(-1, "cannot set VHT setting");
 	printf("%s: vhtconf=0x%08x, d=%d\n", __func__, vhtconf, d);
 	if (d < 0) {
 		d = -d;
 		vhtconf &= ~d;
 	} else
 		vhtconf |= d;
 	printf("%s: vhtconf is now 0x%08x\n", __func__, vhtconf);
 	set80211(s, IEEE80211_IOC_VHTCONF, vhtconf, 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211tdmaslot, val, d)
 {
 	set80211(s, IEEE80211_IOC_TDMA_SLOT, atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211tdmaslotcnt, val, d)
 {
 	set80211(s, IEEE80211_IOC_TDMA_SLOTCNT, atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211tdmaslotlen, val, d)
 {
 	set80211(s, IEEE80211_IOC_TDMA_SLOTLEN, atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211tdmabintval, val, d)
 {
 	set80211(s, IEEE80211_IOC_TDMA_BINTERVAL, atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211meshttl, val, d)
 {
 	set80211(s, IEEE80211_IOC_MESH_TTL, atoi(val), 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211meshforward, val, d)
 {
 	set80211(s, IEEE80211_IOC_MESH_FWRD, d, 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211meshgate, val, d)
 {
 	set80211(s, IEEE80211_IOC_MESH_GATE, d, 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211meshpeering, val, d)
 {
 	set80211(s, IEEE80211_IOC_MESH_AP, d, 0, NULL);
 }
 
 static
 DECL_CMD_FUNC(set80211meshmetric, val, d)
 {
 	char v[12];
 	
 	memcpy(v, val, sizeof(v));
 	set80211(s, IEEE80211_IOC_MESH_PR_METRIC, 0, 0, v);
 }
 
 static
 DECL_CMD_FUNC(set80211meshpath, val, d)
 {
 	char v[12];
 	
 	memcpy(v, val, sizeof(v));
 	set80211(s, IEEE80211_IOC_MESH_PR_PATH, 0, 0, v);
 }
 
 static int
 regdomain_sort(const void *a, const void *b)
 {
 #define	CHAN_ALL \
 	(IEEE80211_CHAN_ALLTURBO|IEEE80211_CHAN_HALF|IEEE80211_CHAN_QUARTER)
 	const struct ieee80211_channel *ca = a;
 	const struct ieee80211_channel *cb = b;
 
 	return ca->ic_freq == cb->ic_freq ?
 	    (ca->ic_flags & CHAN_ALL) - (cb->ic_flags & CHAN_ALL) :
 	    ca->ic_freq - cb->ic_freq;
 #undef CHAN_ALL
 }
 
 static const struct ieee80211_channel *
 chanlookup(const struct ieee80211_channel chans[], int nchans,
 	int freq, int flags)
 {
 	int i;
 
 	flags &= IEEE80211_CHAN_ALLTURBO;
 	for (i = 0; i < nchans; i++) {
 		const struct ieee80211_channel *c = &chans[i];
 		if (c->ic_freq == freq &&
 		    (c->ic_flags & IEEE80211_CHAN_ALLTURBO) == flags)
 			return c;
 	}
 	return NULL;
 }
 
 static int
 chanfind(const struct ieee80211_channel chans[], int nchans, int flags)
 {
 	int i;
 
 	for (i = 0; i < nchans; i++) {
 		const struct ieee80211_channel *c = &chans[i];
 		if ((c->ic_flags & flags) == flags)
 			return 1;
 	}
 	return 0;
 }
 
 /*
  * Check channel compatibility.
  */
 static int
 checkchan(const struct ieee80211req_chaninfo *avail, int freq, int flags)
 {
 	flags &= ~REQ_FLAGS;
 	/*
 	 * Check if exact channel is in the calibration table;
 	 * everything below is to deal with channels that we
 	 * want to include but that are not explicitly listed.
 	 */
 	if (chanlookup(avail->ic_chans, avail->ic_nchans, freq, flags) != NULL)
 		return 1;
 	if (flags & IEEE80211_CHAN_GSM) {
 		/*
 		 * XXX GSM frequency mapping is handled in the kernel
 		 * so we cannot find them in the calibration table;
 		 * just accept the channel and the kernel will reject
 		 * the channel list if it's wrong.
 		 */
 		return 1;
 	}
 	/*
 	 * If this is a 1/2 or 1/4 width channel allow it if a full
 	 * width channel is present for this frequency, and the device
 	 * supports fractional channels on this band.  This is a hack
 	 * that avoids bloating the calibration table; it may be better
 	 * by per-band attributes though (we are effectively calculating
 	 * this attribute by scanning the channel list ourself).
 	 */
 	if ((flags & (IEEE80211_CHAN_HALF | IEEE80211_CHAN_QUARTER)) == 0)
 		return 0;
 	if (chanlookup(avail->ic_chans, avail->ic_nchans, freq,
 	    flags &~ (IEEE80211_CHAN_HALF | IEEE80211_CHAN_QUARTER)) == NULL)
 		return 0;
 	if (flags & IEEE80211_CHAN_HALF) {
 		return chanfind(avail->ic_chans, avail->ic_nchans,
 		    IEEE80211_CHAN_HALF |
 		       (flags & (IEEE80211_CHAN_2GHZ | IEEE80211_CHAN_5GHZ)));
 	} else {
 		return chanfind(avail->ic_chans, avail->ic_nchans,
 		    IEEE80211_CHAN_QUARTER |
 			(flags & (IEEE80211_CHAN_2GHZ | IEEE80211_CHAN_5GHZ)));
 	}
 }
 
 static void
 regdomain_addchans(struct ieee80211req_chaninfo *ci,
 	const netband_head *bands,
 	const struct ieee80211_regdomain *reg,
 	uint32_t chanFlags,
 	const struct ieee80211req_chaninfo *avail)
 {
 	const struct netband *nb;
 	const struct freqband *b;
 	struct ieee80211_channel *c, *prev;
 	int freq, hi_adj, lo_adj, channelSep;
 	uint32_t flags;
 
 	hi_adj = (chanFlags & IEEE80211_CHAN_HT40U) ? -20 : 0;
 	lo_adj = (chanFlags & IEEE80211_CHAN_HT40D) ? 20 : 0;
 	channelSep = (chanFlags & IEEE80211_CHAN_2GHZ) ? 0 : 40;
 
 	LIST_FOREACH(nb, bands, next) {
 		b = nb->band;
 		if (verbose) {
 			printf("%s:", __func__);
 			printb(" chanFlags", chanFlags, IEEE80211_CHAN_BITS);
 			printb(" bandFlags", nb->flags | b->flags,
 			    IEEE80211_CHAN_BITS);
 			putchar('\n');
 		}
 		prev = NULL;
 
 		for (freq = b->freqStart + lo_adj;
 		     freq <= b->freqEnd + hi_adj; freq += b->chanSep) {
 			/*
 			 * Construct flags for the new channel.  We take
 			 * the attributes from the band descriptions except
 			 * for HT40 which is enabled generically (i.e. +/-
 			 * extension channel) in the band description and
 			 * then constrained according by channel separation.
 			 */
 			flags = nb->flags | b->flags;
 
 			/*
 			 * VHT first - HT is a subset.
 			 *
 			 * XXX TODO: VHT80p80, VHT160 is not yet done.
 			 */
 			if (flags & IEEE80211_CHAN_VHT) {
 				if ((chanFlags & IEEE80211_CHAN_VHT20) &&
 				    (flags & IEEE80211_CHAN_VHT20) == 0) {
 					if (verbose)
 						printf("%u: skip, not a "
 						    "VHT20 channel\n", freq);
 					continue;
 				}
 				if ((chanFlags & IEEE80211_CHAN_VHT40) &&
 				    (flags & IEEE80211_CHAN_VHT40) == 0) {
 					if (verbose)
 						printf("%u: skip, not a "
 						    "VHT40 channel\n", freq);
 					continue;
 				}
 				if ((chanFlags & IEEE80211_CHAN_VHT80) &&
 				    (flags & IEEE80211_CHAN_VHT80) == 0) {
 					if (verbose)
 						printf("%u: skip, not a "
 						    "VHT80 channel\n", freq);
 					continue;
 				}
 
 				flags &= ~IEEE80211_CHAN_VHT;
 				flags |= chanFlags & IEEE80211_CHAN_VHT;
 			}
 
 			/* Now, constrain HT */
 			if (flags & IEEE80211_CHAN_HT) {
 				/*
 				 * HT channels are generated specially; we're
 				 * called to add HT20, HT40+, and HT40- chan's
 				 * so we need to expand only band specs for
 				 * the HT channel type being added.
 				 */
 				if ((chanFlags & IEEE80211_CHAN_HT20) &&
 				    (flags & IEEE80211_CHAN_HT20) == 0) {
 					if (verbose)
 						printf("%u: skip, not an "
 						    "HT20 channel\n", freq);
 					continue;
 				}
 				if ((chanFlags & IEEE80211_CHAN_HT40) &&
 				    (flags & IEEE80211_CHAN_HT40) == 0) {
 					if (verbose)
 						printf("%u: skip, not an "
 						    "HT40 channel\n", freq);
 					continue;
 				}
 				/* NB: HT attribute comes from caller */
 				flags &= ~IEEE80211_CHAN_HT;
 				flags |= chanFlags & IEEE80211_CHAN_HT;
 			}
 			/*
 			 * Check if device can operate on this frequency.
 			 */
 			if (!checkchan(avail, freq, flags)) {
 				if (verbose) {
 					printf("%u: skip, ", freq);
 					printb("flags", flags,
 					    IEEE80211_CHAN_BITS);
 					printf(" not available\n");
 				}
 				continue;
 			}
 			if ((flags & REQ_ECM) && !reg->ecm) {
 				if (verbose)
 					printf("%u: skip, ECM channel\n", freq);
 				continue;
 			}
 			if ((flags & REQ_INDOOR) && reg->location == 'O') {
 				if (verbose)
 					printf("%u: skip, indoor channel\n",
 					    freq);
 				continue;
 			}
 			if ((flags & REQ_OUTDOOR) && reg->location == 'I') {
 				if (verbose)
 					printf("%u: skip, outdoor channel\n",
 					    freq);
 				continue;
 			}
 			if ((flags & IEEE80211_CHAN_HT40) &&
 			    prev != NULL && (freq - prev->ic_freq) < channelSep) {
 				if (verbose)
 					printf("%u: skip, only %u channel "
 					    "separation, need %d\n", freq, 
 					    freq - prev->ic_freq, channelSep);
 				continue;
 			}
 			if (ci->ic_nchans == IEEE80211_CHAN_MAX) {
 				if (verbose)
 					printf("%u: skip, channel table full\n",
 					    freq);
 				break;
 			}
 			c = &ci->ic_chans[ci->ic_nchans++];
 			memset(c, 0, sizeof(*c));
 			c->ic_freq = freq;
 			c->ic_flags = flags;
 		if (c->ic_flags & IEEE80211_CHAN_DFS)
 				c->ic_maxregpower = nb->maxPowerDFS;
 			else
 				c->ic_maxregpower = nb->maxPower;
 			if (verbose) {
 				printf("[%3d] add freq %u ",
 				    ci->ic_nchans-1, c->ic_freq);
 				printb("flags", c->ic_flags, IEEE80211_CHAN_BITS);
 				printf(" power %u\n", c->ic_maxregpower);
 			}
 			/* NB: kernel fills in other fields */
 			prev = c;
 		}
 	}
 }
 
 static void
 regdomain_makechannels(
 	struct ieee80211_regdomain_req *req,
 	const struct ieee80211_devcaps_req *dc)
 {
 	struct regdata *rdp = getregdata();
 	const struct country *cc;
 	const struct ieee80211_regdomain *reg = &req->rd;
 	struct ieee80211req_chaninfo *ci = &req->chaninfo;
 	const struct regdomain *rd;
 
 	/*
 	 * Locate construction table for new channel list.  We treat
 	 * the regdomain/SKU as definitive so a country can be in
 	 * multiple with different properties (e.g. US in FCC+FCC3).
 	 * If no regdomain is specified then we fallback on the country
 	 * code to find the associated regdomain since countries always
 	 * belong to at least one regdomain.
 	 */
 	if (reg->regdomain == 0) {
 		cc = lib80211_country_findbycc(rdp, reg->country);
 		if (cc == NULL)
 			errx(1, "internal error, country %d not found",
 			    reg->country);
 		rd = cc->rd;
 	} else
 		rd = lib80211_regdomain_findbysku(rdp, reg->regdomain);
 	if (rd == NULL)
 		errx(1, "internal error, regdomain %d not found",
 			    reg->regdomain);
 	if (rd->sku != SKU_DEBUG) {
 		/*
 		 * regdomain_addchans incrememnts the channel count for
 		 * each channel it adds so initialize ic_nchans to zero.
 		 * Note that we know we have enough space to hold all possible
 		 * channels because the devcaps list size was used to
 		 * allocate our request.
 		 */
 		ci->ic_nchans = 0;
 		if (!LIST_EMPTY(&rd->bands_11b))
 			regdomain_addchans(ci, &rd->bands_11b, reg,
 			    IEEE80211_CHAN_B, &dc->dc_chaninfo);
 		if (!LIST_EMPTY(&rd->bands_11g))
 			regdomain_addchans(ci, &rd->bands_11g, reg,
 			    IEEE80211_CHAN_G, &dc->dc_chaninfo);
 		if (!LIST_EMPTY(&rd->bands_11a))
 			regdomain_addchans(ci, &rd->bands_11a, reg,
 			    IEEE80211_CHAN_A, &dc->dc_chaninfo);
 		if (!LIST_EMPTY(&rd->bands_11na) && dc->dc_htcaps != 0) {
 			regdomain_addchans(ci, &rd->bands_11na, reg,
 			    IEEE80211_CHAN_A | IEEE80211_CHAN_HT20,
 			    &dc->dc_chaninfo);
 			if (dc->dc_htcaps & IEEE80211_HTCAP_CHWIDTH40) {
 				regdomain_addchans(ci, &rd->bands_11na, reg,
 				    IEEE80211_CHAN_A | IEEE80211_CHAN_HT40U,
 				    &dc->dc_chaninfo);
 				regdomain_addchans(ci, &rd->bands_11na, reg,
 				    IEEE80211_CHAN_A | IEEE80211_CHAN_HT40D,
 				    &dc->dc_chaninfo);
 			}
 		}
 		if (!LIST_EMPTY(&rd->bands_11ac) && dc->dc_vhtcaps != 0) {
 			regdomain_addchans(ci, &rd->bands_11ac, reg,
 			    IEEE80211_CHAN_A | IEEE80211_CHAN_HT20 |
 			    IEEE80211_CHAN_VHT20,
 			    &dc->dc_chaninfo);
 
 			/* VHT40 is a function of HT40.. */
 			if (dc->dc_htcaps & IEEE80211_HTCAP_CHWIDTH40) {
 				regdomain_addchans(ci, &rd->bands_11ac, reg,
 				    IEEE80211_CHAN_A | IEEE80211_CHAN_HT40U |
 				    IEEE80211_CHAN_VHT40U,
 				    &dc->dc_chaninfo);
 				regdomain_addchans(ci, &rd->bands_11ac, reg,
 				    IEEE80211_CHAN_A | IEEE80211_CHAN_HT40D |
 				    IEEE80211_CHAN_VHT40D,
 				    &dc->dc_chaninfo);
 			}
 
 			/* VHT80 */
 			/* XXX dc_vhtcap? */
 			if (1) {
 				regdomain_addchans(ci, &rd->bands_11ac, reg,
 				    IEEE80211_CHAN_A | IEEE80211_CHAN_HT40U |
 				    IEEE80211_CHAN_VHT80,
 				    &dc->dc_chaninfo);
 				regdomain_addchans(ci, &rd->bands_11ac, reg,
 				    IEEE80211_CHAN_A | IEEE80211_CHAN_HT40D |
 				    IEEE80211_CHAN_VHT80,
 				    &dc->dc_chaninfo);
 			}
 
 			/* XXX TODO: VHT80_80, VHT160 */
 		}
 
 		if (!LIST_EMPTY(&rd->bands_11ng) && dc->dc_htcaps != 0) {
 			regdomain_addchans(ci, &rd->bands_11ng, reg,
 			    IEEE80211_CHAN_G | IEEE80211_CHAN_HT20,
 			    &dc->dc_chaninfo);
 			if (dc->dc_htcaps & IEEE80211_HTCAP_CHWIDTH40) {
 				regdomain_addchans(ci, &rd->bands_11ng, reg,
 				    IEEE80211_CHAN_G | IEEE80211_CHAN_HT40U,
 				    &dc->dc_chaninfo);
 				regdomain_addchans(ci, &rd->bands_11ng, reg,
 				    IEEE80211_CHAN_G | IEEE80211_CHAN_HT40D,
 				    &dc->dc_chaninfo);
 			}
 		}
 		qsort(ci->ic_chans, ci->ic_nchans, sizeof(ci->ic_chans[0]),
 		    regdomain_sort);
 	} else
 		memcpy(ci, &dc->dc_chaninfo,
 		    IEEE80211_CHANINFO_SPACE(&dc->dc_chaninfo));
 }
 
 static void
 list_countries(void)
 {
 	struct regdata *rdp = getregdata();
 	const struct country *cp;
 	const struct regdomain *dp;
 	int i;
 
 	i = 0;
 	printf("\nCountry codes:\n");
 	LIST_FOREACH(cp, &rdp->countries, next) {
 		printf("%2s %-15.15s%s", cp->isoname,
 		    cp->name, ((i+1)%4) == 0 ? "\n" : " ");
 		i++;
 	}
 	i = 0;
 	printf("\nRegulatory domains:\n");
 	LIST_FOREACH(dp, &rdp->domains, next) {
 		printf("%-15.15s%s", dp->name, ((i+1)%4) == 0 ? "\n" : " ");
 		i++;
 	}
 	printf("\n");
 }
 
 static void
 defaultcountry(const struct regdomain *rd)
 {
 	struct regdata *rdp = getregdata();
 	const struct country *cc;
 
 	cc = lib80211_country_findbycc(rdp, rd->cc->code);
 	if (cc == NULL)
 		errx(1, "internal error, ISO country code %d not "
 		    "defined for regdomain %s", rd->cc->code, rd->name);
 	regdomain.country = cc->code;
 	regdomain.isocc[0] = cc->isoname[0];
 	regdomain.isocc[1] = cc->isoname[1];
 }
 
 static
 DECL_CMD_FUNC(set80211regdomain, val, d)
 {
 	struct regdata *rdp = getregdata();
 	const struct regdomain *rd;
 
 	rd = lib80211_regdomain_findbyname(rdp, val);
 	if (rd == NULL) {
 		char *eptr;
 		long sku = strtol(val, &eptr, 0);
 
 		if (eptr != val)
 			rd = lib80211_regdomain_findbysku(rdp, sku);
 		if (eptr == val || rd == NULL)
 			errx(1, "unknown regdomain %s", val);
 	}
 	getregdomain(s);
 	regdomain.regdomain = rd->sku;
 	if (regdomain.country == 0 && rd->cc != NULL) {
 		/*
 		 * No country code setup and there's a default
 		 * one for this regdomain fill it in.
 		 */
 		defaultcountry(rd);
 	}
 	callback_register(setregdomain_cb, &regdomain);
 }
 
 static
 DECL_CMD_FUNC(set80211country, val, d)
 {
 	struct regdata *rdp = getregdata();
 	const struct country *cc;
 
 	cc = lib80211_country_findbyname(rdp, val);
 	if (cc == NULL) {
 		char *eptr;
 		long code = strtol(val, &eptr, 0);
 
 		if (eptr != val)
 			cc = lib80211_country_findbycc(rdp, code);
 		if (eptr == val || cc == NULL)
 			errx(1, "unknown ISO country code %s", val);
 	}
 	getregdomain(s);
 	regdomain.regdomain = cc->rd->sku;
 	regdomain.country = cc->code;
 	regdomain.isocc[0] = cc->isoname[0];
 	regdomain.isocc[1] = cc->isoname[1];
 	callback_register(setregdomain_cb, &regdomain);
 }
 
 static void
 set80211location(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	getregdomain(s);
 	regdomain.location = d;
 	callback_register(setregdomain_cb, &regdomain);
 }
 
 static void
 set80211ecm(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	getregdomain(s);
 	regdomain.ecm = d;
 	callback_register(setregdomain_cb, &regdomain);
 }
 
 static void
 LINE_INIT(char c)
 {
 	spacer = c;
 	if (c == '\t')
 		col = 8;
 	else
 		col = 1;
 }
 
 static void
 LINE_BREAK(void)
 {
 	if (spacer != '\t') {
 		printf("\n");
 		spacer = '\t';
 	}
 	col = 8;		/* 8-col tab */
 }
 
 static void
 LINE_CHECK(const char *fmt, ...)
 {
 	char buf[80];
 	va_list ap;
 	int n;
 
 	va_start(ap, fmt);
 	n = vsnprintf(buf+1, sizeof(buf)-1, fmt, ap);
 	va_end(ap);
 	col += 1+n;
 	if (col > MAXCOL) {
 		LINE_BREAK();
 		col += n;
 	}
 	buf[0] = spacer;
 	printf("%s", buf);
 	spacer = ' ';
 }
 
 static int
 getmaxrate(const uint8_t rates[15], uint8_t nrates)
 {
 	int i, maxrate = -1;
 
 	for (i = 0; i < nrates; i++) {
 		int rate = rates[i] & IEEE80211_RATE_VAL;
 		if (rate > maxrate)
 			maxrate = rate;
 	}
 	return maxrate / 2;
 }
 
 static const char *
 getcaps(int capinfo)
 {
 	static char capstring[32];
 	char *cp = capstring;
 
 	if (capinfo & IEEE80211_CAPINFO_ESS)
 		*cp++ = 'E';
 	if (capinfo & IEEE80211_CAPINFO_IBSS)
 		*cp++ = 'I';
 	if (capinfo & IEEE80211_CAPINFO_CF_POLLABLE)
 		*cp++ = 'c';
 	if (capinfo & IEEE80211_CAPINFO_CF_POLLREQ)
 		*cp++ = 'C';
 	if (capinfo & IEEE80211_CAPINFO_PRIVACY)
 		*cp++ = 'P';
 	if (capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE)
 		*cp++ = 'S';
 	if (capinfo & IEEE80211_CAPINFO_PBCC)
 		*cp++ = 'B';
 	if (capinfo & IEEE80211_CAPINFO_CHNL_AGILITY)
 		*cp++ = 'A';
 	if (capinfo & IEEE80211_CAPINFO_SHORT_SLOTTIME)
 		*cp++ = 's';
 	if (capinfo & IEEE80211_CAPINFO_RSN)
 		*cp++ = 'R';
 	if (capinfo & IEEE80211_CAPINFO_DSSSOFDM)
 		*cp++ = 'D';
 	*cp = '\0';
 	return capstring;
 }
 
 static const char *
 getflags(int flags)
 {
 	static char flagstring[32];
 	char *cp = flagstring;
 
 	if (flags & IEEE80211_NODE_AUTH)
 		*cp++ = 'A';
 	if (flags & IEEE80211_NODE_QOS)
 		*cp++ = 'Q';
 	if (flags & IEEE80211_NODE_ERP)
 		*cp++ = 'E';
 	if (flags & IEEE80211_NODE_PWR_MGT)
 		*cp++ = 'P';
 	if (flags & IEEE80211_NODE_HT) {
 		*cp++ = 'H';
 		if (flags & IEEE80211_NODE_HTCOMPAT)
 			*cp++ = '+';
 	}
 	if (flags & IEEE80211_NODE_VHT)
 		*cp++ = 'V';
 	if (flags & IEEE80211_NODE_WPS)
 		*cp++ = 'W';
 	if (flags & IEEE80211_NODE_TSN)
 		*cp++ = 'N';
 	if (flags & IEEE80211_NODE_AMPDU_TX)
 		*cp++ = 'T';
 	if (flags & IEEE80211_NODE_AMPDU_RX)
 		*cp++ = 'R';
 	if (flags & IEEE80211_NODE_MIMO_PS) {
 		*cp++ = 'M';
 		if (flags & IEEE80211_NODE_MIMO_RTS)
 			*cp++ = '+';
 	}
 	if (flags & IEEE80211_NODE_RIFS)
 		*cp++ = 'I';
 	if (flags & IEEE80211_NODE_SGI40) {
 		*cp++ = 'S';
 		if (flags & IEEE80211_NODE_SGI20)
 			*cp++ = '+';
 	} else if (flags & IEEE80211_NODE_SGI20)
 		*cp++ = 's';
 	if (flags & IEEE80211_NODE_AMSDU_TX)
 		*cp++ = 't';
 	if (flags & IEEE80211_NODE_AMSDU_RX)
 		*cp++ = 'r';
 	*cp = '\0';
 	return flagstring;
 }
 
 static void
 printie(const char* tag, const uint8_t *ie, size_t ielen, int maxlen)
 {
 	printf("%s", tag);
 	if (verbose) {
 		maxlen -= strlen(tag)+2;
 		if (2*ielen > maxlen)
 			maxlen--;
 		printf("<");
 		for (; ielen > 0; ie++, ielen--) {
 			if (maxlen-- <= 0)
 				break;
 			printf("%02x", *ie);
 		}
 		if (ielen != 0)
 			printf("-");
 		printf(">");
 	}
 }
 
 #define LE_READ_2(p)					\
 	((u_int16_t)					\
 	 ((((const u_int8_t *)(p))[0]      ) |		\
 	  (((const u_int8_t *)(p))[1] <<  8)))
 #define LE_READ_4(p)					\
 	((u_int32_t)					\
 	 ((((const u_int8_t *)(p))[0]      ) |		\
 	  (((const u_int8_t *)(p))[1] <<  8) |		\
 	  (((const u_int8_t *)(p))[2] << 16) |		\
 	  (((const u_int8_t *)(p))[3] << 24)))
 
 /*
  * NB: The decoding routines assume a properly formatted ie
  *     which should be safe as the kernel only retains them
  *     if they parse ok.
  */
 
 static void
 printwmeparam(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 #define	MS(_v, _f)	(((_v) & _f) >> _f##_S)
 	static const char *acnames[] = { "BE", "BK", "VO", "VI" };
 	const struct ieee80211_wme_param *wme =
 	    (const struct ieee80211_wme_param *) ie;
 	int i;
 
 	printf("%s", tag);
 	if (!verbose)
 		return;
 	printf("<qosinfo 0x%x", wme->param_qosInfo);
 	ie += offsetof(struct ieee80211_wme_param, params_acParams);
 	for (i = 0; i < WME_NUM_AC; i++) {
 		const struct ieee80211_wme_acparams *ac =
 		    &wme->params_acParams[i];
 
 		printf(" %s[%saifsn %u cwmin %u cwmax %u txop %u]"
 			, acnames[i]
 			, MS(ac->acp_aci_aifsn, WME_PARAM_ACM) ? "acm " : ""
 			, MS(ac->acp_aci_aifsn, WME_PARAM_AIFSN)
 			, MS(ac->acp_logcwminmax, WME_PARAM_LOGCWMIN)
 			, MS(ac->acp_logcwminmax, WME_PARAM_LOGCWMAX)
 			, LE_READ_2(&ac->acp_txop)
 		);
 	}
 	printf(">");
 #undef MS
 }
 
 static void
 printwmeinfo(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	printf("%s", tag);
 	if (verbose) {
 		const struct ieee80211_wme_info *wme =
 		    (const struct ieee80211_wme_info *) ie;
 		printf("<version 0x%x info 0x%x>",
 		    wme->wme_version, wme->wme_info);
 	}
 }
 
 static void
 printvhtcap(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	printf("%s", tag);
 	if (verbose) {
 		const struct ieee80211_ie_vhtcap *vhtcap =
 		    (const struct ieee80211_ie_vhtcap *) ie;
 		uint32_t vhtcap_info = LE_READ_4(&vhtcap->vht_cap_info);
 
 		printf("<cap 0x%08x", vhtcap_info);
 		printf(" rx_mcs_map 0x%x",
 		    LE_READ_2(&vhtcap->supp_mcs.rx_mcs_map));
 		printf(" rx_highest %d",
 		    LE_READ_2(&vhtcap->supp_mcs.rx_highest) & 0x1fff);
 		printf(" tx_mcs_map 0x%x",
 		    LE_READ_2(&vhtcap->supp_mcs.tx_mcs_map));
 		printf(" tx_highest %d",
 		    LE_READ_2(&vhtcap->supp_mcs.tx_highest) & 0x1fff);
 
 		printf(">");
 	}
 }
 
 static void
 printvhtinfo(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	printf("%s", tag);
 	if (verbose) {
 		const struct ieee80211_ie_vht_operation *vhtinfo =
 		    (const struct ieee80211_ie_vht_operation *) ie;
 
 		printf("<chw %d freq1_idx %d freq2_idx %d basic_mcs_set 0x%04x>",
 		    vhtinfo->chan_width,
 		    vhtinfo->center_freq_seg1_idx,
 		    vhtinfo->center_freq_seg2_idx,
 		    LE_READ_2(&vhtinfo->basic_mcs_set));
 	}
 }
 
 static void
 printvhtpwrenv(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	printf("%s", tag);
 	static const char *txpwrmap[] = {
 		"20",
 		"40",
 		"80",
 		"160",
 	};
 	if (verbose) {
 		const struct ieee80211_ie_vht_txpwrenv *vhtpwr =
 		    (const struct ieee80211_ie_vht_txpwrenv *) ie;
 		int i, n;
 		const char *sep = "";
 
 		/* Get count; trim at ielen */
 		n = (vhtpwr->tx_info &
 		    IEEE80211_VHT_TXPWRENV_INFO_COUNT_MASK) + 1;
 		/* Trim at ielen */
 		if (n > ielen - 3)
 			n = ielen - 3;
 		printf("<tx_info 0x%02x pwr:[", vhtpwr->tx_info);
 		for (i = 0; i < n; i++) {
 			printf("%s%s:%.2f", sep, txpwrmap[i],
 			    ((float) ((int8_t) ie[i+3])) / 2.0);
 			sep = " ";
 		}
 
 		printf("]>");
 	}
 }
 
 static void
 printhtcap(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	printf("%s", tag);
 	if (verbose) {
 		const struct ieee80211_ie_htcap *htcap =
 		    (const struct ieee80211_ie_htcap *) ie;
 		const char *sep;
 		int i, j;
 
 		printf("<cap 0x%x param 0x%x",
 		    LE_READ_2(&htcap->hc_cap), htcap->hc_param);
 		printf(" mcsset[");
 		sep = "";
 		for (i = 0; i < IEEE80211_HTRATE_MAXSIZE; i++)
 			if (isset(htcap->hc_mcsset, i)) {
 				for (j = i+1; j < IEEE80211_HTRATE_MAXSIZE; j++)
 					if (isclr(htcap->hc_mcsset, j))
 						break;
 				j--;
 				if (i == j)
 					printf("%s%u", sep, i);
 				else
 					printf("%s%u-%u", sep, i, j);
 				i += j-i;
 				sep = ",";
 			}
 		printf("] extcap 0x%x txbf 0x%x antenna 0x%x>",
 		    LE_READ_2(&htcap->hc_extcap),
 		    LE_READ_4(&htcap->hc_txbf),
 		    htcap->hc_antenna);
 	}
 }
 
 static void
 printhtinfo(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	printf("%s", tag);
 	if (verbose) {
 		const struct ieee80211_ie_htinfo *htinfo =
 		    (const struct ieee80211_ie_htinfo *) ie;
 		const char *sep;
 		int i, j;
 
 		printf("<ctl %u, %x,%x,%x,%x", htinfo->hi_ctrlchannel,
 		    htinfo->hi_byte1, htinfo->hi_byte2, htinfo->hi_byte3,
 		    LE_READ_2(&htinfo->hi_byte45));
 		printf(" basicmcs[");
 		sep = "";
 		for (i = 0; i < IEEE80211_HTRATE_MAXSIZE; i++)
 			if (isset(htinfo->hi_basicmcsset, i)) {
 				for (j = i+1; j < IEEE80211_HTRATE_MAXSIZE; j++)
 					if (isclr(htinfo->hi_basicmcsset, j))
 						break;
 				j--;
 				if (i == j)
 					printf("%s%u", sep, i);
 				else
 					printf("%s%u-%u", sep, i, j);
 				i += j-i;
 				sep = ",";
 			}
 		printf("]>");
 	}
 }
 
 static void
 printathie(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 
 	printf("%s", tag);
 	if (verbose) {
 		const struct ieee80211_ath_ie *ath =
 			(const struct ieee80211_ath_ie *)ie;
 
 		printf("<");
 		if (ath->ath_capability & ATHEROS_CAP_TURBO_PRIME)
 			printf("DTURBO,");
 		if (ath->ath_capability & ATHEROS_CAP_COMPRESSION)
 			printf("COMP,");
 		if (ath->ath_capability & ATHEROS_CAP_FAST_FRAME)
 			printf("FF,");
 		if (ath->ath_capability & ATHEROS_CAP_XR)
 			printf("XR,");
 		if (ath->ath_capability & ATHEROS_CAP_AR)
 			printf("AR,");
 		if (ath->ath_capability & ATHEROS_CAP_BURST)
 			printf("BURST,");
 		if (ath->ath_capability & ATHEROS_CAP_WME)
 			printf("WME,");
 		if (ath->ath_capability & ATHEROS_CAP_BOOST)
 			printf("BOOST,");
 		printf("0x%x>", LE_READ_2(ath->ath_defkeyix));
 	}
 }
 
 
 static void
 printmeshconf(const char *tag, const uint8_t *ie, size_t ielen, int maxlen)
 {
 
 	printf("%s", tag);
 	if (verbose) {
 		const struct ieee80211_meshconf_ie *mconf =
 			(const struct ieee80211_meshconf_ie *)ie;
 		printf("<PATH:");
 		if (mconf->conf_pselid == IEEE80211_MESHCONF_PATH_HWMP)
 			printf("HWMP");
 		else
 			printf("UNKNOWN");
 		printf(" LINK:");
 		if (mconf->conf_pmetid == IEEE80211_MESHCONF_METRIC_AIRTIME)
 			printf("AIRTIME");
 		else
 			printf("UNKNOWN");
 		printf(" CONGESTION:");
 		if (mconf->conf_ccid == IEEE80211_MESHCONF_CC_DISABLED)
 			printf("DISABLED");
 		else
 			printf("UNKNOWN");
 		printf(" SYNC:");
 		if (mconf->conf_syncid == IEEE80211_MESHCONF_SYNC_NEIGHOFF)
 			printf("NEIGHOFF");
 		else
 			printf("UNKNOWN");
 		printf(" AUTH:");
 		if (mconf->conf_authid == IEEE80211_MESHCONF_AUTH_DISABLED)
 			printf("DISABLED");
 		else
 			printf("UNKNOWN");
 		printf(" FORM:0x%x CAPS:0x%x>", mconf->conf_form,
 		    mconf->conf_cap);
 	}
 }
 
 static void
 printbssload(const char *tag, const uint8_t *ie, size_t ielen, int maxlen)
 {
 	printf("%s", tag);
 	if (verbose) {
 		const struct ieee80211_bss_load_ie *bssload =
 		    (const struct ieee80211_bss_load_ie *) ie;
 		printf("<sta count %d, chan load %d, aac %d>",
 		    LE_READ_2(&bssload->sta_count),
 		    bssload->chan_load,
 		    bssload->aac);
 	}
 }
 
 static void
 printapchanrep(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	printf("%s", tag);
 	if (verbose) {
 		const struct ieee80211_ap_chan_report_ie *ap =
 		    (const struct ieee80211_ap_chan_report_ie *) ie;
 		const char *sep = "";
 		int i;
 
 		printf("<class %u, chan:[", ap->i_class);
 
 		for (i = 3; i < ielen; i++) {
 			printf("%s%u", sep, ie[i]);
 			sep = ",";
 		}
 		printf("]>");
 	}
 }
 
 static const char *
 wpa_cipher(const u_int8_t *sel)
 {
 #define	WPA_SEL(x)	(((x)<<24)|WPA_OUI)
 	u_int32_t w = LE_READ_4(sel);
 
 	switch (w) {
 	case WPA_SEL(WPA_CSE_NULL):
 		return "NONE";
 	case WPA_SEL(WPA_CSE_WEP40):
 		return "WEP40";
 	case WPA_SEL(WPA_CSE_WEP104):
 		return "WEP104";
 	case WPA_SEL(WPA_CSE_TKIP):
 		return "TKIP";
 	case WPA_SEL(WPA_CSE_CCMP):
 		return "AES-CCMP";
 	}
 	return "?";		/* NB: so 1<< is discarded */
 #undef WPA_SEL
 }
 
 static const char *
 wpa_keymgmt(const u_int8_t *sel)
 {
 #define	WPA_SEL(x)	(((x)<<24)|WPA_OUI)
 	u_int32_t w = LE_READ_4(sel);
 
 	switch (w) {
 	case WPA_SEL(WPA_ASE_8021X_UNSPEC):
 		return "8021X-UNSPEC";
 	case WPA_SEL(WPA_ASE_8021X_PSK):
 		return "8021X-PSK";
 	case WPA_SEL(WPA_ASE_NONE):
 		return "NONE";
 	}
 	return "?";
 #undef WPA_SEL
 }
 
 static void
 printwpaie(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	u_int8_t len = ie[1];
 
 	printf("%s", tag);
 	if (verbose) {
 		const char *sep;
 		int n;
 
 		ie += 6, len -= 4;		/* NB: len is payload only */
 
 		printf("<v%u", LE_READ_2(ie));
 		ie += 2, len -= 2;
 
 		printf(" mc:%s", wpa_cipher(ie));
 		ie += 4, len -= 4;
 
 		/* unicast ciphers */
 		n = LE_READ_2(ie);
 		ie += 2, len -= 2;
 		sep = " uc:";
 		for (; n > 0; n--) {
 			printf("%s%s", sep, wpa_cipher(ie));
 			ie += 4, len -= 4;
 			sep = "+";
 		}
 
 		/* key management algorithms */
 		n = LE_READ_2(ie);
 		ie += 2, len -= 2;
 		sep = " km:";
 		for (; n > 0; n--) {
 			printf("%s%s", sep, wpa_keymgmt(ie));
 			ie += 4, len -= 4;
 			sep = "+";
 		}
 
 		if (len > 2)		/* optional capabilities */
 			printf(", caps 0x%x", LE_READ_2(ie));
 		printf(">");
 	}
 }
 
 static const char *
 rsn_cipher(const u_int8_t *sel)
 {
 #define	RSN_SEL(x)	(((x)<<24)|RSN_OUI)
 	u_int32_t w = LE_READ_4(sel);
 
 	switch (w) {
 	case RSN_SEL(RSN_CSE_NULL):
 		return "NONE";
 	case RSN_SEL(RSN_CSE_WEP40):
 		return "WEP40";
 	case RSN_SEL(RSN_CSE_WEP104):
 		return "WEP104";
 	case RSN_SEL(RSN_CSE_TKIP):
 		return "TKIP";
 	case RSN_SEL(RSN_CSE_CCMP):
 		return "AES-CCMP";
 	case RSN_SEL(RSN_CSE_WRAP):
 		return "AES-OCB";
 	}
 	return "?";
 #undef WPA_SEL
 }
 
 static const char *
 rsn_keymgmt(const u_int8_t *sel)
 {
 #define	RSN_SEL(x)	(((x)<<24)|RSN_OUI)
 	u_int32_t w = LE_READ_4(sel);
 
 	switch (w) {
 	case RSN_SEL(RSN_ASE_8021X_UNSPEC):
 		return "8021X-UNSPEC";
 	case RSN_SEL(RSN_ASE_8021X_PSK):
 		return "8021X-PSK";
 	case RSN_SEL(RSN_ASE_NONE):
 		return "NONE";
 	}
 	return "?";
 #undef RSN_SEL
 }
 
 static void
 printrsnie(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	printf("%s", tag);
 	if (verbose) {
 		const char *sep;
 		int n;
 
 		ie += 2, ielen -= 2;
 
 		printf("<v%u", LE_READ_2(ie));
 		ie += 2, ielen -= 2;
 
 		printf(" mc:%s", rsn_cipher(ie));
 		ie += 4, ielen -= 4;
 
 		/* unicast ciphers */
 		n = LE_READ_2(ie);
 		ie += 2, ielen -= 2;
 		sep = " uc:";
 		for (; n > 0; n--) {
 			printf("%s%s", sep, rsn_cipher(ie));
 			ie += 4, ielen -= 4;
 			sep = "+";
 		}
 
 		/* key management algorithms */
 		n = LE_READ_2(ie);
 		ie += 2, ielen -= 2;
 		sep = " km:";
 		for (; n > 0; n--) {
 			printf("%s%s", sep, rsn_keymgmt(ie));
 			ie += 4, ielen -= 4;
 			sep = "+";
 		}
 
 		if (ielen > 2)		/* optional capabilities */
 			printf(", caps 0x%x", LE_READ_2(ie));
 		/* XXXPMKID */
 		printf(">");
 	}
 }
 
 /* XXX move to a public include file */
 #define IEEE80211_WPS_DEV_PASS_ID	0x1012
 #define IEEE80211_WPS_SELECTED_REG	0x1041
 #define IEEE80211_WPS_SETUP_STATE	0x1044
 #define IEEE80211_WPS_UUID_E		0x1047
 #define IEEE80211_WPS_VERSION		0x104a
 
 #define BE_READ_2(p)					\
 	((u_int16_t)					\
 	 ((((const u_int8_t *)(p))[1]      ) |		\
 	  (((const u_int8_t *)(p))[0] <<  8)))
 
 static void
 printwpsie(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	u_int8_t len = ie[1];
 
 	printf("%s", tag);
 	if (verbose) {
 		static const char *dev_pass_id[] = {
 			"D",	/* Default (PIN) */
 			"U",	/* User-specified */
 			"M",	/* Machine-specified */
 			"K",	/* Rekey */
 			"P",	/* PushButton */
 			"R"	/* Registrar-specified */
 		};
 		int n;
 
 		ie +=6, len -= 4;		/* NB: len is payload only */
 
 		/* WPS IE in Beacon and Probe Resp frames have different fields */
 		printf("<");
 		while (len) {
 			uint16_t tlv_type = BE_READ_2(ie);
 			uint16_t tlv_len  = BE_READ_2(ie + 2);
 
 			ie += 4, len -= 4;
 
 			switch (tlv_type) {
 			case IEEE80211_WPS_VERSION:
 				printf("v:%d.%d", *ie >> 4, *ie & 0xf);
 				break;
 			case IEEE80211_WPS_SETUP_STATE:
 				/* Only 1 and 2 are valid */
 				if (*ie == 0 || *ie >= 3)
 					printf(" state:B");
 				else
 					printf(" st:%s", *ie == 1 ? "N" : "C");
 				break;
 			case IEEE80211_WPS_SELECTED_REG:
 				printf(" sel:%s", *ie ? "T" : "F");
 				break;
 			case IEEE80211_WPS_DEV_PASS_ID:
 				n = LE_READ_2(ie);
 				if (n < nitems(dev_pass_id))
 					printf(" dpi:%s", dev_pass_id[n]);
 				break;
 			case IEEE80211_WPS_UUID_E:
 				printf(" uuid-e:");
 				for (n = 0; n < (tlv_len - 1); n++)
 					printf("%02x-", ie[n]);
 				printf("%02x", ie[n]);
 				break;
 			}
 			ie += tlv_len, len -= tlv_len;
 		}
 		printf(">");
 	}
 }
 
 static void
 printtdmaie(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	printf("%s", tag);
 	if (verbose && ielen >= sizeof(struct ieee80211_tdma_param)) {
 		const struct ieee80211_tdma_param *tdma =
 		   (const struct ieee80211_tdma_param *) ie;
 
 		/* XXX tstamp */
 		printf("<v%u slot:%u slotcnt:%u slotlen:%u bintval:%u inuse:0x%x>",
 		    tdma->tdma_version, tdma->tdma_slot, tdma->tdma_slotcnt,
 		    LE_READ_2(&tdma->tdma_slotlen), tdma->tdma_bintval,
 		    tdma->tdma_inuse[0]);
 	}
 }
 
 /*
  * Copy the ssid string contents into buf, truncating to fit.  If the
  * ssid is entirely printable then just copy intact.  Otherwise convert
  * to hexadecimal.  If the result is truncated then replace the last
  * three characters with "...".
  */
 static int
 copy_essid(char buf[], size_t bufsize, const u_int8_t *essid, size_t essid_len)
 {
 	const u_int8_t *p; 
 	size_t maxlen;
 	u_int i;
 
 	if (essid_len > bufsize)
 		maxlen = bufsize;
 	else
 		maxlen = essid_len;
 	/* determine printable or not */
 	for (i = 0, p = essid; i < maxlen; i++, p++) {
 		if (*p < ' ' || *p > 0x7e)
 			break;
 	}
 	if (i != maxlen) {		/* not printable, print as hex */
 		if (bufsize < 3)
 			return 0;
 		strlcpy(buf, "0x", bufsize);
 		bufsize -= 2;
 		p = essid;
 		for (i = 0; i < maxlen && bufsize >= 2; i++) {
 			sprintf(&buf[2+2*i], "%02x", p[i]);
 			bufsize -= 2;
 		}
 		if (i != essid_len)
 			memcpy(&buf[2+2*i-3], "...", 3);
 	} else {			/* printable, truncate as needed */
 		memcpy(buf, essid, maxlen);
 		if (maxlen != essid_len)
 			memcpy(&buf[maxlen-3], "...", 3);
 	}
 	return maxlen;
 }
 
 static void
 printssid(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	char ssid[2*IEEE80211_NWID_LEN+1];
 
 	printf("%s<%.*s>", tag, copy_essid(ssid, maxlen, ie+2, ie[1]), ssid);
 }
 
 static void
 printrates(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	const char *sep;
 	int i;
 
 	printf("%s", tag);
 	sep = "<";
 	for (i = 2; i < ielen; i++) {
 		printf("%s%s%d", sep,
 		    ie[i] & IEEE80211_RATE_BASIC ? "B" : "",
 		    ie[i] & IEEE80211_RATE_VAL);
 		sep = ",";
 	}
 	printf(">");
 }
 
 static void
 printcountry(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen)
 {
 	const struct ieee80211_country_ie *cie =
 	   (const struct ieee80211_country_ie *) ie;
 	int i, nbands, schan, nchan;
 
 	printf("%s<%c%c%c", tag, cie->cc[0], cie->cc[1], cie->cc[2]);
 	nbands = (cie->len - 3) / sizeof(cie->band[0]);
 	for (i = 0; i < nbands; i++) {
 		schan = cie->band[i].schan;
 		nchan = cie->band[i].nchan;
 		if (nchan != 1)
 			printf(" %u-%u,%u", schan, schan + nchan-1,
 			    cie->band[i].maxtxpwr);
 		else
 			printf(" %u,%u", schan, cie->band[i].maxtxpwr);
 	}
 	printf(">");
 }
 
 static __inline int
 iswpaoui(const u_int8_t *frm)
 {
 	return frm[1] > 3 && LE_READ_4(frm+2) == ((WPA_OUI_TYPE<<24)|WPA_OUI);
 }
 
 static __inline int
 iswmeinfo(const u_int8_t *frm)
 {
 	return frm[1] > 5 && LE_READ_4(frm+2) == ((WME_OUI_TYPE<<24)|WME_OUI) &&
 		frm[6] == WME_INFO_OUI_SUBTYPE;
 }
 
 static __inline int
 iswmeparam(const u_int8_t *frm)
 {
 	return frm[1] > 5 && LE_READ_4(frm+2) == ((WME_OUI_TYPE<<24)|WME_OUI) &&
 		frm[6] == WME_PARAM_OUI_SUBTYPE;
 }
 
 static __inline int
 isatherosoui(const u_int8_t *frm)
 {
 	return frm[1] > 3 && LE_READ_4(frm+2) == ((ATH_OUI_TYPE<<24)|ATH_OUI);
 }
 
 static __inline int
 istdmaoui(const uint8_t *frm)
 {
 	return frm[1] > 3 && LE_READ_4(frm+2) == ((TDMA_OUI_TYPE<<24)|TDMA_OUI);
 }
 
 static __inline int
 iswpsoui(const uint8_t *frm)
 {
 	return frm[1] > 3 && LE_READ_4(frm+2) == ((WPS_OUI_TYPE<<24)|WPA_OUI);
 }
 
 static const char *
 iename(int elemid)
 {
 	switch (elemid) {
 	case IEEE80211_ELEMID_FHPARMS:	return " FHPARMS";
 	case IEEE80211_ELEMID_CFPARMS:	return " CFPARMS";
 	case IEEE80211_ELEMID_TIM:	return " TIM";
 	case IEEE80211_ELEMID_IBSSPARMS:return " IBSSPARMS";
 	case IEEE80211_ELEMID_BSSLOAD:	return " BSSLOAD";
 	case IEEE80211_ELEMID_CHALLENGE:return " CHALLENGE";
 	case IEEE80211_ELEMID_PWRCNSTR:	return " PWRCNSTR";
 	case IEEE80211_ELEMID_PWRCAP:	return " PWRCAP";
 	case IEEE80211_ELEMID_TPCREQ:	return " TPCREQ";
 	case IEEE80211_ELEMID_TPCREP:	return " TPCREP";
 	case IEEE80211_ELEMID_SUPPCHAN:	return " SUPPCHAN";
 	case IEEE80211_ELEMID_CSA:	return " CSA";
 	case IEEE80211_ELEMID_MEASREQ:	return " MEASREQ";
 	case IEEE80211_ELEMID_MEASREP:	return " MEASREP";
 	case IEEE80211_ELEMID_QUIET:	return " QUIET";
 	case IEEE80211_ELEMID_IBSSDFS:	return " IBSSDFS";
 	case IEEE80211_ELEMID_TPC:	return " TPC";
 	case IEEE80211_ELEMID_CCKM:	return " CCKM";
 	}
 	return " ???";
 }
 
 static void
 printies(const u_int8_t *vp, int ielen, int maxcols)
 {
 	while (ielen > 0) {
 		switch (vp[0]) {
 		case IEEE80211_ELEMID_SSID:
 			if (verbose)
 				printssid(" SSID", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_RATES:
 		case IEEE80211_ELEMID_XRATES:
 			if (verbose)
 				printrates(vp[0] == IEEE80211_ELEMID_RATES ?
 				    " RATES" : " XRATES", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_DSPARMS:
 			if (verbose)
 				printf(" DSPARMS<%u>", vp[2]);
 			break;
 		case IEEE80211_ELEMID_COUNTRY:
 			if (verbose)
 				printcountry(" COUNTRY", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_ERP:
 			if (verbose)
 				printf(" ERP<0x%x>", vp[2]);
 			break;
 		case IEEE80211_ELEMID_VENDOR:
 			if (iswpaoui(vp))
 				printwpaie(" WPA", vp, 2+vp[1], maxcols);
 			else if (iswmeinfo(vp))
 				printwmeinfo(" WME", vp, 2+vp[1], maxcols);
 			else if (iswmeparam(vp))
 				printwmeparam(" WME", vp, 2+vp[1], maxcols);
 			else if (isatherosoui(vp))
 				printathie(" ATH", vp, 2+vp[1], maxcols);
 			else if (iswpsoui(vp))
 				printwpsie(" WPS", vp, 2+vp[1], maxcols);
 			else if (istdmaoui(vp))
 				printtdmaie(" TDMA", vp, 2+vp[1], maxcols);
 			else if (verbose)
 				printie(" VEN", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_RSN:
 			printrsnie(" RSN", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_HTCAP:
 			printhtcap(" HTCAP", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_HTINFO:
 			if (verbose)
 				printhtinfo(" HTINFO", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_MESHID:
 			if (verbose)
 				printssid(" MESHID", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_MESHCONF:
 			printmeshconf(" MESHCONF", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_VHT_CAP:
 			printvhtcap(" VHTCAP", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_VHT_OPMODE:
 			printvhtinfo(" VHTOPMODE", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_VHT_PWR_ENV:
 			printvhtpwrenv(" VHTPWRENV", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_BSSLOAD:
 			printbssload(" BSSLOAD", vp, 2+vp[1], maxcols);
 			break;
 		case IEEE80211_ELEMID_APCHANREP:
 			printapchanrep(" APCHANREP", vp, 2+vp[1], maxcols);
 			break;
 		default:
 			if (verbose)
 				printie(iename(vp[0]), vp, 2+vp[1], maxcols);
 			break;
 		}
 		ielen -= 2+vp[1];
 		vp += 2+vp[1];
 	}
 }
 
 static void
 printmimo(const struct ieee80211_mimo_info *mi)
 {
 	/* NB: don't muddy display unless there's something to show */
 	if (mi->rssi[0] != 0 || mi->rssi[1] != 0 || mi->rssi[2] != 0) {
 		/* XXX ignore EVM for now */
 		printf(" (rssi %.1f:%.1f:%.1f nf %d:%d:%d)",
 		    mi->rssi[0] / 2.0, mi->rssi[1] / 2.0, mi->rssi[2] / 2.0,
 		    mi->noise[0], mi->noise[1], mi->noise[2]);
 	}
 }
 
 static void
 list_scan(int s)
 {
 	uint8_t buf[24*1024];
 	char ssid[IEEE80211_NWID_LEN+1];
 	const uint8_t *cp;
 	int len, ssidmax, idlen;
 
 	if (get80211len(s, IEEE80211_IOC_SCAN_RESULTS, buf, sizeof(buf), &len) < 0)
 		errx(1, "unable to get scan results");
 	if (len < sizeof(struct ieee80211req_scan_result))
 		return;
 
 	getchaninfo(s);
 
 	ssidmax = verbose ? IEEE80211_NWID_LEN : 14;
 	printf("%-*.*s  %-17.17s  %4s %4s   %-7s  %3s %4s\n"
 		, ssidmax, ssidmax, "SSID/MESH ID"
 		, "BSSID"
 		, "CHAN"
 		, "RATE"
 		, " S:N"
 		, "INT"
 		, "CAPS"
 	);
 	cp = buf;
 	do {
 		const struct ieee80211req_scan_result *sr;
 		const uint8_t *vp, *idp;
 
 		sr = (const struct ieee80211req_scan_result *) cp;
 		vp = cp + sr->isr_ie_off;
 		if (sr->isr_meshid_len) {
 			idp = vp + sr->isr_ssid_len;
 			idlen = sr->isr_meshid_len;
 		} else {
 			idp = vp;
 			idlen = sr->isr_ssid_len;
 		}
 		printf("%-*.*s  %s  %3d  %3dM %4d:%-4d %4d %-4.4s"
 			, ssidmax
 			  , copy_essid(ssid, ssidmax, idp, idlen)
 			  , ssid
 			, ether_ntoa((const struct ether_addr *) sr->isr_bssid)
 			, ieee80211_mhz2ieee(sr->isr_freq, sr->isr_flags)
 			, getmaxrate(sr->isr_rates, sr->isr_nrates)
 			, (sr->isr_rssi/2)+sr->isr_noise, sr->isr_noise
 			, sr->isr_intval
 			, getcaps(sr->isr_capinfo)
 		);
 		printies(vp + sr->isr_ssid_len + sr->isr_meshid_len,
 		    sr->isr_ie_len, 24);
 		printf("\n");
 		cp += sr->isr_len, len -= sr->isr_len;
 	} while (len >= sizeof(struct ieee80211req_scan_result));
 }
 
 static void
 scan_and_wait(int s)
 {
 	struct ieee80211_scan_req sr;
 	struct ieee80211req ireq;
 	int sroute;
 
 	sroute = socket(PF_ROUTE, SOCK_RAW, 0);
 	if (sroute < 0) {
 		perror("socket(PF_ROUTE,SOCK_RAW)");
 		return;
 	}
 	(void) memset(&ireq, 0, sizeof(ireq));
 	(void) strlcpy(ireq.i_name, name, sizeof(ireq.i_name));
 	ireq.i_type = IEEE80211_IOC_SCAN_REQ;
 
 	memset(&sr, 0, sizeof(sr));
 	sr.sr_flags = IEEE80211_IOC_SCAN_ACTIVE
 		    | IEEE80211_IOC_SCAN_BGSCAN
 		    | IEEE80211_IOC_SCAN_NOPICK
 		    | IEEE80211_IOC_SCAN_ONCE;
 	sr.sr_duration = IEEE80211_IOC_SCAN_FOREVER;
 	sr.sr_nssid = 0;
 
 	ireq.i_data = &sr;
 	ireq.i_len = sizeof(sr);
 	/*
 	 * NB: only root can trigger a scan so ignore errors. Also ignore
 	 * possible errors from net80211, even if no new scan could be
 	 * started there might still be a valid scan cache.
 	 */
 	if (ioctl(s, SIOCS80211, &ireq) == 0) {
 		char buf[2048];
 		struct if_announcemsghdr *ifan;
 		struct rt_msghdr *rtm;
 
 		do {
 			if (read(sroute, buf, sizeof(buf)) < 0) {
 				perror("read(PF_ROUTE)");
 				break;
 			}
 			rtm = (struct rt_msghdr *) buf;
 			if (rtm->rtm_version != RTM_VERSION)
 				break;
 			ifan = (struct if_announcemsghdr *) rtm;
 		} while (rtm->rtm_type != RTM_IEEE80211 ||
 		    ifan->ifan_what != RTM_IEEE80211_SCAN);
 	}
 	close(sroute);
 }
 
 static
 DECL_CMD_FUNC(set80211scan, val, d)
 {
 	scan_and_wait(s);
 	list_scan(s);
 }
 
 static enum ieee80211_opmode get80211opmode(int s);
 
 static int
 gettxseq(const struct ieee80211req_sta_info *si)
 {
 	int i, txseq;
 
 	if ((si->isi_state & IEEE80211_NODE_QOS) == 0)
 		return si->isi_txseqs[0];
 	/* XXX not right but usually what folks want */
 	txseq = 0;
 	for (i = 0; i < IEEE80211_TID_SIZE; i++)
 		if (si->isi_txseqs[i] > txseq)
 			txseq = si->isi_txseqs[i];
 	return txseq;
 }
 
 static int
 getrxseq(const struct ieee80211req_sta_info *si)
 {
 	int i, rxseq;
 
 	if ((si->isi_state & IEEE80211_NODE_QOS) == 0)
 		return si->isi_rxseqs[0];
 	/* XXX not right but usually what folks want */
 	rxseq = 0;
 	for (i = 0; i < IEEE80211_TID_SIZE; i++)
 		if (si->isi_rxseqs[i] > rxseq)
 			rxseq = si->isi_rxseqs[i];
 	return rxseq;
 }
 
 static void
 list_stations(int s)
 {
 	union {
 		struct ieee80211req_sta_req req;
 		uint8_t buf[24*1024];
 	} u;
 	enum ieee80211_opmode opmode = get80211opmode(s);
 	const uint8_t *cp;
 	int len;
 
 	/* broadcast address =>'s get all stations */
 	(void) memset(u.req.is_u.macaddr, 0xff, IEEE80211_ADDR_LEN);
 	if (opmode == IEEE80211_M_STA) {
 		/*
 		 * Get information about the associated AP.
 		 */
 		(void) get80211(s, IEEE80211_IOC_BSSID,
 		    u.req.is_u.macaddr, IEEE80211_ADDR_LEN);
 	}
 	if (get80211len(s, IEEE80211_IOC_STA_INFO, &u, sizeof(u), &len) < 0)
 		errx(1, "unable to get station information");
 	if (len < sizeof(struct ieee80211req_sta_info))
 		return;
 
 	getchaninfo(s);
 
 	if (opmode == IEEE80211_M_MBSS)
 		printf("%-17.17s %4s %5s %5s %7s %4s %4s %4s %6s %6s\n"
 			, "ADDR"
 			, "CHAN"
 			, "LOCAL"
 			, "PEER"
 			, "STATE"
 			, "RATE"
 			, "RSSI"
 			, "IDLE"
 			, "TXSEQ"
 			, "RXSEQ"
 		);
 	else 
 		printf("%-17.17s %4s %4s %4s %4s %4s %6s %6s %4s %-7s\n"
 			, "ADDR"
 			, "AID"
 			, "CHAN"
 			, "RATE"
 			, "RSSI"
 			, "IDLE"
 			, "TXSEQ"
 			, "RXSEQ"
 			, "CAPS"
 			, "FLAG"
 		);
 	cp = (const uint8_t *) u.req.info;
 	do {
 		const struct ieee80211req_sta_info *si;
 
 		si = (const struct ieee80211req_sta_info *) cp;
 		if (si->isi_len < sizeof(*si))
 			break;
 		if (opmode == IEEE80211_M_MBSS)
 			printf("%s %4d %5x %5x %7.7s %3dM %4.1f %4d %6d %6d"
 				, ether_ntoa((const struct ether_addr*)
 				    si->isi_macaddr)
 				, ieee80211_mhz2ieee(si->isi_freq,
 				    si->isi_flags)
 				, si->isi_localid
 				, si->isi_peerid
 				, mesh_linkstate_string(si->isi_peerstate)
 				, si->isi_txmbps/2
 				, si->isi_rssi/2.
 				, si->isi_inact
 				, gettxseq(si)
 				, getrxseq(si)
 			);
 		else 
 			printf("%s %4u %4d %3dM %4.1f %4d %6d %6d %-4.4s %-7.7s"
 				, ether_ntoa((const struct ether_addr*)
 				    si->isi_macaddr)
 				, IEEE80211_AID(si->isi_associd)
 				, ieee80211_mhz2ieee(si->isi_freq,
 				    si->isi_flags)
 				, si->isi_txmbps/2
 				, si->isi_rssi/2.
 				, si->isi_inact
 				, gettxseq(si)
 				, getrxseq(si)
 				, getcaps(si->isi_capinfo)
 				, getflags(si->isi_state)
 			);
 		printies(cp + si->isi_ie_off, si->isi_ie_len, 24);
 		printmimo(&si->isi_mimo);
 		printf("\n");
 		cp += si->isi_len, len -= si->isi_len;
 	} while (len >= sizeof(struct ieee80211req_sta_info));
 }
 
 static const char *
 mesh_linkstate_string(uint8_t state)
 {
 	static const char *state_names[] = {
 	    [0] = "IDLE",
 	    [1] = "OPEN-TX",
 	    [2] = "OPEN-RX",
 	    [3] = "CONF-RX",
 	    [4] = "ESTAB",
 	    [5] = "HOLDING",
 	};
 
 	if (state >= nitems(state_names)) {
 		static char buf[10];
 		snprintf(buf, sizeof(buf), "#%u", state);
 		return buf;
 	} else
 		return state_names[state];
 }
 
 static const char *
 get_chaninfo(const struct ieee80211_channel *c, int precise,
 	char buf[], size_t bsize)
 {
 	buf[0] = '\0';
 	if (IEEE80211_IS_CHAN_FHSS(c))
 		strlcat(buf, " FHSS", bsize);
 	if (IEEE80211_IS_CHAN_A(c))
 		strlcat(buf, " 11a", bsize);
 	else if (IEEE80211_IS_CHAN_ANYG(c))
 		strlcat(buf, " 11g", bsize);
 	else if (IEEE80211_IS_CHAN_B(c))
 		strlcat(buf, " 11b", bsize);
 	if (IEEE80211_IS_CHAN_HALF(c))
 		strlcat(buf, "/10MHz", bsize);
 	if (IEEE80211_IS_CHAN_QUARTER(c))
 		strlcat(buf, "/5MHz", bsize);
 	if (IEEE80211_IS_CHAN_TURBO(c))
 		strlcat(buf, " Turbo", bsize);
 	if (precise) {
 		/* XXX should make VHT80U, VHT80D */
 		if (IEEE80211_IS_CHAN_VHT80(c) &&
 		    IEEE80211_IS_CHAN_HT40D(c))
 			strlcat(buf, " vht/80-", bsize);
 		else if (IEEE80211_IS_CHAN_VHT80(c) &&
 		    IEEE80211_IS_CHAN_HT40U(c))
 			strlcat(buf, " vht/80+", bsize);
 		else if (IEEE80211_IS_CHAN_VHT80(c))
 			strlcat(buf, " vht/80", bsize);
 		else if (IEEE80211_IS_CHAN_VHT40D(c))
 			strlcat(buf, " vht/40-", bsize);
 		else if (IEEE80211_IS_CHAN_VHT40U(c))
 			strlcat(buf, " vht/40+", bsize);
 		else if (IEEE80211_IS_CHAN_VHT20(c))
 			strlcat(buf, " vht/20", bsize);
 		else if (IEEE80211_IS_CHAN_HT20(c))
 			strlcat(buf, " ht/20", bsize);
 		else if (IEEE80211_IS_CHAN_HT40D(c))
 			strlcat(buf, " ht/40-", bsize);
 		else if (IEEE80211_IS_CHAN_HT40U(c))
 			strlcat(buf, " ht/40+", bsize);
 	} else {
 		if (IEEE80211_IS_CHAN_VHT(c))
 			strlcat(buf, " vht", bsize);
 		else if (IEEE80211_IS_CHAN_HT(c))
 			strlcat(buf, " ht", bsize);
 	}
 	return buf;
 }
 
 static void
 print_chaninfo(const struct ieee80211_channel *c, int verb)
 {
 	char buf[14];
 
 	if (verb)
 		printf("Channel %3u : %u%c%c%c%c%c MHz%-14.14s",
 		    ieee80211_mhz2ieee(c->ic_freq, c->ic_flags), c->ic_freq,
 		    IEEE80211_IS_CHAN_PASSIVE(c) ? '*' : ' ',
 		    IEEE80211_IS_CHAN_DFS(c) ? 'D' : ' ',
 		    IEEE80211_IS_CHAN_RADAR(c) ? 'R' : ' ',
 		    IEEE80211_IS_CHAN_CWINT(c) ? 'I' : ' ',
 		    IEEE80211_IS_CHAN_CACDONE(c) ? 'C' : ' ',
 		    get_chaninfo(c, verb, buf, sizeof(buf)));
 	else
 	printf("Channel %3u : %u%c MHz%-14.14s",
 	    ieee80211_mhz2ieee(c->ic_freq, c->ic_flags), c->ic_freq,
 	    IEEE80211_IS_CHAN_PASSIVE(c) ? '*' : ' ',
 	    get_chaninfo(c, verb, buf, sizeof(buf)));
 
 }
 
 static int
 chanpref(const struct ieee80211_channel *c)
 {
 	if (IEEE80211_IS_CHAN_VHT160(c))
 		return 80;
 	if (IEEE80211_IS_CHAN_VHT80_80(c))
 		return 75;
 	if (IEEE80211_IS_CHAN_VHT80(c))
 		return 70;
 	if (IEEE80211_IS_CHAN_VHT40(c))
 		return 60;
 	if (IEEE80211_IS_CHAN_VHT20(c))
 		return 50;
 	if (IEEE80211_IS_CHAN_HT40(c))
 		return 40;
 	if (IEEE80211_IS_CHAN_HT20(c))
 		return 30;
 	if (IEEE80211_IS_CHAN_HALF(c))
 		return 10;
 	if (IEEE80211_IS_CHAN_QUARTER(c))
 		return 5;
 	if (IEEE80211_IS_CHAN_TURBO(c))
 		return 25;
 	if (IEEE80211_IS_CHAN_A(c))
 		return 20;
 	if (IEEE80211_IS_CHAN_G(c))
 		return 20;
 	if (IEEE80211_IS_CHAN_B(c))
 		return 15;
 	if (IEEE80211_IS_CHAN_PUREG(c))
 		return 15;
 	return 0;
 }
 
 static void
 print_channels(int s, const struct ieee80211req_chaninfo *chans,
 	int allchans, int verb)
 {
 	struct ieee80211req_chaninfo *achans;
 	uint8_t reported[IEEE80211_CHAN_BYTES];
 	const struct ieee80211_channel *c;
 	int i, half;
 
 	achans = malloc(IEEE80211_CHANINFO_SPACE(chans));
 	if (achans == NULL)
 		errx(1, "no space for active channel list");
 	achans->ic_nchans = 0;
 	memset(reported, 0, sizeof(reported));
 	if (!allchans) {
 		struct ieee80211req_chanlist active;
 
 		if (get80211(s, IEEE80211_IOC_CHANLIST, &active, sizeof(active)) < 0)
 			errx(1, "unable to get active channel list");
 		for (i = 0; i < chans->ic_nchans; i++) {
 			c = &chans->ic_chans[i];
 			if (!isset(active.ic_channels, c->ic_ieee))
 				continue;
 			/*
 			 * Suppress compatible duplicates unless
 			 * verbose.  The kernel gives us it's
 			 * complete channel list which has separate
 			 * entries for 11g/11b and 11a/turbo.
 			 */
 			if (isset(reported, c->ic_ieee) && !verb) {
 				/* XXX we assume duplicates are adjacent */
 				achans->ic_chans[achans->ic_nchans-1] = *c;
 			} else {
 				achans->ic_chans[achans->ic_nchans++] = *c;
 				setbit(reported, c->ic_ieee);
 			}
 		}
 	} else {
 		for (i = 0; i < chans->ic_nchans; i++) {
 			c = &chans->ic_chans[i];
 			/* suppress duplicates as above */
 			if (isset(reported, c->ic_ieee) && !verb) {
 				/* XXX we assume duplicates are adjacent */
 				struct ieee80211_channel *a =
 				    &achans->ic_chans[achans->ic_nchans-1];
 				if (chanpref(c) > chanpref(a))
 					*a = *c;
 			} else {
 				achans->ic_chans[achans->ic_nchans++] = *c;
 				setbit(reported, c->ic_ieee);
 			}
 		}
 	}
 	half = achans->ic_nchans / 2;
 	if (achans->ic_nchans % 2)
 		half++;
 
 	for (i = 0; i < achans->ic_nchans / 2; i++) {
 		print_chaninfo(&achans->ic_chans[i], verb);
 		print_chaninfo(&achans->ic_chans[half+i], verb);
 		printf("\n");
 	}
 	if (achans->ic_nchans % 2) {
 		print_chaninfo(&achans->ic_chans[i], verb);
 		printf("\n");
 	}
 	free(achans);
 }
 
 static void
 list_channels(int s, int allchans)
 {
 	getchaninfo(s);
 	print_channels(s, chaninfo, allchans, verbose);
 }
 
 static void
 print_txpow(const struct ieee80211_channel *c)
 {
 	printf("Channel %3u : %u MHz %3.1f reg %2d  ",
 	    c->ic_ieee, c->ic_freq,
 	    c->ic_maxpower/2., c->ic_maxregpower);
 }
 
 static void
 print_txpow_verbose(const struct ieee80211_channel *c)
 {
 	print_chaninfo(c, 1);
 	printf("min %4.1f dBm  max %3.1f dBm  reg %2d dBm",
 	    c->ic_minpower/2., c->ic_maxpower/2., c->ic_maxregpower);
 	/* indicate where regulatory cap limits power use */
 	if (c->ic_maxpower > 2*c->ic_maxregpower)
 		printf(" <");
 }
 
 static void
 list_txpow(int s)
 {
 	struct ieee80211req_chaninfo *achans;
 	uint8_t reported[IEEE80211_CHAN_BYTES];
 	struct ieee80211_channel *c, *prev;
 	int i, half;
 
 	getchaninfo(s);
 	achans = malloc(IEEE80211_CHANINFO_SPACE(chaninfo));
 	if (achans == NULL)
 		errx(1, "no space for active channel list");
 	achans->ic_nchans = 0;
 	memset(reported, 0, sizeof(reported));
 	for (i = 0; i < chaninfo->ic_nchans; i++) {
 		c = &chaninfo->ic_chans[i];
 		/* suppress duplicates as above */
 		if (isset(reported, c->ic_ieee) && !verbose) {
 			/* XXX we assume duplicates are adjacent */
 			assert(achans->ic_nchans > 0);
 			prev = &achans->ic_chans[achans->ic_nchans-1];
 			/* display highest power on channel */
 			if (c->ic_maxpower > prev->ic_maxpower)
 				*prev = *c;
 		} else {
 			achans->ic_chans[achans->ic_nchans++] = *c;
 			setbit(reported, c->ic_ieee);
 		}
 	}
 	if (!verbose) {
 		half = achans->ic_nchans / 2;
 		if (achans->ic_nchans % 2)
 			half++;
 
 		for (i = 0; i < achans->ic_nchans / 2; i++) {
 			print_txpow(&achans->ic_chans[i]);
 			print_txpow(&achans->ic_chans[half+i]);
 			printf("\n");
 		}
 		if (achans->ic_nchans % 2) {
 			print_txpow(&achans->ic_chans[i]);
 			printf("\n");
 		}
 	} else {
 		for (i = 0; i < achans->ic_nchans; i++) {
 			print_txpow_verbose(&achans->ic_chans[i]);
 			printf("\n");
 		}
 	}
 	free(achans);
 }
 
 static void
 list_keys(int s)
 {
 }
 
 static void
 list_capabilities(int s)
 {
 	struct ieee80211_devcaps_req *dc;
 
 	if (verbose)
 		dc = malloc(IEEE80211_DEVCAPS_SIZE(MAXCHAN));
 	else
 		dc = malloc(IEEE80211_DEVCAPS_SIZE(1));
 	if (dc == NULL)
 		errx(1, "no space for device capabilities");
 	dc->dc_chaninfo.ic_nchans = verbose ? MAXCHAN : 1;
 	getdevcaps(s, dc);
 	printb("drivercaps", dc->dc_drivercaps, IEEE80211_C_BITS);
 	if (dc->dc_cryptocaps != 0 || verbose) {
 		putchar('\n');
 		printb("cryptocaps", dc->dc_cryptocaps, IEEE80211_CRYPTO_BITS);
 	}
 	if (dc->dc_htcaps != 0 || verbose) {
 		putchar('\n');
 		printb("htcaps", dc->dc_htcaps, IEEE80211_HTCAP_BITS);
 	}
 	if (dc->dc_vhtcaps != 0 || verbose) {
 		putchar('\n');
 		printb("vhtcaps", dc->dc_vhtcaps, IEEE80211_VHTCAP_BITS);
 	}
 
 	putchar('\n');
 	if (verbose) {
 		chaninfo = &dc->dc_chaninfo;	/* XXX */
 		print_channels(s, &dc->dc_chaninfo, 1/*allchans*/, verbose);
 	}
 	free(dc);
 }
 
 static int
 get80211wme(int s, int param, int ac, int *val)
 {
 	struct ieee80211req ireq;
 
 	(void) memset(&ireq, 0, sizeof(ireq));
 	(void) strlcpy(ireq.i_name, name, sizeof(ireq.i_name));
 	ireq.i_type = param;
 	ireq.i_len = ac;
 	if (ioctl(s, SIOCG80211, &ireq) < 0) {
 		warn("cannot get WME parameter %d, ac %d%s",
 		    param, ac & IEEE80211_WMEPARAM_VAL,
 		    ac & IEEE80211_WMEPARAM_BSS ? " (BSS)" : "");
 		return -1;
 	}
 	*val = ireq.i_val;
 	return 0;
 }
 
 static void
 list_wme_aci(int s, const char *tag, int ac)
 {
 	int val;
 
 	printf("\t%s", tag);
 
 	/* show WME BSS parameters */
 	if (get80211wme(s, IEEE80211_IOC_WME_CWMIN, ac, &val) != -1)
 		printf(" cwmin %2u", val);
 	if (get80211wme(s, IEEE80211_IOC_WME_CWMAX, ac, &val) != -1)
 		printf(" cwmax %2u", val);
 	if (get80211wme(s, IEEE80211_IOC_WME_AIFS, ac, &val) != -1)
 		printf(" aifs %2u", val);
 	if (get80211wme(s, IEEE80211_IOC_WME_TXOPLIMIT, ac, &val) != -1)
 		printf(" txopLimit %3u", val);
 	if (get80211wme(s, IEEE80211_IOC_WME_ACM, ac, &val) != -1) {
 		if (val)
 			printf(" acm");
 		else if (verbose)
 			printf(" -acm");
 	}
 	/* !BSS only */
 	if ((ac & IEEE80211_WMEPARAM_BSS) == 0) {
 		if (get80211wme(s, IEEE80211_IOC_WME_ACKPOLICY, ac, &val) != -1) {
 			if (!val)
 				printf(" -ack");
 			else if (verbose)
 				printf(" ack");
 		}
 	}
 	printf("\n");
 }
 
 static void
 list_wme(int s)
 {
 	static const char *acnames[] = { "AC_BE", "AC_BK", "AC_VI", "AC_VO" };
 	int ac;
 
 	if (verbose) {
 		/* display both BSS and local settings */
 		for (ac = WME_AC_BE; ac <= WME_AC_VO; ac++) {
 	again:
 			if (ac & IEEE80211_WMEPARAM_BSS)
 				list_wme_aci(s, "     ", ac);
 			else
 				list_wme_aci(s, acnames[ac], ac);
 			if ((ac & IEEE80211_WMEPARAM_BSS) == 0) {
 				ac |= IEEE80211_WMEPARAM_BSS;
 				goto again;
 			} else
 				ac &= ~IEEE80211_WMEPARAM_BSS;
 		}
 	} else {
 		/* display only channel settings */
 		for (ac = WME_AC_BE; ac <= WME_AC_VO; ac++)
 			list_wme_aci(s, acnames[ac], ac);
 	}
 }
 
 static void
 list_roam(int s)
 {
 	const struct ieee80211_roamparam *rp;
 	int mode;
 
 	getroam(s);
 	for (mode = IEEE80211_MODE_11A; mode < IEEE80211_MODE_MAX; mode++) {
 		rp = &roamparams.params[mode];
 		if (rp->rssi == 0 && rp->rate == 0)
 			continue;
 		if (mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG) {
 			if (rp->rssi & 1)
 				LINE_CHECK("roam:%-7.7s rssi %2u.5dBm  MCS %2u    ",
 				    modename[mode], rp->rssi/2,
 				    rp->rate &~ IEEE80211_RATE_MCS);
 			else
 				LINE_CHECK("roam:%-7.7s rssi %4udBm  MCS %2u    ",
 				    modename[mode], rp->rssi/2,
 				    rp->rate &~ IEEE80211_RATE_MCS);
 		} else {
 			if (rp->rssi & 1)
 				LINE_CHECK("roam:%-7.7s rssi %2u.5dBm rate %2u Mb/s",
 				    modename[mode], rp->rssi/2, rp->rate/2);
 			else
 				LINE_CHECK("roam:%-7.7s rssi %4udBm rate %2u Mb/s",
 				    modename[mode], rp->rssi/2, rp->rate/2);
 		}
 	}
 }
 
 static void
 list_txparams(int s)
 {
 	const struct ieee80211_txparam *tp;
 	int mode;
 
 	gettxparams(s);
 	for (mode = IEEE80211_MODE_11A; mode < IEEE80211_MODE_MAX; mode++) {
 		tp = &txparams.params[mode];
 		if (tp->mgmtrate == 0 && tp->mcastrate == 0)
 			continue;
 		if (mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG) {
 			if (tp->ucastrate == IEEE80211_FIXED_RATE_NONE)
 				LINE_CHECK("%-7.7s ucast NONE    mgmt %2u MCS  "
 				    "mcast %2u MCS  maxretry %u",
 				    modename[mode],
 				    tp->mgmtrate &~ IEEE80211_RATE_MCS,
 				    tp->mcastrate &~ IEEE80211_RATE_MCS,
 				    tp->maxretry);
 			else
 				LINE_CHECK("%-7.7s ucast %2u MCS  mgmt %2u MCS  "
 				    "mcast %2u MCS  maxretry %u",
 				    modename[mode],
 				    tp->ucastrate &~ IEEE80211_RATE_MCS,
 				    tp->mgmtrate &~ IEEE80211_RATE_MCS,
 				    tp->mcastrate &~ IEEE80211_RATE_MCS,
 				    tp->maxretry);
 		} else {
 			if (tp->ucastrate == IEEE80211_FIXED_RATE_NONE)
 				LINE_CHECK("%-7.7s ucast NONE    mgmt %2u Mb/s "
 				    "mcast %2u Mb/s maxretry %u",
 				    modename[mode],
 				    tp->mgmtrate/2,
 				    tp->mcastrate/2, tp->maxretry);
 			else
 				LINE_CHECK("%-7.7s ucast %2u Mb/s mgmt %2u Mb/s "
 				    "mcast %2u Mb/s maxretry %u",
 				    modename[mode],
 				    tp->ucastrate/2, tp->mgmtrate/2,
 				    tp->mcastrate/2, tp->maxretry);
 		}
 	}
 }
 
 static void
 printpolicy(int policy)
 {
 	switch (policy) {
 	case IEEE80211_MACCMD_POLICY_OPEN:
 		printf("policy: open\n");
 		break;
 	case IEEE80211_MACCMD_POLICY_ALLOW:
 		printf("policy: allow\n");
 		break;
 	case IEEE80211_MACCMD_POLICY_DENY:
 		printf("policy: deny\n");
 		break;
 	case IEEE80211_MACCMD_POLICY_RADIUS:
 		printf("policy: radius\n");
 		break;
 	default:
 		printf("policy: unknown (%u)\n", policy);
 		break;
 	}
 }
 
 static void
 list_mac(int s)
 {
 	struct ieee80211req ireq;
 	struct ieee80211req_maclist *acllist;
 	int i, nacls, policy, len;
 	uint8_t *data;
 	char c;
 
 	(void) memset(&ireq, 0, sizeof(ireq));
 	(void) strlcpy(ireq.i_name, name, sizeof(ireq.i_name)); /* XXX ?? */
 	ireq.i_type = IEEE80211_IOC_MACCMD;
 	ireq.i_val = IEEE80211_MACCMD_POLICY;
 	if (ioctl(s, SIOCG80211, &ireq) < 0) {
 		if (errno == EINVAL) {
 			printf("No acl policy loaded\n");
 			return;
 		}
 		err(1, "unable to get mac policy");
 	}
 	policy = ireq.i_val;
 	if (policy == IEEE80211_MACCMD_POLICY_OPEN) {
 		c = '*';
 	} else if (policy == IEEE80211_MACCMD_POLICY_ALLOW) {
 		c = '+';
 	} else if (policy == IEEE80211_MACCMD_POLICY_DENY) {
 		c = '-';
 	} else if (policy == IEEE80211_MACCMD_POLICY_RADIUS) {
 		c = 'r';		/* NB: should never have entries */
 	} else {
 		printf("policy: unknown (%u)\n", policy);
 		c = '?';
 	}
 	if (verbose || c == '?')
 		printpolicy(policy);
 
 	ireq.i_val = IEEE80211_MACCMD_LIST;
 	ireq.i_len = 0;
 	if (ioctl(s, SIOCG80211, &ireq) < 0)
 		err(1, "unable to get mac acl list size");
 	if (ireq.i_len == 0) {		/* NB: no acls */
 		if (!(verbose || c == '?'))
 			printpolicy(policy);
 		return;
 	}
 	len = ireq.i_len;
 
 	data = malloc(len);
 	if (data == NULL)
 		err(1, "out of memory for acl list");
 
 	ireq.i_data = data;
 	if (ioctl(s, SIOCG80211, &ireq) < 0)
 		err(1, "unable to get mac acl list");
 	nacls = len / sizeof(*acllist);
 	acllist = (struct ieee80211req_maclist *) data;
 	for (i = 0; i < nacls; i++)
 		printf("%c%s\n", c, ether_ntoa(
 			(const struct ether_addr *) acllist[i].ml_macaddr));
 	free(data);
 }
 
 static void
 print_regdomain(const struct ieee80211_regdomain *reg, int verb)
 {
 	if ((reg->regdomain != 0 &&
 	    reg->regdomain != reg->country) || verb) {
 		const struct regdomain *rd =
 		    lib80211_regdomain_findbysku(getregdata(), reg->regdomain);
 		if (rd == NULL)
 			LINE_CHECK("regdomain %d", reg->regdomain);
 		else
 			LINE_CHECK("regdomain %s", rd->name);
 	}
 	if (reg->country != 0 || verb) {
 		const struct country *cc =
 		    lib80211_country_findbycc(getregdata(), reg->country);
 		if (cc == NULL)
 			LINE_CHECK("country %d", reg->country);
 		else
 			LINE_CHECK("country %s", cc->isoname);
 	}
 	if (reg->location == 'I')
 		LINE_CHECK("indoor");
 	else if (reg->location == 'O')
 		LINE_CHECK("outdoor");
 	else if (verb)
 		LINE_CHECK("anywhere");
 	if (reg->ecm)
 		LINE_CHECK("ecm");
 	else if (verb)
 		LINE_CHECK("-ecm");
 }
 
 static void
 list_regdomain(int s, int channelsalso)
 {
 	getregdomain(s);
 	if (channelsalso) {
 		getchaninfo(s);
 		spacer = ':';
 		print_regdomain(&regdomain, 1);
 		LINE_BREAK();
 		print_channels(s, chaninfo, 1/*allchans*/, 1/*verbose*/);
 	} else
 		print_regdomain(&regdomain, verbose);
 }
 
 static void
 list_mesh(int s)
 {
 	struct ieee80211req ireq;
 	struct ieee80211req_mesh_route routes[128];
 	struct ieee80211req_mesh_route *rt;
 
 	(void) memset(&ireq, 0, sizeof(ireq));
 	(void) strlcpy(ireq.i_name, name, sizeof(ireq.i_name));
 	ireq.i_type = IEEE80211_IOC_MESH_RTCMD;
 	ireq.i_val = IEEE80211_MESH_RTCMD_LIST;
 	ireq.i_data = &routes;
 	ireq.i_len = sizeof(routes);
 	if (ioctl(s, SIOCG80211, &ireq) < 0)
 	 	err(1, "unable to get the Mesh routing table");
 
 	printf("%-17.17s %-17.17s %4s %4s %4s %6s %s\n"
 		, "DEST"
 		, "NEXT HOP"
 		, "HOPS"
 		, "METRIC"
 		, "LIFETIME"
 		, "MSEQ"
 		, "FLAGS");
 
 	for (rt = &routes[0]; rt - &routes[0] < ireq.i_len / sizeof(*rt); rt++){
 		printf("%s ",
 		    ether_ntoa((const struct ether_addr *)rt->imr_dest));
 		printf("%s %4u   %4u   %6u %6u    %c%c\n",
 			ether_ntoa((const struct ether_addr *)rt->imr_nexthop),
 			rt->imr_nhops, rt->imr_metric, rt->imr_lifetime,
 			rt->imr_lastmseq,
 			(rt->imr_flags & IEEE80211_MESHRT_FLAGS_DISCOVER) ?
 			    'D' :
 			(rt->imr_flags & IEEE80211_MESHRT_FLAGS_VALID) ?
 			    'V' : '!',
 			(rt->imr_flags & IEEE80211_MESHRT_FLAGS_PROXY) ?
 			    'P' :
 			(rt->imr_flags & IEEE80211_MESHRT_FLAGS_GATE) ?
 			    'G' :' ');
 	}
 }
 
 static
 DECL_CMD_FUNC(set80211list, arg, d)
 {
 #define	iseq(a,b)	(strncasecmp(a,b,sizeof(b)-1) == 0)
 
 	LINE_INIT('\t');
 
 	if (iseq(arg, "sta"))
 		list_stations(s);
 	else if (iseq(arg, "scan") || iseq(arg, "ap"))
 		list_scan(s);
 	else if (iseq(arg, "chan") || iseq(arg, "freq"))
 		list_channels(s, 1);
 	else if (iseq(arg, "active"))
 		list_channels(s, 0);
 	else if (iseq(arg, "keys"))
 		list_keys(s);
 	else if (iseq(arg, "caps"))
 		list_capabilities(s);
 	else if (iseq(arg, "wme") || iseq(arg, "wmm"))
 		list_wme(s);
 	else if (iseq(arg, "mac"))
 		list_mac(s);
 	else if (iseq(arg, "txpow"))
 		list_txpow(s);
 	else if (iseq(arg, "roam"))
 		list_roam(s);
 	else if (iseq(arg, "txparam") || iseq(arg, "txparm"))
 		list_txparams(s);
 	else if (iseq(arg, "regdomain"))
 		list_regdomain(s, 1);
 	else if (iseq(arg, "countries"))
 		list_countries();
 	else if (iseq(arg, "mesh"))
 		list_mesh(s);
 	else
 		errx(1, "Don't know how to list %s for %s", arg, name);
 	LINE_BREAK();
 #undef iseq
 }
 
 static enum ieee80211_opmode
 get80211opmode(int s)
 {
 	struct ifmediareq ifmr;
 
 	(void) memset(&ifmr, 0, sizeof(ifmr));
 	(void) strlcpy(ifmr.ifm_name, name, sizeof(ifmr.ifm_name));
 
 	if (ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) >= 0) {
 		if (ifmr.ifm_current & IFM_IEEE80211_ADHOC) {
 			if (ifmr.ifm_current & IFM_FLAG0)
 				return IEEE80211_M_AHDEMO;
 			else
 				return IEEE80211_M_IBSS;
 		}
 		if (ifmr.ifm_current & IFM_IEEE80211_HOSTAP)
 			return IEEE80211_M_HOSTAP;
 		if (ifmr.ifm_current & IFM_IEEE80211_IBSS)
 			return IEEE80211_M_IBSS;
 		if (ifmr.ifm_current & IFM_IEEE80211_MONITOR)
 			return IEEE80211_M_MONITOR;
 		if (ifmr.ifm_current & IFM_IEEE80211_MBSS)
 			return IEEE80211_M_MBSS;
 	}
 	return IEEE80211_M_STA;
 }
 
 #if 0
 static void
 printcipher(int s, struct ieee80211req *ireq, int keylenop)
 {
 	switch (ireq->i_val) {
 	case IEEE80211_CIPHER_WEP:
 		ireq->i_type = keylenop;
 		if (ioctl(s, SIOCG80211, ireq) != -1)
 			printf("WEP-%s", 
 			    ireq->i_len <= 5 ? "40" :
 			    ireq->i_len <= 13 ? "104" : "128");
 		else
 			printf("WEP");
 		break;
 	case IEEE80211_CIPHER_TKIP:
 		printf("TKIP");
 		break;
 	case IEEE80211_CIPHER_AES_OCB:
 		printf("AES-OCB");
 		break;
 	case IEEE80211_CIPHER_AES_CCM:
 		printf("AES-CCM");
 		break;
 	case IEEE80211_CIPHER_CKIP:
 		printf("CKIP");
 		break;
 	case IEEE80211_CIPHER_NONE:
 		printf("NONE");
 		break;
 	default:
 		printf("UNKNOWN (0x%x)", ireq->i_val);
 		break;
 	}
 }
 #endif
 
 static void
 printkey(const struct ieee80211req_key *ik)
 {
 	static const uint8_t zerodata[IEEE80211_KEYBUF_SIZE];
 	u_int keylen = ik->ik_keylen;
 	int printcontents;
 
 	printcontents = printkeys &&
 		(memcmp(ik->ik_keydata, zerodata, keylen) != 0 || verbose);
 	if (printcontents)
 		LINE_BREAK();
 	switch (ik->ik_type) {
 	case IEEE80211_CIPHER_WEP:
 		/* compatibility */
 		LINE_CHECK("wepkey %u:%s", ik->ik_keyix+1,
 		    keylen <= 5 ? "40-bit" :
 		    keylen <= 13 ? "104-bit" : "128-bit");
 		break;
 	case IEEE80211_CIPHER_TKIP:
 		if (keylen > 128/8)
 			keylen -= 128/8;	/* ignore MIC for now */
 		LINE_CHECK("TKIP %u:%u-bit", ik->ik_keyix+1, 8*keylen);
 		break;
 	case IEEE80211_CIPHER_AES_OCB:
 		LINE_CHECK("AES-OCB %u:%u-bit", ik->ik_keyix+1, 8*keylen);
 		break;
 	case IEEE80211_CIPHER_AES_CCM:
 		LINE_CHECK("AES-CCM %u:%u-bit", ik->ik_keyix+1, 8*keylen);
 		break;
 	case IEEE80211_CIPHER_CKIP:
 		LINE_CHECK("CKIP %u:%u-bit", ik->ik_keyix+1, 8*keylen);
 		break;
 	case IEEE80211_CIPHER_NONE:
 		LINE_CHECK("NULL %u:%u-bit", ik->ik_keyix+1, 8*keylen);
 		break;
 	default:
 		LINE_CHECK("UNKNOWN (0x%x) %u:%u-bit",
 			ik->ik_type, ik->ik_keyix+1, 8*keylen);
 		break;
 	}
 	if (printcontents) {
 		u_int i;
 
 		printf(" <");
 		for (i = 0; i < keylen; i++)
 			printf("%02x", ik->ik_keydata[i]);
 		printf(">");
 		if (ik->ik_type != IEEE80211_CIPHER_WEP &&
 		    (ik->ik_keyrsc != 0 || verbose))
 			printf(" rsc %ju", (uintmax_t)ik->ik_keyrsc);
 		if (ik->ik_type != IEEE80211_CIPHER_WEP &&
 		    (ik->ik_keytsc != 0 || verbose))
 			printf(" tsc %ju", (uintmax_t)ik->ik_keytsc);
 		if (ik->ik_flags != 0 && verbose) {
 			const char *sep = " ";
 
 			if (ik->ik_flags & IEEE80211_KEY_XMIT)
 				printf("%stx", sep), sep = "+";
 			if (ik->ik_flags & IEEE80211_KEY_RECV)
 				printf("%srx", sep), sep = "+";
 			if (ik->ik_flags & IEEE80211_KEY_DEFAULT)
 				printf("%sdef", sep), sep = "+";
 		}
 		LINE_BREAK();
 	}
 }
 
 static void
 printrate(const char *tag, int v, int defrate, int defmcs)
 {
 	if ((v & IEEE80211_RATE_MCS) == 0) {
 		if (v != defrate) {
 			if (v & 1)
 				LINE_CHECK("%s %d.5", tag, v/2);
 			else
 				LINE_CHECK("%s %d", tag, v/2);
 		}
 	} else {
 		if (v != defmcs)
 			LINE_CHECK("%s %d", tag, v &~ 0x80);
 	}
 }
 
 static int
 getid(int s, int ix, void *data, size_t len, int *plen, int mesh)
 {
 	struct ieee80211req ireq;
 
 	(void) memset(&ireq, 0, sizeof(ireq));
 	(void) strlcpy(ireq.i_name, name, sizeof(ireq.i_name));
 	ireq.i_type = (!mesh) ? IEEE80211_IOC_SSID : IEEE80211_IOC_MESH_ID;
 	ireq.i_val = ix;
 	ireq.i_data = data;
 	ireq.i_len = len;
 	if (ioctl(s, SIOCG80211, &ireq) < 0)
 		return -1;
 	*plen = ireq.i_len;
 	return 0;
 }
 
 static void
 ieee80211_status(int s)
 {
 	static const uint8_t zerobssid[IEEE80211_ADDR_LEN];
 	enum ieee80211_opmode opmode = get80211opmode(s);
 	int i, num, wpa, wme, bgscan, bgscaninterval, val, len, wepmode;
 	uint8_t data[32];
 	const struct ieee80211_channel *c;
 	const struct ieee80211_roamparam *rp;
 	const struct ieee80211_txparam *tp;
 
 	if (getid(s, -1, data, sizeof(data), &len, 0) < 0) {
 		/* If we can't get the SSID, this isn't an 802.11 device. */
 		return;
 	}
 
 	/*
 	 * Invalidate cached state so printing status for multiple
 	 * if's doesn't reuse the first interfaces' cached state.
 	 */
 	gotcurchan = 0;
 	gotroam = 0;
 	gottxparams = 0;
 	gothtconf = 0;
 	gotregdomain = 0;
 
 	printf("\t");
 	if (opmode == IEEE80211_M_MBSS) {
 		printf("meshid ");
 		getid(s, 0, data, sizeof(data), &len, 1);
 		print_string(data, len);
 	} else {
 		if (get80211val(s, IEEE80211_IOC_NUMSSIDS, &num) < 0)
 			num = 0;
 		printf("ssid ");
 		if (num > 1) {
 			for (i = 0; i < num; i++) {
 				if (getid(s, i, data, sizeof(data), &len, 0) >= 0 && len > 0) {
 					printf(" %d:", i + 1);
 					print_string(data, len);
 				}
 			}
 		} else
 			print_string(data, len);
 	}
 	c = getcurchan(s);
 	if (c->ic_freq != IEEE80211_CHAN_ANY) {
 		char buf[14];
 		printf(" channel %d (%u MHz%s)", c->ic_ieee, c->ic_freq,
 			get_chaninfo(c, 1, buf, sizeof(buf)));
 	} else if (verbose)
 		printf(" channel UNDEF");
 
 	if (get80211(s, IEEE80211_IOC_BSSID, data, IEEE80211_ADDR_LEN) >= 0 &&
 	    (memcmp(data, zerobssid, sizeof(zerobssid)) != 0 || verbose))
 		printf(" bssid %s", ether_ntoa((struct ether_addr *)data));
 
 	if (get80211len(s, IEEE80211_IOC_STATIONNAME, data, sizeof(data), &len) != -1) {
 		printf("\n\tstationname ");
 		print_string(data, len);
 	}
 
 	spacer = ' ';		/* force first break */
 	LINE_BREAK();
 
 	list_regdomain(s, 0);
 
 	wpa = 0;
 	if (get80211val(s, IEEE80211_IOC_AUTHMODE, &val) != -1) {
 		switch (val) {
 		case IEEE80211_AUTH_NONE:
 			LINE_CHECK("authmode NONE");
 			break;
 		case IEEE80211_AUTH_OPEN:
 			LINE_CHECK("authmode OPEN");
 			break;
 		case IEEE80211_AUTH_SHARED:
 			LINE_CHECK("authmode SHARED");
 			break;
 		case IEEE80211_AUTH_8021X:
 			LINE_CHECK("authmode 802.1x");
 			break;
 		case IEEE80211_AUTH_WPA:
 			if (get80211val(s, IEEE80211_IOC_WPA, &wpa) < 0)
 				wpa = 1;	/* default to WPA1 */
 			switch (wpa) {
 			case 2:
 				LINE_CHECK("authmode WPA2/802.11i");
 				break;
 			case 3:
 				LINE_CHECK("authmode WPA1+WPA2/802.11i");
 				break;
 			default:
 				LINE_CHECK("authmode WPA");
 				break;
 			}
 			break;
 		case IEEE80211_AUTH_AUTO:
 			LINE_CHECK("authmode AUTO");
 			break;
 		default:
 			LINE_CHECK("authmode UNKNOWN (0x%x)", val);
 			break;
 		}
 	}
 
 	if (wpa || verbose) {
 		if (get80211val(s, IEEE80211_IOC_WPS, &val) != -1) {
 			if (val)
 				LINE_CHECK("wps");
 			else if (verbose)
 				LINE_CHECK("-wps");
 		}
 		if (get80211val(s, IEEE80211_IOC_TSN, &val) != -1) {
 			if (val)
 				LINE_CHECK("tsn");
 			else if (verbose)
 				LINE_CHECK("-tsn");
 		}
 		if (ioctl(s, IEEE80211_IOC_COUNTERMEASURES, &val) != -1) {
 			if (val)
 				LINE_CHECK("countermeasures");
 			else if (verbose)
 				LINE_CHECK("-countermeasures");
 		}
 #if 0
 		/* XXX not interesting with WPA done in user space */
 		ireq.i_type = IEEE80211_IOC_KEYMGTALGS;
 		if (ioctl(s, SIOCG80211, &ireq) != -1) {
 		}
 
 		ireq.i_type = IEEE80211_IOC_MCASTCIPHER;
 		if (ioctl(s, SIOCG80211, &ireq) != -1) {
 			LINE_CHECK("mcastcipher ");
 			printcipher(s, &ireq, IEEE80211_IOC_MCASTKEYLEN);
 			spacer = ' ';
 		}
 
 		ireq.i_type = IEEE80211_IOC_UCASTCIPHER;
 		if (ioctl(s, SIOCG80211, &ireq) != -1) {
 			LINE_CHECK("ucastcipher ");
 			printcipher(s, &ireq, IEEE80211_IOC_UCASTKEYLEN);
 		}
 
 		if (wpa & 2) {
 			ireq.i_type = IEEE80211_IOC_RSNCAPS;
 			if (ioctl(s, SIOCG80211, &ireq) != -1) {
 				LINE_CHECK("RSN caps 0x%x", ireq.i_val);
 				spacer = ' ';
 			}
 		}
 
 		ireq.i_type = IEEE80211_IOC_UCASTCIPHERS;
 		if (ioctl(s, SIOCG80211, &ireq) != -1) {
 		}
 #endif
 	}
 
 	if (get80211val(s, IEEE80211_IOC_WEP, &wepmode) != -1 &&
 	    wepmode != IEEE80211_WEP_NOSUP) {
 
 		switch (wepmode) {
 		case IEEE80211_WEP_OFF:
 			LINE_CHECK("privacy OFF");
 			break;
 		case IEEE80211_WEP_ON:
 			LINE_CHECK("privacy ON");
 			break;
 		case IEEE80211_WEP_MIXED:
 			LINE_CHECK("privacy MIXED");
 			break;
 		default:
 			LINE_CHECK("privacy UNKNOWN (0x%x)", wepmode);
 			break;
 		}
 
 		/*
 		 * If we get here then we've got WEP support so we need
 		 * to print WEP status.
 		 */
 
 		if (get80211val(s, IEEE80211_IOC_WEPTXKEY, &val) < 0) {
 			warn("WEP support, but no tx key!");
 			goto end;
 		}
 		if (val != -1)
 			LINE_CHECK("deftxkey %d", val+1);
 		else if (wepmode != IEEE80211_WEP_OFF || verbose)
 			LINE_CHECK("deftxkey UNDEF");
 
 		if (get80211val(s, IEEE80211_IOC_NUMWEPKEYS, &num) < 0) {
 			warn("WEP support, but no NUMWEPKEYS support!");
 			goto end;
 		}
 
 		for (i = 0; i < num; i++) {
 			struct ieee80211req_key ik;
 
 			memset(&ik, 0, sizeof(ik));
 			ik.ik_keyix = i;
 			if (get80211(s, IEEE80211_IOC_WPAKEY, &ik, sizeof(ik)) < 0) {
 				warn("WEP support, but can get keys!");
 				goto end;
 			}
 			if (ik.ik_keylen != 0) {
 				if (verbose)
 					LINE_BREAK();
 				printkey(&ik);
 			}
 		}
 end:
 		;
 	}
 
 	if (get80211val(s, IEEE80211_IOC_POWERSAVE, &val) != -1 &&
 	    val != IEEE80211_POWERSAVE_NOSUP ) {
 		if (val != IEEE80211_POWERSAVE_OFF || verbose) {
 			switch (val) {
 			case IEEE80211_POWERSAVE_OFF:
 				LINE_CHECK("powersavemode OFF");
 				break;
 			case IEEE80211_POWERSAVE_CAM:
 				LINE_CHECK("powersavemode CAM");
 				break;
 			case IEEE80211_POWERSAVE_PSP:
 				LINE_CHECK("powersavemode PSP");
 				break;
 			case IEEE80211_POWERSAVE_PSP_CAM:
 				LINE_CHECK("powersavemode PSP-CAM");
 				break;
 			}
 			if (get80211val(s, IEEE80211_IOC_POWERSAVESLEEP, &val) != -1)
 				LINE_CHECK("powersavesleep %d", val);
 		}
 	}
 
 	if (get80211val(s, IEEE80211_IOC_TXPOWER, &val) != -1) {
 		if (val & 1)
 			LINE_CHECK("txpower %d.5", val/2);
 		else
 			LINE_CHECK("txpower %d", val/2);
 	}
 	if (verbose) {
 		if (get80211val(s, IEEE80211_IOC_TXPOWMAX, &val) != -1)
 			LINE_CHECK("txpowmax %.1f", val/2.);
 	}
 
 	if (get80211val(s, IEEE80211_IOC_DOTD, &val) != -1) {
 		if (val)
 			LINE_CHECK("dotd");
 		else if (verbose)
 			LINE_CHECK("-dotd");
 	}
 
 	if (get80211val(s, IEEE80211_IOC_RTSTHRESHOLD, &val) != -1) {
 		if (val != IEEE80211_RTS_MAX || verbose)
 			LINE_CHECK("rtsthreshold %d", val);
 	}
 
 	if (get80211val(s, IEEE80211_IOC_FRAGTHRESHOLD, &val) != -1) {
 		if (val != IEEE80211_FRAG_MAX || verbose)
 			LINE_CHECK("fragthreshold %d", val);
 	}
 	if (opmode == IEEE80211_M_STA || verbose) {
 		if (get80211val(s, IEEE80211_IOC_BMISSTHRESHOLD, &val) != -1) {
 			if (val != IEEE80211_HWBMISS_MAX || verbose)
 				LINE_CHECK("bmiss %d", val);
 		}
 	}
 
 	if (!verbose) {
 		gettxparams(s);
 		tp = &txparams.params[chan2mode(c)];
 		printrate("ucastrate", tp->ucastrate,
 		    IEEE80211_FIXED_RATE_NONE, IEEE80211_FIXED_RATE_NONE);
 		printrate("mcastrate", tp->mcastrate, 2*1,
 		    IEEE80211_RATE_MCS|0);
 		printrate("mgmtrate", tp->mgmtrate, 2*1,
 		    IEEE80211_RATE_MCS|0);
 		if (tp->maxretry != 6)		/* XXX */
 			LINE_CHECK("maxretry %d", tp->maxretry);
 	} else {
 		LINE_BREAK();
 		list_txparams(s);
 	}
 
 	bgscaninterval = -1;
 	(void) get80211val(s, IEEE80211_IOC_BGSCAN_INTERVAL, &bgscaninterval);
 
 	if (get80211val(s, IEEE80211_IOC_SCANVALID, &val) != -1) {
 		if (val != bgscaninterval || verbose)
 			LINE_CHECK("scanvalid %u", val);
 	}
 
 	bgscan = 0;
 	if (get80211val(s, IEEE80211_IOC_BGSCAN, &bgscan) != -1) {
 		if (bgscan)
 			LINE_CHECK("bgscan");
 		else if (verbose)
 			LINE_CHECK("-bgscan");
 	}
 	if (bgscan || verbose) {
 		if (bgscaninterval != -1)
 			LINE_CHECK("bgscanintvl %u", bgscaninterval);
 		if (get80211val(s, IEEE80211_IOC_BGSCAN_IDLE, &val) != -1)
 			LINE_CHECK("bgscanidle %u", val);
 		if (!verbose) {
 			getroam(s);
 			rp = &roamparams.params[chan2mode(c)];
 			if (rp->rssi & 1)
 				LINE_CHECK("roam:rssi %u.5", rp->rssi/2);
 			else
 				LINE_CHECK("roam:rssi %u", rp->rssi/2);
 			LINE_CHECK("roam:rate %u", rp->rate/2);
 		} else {
 			LINE_BREAK();
 			list_roam(s);
 			LINE_BREAK();
 		}
 	}
 
 	if (IEEE80211_IS_CHAN_ANYG(c) || verbose) {
 		if (get80211val(s, IEEE80211_IOC_PUREG, &val) != -1) {
 			if (val)
 				LINE_CHECK("pureg");
 			else if (verbose)
 				LINE_CHECK("-pureg");
 		}
 		if (get80211val(s, IEEE80211_IOC_PROTMODE, &val) != -1) {
 			switch (val) {
 			case IEEE80211_PROTMODE_OFF:
 				LINE_CHECK("protmode OFF");
 				break;
 			case IEEE80211_PROTMODE_CTS:
 				LINE_CHECK("protmode CTS");
 				break;
 			case IEEE80211_PROTMODE_RTSCTS:
 				LINE_CHECK("protmode RTSCTS");
 				break;
 			default:
 				LINE_CHECK("protmode UNKNOWN (0x%x)", val);
 				break;
 			}
 		}
 	}
 
 	if (IEEE80211_IS_CHAN_HT(c) || verbose) {
 		gethtconf(s);
 		switch (htconf & 3) {
 		case 0:
 		case 2:
 			LINE_CHECK("-ht");
 			break;
 		case 1:
 			LINE_CHECK("ht20");
 			break;
 		case 3:
 			if (verbose)
 				LINE_CHECK("ht");
 			break;
 		}
 		if (get80211val(s, IEEE80211_IOC_HTCOMPAT, &val) != -1) {
 			if (!val)
 				LINE_CHECK("-htcompat");
 			else if (verbose)
 				LINE_CHECK("htcompat");
 		}
 		if (get80211val(s, IEEE80211_IOC_AMPDU, &val) != -1) {
 			switch (val) {
 			case 0:
 				LINE_CHECK("-ampdu");
 				break;
 			case 1:
 				LINE_CHECK("ampdutx -ampdurx");
 				break;
 			case 2:
 				LINE_CHECK("-ampdutx ampdurx");
 				break;
 			case 3:
 				if (verbose)
 					LINE_CHECK("ampdu");
 				break;
 			}
 		}
 		if (get80211val(s, IEEE80211_IOC_AMPDU_LIMIT, &val) != -1) {
 			switch (val) {
 			case IEEE80211_HTCAP_MAXRXAMPDU_8K:
 				LINE_CHECK("ampdulimit 8k");
 				break;
 			case IEEE80211_HTCAP_MAXRXAMPDU_16K:
 				LINE_CHECK("ampdulimit 16k");
 				break;
 			case IEEE80211_HTCAP_MAXRXAMPDU_32K:
 				LINE_CHECK("ampdulimit 32k");
 				break;
 			case IEEE80211_HTCAP_MAXRXAMPDU_64K:
 				LINE_CHECK("ampdulimit 64k");
 				break;
 			}
 		}
 		if (get80211val(s, IEEE80211_IOC_AMPDU_DENSITY, &val) != -1) {
 			switch (val) {
 			case IEEE80211_HTCAP_MPDUDENSITY_NA:
 				if (verbose)
 					LINE_CHECK("ampdudensity NA");
 				break;
 			case IEEE80211_HTCAP_MPDUDENSITY_025:
 				LINE_CHECK("ampdudensity .25");
 				break;
 			case IEEE80211_HTCAP_MPDUDENSITY_05:
 				LINE_CHECK("ampdudensity .5");
 				break;
 			case IEEE80211_HTCAP_MPDUDENSITY_1:
 				LINE_CHECK("ampdudensity 1");
 				break;
 			case IEEE80211_HTCAP_MPDUDENSITY_2:
 				LINE_CHECK("ampdudensity 2");
 				break;
 			case IEEE80211_HTCAP_MPDUDENSITY_4:
 				LINE_CHECK("ampdudensity 4");
 				break;
 			case IEEE80211_HTCAP_MPDUDENSITY_8:
 				LINE_CHECK("ampdudensity 8");
 				break;
 			case IEEE80211_HTCAP_MPDUDENSITY_16:
 				LINE_CHECK("ampdudensity 16");
 				break;
 			}
 		}
 		if (get80211val(s, IEEE80211_IOC_AMSDU, &val) != -1) {
 			switch (val) {
 			case 0:
 				LINE_CHECK("-amsdu");
 				break;
 			case 1:
 				LINE_CHECK("amsdutx -amsdurx");
 				break;
 			case 2:
 				LINE_CHECK("-amsdutx amsdurx");
 				break;
 			case 3:
 				if (verbose)
 					LINE_CHECK("amsdu");
 				break;
 			}
 		}
 		/* XXX amsdu limit */
 		if (get80211val(s, IEEE80211_IOC_SHORTGI, &val) != -1) {
 			if (val)
 				LINE_CHECK("shortgi");
 			else if (verbose)
 				LINE_CHECK("-shortgi");
 		}
 		if (get80211val(s, IEEE80211_IOC_HTPROTMODE, &val) != -1) {
 			if (val == IEEE80211_PROTMODE_OFF)
 				LINE_CHECK("htprotmode OFF");
 			else if (val != IEEE80211_PROTMODE_RTSCTS)
 				LINE_CHECK("htprotmode UNKNOWN (0x%x)", val);
 			else if (verbose)
 				LINE_CHECK("htprotmode RTSCTS");
 		}
 		if (get80211val(s, IEEE80211_IOC_PUREN, &val) != -1) {
 			if (val)
 				LINE_CHECK("puren");
 			else if (verbose)
 				LINE_CHECK("-puren");
 		}
 		if (get80211val(s, IEEE80211_IOC_SMPS, &val) != -1) {
 			if (val == IEEE80211_HTCAP_SMPS_DYNAMIC)
 				LINE_CHECK("smpsdyn");
 			else if (val == IEEE80211_HTCAP_SMPS_ENA)
 				LINE_CHECK("smps");
 			else if (verbose)
 				LINE_CHECK("-smps");
 		}
 		if (get80211val(s, IEEE80211_IOC_RIFS, &val) != -1) {
 			if (val)
 				LINE_CHECK("rifs");
 			else if (verbose)
 				LINE_CHECK("-rifs");
 		}
 		if (get80211val(s, IEEE80211_IOC_STBC, &val) != -1) {
 			switch (val) {
 			case 0:
 				LINE_CHECK("-stbc");
 				break;
 			case 1:
 				LINE_CHECK("stbctx -stbcrx");
 				break;
 			case 2:
 				LINE_CHECK("-stbctx stbcrx");
 				break;
 			case 3:
 				if (verbose)
 					LINE_CHECK("stbc");
 				break;
 			}
 		}
 		if (get80211val(s, IEEE80211_IOC_LDPC, &val) != -1) {
 			switch (val) {
 			case 0:
 				LINE_CHECK("-ldpc");
 				break;
 			case 1:
 				LINE_CHECK("ldpctx -ldpcrx");
 				break;
 			case 2:
 				LINE_CHECK("-ldpctx ldpcrx");
 				break;
 			case 3:
 				if (verbose)
 					LINE_CHECK("ldpc");
 				break;
 			}
 		}
 	}
 
 	if (IEEE80211_IS_CHAN_VHT(c) || verbose) {
 		getvhtconf(s);
 		if (vhtconf & 0x1)
 			LINE_CHECK("vht");
 		else
 			LINE_CHECK("-vht");
 		if (vhtconf & 0x2)
 			LINE_CHECK("vht40");
 		else
 			LINE_CHECK("-vht40");
 		if (vhtconf & 0x4)
 			LINE_CHECK("vht80");
 		else
 			LINE_CHECK("-vht80");
 		if (vhtconf & 0x8)
 			LINE_CHECK("vht80p80");
 		else
 			LINE_CHECK("-vht80p80");
 		if (vhtconf & 0x10)
 			LINE_CHECK("vht160");
 		else
 			LINE_CHECK("-vht160");
 	}
 
 	if (get80211val(s, IEEE80211_IOC_WME, &wme) != -1) {
 		if (wme)
 			LINE_CHECK("wme");
 		else if (verbose)
 			LINE_CHECK("-wme");
 	} else
 		wme = 0;
 
 	if (get80211val(s, IEEE80211_IOC_BURST, &val) != -1) {
 		if (val)
 			LINE_CHECK("burst");
 		else if (verbose)
 			LINE_CHECK("-burst");
 	}
 
 	if (get80211val(s, IEEE80211_IOC_FF, &val) != -1) {
 		if (val)
 			LINE_CHECK("ff");
 		else if (verbose)
 			LINE_CHECK("-ff");
 	}
 	if (get80211val(s, IEEE80211_IOC_TURBOP, &val) != -1) {
 		if (val)
 			LINE_CHECK("dturbo");
 		else if (verbose)
 			LINE_CHECK("-dturbo");
 	}
 	if (get80211val(s, IEEE80211_IOC_DWDS, &val) != -1) {
 		if (val)
 			LINE_CHECK("dwds");
 		else if (verbose)
 			LINE_CHECK("-dwds");
 	}
 
 	if (opmode == IEEE80211_M_HOSTAP) {
 		if (get80211val(s, IEEE80211_IOC_HIDESSID, &val) != -1) {
 			if (val)
 				LINE_CHECK("hidessid");
 			else if (verbose)
 				LINE_CHECK("-hidessid");
 		}
 		if (get80211val(s, IEEE80211_IOC_APBRIDGE, &val) != -1) {
 			if (!val)
 				LINE_CHECK("-apbridge");
 			else if (verbose)
 				LINE_CHECK("apbridge");
 		}
 		if (get80211val(s, IEEE80211_IOC_DTIM_PERIOD, &val) != -1)
 			LINE_CHECK("dtimperiod %u", val);
 
 		if (get80211val(s, IEEE80211_IOC_DOTH, &val) != -1) {
 			if (!val)
 				LINE_CHECK("-doth");
 			else if (verbose)
 				LINE_CHECK("doth");
 		}
 		if (get80211val(s, IEEE80211_IOC_DFS, &val) != -1) {
 			if (!val)
 				LINE_CHECK("-dfs");
 			else if (verbose)
 				LINE_CHECK("dfs");
 		}
 		if (get80211val(s, IEEE80211_IOC_INACTIVITY, &val) != -1) {
 			if (!val)
 				LINE_CHECK("-inact");
 			else if (verbose)
 				LINE_CHECK("inact");
 		}
 	} else {
 		if (get80211val(s, IEEE80211_IOC_ROAMING, &val) != -1) {
 			if (val != IEEE80211_ROAMING_AUTO || verbose) {
 				switch (val) {
 				case IEEE80211_ROAMING_DEVICE:
 					LINE_CHECK("roaming DEVICE");
 					break;
 				case IEEE80211_ROAMING_AUTO:
 					LINE_CHECK("roaming AUTO");
 					break;
 				case IEEE80211_ROAMING_MANUAL:
 					LINE_CHECK("roaming MANUAL");
 					break;
 				default:
 					LINE_CHECK("roaming UNKNOWN (0x%x)",
 						val);
 					break;
 				}
 			}
 		}
 	}
 
 	if (opmode == IEEE80211_M_AHDEMO) {
 		if (get80211val(s, IEEE80211_IOC_TDMA_SLOT, &val) != -1)
 			LINE_CHECK("tdmaslot %u", val);
 		if (get80211val(s, IEEE80211_IOC_TDMA_SLOTCNT, &val) != -1)
 			LINE_CHECK("tdmaslotcnt %u", val);
 		if (get80211val(s, IEEE80211_IOC_TDMA_SLOTLEN, &val) != -1)
 			LINE_CHECK("tdmaslotlen %u", val);
 		if (get80211val(s, IEEE80211_IOC_TDMA_BINTERVAL, &val) != -1)
 			LINE_CHECK("tdmabintval %u", val);
 	} else if (get80211val(s, IEEE80211_IOC_BEACON_INTERVAL, &val) != -1) {
 		/* XXX default define not visible */
 		if (val != 100 || verbose)
 			LINE_CHECK("bintval %u", val);
 	}
 
 	if (wme && verbose) {
 		LINE_BREAK();
 		list_wme(s);
 	}
 
 	if (opmode == IEEE80211_M_MBSS) {
 		if (get80211val(s, IEEE80211_IOC_MESH_TTL, &val) != -1) {
 			LINE_CHECK("meshttl %u", val);
 		}
 		if (get80211val(s, IEEE80211_IOC_MESH_AP, &val) != -1) {
 			if (val)
 				LINE_CHECK("meshpeering");
 			else
 				LINE_CHECK("-meshpeering");
 		}
 		if (get80211val(s, IEEE80211_IOC_MESH_FWRD, &val) != -1) {
 			if (val)
 				LINE_CHECK("meshforward");
 			else
 				LINE_CHECK("-meshforward");
 		}
 		if (get80211val(s, IEEE80211_IOC_MESH_GATE, &val) != -1) {
 			if (val)
 				LINE_CHECK("meshgate");
 			else
 				LINE_CHECK("-meshgate");
 		}
 		if (get80211len(s, IEEE80211_IOC_MESH_PR_METRIC, data, 12,
 		    &len) != -1) {
 			data[len] = '\0';
 			LINE_CHECK("meshmetric %s", data);
 		}
 		if (get80211len(s, IEEE80211_IOC_MESH_PR_PATH, data, 12,
 		    &len) != -1) {
 			data[len] = '\0';
 			LINE_CHECK("meshpath %s", data);
 		}
 		if (get80211val(s, IEEE80211_IOC_HWMP_ROOTMODE, &val) != -1) {
 			switch (val) {
 			case IEEE80211_HWMP_ROOTMODE_DISABLED:
 				LINE_CHECK("hwmprootmode DISABLED");
 				break;
 			case IEEE80211_HWMP_ROOTMODE_NORMAL:
 				LINE_CHECK("hwmprootmode NORMAL");
 				break;
 			case IEEE80211_HWMP_ROOTMODE_PROACTIVE:
 				LINE_CHECK("hwmprootmode PROACTIVE");
 				break;
 			case IEEE80211_HWMP_ROOTMODE_RANN:
 				LINE_CHECK("hwmprootmode RANN");
 				break;
 			default:
 				LINE_CHECK("hwmprootmode UNKNOWN(%d)", val);
 				break;
 			}
 		}
 		if (get80211val(s, IEEE80211_IOC_HWMP_MAXHOPS, &val) != -1) {
 			LINE_CHECK("hwmpmaxhops %u", val);
 		}
 	}
 
 	LINE_BREAK();
 }
 
 static int
 get80211(int s, int type, void *data, int len)
 {
 
 	return (lib80211_get80211(s, name, type, data, len));
 }
 
 static int
 get80211len(int s, int type, void *data, int len, int *plen)
 {
 
 	return (lib80211_get80211len(s, name, type, data, len, plen));
 }
 
 static int
 get80211val(int s, int type, int *val)
 {
 
 	return (lib80211_get80211val(s, name, type, val));
 }
 
 static void
 set80211(int s, int type, int val, int len, void *data)
 {
 	int ret;
 
 	ret = lib80211_set80211(s, name, type, val, len, data);
 	if (ret < 0)
 		err(1, "SIOCS80211");
 }
 
 static const char *
 get_string(const char *val, const char *sep, u_int8_t *buf, int *lenp)
 {
 	int len;
 	int hexstr;
 	u_int8_t *p;
 
 	len = *lenp;
 	p = buf;
 	hexstr = (val[0] == '0' && tolower((u_char)val[1]) == 'x');
 	if (hexstr)
 		val += 2;
 	for (;;) {
 		if (*val == '\0')
 			break;
 		if (sep != NULL && strchr(sep, *val) != NULL) {
 			val++;
 			break;
 		}
 		if (hexstr) {
 			if (!isxdigit((u_char)val[0])) {
 				warnx("bad hexadecimal digits");
 				return NULL;
 			}
 			if (!isxdigit((u_char)val[1])) {
 				warnx("odd count hexadecimal digits");
 				return NULL;
 			}
 		}
 		if (p >= buf + len) {
 			if (hexstr)
 				warnx("hexadecimal digits too long");
 			else
 				warnx("string too long");
 			return NULL;
 		}
 		if (hexstr) {
 #define	tohex(x)	(isdigit(x) ? (x) - '0' : tolower(x) - 'a' + 10)
 			*p++ = (tohex((u_char)val[0]) << 4) |
 			    tohex((u_char)val[1]);
 #undef tohex
 			val += 2;
 		} else
 			*p++ = *val++;
 	}
 	len = p - buf;
 	/* The string "-" is treated as the empty string. */
 	if (!hexstr && len == 1 && buf[0] == '-') {
 		len = 0;
 		memset(buf, 0, *lenp);
 	} else if (len < *lenp)
 		memset(p, 0, *lenp - len);
 	*lenp = len;
 	return val;
 }
 
 static void
 print_string(const u_int8_t *buf, int len)
 {
 	int i;
 	int hasspc;
 
 	i = 0;
 	hasspc = 0;
 	for (; i < len; i++) {
 		if (!isprint(buf[i]) && buf[i] != '\0')
 			break;
 		if (isspace(buf[i]))
 			hasspc++;
 	}
 	if (i == len) {
 		if (hasspc || len == 0 || buf[0] == '\0')
 			printf("\"%.*s\"", len, buf);
 		else
 			printf("%.*s", len, buf);
 	} else {
 		printf("0x");
 		for (i = 0; i < len; i++)
 			printf("%02x", buf[i]);
 	}
 }
 
 static void
 setdefregdomain(int s)
 {
 	struct regdata *rdp = getregdata();
 	const struct regdomain *rd;
 
 	/* Check if regdomain/country was already set by a previous call. */
 	/* XXX is it possible? */
 	if (regdomain.regdomain != 0 ||
 	    regdomain.country != CTRY_DEFAULT)
 		return;
 
 	getregdomain(s);
 
 	/* Check if it was already set by the driver. */
 	if (regdomain.regdomain != 0 ||
 	    regdomain.country != CTRY_DEFAULT)
 		return;
 
 	/* Set FCC/US as default. */
 	rd = lib80211_regdomain_findbysku(rdp, SKU_FCC);
 	if (rd == NULL)
 		errx(1, "FCC regdomain was not found");
 
 	regdomain.regdomain = rd->sku;
 	if (rd->cc != NULL)
 		defaultcountry(rd);
 
 	/* Send changes to net80211. */
 	setregdomain_cb(s, &regdomain);
 
 	/* Cleanup (so it can be overriden by subsequent parameters). */
 	regdomain.regdomain = 0;
 	regdomain.country = CTRY_DEFAULT;
 	regdomain.isocc[0] = 0;
 	regdomain.isocc[1] = 0;
 }
 
 /*
  * Virtual AP cloning support.
  */
 static struct ieee80211_clone_params params = {
 	.icp_opmode	= IEEE80211_M_STA,	/* default to station mode */
 };
 
 static void
 wlan_create(int s, struct ifreq *ifr)
 {
 	static const uint8_t zerobssid[IEEE80211_ADDR_LEN];
 	char orig_name[IFNAMSIZ];
 
 	if (params.icp_parent[0] == '\0')
 		errx(1, "must specify a parent device (wlandev) when creating "
 		    "a wlan device");
 	if (params.icp_opmode == IEEE80211_M_WDS &&
 	    memcmp(params.icp_bssid, zerobssid, sizeof(zerobssid)) == 0)
 		errx(1, "no bssid specified for WDS (use wlanbssid)");
 	ifr->ifr_data = (caddr_t) &params;
 	if (ioctl(s, SIOCIFCREATE2, ifr) < 0)
 		err(1, "SIOCIFCREATE2");
 
 	/* XXX preserve original name for ifclonecreate(). */
 	strlcpy(orig_name, name, sizeof(orig_name));
 	strlcpy(name, ifr->ifr_name, sizeof(name));
 
 	setdefregdomain(s);
 
 	strlcpy(name, orig_name, sizeof(name));
 }
 
 static
 DECL_CMD_FUNC(set80211clone_wlandev, arg, d)
 {
 	strlcpy(params.icp_parent, arg, IFNAMSIZ);
 }
 
 static
 DECL_CMD_FUNC(set80211clone_wlanbssid, arg, d)
 {
 	const struct ether_addr *ea;
 
 	ea = ether_aton(arg);
 	if (ea == NULL)
 		errx(1, "%s: cannot parse bssid", arg);
 	memcpy(params.icp_bssid, ea->octet, IEEE80211_ADDR_LEN);
 }
 
 static
 DECL_CMD_FUNC(set80211clone_wlanaddr, arg, d)
 {
 	const struct ether_addr *ea;
 
 	ea = ether_aton(arg);
 	if (ea == NULL)
 		errx(1, "%s: cannot parse address", arg);
 	memcpy(params.icp_macaddr, ea->octet, IEEE80211_ADDR_LEN);
 	params.icp_flags |= IEEE80211_CLONE_MACADDR;
 }
 
 static
 DECL_CMD_FUNC(set80211clone_wlanmode, arg, d)
 {
 #define	iseq(a,b)	(strncasecmp(a,b,sizeof(b)-1) == 0)
 	if (iseq(arg, "sta"))
 		params.icp_opmode = IEEE80211_M_STA;
 	else if (iseq(arg, "ahdemo") || iseq(arg, "adhoc-demo"))
 		params.icp_opmode = IEEE80211_M_AHDEMO;
 	else if (iseq(arg, "ibss") || iseq(arg, "adhoc"))
 		params.icp_opmode = IEEE80211_M_IBSS;
 	else if (iseq(arg, "ap") || iseq(arg, "host"))
 		params.icp_opmode = IEEE80211_M_HOSTAP;
 	else if (iseq(arg, "wds"))
 		params.icp_opmode = IEEE80211_M_WDS;
 	else if (iseq(arg, "monitor"))
 		params.icp_opmode = IEEE80211_M_MONITOR;
 	else if (iseq(arg, "tdma")) {
 		params.icp_opmode = IEEE80211_M_AHDEMO;
 		params.icp_flags |= IEEE80211_CLONE_TDMA;
 	} else if (iseq(arg, "mesh") || iseq(arg, "mp")) /* mesh point */
 		params.icp_opmode = IEEE80211_M_MBSS;
 	else
 		errx(1, "Don't know to create %s for %s", arg, name);
 #undef iseq
 }
 
 static void
 set80211clone_beacons(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	/* NB: inverted sense */
 	if (d)
 		params.icp_flags &= ~IEEE80211_CLONE_NOBEACONS;
 	else
 		params.icp_flags |= IEEE80211_CLONE_NOBEACONS;
 }
 
 static void
 set80211clone_bssid(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	if (d)
 		params.icp_flags |= IEEE80211_CLONE_BSSID;
 	else
 		params.icp_flags &= ~IEEE80211_CLONE_BSSID;
 }
 
 static void
 set80211clone_wdslegacy(const char *val, int d, int s, const struct afswtch *rafp)
 {
 	if (d)
 		params.icp_flags |= IEEE80211_CLONE_WDSLEGACY;
 	else
 		params.icp_flags &= ~IEEE80211_CLONE_WDSLEGACY;
 }
 
 static struct cmd ieee80211_cmds[] = {
 	DEF_CMD_ARG("ssid",		set80211ssid),
 	DEF_CMD_ARG("nwid",		set80211ssid),
 	DEF_CMD_ARG("meshid",		set80211meshid),
 	DEF_CMD_ARG("stationname",	set80211stationname),
 	DEF_CMD_ARG("station",		set80211stationname),	/* BSD/OS */
 	DEF_CMD_ARG("channel",		set80211channel),
 	DEF_CMD_ARG("authmode",		set80211authmode),
 	DEF_CMD_ARG("powersavemode",	set80211powersavemode),
 	DEF_CMD("powersave",	1,	set80211powersave),
 	DEF_CMD("-powersave",	0,	set80211powersave),
 	DEF_CMD_ARG("powersavesleep", 	set80211powersavesleep),
 	DEF_CMD_ARG("wepmode",		set80211wepmode),
 	DEF_CMD("wep",		1,	set80211wep),
 	DEF_CMD("-wep",		0,	set80211wep),
 	DEF_CMD_ARG("deftxkey",		set80211weptxkey),
 	DEF_CMD_ARG("weptxkey",		set80211weptxkey),
 	DEF_CMD_ARG("wepkey",		set80211wepkey),
 	DEF_CMD_ARG("nwkey",		set80211nwkey),		/* NetBSD */
 	DEF_CMD("-nwkey",	0,	set80211wep),		/* NetBSD */
 	DEF_CMD_ARG("rtsthreshold",	set80211rtsthreshold),
 	DEF_CMD_ARG("protmode",		set80211protmode),
 	DEF_CMD_ARG("txpower",		set80211txpower),
 	DEF_CMD_ARG("roaming",		set80211roaming),
 	DEF_CMD("wme",		1,	set80211wme),
 	DEF_CMD("-wme",		0,	set80211wme),
 	DEF_CMD("wmm",		1,	set80211wme),
 	DEF_CMD("-wmm",		0,	set80211wme),
 	DEF_CMD("hidessid",	1,	set80211hidessid),
 	DEF_CMD("-hidessid",	0,	set80211hidessid),
 	DEF_CMD("apbridge",	1,	set80211apbridge),
 	DEF_CMD("-apbridge",	0,	set80211apbridge),
 	DEF_CMD_ARG("chanlist",		set80211chanlist),
 	DEF_CMD_ARG("bssid",		set80211bssid),
 	DEF_CMD_ARG("ap",		set80211bssid),
 	DEF_CMD("scan",	0,		set80211scan),
 	DEF_CMD_ARG("list",		set80211list),
 	DEF_CMD_ARG2("cwmin",		set80211cwmin),
 	DEF_CMD_ARG2("cwmax",		set80211cwmax),
 	DEF_CMD_ARG2("aifs",		set80211aifs),
 	DEF_CMD_ARG2("txoplimit",	set80211txoplimit),
 	DEF_CMD_ARG("acm",		set80211acm),
 	DEF_CMD_ARG("-acm",		set80211noacm),
 	DEF_CMD_ARG("ack",		set80211ackpolicy),
 	DEF_CMD_ARG("-ack",		set80211noackpolicy),
 	DEF_CMD_ARG2("bss:cwmin",	set80211bsscwmin),
 	DEF_CMD_ARG2("bss:cwmax",	set80211bsscwmax),
 	DEF_CMD_ARG2("bss:aifs",	set80211bssaifs),
 	DEF_CMD_ARG2("bss:txoplimit",	set80211bsstxoplimit),
 	DEF_CMD_ARG("dtimperiod",	set80211dtimperiod),
 	DEF_CMD_ARG("bintval",		set80211bintval),
 	DEF_CMD("mac:open",	IEEE80211_MACCMD_POLICY_OPEN,	set80211maccmd),
 	DEF_CMD("mac:allow",	IEEE80211_MACCMD_POLICY_ALLOW,	set80211maccmd),
 	DEF_CMD("mac:deny",	IEEE80211_MACCMD_POLICY_DENY,	set80211maccmd),
 	DEF_CMD("mac:radius",	IEEE80211_MACCMD_POLICY_RADIUS,	set80211maccmd),
 	DEF_CMD("mac:flush",	IEEE80211_MACCMD_FLUSH,		set80211maccmd),
 	DEF_CMD("mac:detach",	IEEE80211_MACCMD_DETACH,	set80211maccmd),
 	DEF_CMD_ARG("mac:add",		set80211addmac),
 	DEF_CMD_ARG("mac:del",		set80211delmac),
 	DEF_CMD_ARG("mac:kick",		set80211kickmac),
 	DEF_CMD("pureg",	1,	set80211pureg),
 	DEF_CMD("-pureg",	0,	set80211pureg),
 	DEF_CMD("ff",		1,	set80211fastframes),
 	DEF_CMD("-ff",		0,	set80211fastframes),
 	DEF_CMD("dturbo",	1,	set80211dturbo),
 	DEF_CMD("-dturbo",	0,	set80211dturbo),
 	DEF_CMD("bgscan",	1,	set80211bgscan),
 	DEF_CMD("-bgscan",	0,	set80211bgscan),
 	DEF_CMD_ARG("bgscanidle",	set80211bgscanidle),
 	DEF_CMD_ARG("bgscanintvl",	set80211bgscanintvl),
 	DEF_CMD_ARG("scanvalid",	set80211scanvalid),
-	DEF_CMD("quiet",        1,      set80211quiet),
-	DEF_CMD("-quiet",       0,      set80211quiet),
-	DEF_CMD_ARG("quiet_count",      set80211quietcount),
-	DEF_CMD_ARG("quiet_period",     set80211quietperiod),
-	DEF_CMD_ARG("quiet_dur",        set80211quietduration),
-	DEF_CMD_ARG("quiet_offset",     set80211quietoffset),
+	DEF_CMD("quiet",	1,	set80211quiet),
+	DEF_CMD("-quiet",	0,	set80211quiet),
+	DEF_CMD_ARG("quiet_count",	set80211quietcount),
+	DEF_CMD_ARG("quiet_period",	set80211quietperiod),
+	DEF_CMD_ARG("quiet_duration",	set80211quietduration),
+	DEF_CMD_ARG("quiet_offset",	set80211quietoffset),
 	DEF_CMD_ARG("roam:rssi",	set80211roamrssi),
 	DEF_CMD_ARG("roam:rate",	set80211roamrate),
 	DEF_CMD_ARG("mcastrate",	set80211mcastrate),
 	DEF_CMD_ARG("ucastrate",	set80211ucastrate),
 	DEF_CMD_ARG("mgtrate",		set80211mgtrate),
 	DEF_CMD_ARG("mgmtrate",		set80211mgtrate),
 	DEF_CMD_ARG("maxretry",		set80211maxretry),
 	DEF_CMD_ARG("fragthreshold",	set80211fragthreshold),
 	DEF_CMD("burst",	1,	set80211burst),
 	DEF_CMD("-burst",	0,	set80211burst),
 	DEF_CMD_ARG("bmiss",		set80211bmissthreshold),
 	DEF_CMD_ARG("bmissthreshold",	set80211bmissthreshold),
 	DEF_CMD("shortgi",	1,	set80211shortgi),
 	DEF_CMD("-shortgi",	0,	set80211shortgi),
 	DEF_CMD("ampdurx",	2,	set80211ampdu),
 	DEF_CMD("-ampdurx",	-2,	set80211ampdu),
 	DEF_CMD("ampdutx",	1,	set80211ampdu),
 	DEF_CMD("-ampdutx",	-1,	set80211ampdu),
 	DEF_CMD("ampdu",	3,	set80211ampdu),		/* NB: tx+rx */
 	DEF_CMD("-ampdu",	-3,	set80211ampdu),
 	DEF_CMD_ARG("ampdulimit",	set80211ampdulimit),
 	DEF_CMD_ARG("ampdudensity",	set80211ampdudensity),
 	DEF_CMD("amsdurx",	2,	set80211amsdu),
 	DEF_CMD("-amsdurx",	-2,	set80211amsdu),
 	DEF_CMD("amsdutx",	1,	set80211amsdu),
 	DEF_CMD("-amsdutx",	-1,	set80211amsdu),
 	DEF_CMD("amsdu",	3,	set80211amsdu),		/* NB: tx+rx */
 	DEF_CMD("-amsdu",	-3,	set80211amsdu),
 	DEF_CMD_ARG("amsdulimit",	set80211amsdulimit),
 	DEF_CMD("stbcrx",	2,	set80211stbc),
 	DEF_CMD("-stbcrx",	-2,	set80211stbc),
 	DEF_CMD("stbctx",	1,	set80211stbc),
 	DEF_CMD("-stbctx",	-1,	set80211stbc),
 	DEF_CMD("stbc",		3,	set80211stbc),		/* NB: tx+rx */
 	DEF_CMD("-stbc",	-3,	set80211stbc),
 	DEF_CMD("ldpcrx",	2,	set80211ldpc),
 	DEF_CMD("-ldpcrx",	-2,	set80211ldpc),
 	DEF_CMD("ldpctx",	1,	set80211ldpc),
 	DEF_CMD("-ldpctx",	-1,	set80211ldpc),
 	DEF_CMD("ldpc",		3,	set80211ldpc),		/* NB: tx+rx */
 	DEF_CMD("-ldpc",	-3,	set80211ldpc),
 	DEF_CMD("puren",	1,	set80211puren),
 	DEF_CMD("-puren",	0,	set80211puren),
 	DEF_CMD("doth",		1,	set80211doth),
 	DEF_CMD("-doth",	0,	set80211doth),
 	DEF_CMD("dfs",		1,	set80211dfs),
 	DEF_CMD("-dfs",		0,	set80211dfs),
 	DEF_CMD("htcompat",	1,	set80211htcompat),
 	DEF_CMD("-htcompat",	0,	set80211htcompat),
 	DEF_CMD("dwds",		1,	set80211dwds),
 	DEF_CMD("-dwds",	0,	set80211dwds),
 	DEF_CMD("inact",	1,	set80211inact),
 	DEF_CMD("-inact",	0,	set80211inact),
 	DEF_CMD("tsn",		1,	set80211tsn),
 	DEF_CMD("-tsn",		0,	set80211tsn),
 	DEF_CMD_ARG("regdomain",	set80211regdomain),
 	DEF_CMD_ARG("country",		set80211country),
 	DEF_CMD("indoor",	'I',	set80211location),
 	DEF_CMD("-indoor",	'O',	set80211location),
 	DEF_CMD("outdoor",	'O',	set80211location),
 	DEF_CMD("-outdoor",	'I',	set80211location),
 	DEF_CMD("anywhere",	' ',	set80211location),
 	DEF_CMD("ecm",		1,	set80211ecm),
 	DEF_CMD("-ecm",		0,	set80211ecm),
 	DEF_CMD("dotd",		1,	set80211dotd),
 	DEF_CMD("-dotd",	0,	set80211dotd),
 	DEF_CMD_ARG("htprotmode",	set80211htprotmode),
 	DEF_CMD("ht20",		1,	set80211htconf),
 	DEF_CMD("-ht20",	0,	set80211htconf),
 	DEF_CMD("ht40",		3,	set80211htconf),	/* NB: 20+40 */
 	DEF_CMD("-ht40",	0,	set80211htconf),
 	DEF_CMD("ht",		3,	set80211htconf),	/* NB: 20+40 */
 	DEF_CMD("-ht",		0,	set80211htconf),
 	DEF_CMD("vht",		1,	set80211vhtconf),
 	DEF_CMD("-vht",		0,	set80211vhtconf),
 	DEF_CMD("vht40",		2,	set80211vhtconf),
 	DEF_CMD("-vht40",		-2,	set80211vhtconf),
 	DEF_CMD("vht80",		4,	set80211vhtconf),
 	DEF_CMD("-vht80",		-4,	set80211vhtconf),
 	DEF_CMD("vht80p80",		8,	set80211vhtconf),
 	DEF_CMD("-vht80p80",		-8,	set80211vhtconf),
 	DEF_CMD("vht160",		16,	set80211vhtconf),
 	DEF_CMD("-vht160",		-16,	set80211vhtconf),
 	DEF_CMD("rifs",		1,	set80211rifs),
 	DEF_CMD("-rifs",	0,	set80211rifs),
 	DEF_CMD("smps",		IEEE80211_HTCAP_SMPS_ENA,	set80211smps),
 	DEF_CMD("smpsdyn",	IEEE80211_HTCAP_SMPS_DYNAMIC,	set80211smps),
 	DEF_CMD("-smps",	IEEE80211_HTCAP_SMPS_OFF,	set80211smps),
 	/* XXX for testing */
 	DEF_CMD_ARG("chanswitch",	set80211chanswitch),
 
 	DEF_CMD_ARG("tdmaslot",		set80211tdmaslot),
 	DEF_CMD_ARG("tdmaslotcnt",	set80211tdmaslotcnt),
 	DEF_CMD_ARG("tdmaslotlen",	set80211tdmaslotlen),
 	DEF_CMD_ARG("tdmabintval",	set80211tdmabintval),
 
 	DEF_CMD_ARG("meshttl",		set80211meshttl),
 	DEF_CMD("meshforward",	1,	set80211meshforward),
 	DEF_CMD("-meshforward",	0,	set80211meshforward),
 	DEF_CMD("meshgate",	1,	set80211meshgate),
 	DEF_CMD("-meshgate",	0,	set80211meshgate),
 	DEF_CMD("meshpeering",	1,	set80211meshpeering),
 	DEF_CMD("-meshpeering",	0,	set80211meshpeering),
 	DEF_CMD_ARG("meshmetric",	set80211meshmetric),
 	DEF_CMD_ARG("meshpath",		set80211meshpath),
 	DEF_CMD("meshrt:flush",	IEEE80211_MESH_RTCMD_FLUSH,	set80211meshrtcmd),
 	DEF_CMD_ARG("meshrt:add",	set80211addmeshrt),
 	DEF_CMD_ARG("meshrt:del",	set80211delmeshrt),
 	DEF_CMD_ARG("hwmprootmode",	set80211hwmprootmode),
 	DEF_CMD_ARG("hwmpmaxhops",	set80211hwmpmaxhops),
 
 	/* vap cloning support */
 	DEF_CLONE_CMD_ARG("wlanaddr",	set80211clone_wlanaddr),
 	DEF_CLONE_CMD_ARG("wlanbssid",	set80211clone_wlanbssid),
 	DEF_CLONE_CMD_ARG("wlandev",	set80211clone_wlandev),
 	DEF_CLONE_CMD_ARG("wlanmode",	set80211clone_wlanmode),
 	DEF_CLONE_CMD("beacons", 1,	set80211clone_beacons),
 	DEF_CLONE_CMD("-beacons", 0,	set80211clone_beacons),
 	DEF_CLONE_CMD("bssid",	1,	set80211clone_bssid),
 	DEF_CLONE_CMD("-bssid",	0,	set80211clone_bssid),
 	DEF_CLONE_CMD("wdslegacy", 1,	set80211clone_wdslegacy),
 	DEF_CLONE_CMD("-wdslegacy", 0,	set80211clone_wdslegacy),
 };
 static struct afswtch af_ieee80211 = {
 	.af_name	= "af_ieee80211",
 	.af_af		= AF_UNSPEC,
 	.af_other_status = ieee80211_status,
 };
 
 static __constructor void
 ieee80211_ctor(void)
 {
 	int i;
 
 	for (i = 0; i < nitems(ieee80211_cmds);  i++)
 		cmd_register(&ieee80211_cmds[i]);
 	af_register(&af_ieee80211);
 	clone_setdefcallback("wlan", wlan_create);
 }
Index: projects/netbsd-tests-upstream-01-2017/sbin/nvmecontrol/logpage.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sbin/nvmecontrol/logpage.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sbin/nvmecontrol/logpage.c	(revision 313267)
@@ -1,975 +1,988 @@
 /*-
  * Copyright (c) 2013 EMC Corp.
  * All rights reserved.
  *
  * Copyright (C) 2012-2013 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/ioccom.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <fcntl.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <sys/endian.h>
 
 #if _BYTE_ORDER != _LITTLE_ENDIAN
 #error "Code only works on little endian machines"
 #endif
 
 #include "nvmecontrol.h"
 
 #define DEFAULT_SIZE	(4096)
 #define MAX_FW_SLOTS	(7)
 
 typedef void (*print_fn_t)(void *buf, uint32_t size);
 
 struct kv_name
 {
 	uint32_t key;
 	const char *name;
 };
 
 static const char *
 kv_lookup(const struct kv_name *kv, size_t kv_count, uint32_t key)
 {
 	static char bad[32];
 	size_t i;
 
 	for (i = 0; i < kv_count; i++, kv++)
 		if (kv->key == key)
 			return kv->name;
 	snprintf(bad, sizeof(bad), "Attribute %#x", key);
 	return bad;
 }
 
+static void
+print_bin(void *data, uint32_t length)
+{
+	write(STDOUT_FILENO, data, length);
+}
+
 /*
  * 128-bit integer augments to standard values. On i386 this
  * doesn't exist, so we use 64-bit values. The 128-bit counters
  * are crazy anyway, since for this purpose, you'd need a
  * billion IOPs for billions of seconds to overflow them.
  * So, on 32-bit i386, you'll get truncated values.
  */
 #define UINT128_DIG	39
 #ifdef __i386__
 typedef uint64_t uint128_t;
 #else
 typedef __uint128_t uint128_t;
 #endif
 
 static inline uint128_t
 to128(void *p)
 {
 	return *(uint128_t *)p;
 }
 
 static char *
 uint128_to_str(uint128_t u, char *buf, size_t buflen)
 {
 	char *end = buf + buflen - 1;
 
 	*end-- = '\0';
 	if (u == 0)
 		*end-- = '0';
 	while (u && end >= buf) {
 		*end-- = u % 10 + '0';
 		u /= 10;
 	}
 	end++;
 	if (u != 0)
 		return NULL;
 
 	return end;
 }
 
 /* "Missing" from endian.h */
 static __inline uint64_t
 le48dec(const void *pp)
 {
 	uint8_t const *p = (uint8_t const *)pp;
 
 	return (((uint64_t)le16dec(p + 4) << 32) | le32dec(p));
 }
 
 static void *
 get_log_buffer(uint32_t size)
 {
 	void	*buf;
 
 	if ((buf = malloc(size)) == NULL)
 		errx(1, "unable to malloc %u bytes", size);
 
 	memset(buf, 0, size);
 	return (buf);
 }
 
 void
 read_logpage(int fd, uint8_t log_page, int nsid, void *payload,
     uint32_t payload_size)
 {
 	struct nvme_pt_command	pt;
 
 	memset(&pt, 0, sizeof(pt));
 	pt.cmd.opc = NVME_OPC_GET_LOG_PAGE;
 	pt.cmd.nsid = nsid;
 	pt.cmd.cdw10 = ((payload_size/sizeof(uint32_t)) - 1) << 16;
 	pt.cmd.cdw10 |= log_page;
 	pt.buf = payload;
 	pt.len = payload_size;
 	pt.is_read = 1;
 
 	if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0)
 		err(1, "get log page request failed");
 
 	if (nvme_completion_is_error(&pt.cpl))
 		errx(1, "get log page request returned error");
 }
 
 static void
 print_log_error(void *buf, uint32_t size)
 {
 	int					i, nentries;
 	struct nvme_error_information_entry	*entry = buf;
 	struct nvme_status			*status;
 
 	printf("Error Information Log\n");
 	printf("=====================\n");
 
 	if (entry->error_count == 0) {
 		printf("No error entries found\n");
 		return;
 	}
 
 	nentries = size/sizeof(struct nvme_error_information_entry);
 	for (i = 0; i < nentries; i++, entry++) {
 		if (entry->error_count == 0)
 			break;
 
 		status = &entry->status;
 		printf("Entry %02d\n", i + 1);
 		printf("=========\n");
 		printf(" Error count:          %ju\n", entry->error_count);
 		printf(" Submission queue ID:  %u\n", entry->sqid);
 		printf(" Command ID:           %u\n", entry->cid);
 		/* TODO: Export nvme_status_string structures from kernel? */
 		printf(" Status:\n");
 		printf("  Phase tag:           %d\n", status->p);
 		printf("  Status code:         %d\n", status->sc);
 		printf("  Status code type:    %d\n", status->sct);
 		printf("  More:                %d\n", status->m);
 		printf("  DNR:                 %d\n", status->dnr);
 		printf(" Error location:       %u\n", entry->error_location);
 		printf(" LBA:                  %ju\n", entry->lba);
 		printf(" Namespace ID:         %u\n", entry->nsid);
 		printf(" Vendor specific info: %u\n", entry->vendor_specific);
 	}
 }
 
 static void
 print_temp(uint16_t t)
 {
 	printf("%u K, %2.2f C, %3.2f F\n", t, (float)t - 273.15, (float)t * 9 / 5 - 459.67);
 }
 
 
 static void
 print_log_health(void *buf, uint32_t size __unused)
 {
 	struct nvme_health_information_page *health = buf;
 	char cbuf[UINT128_DIG + 1];
 	int i;
 
 	printf("SMART/Health Information Log\n");
 	printf("============================\n");
 
 	printf("Critical Warning State:         0x%02x\n",
 	    health->critical_warning.raw);
 	printf(" Available spare:               %d\n",
 	    health->critical_warning.bits.available_spare);
 	printf(" Temperature:                   %d\n",
 	    health->critical_warning.bits.temperature);
 	printf(" Device reliability:            %d\n",
 	    health->critical_warning.bits.device_reliability);
 	printf(" Read only:                     %d\n",
 	    health->critical_warning.bits.read_only);
 	printf(" Volatile memory backup:        %d\n",
 	    health->critical_warning.bits.volatile_memory_backup);
 	printf("Temperature:                    ");
 	print_temp(health->temperature);
 	printf("Available spare:                %u\n",
 	    health->available_spare);
 	printf("Available spare threshold:      %u\n",
 	    health->available_spare_threshold);
 	printf("Percentage used:                %u\n",
 	    health->percentage_used);
 
 	printf("Data units (512,000 byte) read: %s\n",
 	    uint128_to_str(to128(health->data_units_read), cbuf, sizeof(cbuf)));
 	printf("Data units written:             %s\n",
 	    uint128_to_str(to128(health->data_units_written), cbuf, sizeof(cbuf)));
 	printf("Host read commands:             %s\n",
 	    uint128_to_str(to128(health->host_read_commands), cbuf, sizeof(cbuf)));
 	printf("Host write commands:            %s\n",
 	    uint128_to_str(to128(health->host_write_commands), cbuf, sizeof(cbuf)));
 	printf("Controller busy time (minutes): %s\n",
 	    uint128_to_str(to128(health->controller_busy_time), cbuf, sizeof(cbuf)));
 	printf("Power cycles:                   %s\n",
 	    uint128_to_str(to128(health->power_cycles), cbuf, sizeof(cbuf)));
 	printf("Power on hours:                 %s\n",
 	    uint128_to_str(to128(health->power_on_hours), cbuf, sizeof(cbuf)));
 	printf("Unsafe shutdowns:               %s\n",
 	    uint128_to_str(to128(health->unsafe_shutdowns), cbuf, sizeof(cbuf)));
 	printf("Media errors:                   %s\n",
 	    uint128_to_str(to128(health->media_errors), cbuf, sizeof(cbuf)));
 	printf("No. error info log entries:     %s\n",
 	    uint128_to_str(to128(health->num_error_info_log_entries), cbuf, sizeof(cbuf)));
 
 	printf("Warning Temp Composite Time:    %d\n", health->warning_temp_time);
 	printf("Error Temp Composite Time:      %d\n", health->error_temp_time);
 	for (i = 0; i < 7; i++) {
 		if (health->temp_sensor[i] == 0)
 			continue;
 		printf("Temperature Sensor %d:           ", i + 1);
 		print_temp(health->temp_sensor[i]);
 	}
 }
 
 static void
 print_log_firmware(void *buf, uint32_t size __unused)
 {
 	int				i;
 	const char			*status;
 	struct nvme_firmware_page	*fw = buf;
 
 	printf("Firmware Slot Log\n");
 	printf("=================\n");
 
 	for (i = 0; i < MAX_FW_SLOTS; i++) {
 		printf("Slot %d: ", i + 1);
 		if (fw->afi.slot == i + 1)
 			status = "  Active";
 		else
 			status = "Inactive";
 
 		if (fw->revision[i] == 0LLU)
 			printf("Empty\n");
 		else
 			if (isprint(*(char *)&fw->revision[i]))
 				printf("[%s] %.8s\n", status,
 				    (char *)&fw->revision[i]);
 			else
 				printf("[%s] %016jx\n", status,
 				    fw->revision[i]);
 	}
 }
 
 /*
  * Intel specific log pages from
  * http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/ssd-dc-p3700-spec.pdf
  *
  * Though the version as of this date has a typo for the size of log page 0xca,
  * offset 147: it is only 1 byte, not 6.
  */
 static void
 print_intel_temp_stats(void *buf, uint32_t size __unused)
 {
 	struct intel_log_temp_stats	*temp = buf;
 
 	printf("Intel Temperature Log\n");
 	printf("=====================\n");
 
 	printf("Current:                        ");
 	print_temp(temp->current);
 	printf("Overtemp Last Flags             %#jx\n", (uintmax_t)temp->overtemp_flag_last);
 	printf("Overtemp Lifetime Flags         %#jx\n", (uintmax_t)temp->overtemp_flag_life);
 	printf("Max Temperature                 ");
 	print_temp(temp->max_temp);
 	printf("Min Temperature                 ");
 	print_temp(temp->min_temp);
 	printf("Max Operating Temperature       ");
 	print_temp(temp->max_oper_temp);
 	printf("Min Operating Temperature       ");
 	print_temp(temp->min_oper_temp);
 	printf("Estimated Temperature Offset:   %ju C/K\n", (uintmax_t)temp->est_offset);
 }
 
 /*
  * Format from Table 22, section 5.7 IO Command Latency Statistics.
  * Read and write stats pages have identical encoding.
  */
 static void
 print_intel_read_write_lat_log(void *buf, uint32_t size __unused)
 {
 	const char *walker = buf;
 	int i;
 
 	printf("Major:                         %d\n", le16dec(walker + 0));
 	printf("Minor:                         %d\n", le16dec(walker + 2));
 	for (i = 0; i < 32; i++)
 		printf("%4dus-%4dus:                 %ju\n", i * 32, (i + 1) * 32, (uintmax_t)le32dec(walker + 4 + i * 4));
 	for (i = 1; i < 32; i++)
 		printf("%4dms-%4dms:                 %ju\n", i, i + 1, (uintmax_t)le32dec(walker + 132 + i * 4));
 	for (i = 1; i < 32; i++)
 		printf("%4dms-%4dms:                 %ju\n", i * 32, (i + 1) * 32, (uintmax_t)le32dec(walker + 256 + i * 4));
 }
 
 static void
 print_intel_read_lat_log(void *buf, uint32_t size)
 {
 
 	printf("Intel Read Latency Log\n");
 	printf("======================\n");
 	print_intel_read_write_lat_log(buf, size);
 }
 
 static void
 print_intel_write_lat_log(void *buf, uint32_t size)
 {
 
 	printf("Intel Write Latency Log\n");
 	printf("=======================\n");
 	print_intel_read_write_lat_log(buf, size);
 }
 
 /*
  * Table 19. 5.4 SMART Attributes
  */
 static void
 print_intel_add_smart(void *buf, uint32_t size __unused)
 {
 	uint8_t *walker = buf;
 	uint8_t *end = walker + 150;
 	const char *name;
 	uint64_t raw;
 	uint8_t normalized;
 
 	static struct kv_name kv[] =
 	{
 		{ 0xab, "Program Fail Count" },
 		{ 0xac, "Erase Fail Count" },
 		{ 0xad, "Wear Leveling Count" },
 		{ 0xb8, "End to End Error Count" },
 		{ 0xc7, "CRC Error Count" },
 		{ 0xe2, "Timed: Media Wear" },
 		{ 0xe3, "Timed: Host Read %" },
 		{ 0xe4, "Timed: Elapsed Time" },
 		{ 0xea, "Thermal Throttle Status" },
 		{ 0xf0, "Retry Buffer Overflows" },
 		{ 0xf3, "PLL Lock Loss Count" },
 		{ 0xf4, "NAND Bytes Written" },
 		{ 0xf5, "Host Bytes Written" },
 	};
 
 	printf("Additional SMART Data Log\n");
 	printf("=========================\n");
 	/*
 	 * walker[0] = Key
 	 * walker[1,2] = reserved
 	 * walker[3] = Normalized Value
 	 * walker[4] = reserved
 	 * walker[5..10] = Little Endian Raw value
 	 *	(or other represenations)
 	 * walker[11] = reserved
 	 */
 	while (walker < end) {
 		name = kv_lookup(kv, nitems(kv), *walker);
 		normalized = walker[3];
 		raw = le48dec(walker + 5);
 		switch (*walker){
 		case 0:
 			break;
 		case 0xad:
 			printf("%-32s: %3d min: %u max: %u ave: %u\n", name, normalized,
 			    le16dec(walker + 5), le16dec(walker + 7), le16dec(walker + 9));
 			break;
 		case 0xe2:
 			printf("%-32s: %3d %.3f%%\n", name, normalized, raw / 1024.0);
 			break;
 		case 0xea:
 			printf("%-32s: %3d %d%% %d times\n", name, normalized, walker[5], le32dec(walker+6));
 			break;
 		default:
 			printf("%-32s: %3d %ju\n", name, normalized, (uintmax_t)raw);
 			break;
 		}
 		walker += 12;
 	}
 }
 
 /*
  * HGST's 0xc1 page. This is a grab bag of additional data. Please see
  * https://www.hgst.com/sites/default/files/resources/US_SN150_ProdManual.pdf
  * https://www.hgst.com/sites/default/files/resources/US_SN100_ProdManual.pdf
  * Appendix A for details
  */
 
 typedef void (*subprint_fn_t)(void *buf, uint16_t subtype, uint8_t res, uint32_t size);
 
 struct subpage_print
 {
 	uint16_t key;
 	subprint_fn_t fn;
 };
 
 static void print_hgst_info_write_errors(void *buf, uint16_t subtype, uint8_t res, uint32_t size);
 static void print_hgst_info_read_errors(void *buf, uint16_t subtype, uint8_t res, uint32_t size);
 static void print_hgst_info_verify_errors(void *buf, uint16_t subtype, uint8_t res, uint32_t size);
 static void print_hgst_info_self_test(void *buf, uint16_t subtype, uint8_t res, uint32_t size);
 static void print_hgst_info_background_scan(void *buf, uint16_t subtype, uint8_t res, uint32_t size);
 static void print_hgst_info_erase_errors(void *buf, uint16_t subtype, uint8_t res, uint32_t size);
 static void print_hgst_info_erase_counts(void *buf, uint16_t subtype, uint8_t res, uint32_t size);
 static void print_hgst_info_temp_history(void *buf, uint16_t subtype, uint8_t res, uint32_t size);
 static void print_hgst_info_ssd_perf(void *buf, uint16_t subtype, uint8_t res, uint32_t size);
 static void print_hgst_info_firmware_load(void *buf, uint16_t subtype, uint8_t res, uint32_t size);
 
 static struct subpage_print hgst_subpage[] = {
 	{ 0x02, print_hgst_info_write_errors },
 	{ 0x03, print_hgst_info_read_errors },
 	{ 0x05, print_hgst_info_verify_errors },
 	{ 0x10, print_hgst_info_self_test },
 	{ 0x15, print_hgst_info_background_scan },
 	{ 0x30, print_hgst_info_erase_errors },
 	{ 0x31, print_hgst_info_erase_counts },
 	{ 0x32, print_hgst_info_temp_history },
 	{ 0x37, print_hgst_info_ssd_perf },
 	{ 0x38, print_hgst_info_firmware_load },
 };
 
 /* Print a subpage that is basically just key value pairs */
 static void
 print_hgst_info_subpage_gen(void *buf, uint16_t subtype __unused, uint32_t size,
     const struct kv_name *kv, size_t kv_count)
 {
 	uint8_t *wsp, *esp;
 	uint16_t ptype;
 	uint8_t plen;
 	uint64_t param;
 	int i;
 
 	wsp = buf;
 	esp = wsp + size;
 	while (wsp < esp) {
 		ptype = le16dec(wsp);
 		wsp += 2;
 		wsp++;			/* Flags, just ignore */
 		plen = *wsp++;
 		param = 0;
 		for (i = 0; i < plen; i++)
 			param |= (uint64_t)*wsp++ << (i * 8);
 		printf("  %-30s: %jd\n", kv_lookup(kv, kv_count, ptype), (uintmax_t)param);
 	}
 }
 
 static void
 print_hgst_info_write_errors(void *buf, uint16_t subtype, uint8_t res __unused, uint32_t size)
 {
 	static struct kv_name kv[] =
 	{
 		{ 0x0000, "Corrected Without Delay" },
 		{ 0x0001, "Corrected Maybe Delayed" },
 		{ 0x0002, "Re-Writes" },
 		{ 0x0003, "Errors Corrected" },
 		{ 0x0004, "Correct Algorithm Used" },
 		{ 0x0005, "Bytes Processed" },
 		{ 0x0006, "Uncorrected Errors" },
 		{ 0x8000, "Flash Write Commands" },
 		{ 0x8001, "HGST Special" },
 	};
 
 	printf("Write Errors Subpage:\n");
 	print_hgst_info_subpage_gen(buf, subtype, size, kv, nitems(kv));
 }
 
 static void
 print_hgst_info_read_errors(void *buf, uint16_t subtype, uint8_t res __unused, uint32_t size)
 {
 	static struct kv_name kv[] =
 	{
 		{ 0x0000, "Corrected Without Delay" },
 		{ 0x0001, "Corrected Maybe Delayed" },
 		{ 0x0002, "Re-Reads" },
 		{ 0x0003, "Errors Corrected" },
 		{ 0x0004, "Correct Algorithm Used" },
 		{ 0x0005, "Bytes Processed" },
 		{ 0x0006, "Uncorrected Errors" },
 		{ 0x8000, "Flash Read Commands" },
 		{ 0x8001, "XOR Recovered" },
 		{ 0x8002, "Total Corrected Bits" },
 	};
 
 	printf("Read Errors Subpage:\n");
 	print_hgst_info_subpage_gen(buf, subtype, size, kv, nitems(kv));
 }
 
 static void
 print_hgst_info_verify_errors(void *buf, uint16_t subtype, uint8_t res __unused, uint32_t size)
 {
 	static struct kv_name kv[] =
 	{
 		{ 0x0000, "Corrected Without Delay" },
 		{ 0x0001, "Corrected Maybe Delayed" },
 		{ 0x0002, "Re-Reads" },
 		{ 0x0003, "Errors Corrected" },
 		{ 0x0004, "Correct Algorithm Used" },
 		{ 0x0005, "Bytes Processed" },
 		{ 0x0006, "Uncorrected Errors" },
 		{ 0x8000, "Commands Processed" },
 	};
 
 	printf("Verify Errors Subpage:\n");
 	print_hgst_info_subpage_gen(buf, subtype, size, kv, nitems(kv));
 }
 
 static void
 print_hgst_info_self_test(void *buf, uint16_t subtype __unused, uint8_t res __unused, uint32_t size)
 {
 	size_t i;
 	uint8_t *walker = buf;
 	uint16_t code, hrs;
 	uint32_t lba;
 
 	printf("Self Test Subpage:\n");
 	for (i = 0; i < size / 20; i++) {	/* Each entry is 20 bytes */
 		code = le16dec(walker);
 		walker += 2;
 		walker++;			/* Ignore fixed flags */
 		if (*walker == 0)		/* Last entry is zero length */
 			break;
 		if (*walker++ != 0x10) {
 			printf("Bad length for self test report\n");
 			return;
 		}
 		printf("  %-30s: %d\n", "Recent Test", code);
 		printf("    %-28s: %#x\n", "Self-Test Results", *walker & 0xf);
 		printf("    %-28s: %#x\n", "Self-Test Code", (*walker >> 5) & 0x7);
 		walker++;
 		printf("    %-28s: %#x\n", "Self-Test Number", *walker++);
 		hrs = le16dec(walker);
 		walker += 2;
 		lba = le32dec(walker);
 		walker += 4;
 		printf("    %-28s: %u\n", "Total Power On Hrs", hrs);
 		printf("    %-28s: %#jx (%jd)\n", "LBA", (uintmax_t)lba, (uintmax_t)lba);
 		printf("    %-28s: %#x\n", "Sense Key", *walker++ & 0xf);
 		printf("    %-28s: %#x\n", "Additional Sense Code", *walker++);
 		printf("    %-28s: %#x\n", "Additional Sense Qualifier", *walker++);
 		printf("    %-28s: %#x\n", "Vendor Specific Detail", *walker++);
 	}
 }
 
 static void
 print_hgst_info_background_scan(void *buf, uint16_t subtype __unused, uint8_t res __unused, uint32_t size)
 {
 	uint8_t *walker = buf;
 	uint8_t status;
 	uint16_t code, nscan, progress;
 	uint32_t pom, nand;
 
 	printf("Background Media Scan Subpage:\n");
 	/* Decode the header */
 	code = le16dec(walker);
 	walker += 2;
 	walker++;			/* Ignore fixed flags */
 	if (*walker++ != 0x10) {
 		printf("Bad length for background scan header\n");
 		return;
 	}
 	if (code != 0) {
 		printf("Expceted code 0, found code %#x\n", code);
 		return;
 	}
 	pom = le32dec(walker);
 	walker += 4;
 	walker++;			/* Reserved */
 	status = *walker++;
 	nscan = le16dec(walker);
 	walker += 2;
 	progress = le16dec(walker);
 	walker += 2;
 	walker += 6;			/* Reserved */
 	printf("  %-30s: %d\n", "Power On Minutes", pom);
 	printf("  %-30s: %x (%s)\n", "BMS Status", status,
 	    status == 0 ? "idle" : (status == 1 ? "active" : (status == 8 ? "suspended" : "unknown")));
 	printf("  %-30s: %d\n", "Number of BMS", nscan);
 	printf("  %-30s: %d\n", "Progress Current BMS", progress);
 	/* Report retirements */
 	if (walker - (uint8_t *)buf != 20) {
 		printf("Coding error, offset not 20\n");
 		return;
 	}
 	size -= 20;
 	printf("  %-30s: %d\n", "BMS retirements", size / 0x18);
 	while (size > 0) {
 		code = le16dec(walker);
 		walker += 2;
 		walker++;
 		if (*walker++ != 0x14) {
 			printf("Bad length parameter\n");
 			return;
 		}
 		pom = le32dec(walker);
 		walker += 4;
 		/*
 		 * Spec sheet says the following are hard coded, if true, just
 		 * print the NAND retirement.
 		 */
 		if (walker[0] == 0x41 &&
 		    walker[1] == 0x0b &&
 		    walker[2] == 0x01 &&
 		    walker[3] == 0x00 &&
 		    walker[4] == 0x00 &&
 		    walker[5] == 0x00 &&
 		    walker[6] == 0x00 &&
 		    walker[7] == 0x00) {
 			walker += 8;
 			walker += 4;	/* Skip reserved */
 			nand = le32dec(walker);
 			walker += 4;
 			printf("  %-30s: %d\n", "Retirement number", code);
 			printf("    %-28s: %#x\n", "NAND (C/T)BBBPPP", nand);
 		} else {
 			printf("Parameter %#x entry corrupt\n", code);
 			walker += 16;
 		}
 	}
 }
 
 static void
 print_hgst_info_erase_errors(void *buf, uint16_t subtype __unused, uint8_t res __unused, uint32_t size)
 {
 	static struct kv_name kv[] =
 	{
 		{ 0x0000, "Corrected Without Delay" },
 		{ 0x0001, "Corrected Maybe Delayed" },
 		{ 0x0002, "Re-Erase" },
 		{ 0x0003, "Errors Corrected" },
 		{ 0x0004, "Correct Algorithm Used" },
 		{ 0x0005, "Bytes Processed" },
 		{ 0x0006, "Uncorrected Errors" },
 		{ 0x8000, "Flash Erase Commands" },
 		{ 0x8001, "Mfg Defect Count" },
 		{ 0x8002, "Grown Defect Count" },
 		{ 0x8003, "Erase Count -- User" },
 		{ 0x8004, "Erase Count -- System" },
 	};
 
 	printf("Erase Errors Subpage:\n");
 	print_hgst_info_subpage_gen(buf, subtype, size, kv, nitems(kv));
 }
 
 static void
 print_hgst_info_erase_counts(void *buf, uint16_t subtype, uint8_t res __unused, uint32_t size)
 {
 	/* My drive doesn't export this -- so not coding up */
 	printf("XXX: Erase counts subpage: %p, %#x %d\n", buf, subtype, size);
 }
 
 static void
 print_hgst_info_temp_history(void *buf, uint16_t subtype __unused, uint8_t res __unused, uint32_t size __unused)
 {
 	uint8_t *walker = buf;
 	uint32_t min;
 
 	printf("Temperature History:\n");
 	printf("  %-30s: %d C\n", "Current Temperature", *walker++);
 	printf("  %-30s: %d C\n", "Reference Temperature", *walker++);
 	printf("  %-30s: %d C\n", "Maximum Temperature", *walker++);
 	printf("  %-30s: %d C\n", "Minimum Temperature", *walker++);
 	min = le32dec(walker);
 	walker += 4;
 	printf("  %-30s: %d:%02d:00\n", "Max Temperture Time", min / 60, min % 60);
 	min = le32dec(walker);
 	walker += 4;
 	printf("  %-30s: %d:%02d:00\n", "Over Temperture Duration", min / 60, min % 60);
 	min = le32dec(walker);
 	walker += 4;
 	printf("  %-30s: %d:%02d:00\n", "Min Temperture Time", min / 60, min % 60);
 }
 
 static void
 print_hgst_info_ssd_perf(void *buf, uint16_t subtype __unused, uint8_t res, uint32_t size __unused)
 {
 	uint8_t *walker = buf;
 	uint64_t val;
 
 	printf("SSD Performance Subpage Type %d:\n", res);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "Host Read Commands", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "Host Read Blocks", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "Host Cache Read Hits Commands", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "Host Cache Read Hits Blocks", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "Host Read Commands Stalled", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "Host Write Commands", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "Host Write Blocks", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "Host Write Odd Start Commands", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "Host Write Odd End Commands", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "Host Write Commands Stalled", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "NAND Read Commands", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "NAND Read Blocks", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "NAND Write Commands", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "NAND Write Blocks", val);
 	val = le64dec(walker);
 	walker += 8;
 	printf("  %-30s: %ju\n", "NAND Read Before Writes", val);
 }
 
 static void
 print_hgst_info_firmware_load(void *buf, uint16_t subtype __unused, uint8_t res __unused, uint32_t size __unused)
 {
 	uint8_t *walker = buf;
 
 	printf("Firmware Load Subpage:\n");
 	printf("  %-30s: %d\n", "Firmware Downloads", le32dec(walker));
 }
 
 static void
 kv_indirect(void *buf, uint32_t subtype, uint8_t res, uint32_t size, struct subpage_print *sp, size_t nsp)
 {
 	size_t i;
 
 	for (i = 0; i < nsp; i++, sp++) {
 		if (sp->key == subtype) {
 			sp->fn(buf, subtype, res, size);
 			return;
 		}
 	}
 	printf("No handler for page type %x\n", subtype);
 }
 
 static void
 print_hgst_info_log(void *buf, uint32_t size __unused)
 {
 	uint8_t	*walker, *end, *subpage;
 	int pages;
 	uint16_t len;
 	uint8_t subtype, res;
 
 	printf("HGST Extra Info Log\n");
 	printf("===================\n");
 
 	walker = buf;
 	pages = *walker++;
 	walker++;
 	len = le16dec(walker);
 	walker += 2;
 	end = walker + len;		/* Length is exclusive of this header */
 	
 	while (walker < end) {
 		subpage = walker + 4;
 		subtype = *walker++ & 0x3f;	/* subtype */
 		res = *walker++;		/* Reserved */
 		len = le16dec(walker);
 		walker += len + 2;		/* Length, not incl header */
 		if (walker > end) {
 			printf("Ooops! Off the end of the list\n");
 			break;
 		}
 		kv_indirect(subpage, subtype, res, len, hgst_subpage, nitems(hgst_subpage));
 	}
 }
 
 /*
  * Table of log page printer / sizing.
  *
  * This includes Intel specific pages that are widely implemented. Not
  * sure how best to switch between different vendors.
  */
 static struct logpage_function {
 	uint8_t		log_page;
 	const char     *vendor;
 	print_fn_t	print_fn;
 	size_t		size;
 } logfuncs[] = {
 	{NVME_LOG_ERROR,		NULL,	print_log_error,
 	 0},
 	{NVME_LOG_HEALTH_INFORMATION,	NULL,	print_log_health,
 	 sizeof(struct nvme_health_information_page)},
 	{NVME_LOG_FIRMWARE_SLOT,	NULL,	print_log_firmware,
 	 sizeof(struct nvme_firmware_page)},
 	{HGST_INFO_LOG,			"hgst",	print_hgst_info_log,
 	 DEFAULT_SIZE},
+	{HGST_INFO_LOG,			"wdc",	print_hgst_info_log,
+	 DEFAULT_SIZE},
 	{INTEL_LOG_TEMP_STATS,		"intel", print_intel_temp_stats,
 	 sizeof(struct intel_log_temp_stats)},
 	{INTEL_LOG_READ_LAT_LOG,	"intel", print_intel_read_lat_log,
 	 DEFAULT_SIZE},
 	{INTEL_LOG_WRITE_LAT_LOG,	"intel", print_intel_write_lat_log,
 	 DEFAULT_SIZE},
 	{INTEL_LOG_ADD_SMART,		"intel", print_intel_add_smart,
 	 DEFAULT_SIZE},
 	{0,				NULL,	NULL,	 0},
 };
 
 static void
 logpage_usage(void)
 {
 	fprintf(stderr, "usage:\n");
 	fprintf(stderr, LOGPAGE_USAGE);
 	exit(1);
 }
 
 void
 logpage(int argc, char *argv[])
 {
 	int				fd, nsid;
 	int				log_page = 0, pageflag = false;
-	int				hexflag = false, ns_specified;
+	int				binflag = false, hexflag = false, ns_specified;
 	char				ch, *p;
 	char				cname[64];
 	uint32_t			size;
 	void				*buf;
 	const char			*vendor = NULL;
 	struct logpage_function		*f;
 	struct nvme_controller_data	cdata;
 	print_fn_t			print_fn;
 
-	while ((ch = getopt(argc, argv, "p:xv:")) != -1) {
+	while ((ch = getopt(argc, argv, "bp:xv:")) != -1) {
 		switch (ch) {
+		case 'b':
+			binflag = true;
+			break;
 		case 'p':
 			/* TODO: Add human-readable ASCII page IDs */
 			log_page = strtol(optarg, &p, 0);
 			if (p != NULL && *p != '\0') {
 				fprintf(stderr,
 				    "\"%s\" not valid log page id.\n",
 				    optarg);
 				logpage_usage();
 			}
 			pageflag = true;
 			break;
 		case 'x':
 			hexflag = true;
 			break;
 		case 'v':
 			vendor = optarg;
 			break;
 		}
 	}
 
 	if (!pageflag) {
 		printf("Missing page_id (-p).\n");
 		logpage_usage();
 	}
 
 	/* Check that a controller and/or namespace was specified. */
 	if (optind >= argc)
 		logpage_usage();
 
 	if (strstr(argv[optind], NVME_NS_PREFIX) != NULL) {
 		ns_specified = true;
 		parse_ns_str(argv[optind], cname, &nsid);
 		open_dev(cname, &fd, 1, 1);
 	} else {
 		ns_specified = false;
 		nsid = NVME_GLOBAL_NAMESPACE_TAG;
 		open_dev(argv[optind], &fd, 1, 1);
 	}
 
 	read_controller_data(fd, &cdata);
 
 	/*
 	 * The log page attribtues indicate whether or not the controller
 	 * supports the SMART/Health information log page on a per
 	 * namespace basis.
 	 */
 	if (ns_specified) {
 		if (log_page != NVME_LOG_HEALTH_INFORMATION)
 			errx(1, "log page %d valid only at controller level",
 			    log_page);
 		if (cdata.lpa.ns_smart == 0)
 			errx(1,
 			    "controller does not support per namespace "
 			    "smart/health information");
 	}
 
 	print_fn = print_hex;
 	size = DEFAULT_SIZE;
-	if (!hexflag) {
+	if (binflag)
+		print_fn = print_bin;
+	if (!binflag && !hexflag) {
 		/*
 		 * See if there is a pretty print function for the specified log
 		 * page.  If one isn't found, we just revert to the default
 		 * (print_hex). If there was a vendor specified bt the user, and
 		 * the page is vendor specific, don't match the print function
 		 * unless the vendors match.
 		 */
 		for (f = logfuncs; f->log_page > 0; f++) {
 			if (f->vendor != NULL && vendor != NULL &&
 			    strcmp(f->vendor, vendor) != 0)
 				continue;
 			if (log_page != f->log_page)
 				continue;
 			print_fn = f->print_fn;
 			size = f->size;
 			break;
 		}
 	}
 
 	if (log_page == NVME_LOG_ERROR) {
 		size = sizeof(struct nvme_error_information_entry);
 		size *= (cdata.elpe + 1);
 	}
 
 	/* Read the log page */
 	buf = get_log_buffer(size);
 	read_logpage(fd, log_page, nsid, buf, size);
 	print_fn(buf, size);
 
 	close(fd);
 	exit(0);
 }
Index: projects/netbsd-tests-upstream-01-2017/sbin/nvmecontrol/nvmecontrol.8
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sbin/nvmecontrol/nvmecontrol.8	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sbin/nvmecontrol/nvmecontrol.8	(revision 313267)
@@ -1,170 +1,203 @@
 .\"
 .\" Copyright (c) 2012 Intel Corporation
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions, and the following disclaimer,
 .\"    without modification.
 .\" 2. Redistributions in binary form must reproduce at minimum a disclaimer
 .\"    substantially similar to the "NO WARRANTY" disclaimer below
 .\"    ("Disclaimer") and any redistribution must be conditioned upon
 .\"    including a substantially similar Disclaimer requirement for further
 .\"    binary redistribution.
 .\"
 .\" NO WARRANTY
 .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 .\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 .\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
 .\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 .\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 .\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 .\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGES.
 .\"
 .\" nvmecontrol man page.
 .\"
 .\" Author: Jim Harris <jimharris@FreeBSD.org>
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 10, 2016
+.Dd February 4, 2017
 .Dt NVMECONTROL 8
 .Os
 .Sh NAME
 .Nm nvmecontrol
 .Nd NVM Express control utility
 .Sh SYNOPSIS
 .Nm
 .Ic devlist
 .Nm
 .Ic identify
 .Op Fl v
 .Op Fl x
 .Aq device id
 .Nm
 .Ic perftest
 .Aq Fl n Ar num_threads
 .Aq Fl o Ar read|write
 .Op Fl p
 .Aq Fl s Ar size_in_bytes
 .Aq Fl t Ar time_in_sec
 .Aq namespace id
 .Nm
 .Ic reset
 .Aq controller id
 .Nm
 .Ic logpage
 .Aq Fl p Ar page_id
 .Op Fl x
+.Op Fl v Ar vendor-string
+.Op Fl b
 .Aq device id
 .Aq namespace id
 .Nm
 .Ic firmware
 .Op Fl s Ar slot
 .Op Fl f Ar path_to_firmware
 .Op Fl a
 .Aq device id
 .Nm
 .Ic power
 .Op Fl l
 .Op Fl p power_state
-.Op fl w workload_hint
+.Op Fl w workload_hint
 .Nm
 .Ic wdc cap-diag
 .Op Fl o path_template
 .Aq device id
 .Nm
 .Ic wdc drive-log
 .Op Fl o path_template
 .Aq device id
 .Nm
 .Ic wdc get-crash-dump
 .Op Fl o path_template
 .Aq device id
 .\" .Nm
 .\" .Ic wdc purge
 .\" .Aq device id
 .\" .Nm
 .\" .Ic wdc purge-monitor
 .\" .Aq device id
 .Sh DESCRIPTION
 NVM Express (NVMe) is a storage protocol standard, for SSDs and other
 high-speed storage devices over PCI Express.
+.Pp
+.Ss logpage
+The logpage command knows how to print log pages of various types.
+It also knows about vendor specific log pages from hgst/wdc and intel.
+Page 0xc1 for hgst/wdc contains the advanced smart information about
+the drive.
+Page 0xc1 is read latency stats for intel.
+Page 0xc2 is write latency stats for intel.
+Page 0xc5 is temperature stats for intel.
+Page 0xca is advanced smart information for intel.
+.Ss wdc
+The various wdc command retrieve log data from the wdc/hgst drives.
+The
+.Fl o
+flag specifies a path template to use to output the files.
+Each file takes the path template (which defaults to nothing), appends
+the drive's serial number and the type of dump it is followed
+by .bin.
+These logs must be sent to the vendor for analysis.
+This tool only provides a way to extract them.
 .Sh EXAMPLES
 .Dl nvmecontrol devlist
 .Pp
 Display a list of NVMe controllers and namespaces along with their device nodes.
 .Pp
 .Dl nvmecontrol identify nvme0
 .Pp
 Display a human-readable summary of the nvme0 IDENTIFY_CONTROLLER data.
 .Pp
 .Dl nvmecontrol identify -x -v nvme0ns1
 .Pp
 Display an hexadecimal dump of the nvme0 IDENTIFY_NAMESPACE data for namespace
 1.
 .Pp
 .Dl nvmecontrol perftest -n 32 -o read -s 512 -t 30 nvme0ns1
 .Pp
 Run a performance test on nvme0ns1 using 32 kernel threads for 30 seconds.
 Each thread will issue a single 512 byte read command.
 Results are printed to stdout when 30 seconds expires.
 .Pp
 .Dl nvmecontrol reset nvme0
 .Pp
 Perform a controller-level reset of the nvme0 controller.
 .Pp
 .Dl nvmecontrol logpage -p 1 nvme0
 .Pp
 Display a human-readable summary of the nvme0 controller's Error Information Log.
 Log pages defined by the NVMe specification include Error Information Log (ID=1),
 SMART/Health Information Log (ID=2), and Firmware Slot Log (ID=3).
 .Pp
+.Dl nvmecontrol logpage -p 0xc1 -v wdc nvme0
+.Pp
+Display a human-readable summary of the nvme0's wdc-specific advanced
+SMART data.
+.Pp
 .Dl nvmecontrol logpage -p 1 -x nvme0
 .Pp
 Display a hexadecimal dump of the nvme0 controller's Error Information Log.
+.Pp
+.Dl nvmecontrol logpage -p 0xcb -b nvme0 > /tmp/page-cb.bin
+.Pp
+Print the contents of vendor specific page 0xcb as binary data on
+standard out.
+Redirect it to a temporary file.
 .Pp
 .Dl nvmecontrol firmware -s 2 -f /tmp/nvme_firmware nvme0
 .Pp
 Download the firmware image contained in "/tmp/nvme_firmware" to slot 2 of the
 nvme0 controller, but do not activate the image.
 .Pp
 .Dl nvmecontrol firmware -s 4 -a nvme0
 .Pp
 Activate the firmware in slot 4 of the nvme0 controller on the next reset.
 .Pp
 .Dl nvmecontrol firmware -s 7 -f /tmp/nvme_firmware -a nvme0
 .Pp
 Download the firmware image contained in "/tmp/nvme_firmware" to slot 7 of the
 nvme0 controller and activate it on the next reset.
 .Pp
 .Dl nvmecontrol power -l nvme0
 .Pp
 List all the current power modes.
 .Pp
 .Dl nvmecontrol power -p 3 nvme0
 .Pp
 Set the current power mode.
 .Pp
 .Dl nvmecontrol power nvme0
 .Pp
 Get the current power mode.
 .Sh HISTORY
 The
 .Nm
 utility appeared in
 .Fx 9.2 .
 .Sh AUTHORS
 .An -nosplit
 .Nm
 was developed by Intel and originally written by
 .An Jim Harris Aq Mt jimharris@FreeBSD.org .
 .Pp
 This man page was written by
 .An Jim Harris Aq Mt jimharris@FreeBSD.org .
Index: projects/netbsd-tests-upstream-01-2017/sbin/nvmecontrol/wdc.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sbin/nvmecontrol/wdc.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sbin/nvmecontrol/wdc.c	(revision 313267)
@@ -1,341 +1,342 @@
 /*-
  * Copyright (c) 2017 Netflix, Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/ioccom.h>
 #include <sys/endian.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <fcntl.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "nvmecontrol.h"
 
 #define WDC_NVME_TOC_SIZE	8
 
 #define WDC_NVME_CAP_DIAG_OPCODE	0xe6
 #define WDC_NVME_CAP_DIAG_CMD		0x0000
 
 #define WDC_NVME_DIAG_OPCODE		0xc6
 #define WDC_NVME_DRIVE_LOG_SIZE_CMD	0x0120
 #define WDC_NVME_DRIVE_LOG_CMD		0x0020
 #define WDC_NVME_CRASH_DUMP_SIZE_CMD	0x0320
 #define WDC_NVME_CRASH_DUMP_CMD		0x0420
 #define WDC_NVME_PFAIL_DUMP_SIZE_CMD	0x0520
 #define WDC_NVME_PFAIL_DUMP_CMD		0x0620
 
 #define WDC_NVME_CLEAR_DUMP_OPCODE	0xff
 #define WDC_NVME_CLEAR_CRASH_DUMP_CMD	0x0503
 #define WDC_NVME_CLEAR_PFAIL_DUMP_CMD	0x0603
 
 static void wdc_cap_diag(int argc, char *argv[]);
 static void wdc_drive_log(int argc, char *argv[]);
 static void wdc_get_crash_dump(int argc, char *argv[]);
 static void wdc_purge(int argc, char *argv[]);
 static void wdc_purge_monitor(int argc, char *argv[]);
 
 #define WDC_CAP_DIAG_USAGE	"\tnvmecontrol wdc cap-diag [-o path-template]\n"
 #define WDC_DRIVE_LOG_USAGE	"\tnvmecontrol wdc drive-log [-o path-template]\n"
 #define WDC_GET_CRASH_DUMP_USAGE "\tnvmecontrol wdc get-crash-dump [-o path-template]\n"
 #define WDC_PURGE_USAGE		"\tnvmecontrol wdc purge [-o path-template]\n"
-#define WDC_PURGE_MONITOR_USAGE	"\tnvmecontrol wdc purge-montor\n"
+#define WDC_PURGE_MONITOR_USAGE	"\tnvmecontrol wdc purge-monitor\n"
 
 static struct nvme_function wdc_funcs[] = {
 	{"cap-diag",		wdc_cap_diag,		WDC_CAP_DIAG_USAGE},
 	{"drive-log",		wdc_drive_log,		WDC_DRIVE_LOG_USAGE},
 	{"get-crash-dump",	wdc_get_crash_dump,	WDC_GET_CRASH_DUMP_USAGE},
 	{"purge",		wdc_purge,		WDC_PURGE_USAGE},
 	{"purge_monitor",	wdc_purge_monitor,	WDC_PURGE_MONITOR_USAGE},
 	{NULL,			NULL,			NULL},
 };
 
 static void
 wdc_append_serial_name(int fd, char *buf, size_t len, const char *suffix)
 {
 	struct nvme_controller_data	cdata;
 	char sn[NVME_SERIAL_NUMBER_LENGTH + 1];
 	char *walker;
 
 	len -= strlen(buf);
 	buf += strlen(buf);
 	read_controller_data(fd, &cdata);
 	memcpy(sn, cdata.sn, NVME_SERIAL_NUMBER_LENGTH);
 	walker = sn + NVME_SERIAL_NUMBER_LENGTH - 1;
 	while (walker > sn && *walker == ' ')
 		walker--;
-	*walker = '\0';
+	*++walker = '\0';
 	snprintf(buf, len, "%s%s.bin", sn, suffix);
 }
 
 static void
 wdc_get_data(int fd, uint32_t opcode, uint32_t len, uint32_t off, uint32_t cmd,
     uint8_t *buffer, size_t buflen)
 {
 	struct nvme_pt_command	pt;
 
 	memset(&pt, 0, sizeof(pt));
 	pt.cmd.opc = opcode;
 	pt.cmd.cdw10 = len / sizeof(uint32_t);	/* - 1 like all the others ??? */
 	pt.cmd.cdw11 = off / sizeof(uint32_t);
 	pt.cmd.cdw12 = cmd;
 	pt.buf = buffer;
 	pt.len = buflen;
 	pt.is_read = 1;
 //	printf("opcode %#x cdw10(len) %#x cdw11(offset?) %#x cdw12(cmd/sub) %#x buflen %zd\n",
 //	    (int)opcode, (int)cdw10, (int)cdw11, (int)cdw12, buflen);
 
 	if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0)
 		err(1, "wdc_get_data request failed");
 	if (nvme_completion_is_error(&pt.cpl))
 		errx(1, "wdc_get_data request returned error");
 }
 
 static void
 wdc_do_dump(int fd, char *tmpl, const char *suffix, uint32_t opcode,
     uint32_t size_cmd, uint32_t cmd, int len_off)
 {
 	int fd2;
 	uint8_t *buf;
-	uint32_t len, resid, offset;
+	uint32_t len, offset;
+	ssize_t resid;
 
 	wdc_append_serial_name(fd, tmpl, MAXPATHLEN, suffix);
 
 	buf = aligned_alloc(PAGE_SIZE, WDC_NVME_TOC_SIZE);
 	if (buf == NULL)
 		errx(1, "Can't get buffer to get size");
 	wdc_get_data(fd, opcode, WDC_NVME_TOC_SIZE,
 	    0, size_cmd, buf, WDC_NVME_TOC_SIZE);
 	len = be32dec(buf + len_off);
 
 	if (len == 0)
 		errx(1, "No data for %s", suffix);
 
 	printf("Dumping %d bytes to %s\n", len, tmpl);
 	/* XXX overwrite protection? */
 	fd2 = open(tmpl, O_WRONLY | O_CREAT | O_TRUNC);
 	if (fd2 < 0)
 		err(1, "open %s", tmpl);
 	offset = 0;
 	buf = aligned_alloc(PAGE_SIZE, NVME_MAX_XFER_SIZE);
 	if (buf == NULL)
 		errx(1, "Can't get buffer to read dump");
 	while (len > 0) {
 		resid = len > NVME_MAX_XFER_SIZE ? NVME_MAX_XFER_SIZE : len;
 		wdc_get_data(fd, opcode, resid, offset, cmd, buf, resid);
 		if (write(fd2, buf, resid) != resid)
 			err(1, "write");
 		offset += resid;
 		len -= resid;
 	}
 	free(buf);
 	close(fd2);
 }
 
 static void
 wdc_do_clear_dump(int fd, uint32_t opcode, uint32_t cmd)
 {
 	struct nvme_pt_command	pt;
 
 	memset(&pt, 0, sizeof(pt));
 	pt.cmd.opc = opcode;
 	pt.cmd.cdw12 = cmd;
 	if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0)
 		err(1, "wdc_do_clear_dump request failed");
 	if (nvme_completion_is_error(&pt.cpl))
 		errx(1, "wdc_do_clear_dump request returned error");
 }
 
 static void
 wdc_cap_diag_usage()
 {
 	fprintf(stderr, "usage:\n");
 	fprintf(stderr, WDC_CAP_DIAG_USAGE);
 	exit(1);
 }
 
 static void
 wdc_cap_diag(int argc, char *argv[])
 {
 	char path_tmpl[MAXPATHLEN];
 	int ch, fd;
 
 	path_tmpl[0] = '\0';
 	while ((ch = getopt(argc, argv, "o:")) != -1) {
 		switch ((char)ch) {
 		case 'o':
 			strlcpy(path_tmpl, optarg, MAXPATHLEN);
 			break;
 		default:
 			wdc_cap_diag_usage();
 		}
 	}
 	/* Check that a controller was specified. */
 	if (optind >= argc)
 		wdc_cap_diag_usage();
 	open_dev(argv[optind], &fd, 1, 1);
 
 	wdc_do_dump(fd, path_tmpl, "cap_diag", WDC_NVME_CAP_DIAG_OPCODE,
 	    WDC_NVME_CAP_DIAG_CMD, WDC_NVME_CAP_DIAG_CMD, 4);
 
 	close(fd);
 
 	exit(1);	
 }
 
 static void
 wdc_drive_log_usage()
 {
 	fprintf(stderr, "usage:\n");
 	fprintf(stderr, WDC_DRIVE_LOG_USAGE);
 	exit(1);
 }
 
 static void
 wdc_drive_log(int argc, char *argv[])
 {
 	char path_tmpl[MAXPATHLEN];
 	int ch, fd;
 
 	path_tmpl[0] = '\0';
 	while ((ch = getopt(argc, argv, "o:")) != -1) {
 		switch ((char)ch) {
 		case 'o':
 			strlcpy(path_tmpl, optarg, MAXPATHLEN);
 			break;
 		default:
 			wdc_drive_log_usage();
 		}
 	}
 	/* Check that a controller was specified. */
 	if (optind >= argc)
 		wdc_drive_log_usage();
 	open_dev(argv[optind], &fd, 1, 1);
 
 	wdc_do_dump(fd, path_tmpl, "drive_log", WDC_NVME_DIAG_OPCODE,
 	    WDC_NVME_DRIVE_LOG_SIZE_CMD, WDC_NVME_DRIVE_LOG_CMD, 0);
 
 	close(fd);
 
 	exit(1);
 }
 
 static void
 wdc_get_crash_dump_usage()
 {
 	fprintf(stderr, "usage:\n");
 	fprintf(stderr, WDC_CAP_DIAG_USAGE);
 	exit(1);
 }
 
 static void
 wdc_get_crash_dump(int argc, char *argv[])
 {
 	char path_tmpl[MAXPATHLEN];
 	int ch, fd;
 
 	while ((ch = getopt(argc, argv, "o:")) != -1) {
 		switch ((char)ch) {
 		case 'o':
 			strlcpy(path_tmpl, optarg, MAXPATHLEN);
 			break;
 		default:
 			wdc_get_crash_dump_usage();
 		}
 	}
 	/* Check that a controller was specified. */
 	if (optind >= argc)
 		wdc_get_crash_dump_usage();
 	open_dev(argv[optind], &fd, 1, 1);
 
 	wdc_do_dump(fd, path_tmpl, "crash_dump", WDC_NVME_DIAG_OPCODE,
 	    WDC_NVME_CRASH_DUMP_SIZE_CMD, WDC_NVME_CRASH_DUMP_CMD, 0);
 	wdc_do_clear_dump(fd, WDC_NVME_CLEAR_DUMP_OPCODE,
 	    WDC_NVME_CLEAR_CRASH_DUMP_CMD);
 //	wdc_led_beacon_disable(fd);
 	wdc_do_dump(fd, path_tmpl, "pfail_dump", WDC_NVME_DIAG_OPCODE,
 	    WDC_NVME_PFAIL_DUMP_SIZE_CMD, WDC_NVME_PFAIL_DUMP_CMD, 0);
 	wdc_do_clear_dump(fd, WDC_NVME_CLEAR_DUMP_OPCODE,
 		WDC_NVME_CLEAR_PFAIL_DUMP_CMD);
 
 	close(fd);
 
 	exit(1);
 }
 
 static void
 wdc_purge(int argc, char *argv[])
 {
 	char path_tmpl[MAXPATHLEN];
 	int ch;
 
 	while ((ch = getopt(argc, argv, "o:")) != -1) {
 		switch ((char)ch) {
 		case 'o':
 			strlcpy(path_tmpl, optarg, MAXPATHLEN);
 			break;
 		default:
 			wdc_cap_diag_usage();
 		}
 	}
 
 	printf("purge has not been implemented.\n");
 	exit(1);
 }
 
 static void
 wdc_purge_monitor(int argc, char *argv[])
 {
 	char path_tmpl[MAXPATHLEN];
 	int ch;
 
 	while ((ch = getopt(argc, argv, "o:")) != -1) {
 		switch ((char)ch) {
 		case 'o':
 			strlcpy(path_tmpl, optarg, MAXPATHLEN);
 			break;
 		default:
 			wdc_cap_diag_usage();
 		}
 	}
 
 	printf("purge has not been implemented.\n");
 	exit(1);
 }
 
 void
 wdc(int argc, char *argv[])
 {
 
 	dispatch(argc, argv, wdc_funcs);
 }
Index: projects/netbsd-tests-upstream-01-2017/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c	(revision 313267)
@@ -1,18352 +1,18361 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * $FreeBSD$
  */
 
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2016, Joyent, Inc. All rights reserved.
  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 /*
  * DTrace - Dynamic Tracing for Solaris
  *
  * This is the implementation of the Solaris Dynamic Tracing framework
  * (DTrace).  The user-visible interface to DTrace is described at length in
  * the "Solaris Dynamic Tracing Guide".  The interfaces between the libdtrace
  * library, the in-kernel DTrace framework, and the DTrace providers are
  * described in the block comments in the <sys/dtrace.h> header file.  The
  * internal architecture of DTrace is described in the block comments in the
  * <sys/dtrace_impl.h> header file.  The comments contained within the DTrace
  * implementation very much assume mastery of all of these sources; if one has
  * an unanswered question about the implementation, one should consult them
  * first.
  *
  * The functions here are ordered roughly as follows:
  *
  *   - Probe context functions
  *   - Probe hashing functions
  *   - Non-probe context utility functions
  *   - Matching functions
  *   - Provider-to-Framework API functions
  *   - Probe management functions
  *   - DIF object functions
  *   - Format functions
  *   - Predicate functions
  *   - ECB functions
  *   - Buffer functions
  *   - Enabling functions
  *   - DOF functions
  *   - Anonymous enabling functions
  *   - Consumer state functions
  *   - Helper functions
  *   - Hook functions
  *   - Driver cookbook functions
  *
  * Each group of functions begins with a block comment labelled the "DTrace
  * [Group] Functions", allowing one to find each block by searching forward
  * on capital-f functions.
  */
 #include <sys/errno.h>
 #ifndef illumos
 #include <sys/time.h>
 #endif
 #include <sys/stat.h>
 #include <sys/modctl.h>
 #include <sys/conf.h>
 #include <sys/systm.h>
 #ifdef illumos
 #include <sys/ddi.h>
 #include <sys/sunddi.h>
 #endif
 #include <sys/cpuvar.h>
 #include <sys/kmem.h>
 #ifdef illumos
 #include <sys/strsubr.h>
 #endif
 #include <sys/sysmacros.h>
 #include <sys/dtrace_impl.h>
 #include <sys/atomic.h>
 #include <sys/cmn_err.h>
 #ifdef illumos
 #include <sys/mutex_impl.h>
 #include <sys/rwlock_impl.h>
 #endif
 #include <sys/ctf_api.h>
 #ifdef illumos
 #include <sys/panic.h>
 #include <sys/priv_impl.h>
 #endif
 #include <sys/policy.h>
 #ifdef illumos
 #include <sys/cred_impl.h>
 #include <sys/procfs_isa.h>
 #endif
 #include <sys/taskq.h>
 #ifdef illumos
 #include <sys/mkdev.h>
 #include <sys/kdi.h>
 #endif
 #include <sys/zone.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include "strtolctype.h"
 
 /* FreeBSD includes: */
 #ifndef illumos
 #include <sys/callout.h>
 #include <sys/ctype.h>
 #include <sys/eventhandler.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/ptrace.h>
 #include <sys/random.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <sys/dtrace_bsd.h>
 
 #include <netinet/in.h>
 
 #include "dtrace_cddl.h"
 #include "dtrace_debug.c"
 #endif
 
 #include "dtrace_xoroshiro128_plus.h"
 
 /*
  * DTrace Tunable Variables
  *
  * The following variables may be tuned by adding a line to /etc/system that
  * includes both the name of the DTrace module ("dtrace") and the name of the
  * variable.  For example:
  *
  *   set dtrace:dtrace_destructive_disallow = 1
  *
  * In general, the only variables that one should be tuning this way are those
  * that affect system-wide DTrace behavior, and for which the default behavior
  * is undesirable.  Most of these variables are tunable on a per-consumer
  * basis using DTrace options, and need not be tuned on a system-wide basis.
  * When tuning these variables, avoid pathological values; while some attempt
  * is made to verify the integrity of these variables, they are not considered
  * part of the supported interface to DTrace, and they are therefore not
  * checked comprehensively.  Further, these variables should not be tuned
  * dynamically via "mdb -kw" or other means; they should only be tuned via
  * /etc/system.
  */
 int		dtrace_destructive_disallow = 0;
 #ifndef illumos
 /* Positive logic version of dtrace_destructive_disallow for loader tunable */
 int		dtrace_allow_destructive = 1;
 #endif
 dtrace_optval_t	dtrace_nonroot_maxsize = (16 * 1024 * 1024);
 size_t		dtrace_difo_maxsize = (256 * 1024);
 dtrace_optval_t	dtrace_dof_maxsize = (8 * 1024 * 1024);
 size_t		dtrace_statvar_maxsize = (16 * 1024);
 size_t		dtrace_actions_max = (16 * 1024);
 size_t		dtrace_retain_max = 1024;
 dtrace_optval_t	dtrace_helper_actions_max = 128;
 dtrace_optval_t	dtrace_helper_providers_max = 32;
 dtrace_optval_t	dtrace_dstate_defsize = (1 * 1024 * 1024);
 size_t		dtrace_strsize_default = 256;
 dtrace_optval_t	dtrace_cleanrate_default = 9900990;		/* 101 hz */
 dtrace_optval_t	dtrace_cleanrate_min = 200000;			/* 5000 hz */
 dtrace_optval_t	dtrace_cleanrate_max = (uint64_t)60 * NANOSEC;	/* 1/minute */
 dtrace_optval_t	dtrace_aggrate_default = NANOSEC;		/* 1 hz */
 dtrace_optval_t	dtrace_statusrate_default = NANOSEC;		/* 1 hz */
 dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC;	 /* 6/minute */
 dtrace_optval_t	dtrace_switchrate_default = NANOSEC;		/* 1 hz */
 dtrace_optval_t	dtrace_nspec_default = 1;
 dtrace_optval_t	dtrace_specsize_default = 32 * 1024;
 dtrace_optval_t dtrace_stackframes_default = 20;
 dtrace_optval_t dtrace_ustackframes_default = 20;
 dtrace_optval_t dtrace_jstackframes_default = 50;
 dtrace_optval_t dtrace_jstackstrsize_default = 512;
 int		dtrace_msgdsize_max = 128;
 hrtime_t	dtrace_chill_max = MSEC2NSEC(500);		/* 500 ms */
 hrtime_t	dtrace_chill_interval = NANOSEC;		/* 1000 ms */
 int		dtrace_devdepth_max = 32;
 int		dtrace_err_verbose;
 hrtime_t	dtrace_deadman_interval = NANOSEC;
 hrtime_t	dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
 hrtime_t	dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
 hrtime_t	dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;
 #ifndef illumos
 int		dtrace_memstr_max = 4096;
 #endif
 
 /*
  * DTrace External Variables
  *
  * As dtrace(7D) is a kernel module, any DTrace variables are obviously
  * available to DTrace consumers via the backtick (`) syntax.  One of these,
  * dtrace_zero, is made deliberately so:  it is provided as a source of
  * well-known, zero-filled memory.  While this variable is not documented,
  * it is used by some translators as an implementation detail.
  */
 const char	dtrace_zero[256] = { 0 };	/* zero-filled memory */
 
 /*
  * DTrace Internal Variables
  */
 #ifdef illumos
 static dev_info_t	*dtrace_devi;		/* device info */
 #endif
 #ifdef illumos
 static vmem_t		*dtrace_arena;		/* probe ID arena */
 static vmem_t		*dtrace_minor;		/* minor number arena */
 #else
 static taskq_t		*dtrace_taskq;		/* task queue */
 static struct unrhdr	*dtrace_arena;		/* Probe ID number.     */
 #endif
 static dtrace_probe_t	**dtrace_probes;	/* array of all probes */
 static int		dtrace_nprobes;		/* number of probes */
 static dtrace_provider_t *dtrace_provider;	/* provider list */
 static dtrace_meta_t	*dtrace_meta_pid;	/* user-land meta provider */
 static int		dtrace_opens;		/* number of opens */
 static int		dtrace_helpers;		/* number of helpers */
 static int		dtrace_getf;		/* number of unpriv getf()s */
 #ifdef illumos
 static void		*dtrace_softstate;	/* softstate pointer */
 #endif
 static dtrace_hash_t	*dtrace_bymod;		/* probes hashed by module */
 static dtrace_hash_t	*dtrace_byfunc;		/* probes hashed by function */
 static dtrace_hash_t	*dtrace_byname;		/* probes hashed by name */
 static dtrace_toxrange_t *dtrace_toxrange;	/* toxic range array */
 static int		dtrace_toxranges;	/* number of toxic ranges */
 static int		dtrace_toxranges_max;	/* size of toxic range array */
 static dtrace_anon_t	dtrace_anon;		/* anonymous enabling */
 static kmem_cache_t	*dtrace_state_cache;	/* cache for dynamic state */
 static uint64_t		dtrace_vtime_references; /* number of vtimestamp refs */
 static kthread_t	*dtrace_panicked;	/* panicking thread */
 static dtrace_ecb_t	*dtrace_ecb_create_cache; /* cached created ECB */
 static dtrace_genid_t	dtrace_probegen;	/* current probe generation */
 static dtrace_helpers_t *dtrace_deferred_pid;	/* deferred helper list */
 static dtrace_enabling_t *dtrace_retained;	/* list of retained enablings */
 static dtrace_genid_t	dtrace_retained_gen;	/* current retained enab gen */
 static dtrace_dynvar_t	dtrace_dynhash_sink;	/* end of dynamic hash chains */
 static int		dtrace_dynvar_failclean; /* dynvars failed to clean */
 #ifndef illumos
 static struct mtx	dtrace_unr_mtx;
 MTX_SYSINIT(dtrace_unr_mtx, &dtrace_unr_mtx, "Unique resource identifier", MTX_DEF);
 static eventhandler_tag	dtrace_kld_load_tag;
 static eventhandler_tag	dtrace_kld_unload_try_tag;
 #endif
 
 /*
  * DTrace Locking
  * DTrace is protected by three (relatively coarse-grained) locks:
  *
  * (1) dtrace_lock is required to manipulate essentially any DTrace state,
  *     including enabling state, probes, ECBs, consumer state, helper state,
  *     etc.  Importantly, dtrace_lock is _not_ required when in probe context;
  *     probe context is lock-free -- synchronization is handled via the
  *     dtrace_sync() cross call mechanism.
  *
  * (2) dtrace_provider_lock is required when manipulating provider state, or
  *     when provider state must be held constant.
  *
  * (3) dtrace_meta_lock is required when manipulating meta provider state, or
  *     when meta provider state must be held constant.
  *
  * The lock ordering between these three locks is dtrace_meta_lock before
  * dtrace_provider_lock before dtrace_lock.  (In particular, there are
  * several places where dtrace_provider_lock is held by the framework as it
  * calls into the providers -- which then call back into the framework,
  * grabbing dtrace_lock.)
  *
  * There are two other locks in the mix:  mod_lock and cpu_lock.  With respect
  * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
  * role as a coarse-grained lock; it is acquired before both of these locks.
  * With respect to dtrace_meta_lock, its behavior is stranger:  cpu_lock must
  * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
  * mod_lock is similar with respect to dtrace_provider_lock in that it must be
  * acquired _between_ dtrace_provider_lock and dtrace_lock.
  */
 static kmutex_t		dtrace_lock;		/* probe state lock */
 static kmutex_t		dtrace_provider_lock;	/* provider state lock */
 static kmutex_t		dtrace_meta_lock;	/* meta-provider state lock */
 
 #ifndef illumos
 /* XXX FreeBSD hacks. */
 #define cr_suid		cr_svuid
 #define cr_sgid		cr_svgid
 #define	ipaddr_t	in_addr_t
 #define mod_modname	pathname
 #define vuprintf	vprintf
 #define ttoproc(_a)	((_a)->td_proc)
 #define crgetzoneid(_a)	0
 #define SNOCD		0
 #define CPU_ON_INTR(_a)	0
 
 #define PRIV_EFFECTIVE		(1 << 0)
 #define PRIV_DTRACE_KERNEL	(1 << 1)
 #define PRIV_DTRACE_PROC	(1 << 2)
 #define PRIV_DTRACE_USER	(1 << 3)
 #define PRIV_PROC_OWNER		(1 << 4)
 #define PRIV_PROC_ZONE		(1 << 5)
 #define PRIV_ALL		~0
 
 SYSCTL_DECL(_debug_dtrace);
 SYSCTL_DECL(_kern_dtrace);
 #endif
 
 #ifdef illumos
 #define curcpu	CPU->cpu_id
 #endif
 
 
 /*
  * DTrace Provider Variables
  *
  * These are the variables relating to DTrace as a provider (that is, the
  * provider of the BEGIN, END, and ERROR probes).
  */
 static dtrace_pattr_t	dtrace_provider_attr = {
 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
 };
 
 static void
 dtrace_nullop(void)
 {}
 
 static dtrace_pops_t	dtrace_provider_ops = {
 	(void (*)(void *, dtrace_probedesc_t *))dtrace_nullop,
 	(void (*)(void *, modctl_t *))dtrace_nullop,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
 	NULL,
 	NULL,
 	NULL,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop
 };
 
 static dtrace_id_t	dtrace_probeid_begin;	/* special BEGIN probe */
 static dtrace_id_t	dtrace_probeid_end;	/* special END probe */
 dtrace_id_t		dtrace_probeid_error;	/* special ERROR probe */
 
 /*
  * DTrace Helper Tracing Variables
  *
  * These variables should be set dynamically to enable helper tracing.  The
  * only variables that should be set are dtrace_helptrace_enable (which should
  * be set to a non-zero value to allocate helper tracing buffers on the next
  * open of /dev/dtrace) and dtrace_helptrace_disable (which should be set to a
  * non-zero value to deallocate helper tracing buffers on the next close of
  * /dev/dtrace).  When (and only when) helper tracing is disabled, the
  * buffer size may also be set via dtrace_helptrace_bufsize.
  */
 int			dtrace_helptrace_enable = 0;
 int			dtrace_helptrace_disable = 0;
 int			dtrace_helptrace_bufsize = 16 * 1024 * 1024;
 uint32_t		dtrace_helptrace_nlocals;
 static dtrace_helptrace_t *dtrace_helptrace_buffer;
 static uint32_t		dtrace_helptrace_next = 0;
 static int		dtrace_helptrace_wrapped = 0;
 
 /*
  * DTrace Error Hashing
  *
  * On DEBUG kernels, DTrace will track the errors that has seen in a hash
  * table.  This is very useful for checking coverage of tests that are
  * expected to induce DIF or DOF processing errors, and may be useful for
  * debugging problems in the DIF code generator or in DOF generation .  The
  * error hash may be examined with the ::dtrace_errhash MDB dcmd.
  */
 #ifdef DEBUG
 static dtrace_errhash_t	dtrace_errhash[DTRACE_ERRHASHSZ];
 static const char *dtrace_errlast;
 static kthread_t *dtrace_errthread;
 static kmutex_t dtrace_errlock;
 #endif
 
 /*
  * DTrace Macros and Constants
  *
  * These are various macros that are useful in various spots in the
  * implementation, along with a few random constants that have no meaning
  * outside of the implementation.  There is no real structure to this cpp
  * mishmash -- but is there ever?
  */
 #define	DTRACE_HASHSTR(hash, probe)	\
 	dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
 
 #define	DTRACE_HASHNEXT(hash, probe)	\
 	(dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
 
 #define	DTRACE_HASHPREV(hash, probe)	\
 	(dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
 
 #define	DTRACE_HASHEQ(hash, lhs, rhs)	\
 	(strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
 	    *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
 
 #define	DTRACE_AGGHASHSIZE_SLEW		17
 
 #define	DTRACE_V4MAPPED_OFFSET		(sizeof (uint32_t) * 3)
 
 /*
  * The key for a thread-local variable consists of the lower 61 bits of the
  * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
  * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
  * equal to a variable identifier.  This is necessary (but not sufficient) to
  * assure that global associative arrays never collide with thread-local
  * variables.  To guarantee that they cannot collide, we must also define the
  * order for keying dynamic variables.  That order is:
  *
  *   [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
  *
  * Because the variable-key and the tls-key are in orthogonal spaces, there is
  * no way for a global variable key signature to match a thread-local key
  * signature.
  */
 #ifdef illumos
 #define	DTRACE_TLS_THRKEY(where) { \
 	uint_t intr = 0; \
 	uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
 	for (; actv; actv >>= 1) \
 		intr++; \
 	ASSERT(intr < (1 << 3)); \
 	(where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
 	    (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
 }
 #else
 #define	DTRACE_TLS_THRKEY(where) { \
 	solaris_cpu_t *_c = &solaris_cpu[curcpu]; \
 	uint_t intr = 0; \
 	uint_t actv = _c->cpu_intr_actv; \
 	for (; actv; actv >>= 1) \
 		intr++; \
 	ASSERT(intr < (1 << 3)); \
 	(where) = ((curthread->td_tid + DIF_VARIABLE_MAX) & \
 	    (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
 }
 #endif
 
 #define	DT_BSWAP_8(x)	((x) & 0xff)
 #define	DT_BSWAP_16(x)	((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
 #define	DT_BSWAP_32(x)	((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
 #define	DT_BSWAP_64(x)	((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
 
 #define	DT_MASK_LO 0x00000000FFFFFFFFULL
 
 #define	DTRACE_STORE(type, tomax, offset, what) \
 	*((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
 
 #ifndef __x86
 #define	DTRACE_ALIGNCHECK(addr, size, flags)				\
 	if (addr & (size - 1)) {					\
 		*flags |= CPU_DTRACE_BADALIGN;				\
 		cpu_core[curcpu].cpuc_dtrace_illval = addr;	\
 		return (0);						\
 	}
 #else
 #define	DTRACE_ALIGNCHECK(addr, size, flags)
 #endif
 
 /*
  * Test whether a range of memory starting at testaddr of size testsz falls
  * within the range of memory described by addr, sz.  We take care to avoid
  * problems with overflow and underflow of the unsigned quantities, and
  * disallow all negative sizes.  Ranges of size 0 are allowed.
  */
 #define	DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
 	((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
 	(testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
 	(testaddr) + (testsz) >= (testaddr))
 
 #define	DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz)		\
 do {									\
 	if ((remp) != NULL) {						\
 		*(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr);	\
 	}								\
 _NOTE(CONSTCOND) } while (0)
 
 
 /*
  * Test whether alloc_sz bytes will fit in the scratch region.  We isolate
  * alloc_sz on the righthand side of the comparison in order to avoid overflow
  * or underflow in the comparison with it.  This is simpler than the INRANGE
  * check above, because we know that the dtms_scratch_ptr is valid in the
  * range.  Allocations of size zero are allowed.
  */
 #define	DTRACE_INSCRATCH(mstate, alloc_sz) \
 	((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
 	(mstate)->dtms_scratch_ptr >= (alloc_sz))
 
 #define	DTRACE_LOADFUNC(bits)						\
 /*CSTYLED*/								\
 uint##bits##_t								\
 dtrace_load##bits(uintptr_t addr)					\
 {									\
 	size_t size = bits / NBBY;					\
 	/*CSTYLED*/							\
 	uint##bits##_t rval;						\
 	int i;								\
 	volatile uint16_t *flags = (volatile uint16_t *)		\
 	    &cpu_core[curcpu].cpuc_dtrace_flags;			\
 									\
 	DTRACE_ALIGNCHECK(addr, size, flags);				\
 									\
 	for (i = 0; i < dtrace_toxranges; i++) {			\
 		if (addr >= dtrace_toxrange[i].dtt_limit)		\
 			continue;					\
 									\
 		if (addr + size <= dtrace_toxrange[i].dtt_base)		\
 			continue;					\
 									\
 		/*							\
 		 * This address falls within a toxic region; return 0.	\
 		 */							\
 		*flags |= CPU_DTRACE_BADADDR;				\
 		cpu_core[curcpu].cpuc_dtrace_illval = addr;		\
 		return (0);						\
 	}								\
 									\
 	*flags |= CPU_DTRACE_NOFAULT;					\
 	/*CSTYLED*/							\
 	rval = *((volatile uint##bits##_t *)addr);			\
 	*flags &= ~CPU_DTRACE_NOFAULT;					\
 									\
 	return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0);		\
 }
 
 #ifdef _LP64
 #define	dtrace_loadptr	dtrace_load64
 #else
 #define	dtrace_loadptr	dtrace_load32
 #endif
 
 #define	DTRACE_DYNHASH_FREE	0
 #define	DTRACE_DYNHASH_SINK	1
 #define	DTRACE_DYNHASH_VALID	2
 
 #define	DTRACE_MATCH_NEXT	0
 #define	DTRACE_MATCH_DONE	1
 #define	DTRACE_ANCHORED(probe)	((probe)->dtpr_func[0] != '\0')
 #define	DTRACE_STATE_ALIGN	64
 
 #define	DTRACE_FLAGS2FLT(flags)						\
 	(((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR :		\
 	((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP :		\
 	((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO :		\
 	((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV :		\
 	((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV :		\
 	((flags) & CPU_DTRACE_TUPOFLOW) ?  DTRACEFLT_TUPOFLOW :		\
 	((flags) & CPU_DTRACE_BADALIGN) ?  DTRACEFLT_BADALIGN :		\
 	((flags) & CPU_DTRACE_NOSCRATCH) ?  DTRACEFLT_NOSCRATCH :	\
 	((flags) & CPU_DTRACE_BADSTACK) ?  DTRACEFLT_BADSTACK :		\
 	DTRACEFLT_UNKNOWN)
 
 #define	DTRACEACT_ISSTRING(act)						\
 	((act)->dta_kind == DTRACEACT_DIFEXPR &&			\
 	(act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
 
 /* Function prototype definitions: */
 static size_t dtrace_strlen(const char *, size_t);
 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
 static void dtrace_enabling_provide(dtrace_provider_t *);
 static int dtrace_enabling_match(dtrace_enabling_t *, int *);
 static void dtrace_enabling_matchall(void);
 static void dtrace_enabling_reap(void);
 static dtrace_state_t *dtrace_anon_grab(void);
 static uint64_t dtrace_helper(int, dtrace_mstate_t *,
     dtrace_state_t *, uint64_t, uint64_t);
 static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
 static void dtrace_buffer_drop(dtrace_buffer_t *);
 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
     dtrace_state_t *, dtrace_mstate_t *);
 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
     dtrace_optval_t);
 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
 uint16_t dtrace_load16(uintptr_t);
 uint32_t dtrace_load32(uintptr_t);
 uint64_t dtrace_load64(uintptr_t);
 uint8_t dtrace_load8(uintptr_t);
 void dtrace_dynvar_clean(dtrace_dstate_t *);
 dtrace_dynvar_t *dtrace_dynvar(dtrace_dstate_t *, uint_t, dtrace_key_t *,
     size_t, dtrace_dynvar_op_t, dtrace_mstate_t *, dtrace_vstate_t *);
 uintptr_t dtrace_dif_varstr(uintptr_t, dtrace_state_t *, dtrace_mstate_t *);
 static int dtrace_priv_proc(dtrace_state_t *);
 static void dtrace_getf_barrier(void);
 static int dtrace_canload_remains(uint64_t, size_t, size_t *,
     dtrace_mstate_t *, dtrace_vstate_t *);
 static int dtrace_canstore_remains(uint64_t, size_t, size_t *,
     dtrace_mstate_t *, dtrace_vstate_t *);
 
 /*
  * DTrace Probe Context Functions
  *
  * These functions are called from probe context.  Because probe context is
  * any context in which C may be called, arbitrarily locks may be held,
  * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
  * As a result, functions called from probe context may only call other DTrace
  * support functions -- they may not interact at all with the system at large.
  * (Note that the ASSERT macro is made probe-context safe by redefining it in
  * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
  * loads are to be performed from probe context, they _must_ be in terms of
  * the safe dtrace_load*() variants.
  *
  * Some functions in this block are not actually called from probe context;
  * for these functions, there will be a comment above the function reading
  * "Note:  not called from probe context."
  */
 void
 dtrace_panic(const char *format, ...)
 {
 	va_list alist;
 
 	va_start(alist, format);
 #ifdef __FreeBSD__
 	vpanic(format, alist);
 #else
 	dtrace_vpanic(format, alist);
 #endif
 	va_end(alist);
 }
 
 int
 dtrace_assfail(const char *a, const char *f, int l)
 {
 	dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
 
 	/*
 	 * We just need something here that even the most clever compiler
 	 * cannot optimize away.
 	 */
 	return (a[(uintptr_t)f]);
 }
 
 /*
  * Atomically increment a specified error counter from probe context.
  */
 static void
 dtrace_error(uint32_t *counter)
 {
 	/*
 	 * Most counters stored to in probe context are per-CPU counters.
 	 * However, there are some error conditions that are sufficiently
 	 * arcane that they don't merit per-CPU storage.  If these counters
 	 * are incremented concurrently on different CPUs, scalability will be
 	 * adversely affected -- but we don't expect them to be white-hot in a
 	 * correctly constructed enabling...
 	 */
 	uint32_t oval, nval;
 
 	do {
 		oval = *counter;
 
 		if ((nval = oval + 1) == 0) {
 			/*
 			 * If the counter would wrap, set it to 1 -- assuring
 			 * that the counter is never zero when we have seen
 			 * errors.  (The counter must be 32-bits because we
 			 * aren't guaranteed a 64-bit compare&swap operation.)
 			 * To save this code both the infamy of being fingered
 			 * by a priggish news story and the indignity of being
 			 * the target of a neo-puritan witch trial, we're
 			 * carefully avoiding any colorful description of the
 			 * likelihood of this condition -- but suffice it to
 			 * say that it is only slightly more likely than the
 			 * overflow of predicate cache IDs, as discussed in
 			 * dtrace_predicate_create().
 			 */
 			nval = 1;
 		}
 	} while (dtrace_cas32(counter, oval, nval) != oval);
 }
 
 /*
  * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
  * uint8_t, a uint16_t, a uint32_t and a uint64_t.
  */
 /* BEGIN CSTYLED */
 DTRACE_LOADFUNC(8)
 DTRACE_LOADFUNC(16)
 DTRACE_LOADFUNC(32)
 DTRACE_LOADFUNC(64)
 /* END CSTYLED */
 
 static int
 dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
 {
 	if (dest < mstate->dtms_scratch_base)
 		return (0);
 
 	if (dest + size < dest)
 		return (0);
 
 	if (dest + size > mstate->dtms_scratch_ptr)
 		return (0);
 
 	return (1);
 }
 
 static int
 dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain,
     dtrace_statvar_t **svars, int nsvars)
 {
 	int i;
 	size_t maxglobalsize, maxlocalsize;
 
 	if (nsvars == 0)
 		return (0);
 
 	maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t);
 	maxlocalsize = maxglobalsize * NCPU;
 
 	for (i = 0; i < nsvars; i++) {
 		dtrace_statvar_t *svar = svars[i];
 		uint8_t scope;
 		size_t size;
 
 		if (svar == NULL || (size = svar->dtsv_size) == 0)
 			continue;
 
 		scope = svar->dtsv_var.dtdv_scope;
 
 		/*
 		 * We verify that our size is valid in the spirit of providing
 		 * defense in depth:  we want to prevent attackers from using
 		 * DTrace to escalate an orthogonal kernel heap corruption bug
 		 * into the ability to store to arbitrary locations in memory.
 		 */
 		VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) ||
 		    (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize));
 
 		if (DTRACE_INRANGE(addr, sz, svar->dtsv_data,
 		    svar->dtsv_size)) {
 			DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data,
 			    svar->dtsv_size);
 			return (1);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Check to see if the address is within a memory region to which a store may
  * be issued.  This includes the DTrace scratch areas, and any DTrace variable
  * region.  The caller of dtrace_canstore() is responsible for performing any
  * alignment checks that are needed before stores are actually executed.
  */
 static int
 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
     dtrace_vstate_t *vstate)
 {
 	return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate));
 }
 
 /*
  * Implementation of dtrace_canstore which communicates the upper bound of the
  * allowed memory region.
  */
 static int
 dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
 {
 	/*
 	 * First, check to see if the address is in scratch space...
 	 */
 	if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
 	    mstate->dtms_scratch_size)) {
 		DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base,
 		    mstate->dtms_scratch_size);
 		return (1);
 	}
 
 	/*
 	 * Now check to see if it's a dynamic variable.  This check will pick
 	 * up both thread-local variables and any global dynamically-allocated
 	 * variables.
 	 */
 	if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
 	    vstate->dtvs_dynvars.dtds_size)) {
 		dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
 		uintptr_t base = (uintptr_t)dstate->dtds_base +
 		    (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
 		uintptr_t chunkoffs;
 		dtrace_dynvar_t *dvar;
 
 		/*
 		 * Before we assume that we can store here, we need to make
 		 * sure that it isn't in our metadata -- storing to our
 		 * dynamic variable metadata would corrupt our state.  For
 		 * the range to not include any dynamic variable metadata,
 		 * it must:
 		 *
 		 *	(1) Start above the hash table that is at the base of
 		 *	the dynamic variable space
 		 *
 		 *	(2) Have a starting chunk offset that is beyond the
 		 *	dtrace_dynvar_t that is at the base of every chunk
 		 *
 		 *	(3) Not span a chunk boundary
 		 *
 		 *	(4) Not be in the tuple space of a dynamic variable
 		 *
 		 */
 		if (addr < base)
 			return (0);
 
 		chunkoffs = (addr - base) % dstate->dtds_chunksize;
 
 		if (chunkoffs < sizeof (dtrace_dynvar_t))
 			return (0);
 
 		if (chunkoffs + sz > dstate->dtds_chunksize)
 			return (0);
 
 		dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs);
 
 		if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE)
 			return (0);
 
 		if (chunkoffs < sizeof (dtrace_dynvar_t) +
 		    ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t)))
 			return (0);
 
 		DTRACE_RANGE_REMAIN(remain, addr, dvar, dstate->dtds_chunksize);
 		return (1);
 	}
 
 	/*
 	 * Finally, check the static local and global variables.  These checks
 	 * take the longest, so we perform them last.
 	 */
 	if (dtrace_canstore_statvar(addr, sz, remain,
 	    vstate->dtvs_locals, vstate->dtvs_nlocals))
 		return (1);
 
 	if (dtrace_canstore_statvar(addr, sz, remain,
 	    vstate->dtvs_globals, vstate->dtvs_nglobals))
 		return (1);
 
 	return (0);
 }
 
 
 /*
  * Convenience routine to check to see if the address is within a memory
  * region in which a load may be issued given the user's privilege level;
  * if not, it sets the appropriate error flags and loads 'addr' into the
  * illegal value slot.
  *
  * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
  * appropriate memory access protection.
  */
 static int
 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
     dtrace_vstate_t *vstate)
 {
 	return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate));
 }
 
 /*
  * Implementation of dtrace_canload which communicates the uppoer bound of the
  * allowed memory region.
  */
 static int
 dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
 {
 	volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval;
 	file_t *fp;
 
 	/*
 	 * If we hold the privilege to read from kernel memory, then
 	 * everything is readable.
 	 */
 	if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
 		DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
 		return (1);
 	}
 
 	/*
 	 * You can obviously read that which you can store.
 	 */
 	if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate))
 		return (1);
 
 	/*
 	 * We're allowed to read from our own string table.
 	 */
 	if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
 	    mstate->dtms_difo->dtdo_strlen)) {
 		DTRACE_RANGE_REMAIN(remain, addr,
 		    mstate->dtms_difo->dtdo_strtab,
 		    mstate->dtms_difo->dtdo_strlen);
 		return (1);
 	}
 
 	if (vstate->dtvs_state != NULL &&
 	    dtrace_priv_proc(vstate->dtvs_state)) {
 		proc_t *p;
 
 		/*
 		 * When we have privileges to the current process, there are
 		 * several context-related kernel structures that are safe to
 		 * read, even absent the privilege to read from kernel memory.
 		 * These reads are safe because these structures contain only
 		 * state that (1) we're permitted to read, (2) is harmless or
 		 * (3) contains pointers to additional kernel state that we're
 		 * not permitted to read (and as such, do not present an
 		 * opportunity for privilege escalation).  Finally (and
 		 * critically), because of the nature of their relation with
 		 * the current thread context, the memory associated with these
 		 * structures cannot change over the duration of probe context,
 		 * and it is therefore impossible for this memory to be
 		 * deallocated and reallocated as something else while it's
 		 * being operated upon.
 		 */
 		if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t))) {
 			DTRACE_RANGE_REMAIN(remain, addr, curthread,
 			    sizeof (kthread_t));
 			return (1);
 		}
 
 		if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
 		    sz, curthread->t_procp, sizeof (proc_t))) {
 			DTRACE_RANGE_REMAIN(remain, addr, curthread->t_procp,
 			    sizeof (proc_t));
 			return (1);
 		}
 
 		if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
 		    curthread->t_cred, sizeof (cred_t))) {
 			DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cred,
 			    sizeof (cred_t));
 			return (1);
 		}
 
 #ifdef illumos
 		if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
 		    &(p->p_pidp->pid_id), sizeof (pid_t))) {
 			DTRACE_RANGE_REMAIN(remain, addr, &(p->p_pidp->pid_id),
 			    sizeof (pid_t));
 			return (1);
 		}
 
 		if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
 		    curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
 			DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cpu,
 			    offsetof(cpu_t, cpu_pause_thread));
 			return (1);
 		}
 #endif
 	}
 
 	if ((fp = mstate->dtms_getf) != NULL) {
 		uintptr_t psz = sizeof (void *);
 		vnode_t *vp;
 		vnodeops_t *op;
 
 		/*
 		 * When getf() returns a file_t, the enabling is implicitly
 		 * granted the (transient) right to read the returned file_t
 		 * as well as the v_path and v_op->vnop_name of the underlying
 		 * vnode.  These accesses are allowed after a successful
 		 * getf() because the members that they refer to cannot change
 		 * once set -- and the barrier logic in the kernel's closef()
 		 * path assures that the file_t and its referenced vode_t
 		 * cannot themselves be stale (that is, it impossible for
 		 * either dtms_getf itself or its f_vnode member to reference
 		 * freed memory).
 		 */
 		if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t))) {
 			DTRACE_RANGE_REMAIN(remain, addr, fp, sizeof (file_t));
 			return (1);
 		}
 
 		if ((vp = fp->f_vnode) != NULL) {
 			size_t slen;
 #ifdef illumos
 			if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz)) {
 				DTRACE_RANGE_REMAIN(remain, addr, &vp->v_path,
 				    psz);
 				return (1);
 			}
 			slen = strlen(vp->v_path) + 1;
 			if (DTRACE_INRANGE(addr, sz, vp->v_path, slen)) {
 				DTRACE_RANGE_REMAIN(remain, addr, vp->v_path,
 				    slen);
 				return (1);
 			}
 #endif
 
 			if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz)) {
 				DTRACE_RANGE_REMAIN(remain, addr, &vp->v_op,
 				    psz);
 				return (1);
 			}
 
 #ifdef illumos
 			if ((op = vp->v_op) != NULL &&
 			    DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
 				DTRACE_RANGE_REMAIN(remain, addr,
 				    &op->vnop_name, psz);
 				return (1);
 			}
 
 			if (op != NULL && op->vnop_name != NULL &&
 			    DTRACE_INRANGE(addr, sz, op->vnop_name,
 			    (slen = strlen(op->vnop_name) + 1))) {
 				DTRACE_RANGE_REMAIN(remain, addr,
 				    op->vnop_name, slen);
 				return (1);
 			}
 #endif
 		}
 	}
 
 	DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
 	*illval = addr;
 	return (0);
 }
 
 /*
  * Convenience routine to check to see if a given string is within a memory
  * region in which a load may be issued given the user's privilege level;
  * this exists so that we don't need to issue unnecessary dtrace_strlen()
  * calls in the event that the user has all privileges.
  */
 static int
 dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
 {
 	size_t rsize;
 
 	/*
 	 * If we hold the privilege to read from kernel memory, then
 	 * everything is readable.
 	 */
 	if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
 		DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
 		return (1);
 	}
 
 	/*
 	 * Even if the caller is uninterested in querying the remaining valid
 	 * range, it is required to ensure that the access is allowed.
 	 */
 	if (remain == NULL) {
 		remain = &rsize;
 	}
 	if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) {
 		size_t strsz;
 		/*
 		 * Perform the strlen after determining the length of the
 		 * memory region which is accessible.  This prevents timing
 		 * information from being used to find NULs in memory which is
 		 * not accessible to the caller.
 		 */
 		strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr,
 		    MIN(sz, *remain));
 		if (strsz <= *remain) {
 			return (1);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Convenience routine to check to see if a given variable is within a memory
  * region in which a load may be issued given the user's privilege level.
  */
 static int
 dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
 {
 	size_t sz;
 	ASSERT(type->dtdt_flags & DIF_TF_BYREF);
 
 	/*
 	 * Calculate the max size before performing any checks since even
 	 * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function
 	 * return the max length via 'remain'.
 	 */
 	if (type->dtdt_kind == DIF_TYPE_STRING) {
 		dtrace_state_t *state = vstate->dtvs_state;
 
 		if (state != NULL) {
 			sz = state->dts_options[DTRACEOPT_STRSIZE];
 		} else {
 			/*
 			 * In helper context, we have a NULL state; fall back
 			 * to using the system-wide default for the string size
 			 * in this case.
 			 */
 			sz = dtrace_strsize_default;
 		}
 	} else {
 		sz = type->dtdt_size;
 	}
 
 	/*
 	 * If we hold the privilege to read from kernel memory, then
 	 * everything is readable.
 	 */
 	if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
 		DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz);
 		return (1);
 	}
 
 	if (type->dtdt_kind == DIF_TYPE_STRING) {
 		return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate,
 		    vstate));
 	}
 	return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate,
 	    vstate));
 }
 
 /*
  * Convert a string to a signed integer using safe loads.
  *
  * NOTE: This function uses various macros from strtolctype.h to manipulate
  * digit values, etc -- these have all been checked to ensure they make
  * no additional function calls.
  */
 static int64_t
 dtrace_strtoll(char *input, int base, size_t limit)
 {
 	uintptr_t pos = (uintptr_t)input;
 	int64_t val = 0;
 	int x;
 	boolean_t neg = B_FALSE;
 	char c, cc, ccc;
 	uintptr_t end = pos + limit;
 
 	/*
 	 * Consume any whitespace preceding digits.
 	 */
 	while ((c = dtrace_load8(pos)) == ' ' || c == '\t')
 		pos++;
 
 	/*
 	 * Handle an explicit sign if one is present.
 	 */
 	if (c == '-' || c == '+') {
 		if (c == '-')
 			neg = B_TRUE;
 		c = dtrace_load8(++pos);
 	}
 
 	/*
 	 * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it
 	 * if present.
 	 */
 	if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' ||
 	    cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) {
 		pos += 2;
 		c = ccc;
 	}
 
 	/*
 	 * Read in contiguous digits until the first non-digit character.
 	 */
 	for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base;
 	    c = dtrace_load8(++pos))
 		val = val * base + x;
 
 	return (neg ? -val : val);
 }
 
 /*
  * Compare two strings using safe loads.
  */
 static int
 dtrace_strncmp(char *s1, char *s2, size_t limit)
 {
 	uint8_t c1, c2;
 	volatile uint16_t *flags;
 
 	if (s1 == s2 || limit == 0)
 		return (0);
 
 	flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 
 	do {
 		if (s1 == NULL) {
 			c1 = '\0';
 		} else {
 			c1 = dtrace_load8((uintptr_t)s1++);
 		}
 
 		if (s2 == NULL) {
 			c2 = '\0';
 		} else {
 			c2 = dtrace_load8((uintptr_t)s2++);
 		}
 
 		if (c1 != c2)
 			return (c1 - c2);
 	} while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
 
 	return (0);
 }
 
 /*
  * Compute strlen(s) for a string using safe memory accesses.  The additional
  * len parameter is used to specify a maximum length to ensure completion.
  */
 static size_t
 dtrace_strlen(const char *s, size_t lim)
 {
 	uint_t len;
 
 	for (len = 0; len != lim; len++) {
 		if (dtrace_load8((uintptr_t)s++) == '\0')
 			break;
 	}
 
 	return (len);
 }
 
 /*
  * Check if an address falls within a toxic region.
  */
 static int
 dtrace_istoxic(uintptr_t kaddr, size_t size)
 {
 	uintptr_t taddr, tsize;
 	int i;
 
 	for (i = 0; i < dtrace_toxranges; i++) {
 		taddr = dtrace_toxrange[i].dtt_base;
 		tsize = dtrace_toxrange[i].dtt_limit - taddr;
 
 		if (kaddr - taddr < tsize) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = kaddr;
 			return (1);
 		}
 
 		if (taddr - kaddr < size) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = taddr;
 			return (1);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Copy src to dst using safe memory accesses.  The src is assumed to be unsafe
  * memory specified by the DIF program.  The dst is assumed to be safe memory
  * that we can store to directly because it is managed by DTrace.  As with
  * standard bcopy, overlapping copies are handled properly.
  */
 static void
 dtrace_bcopy(const void *src, void *dst, size_t len)
 {
 	if (len != 0) {
 		uint8_t *s1 = dst;
 		const uint8_t *s2 = src;
 
 		if (s1 <= s2) {
 			do {
 				*s1++ = dtrace_load8((uintptr_t)s2++);
 			} while (--len != 0);
 		} else {
 			s2 += len;
 			s1 += len;
 
 			do {
 				*--s1 = dtrace_load8((uintptr_t)--s2);
 			} while (--len != 0);
 		}
 	}
 }
 
 /*
  * Copy src to dst using safe memory accesses, up to either the specified
  * length, or the point that a nul byte is encountered.  The src is assumed to
  * be unsafe memory specified by the DIF program.  The dst is assumed to be
  * safe memory that we can store to directly because it is managed by DTrace.
  * Unlike dtrace_bcopy(), overlapping regions are not handled.
  */
 static void
 dtrace_strcpy(const void *src, void *dst, size_t len)
 {
 	if (len != 0) {
 		uint8_t *s1 = dst, c;
 		const uint8_t *s2 = src;
 
 		do {
 			*s1++ = c = dtrace_load8((uintptr_t)s2++);
 		} while (--len != 0 && c != '\0');
 	}
 }
 
 /*
  * Copy src to dst, deriving the size and type from the specified (BYREF)
  * variable type.  The src is assumed to be unsafe memory specified by the DIF
  * program.  The dst is assumed to be DTrace variable memory that is of the
  * specified type; we assume that we can store to directly.
  */
 static void
 dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit)
 {
 	ASSERT(type->dtdt_flags & DIF_TF_BYREF);
 
 	if (type->dtdt_kind == DIF_TYPE_STRING) {
 		dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit));
 	} else {
 		dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit));
 	}
 }
 
 /*
  * Compare s1 to s2 using safe memory accesses.  The s1 data is assumed to be
  * unsafe memory specified by the DIF program.  The s2 data is assumed to be
  * safe memory that we can access directly because it is managed by DTrace.
  */
 static int
 dtrace_bcmp(const void *s1, const void *s2, size_t len)
 {
 	volatile uint16_t *flags;
 
 	flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 
 	if (s1 == s2)
 		return (0);
 
 	if (s1 == NULL || s2 == NULL)
 		return (1);
 
 	if (s1 != s2 && len != 0) {
 		const uint8_t *ps1 = s1;
 		const uint8_t *ps2 = s2;
 
 		do {
 			if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
 				return (1);
 		} while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
 	}
 	return (0);
 }
 
 /*
  * Zero the specified region using a simple byte-by-byte loop.  Note that this
  * is for safe DTrace-managed memory only.
  */
 static void
 dtrace_bzero(void *dst, size_t len)
 {
 	uchar_t *cp;
 
 	for (cp = dst; len != 0; len--)
 		*cp++ = 0;
 }
 
 static void
 dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
 {
 	uint64_t result[2];
 
 	result[0] = addend1[0] + addend2[0];
 	result[1] = addend1[1] + addend2[1] +
 	    (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
 
 	sum[0] = result[0];
 	sum[1] = result[1];
 }
 
 /*
  * Shift the 128-bit value in a by b. If b is positive, shift left.
  * If b is negative, shift right.
  */
 static void
 dtrace_shift_128(uint64_t *a, int b)
 {
 	uint64_t mask;
 
 	if (b == 0)
 		return;
 
 	if (b < 0) {
 		b = -b;
 		if (b >= 64) {
 			a[0] = a[1] >> (b - 64);
 			a[1] = 0;
 		} else {
 			a[0] >>= b;
 			mask = 1LL << (64 - b);
 			mask -= 1;
 			a[0] |= ((a[1] & mask) << (64 - b));
 			a[1] >>= b;
 		}
 	} else {
 		if (b >= 64) {
 			a[1] = a[0] << (b - 64);
 			a[0] = 0;
 		} else {
 			a[1] <<= b;
 			mask = a[0] >> (64 - b);
 			a[1] |= mask;
 			a[0] <<= b;
 		}
 	}
 }
 
 /*
  * The basic idea is to break the 2 64-bit values into 4 32-bit values,
  * use native multiplication on those, and then re-combine into the
  * resulting 128-bit value.
  *
  * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
  *     hi1 * hi2 << 64 +
  *     hi1 * lo2 << 32 +
  *     hi2 * lo1 << 32 +
  *     lo1 * lo2
  */
 static void
 dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
 {
 	uint64_t hi1, hi2, lo1, lo2;
 	uint64_t tmp[2];
 
 	hi1 = factor1 >> 32;
 	hi2 = factor2 >> 32;
 
 	lo1 = factor1 & DT_MASK_LO;
 	lo2 = factor2 & DT_MASK_LO;
 
 	product[0] = lo1 * lo2;
 	product[1] = hi1 * hi2;
 
 	tmp[0] = hi1 * lo2;
 	tmp[1] = 0;
 	dtrace_shift_128(tmp, 32);
 	dtrace_add_128(product, tmp, product);
 
 	tmp[0] = hi2 * lo1;
 	tmp[1] = 0;
 	dtrace_shift_128(tmp, 32);
 	dtrace_add_128(product, tmp, product);
 }
 
 /*
  * This privilege check should be used by actions and subroutines to
  * verify that the user credentials of the process that enabled the
  * invoking ECB match the target credentials
  */
 static int
 dtrace_priv_proc_common_user(dtrace_state_t *state)
 {
 	cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
 
 	/*
 	 * We should always have a non-NULL state cred here, since if cred
 	 * is null (anonymous tracing), we fast-path bypass this routine.
 	 */
 	ASSERT(s_cr != NULL);
 
 	if ((cr = CRED()) != NULL &&
 	    s_cr->cr_uid == cr->cr_uid &&
 	    s_cr->cr_uid == cr->cr_ruid &&
 	    s_cr->cr_uid == cr->cr_suid &&
 	    s_cr->cr_gid == cr->cr_gid &&
 	    s_cr->cr_gid == cr->cr_rgid &&
 	    s_cr->cr_gid == cr->cr_sgid)
 		return (1);
 
 	return (0);
 }
 
 /*
  * This privilege check should be used by actions and subroutines to
  * verify that the zone of the process that enabled the invoking ECB
  * matches the target credentials
  */
 static int
 dtrace_priv_proc_common_zone(dtrace_state_t *state)
 {
 #ifdef illumos
 	cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
 
 	/*
 	 * We should always have a non-NULL state cred here, since if cred
 	 * is null (anonymous tracing), we fast-path bypass this routine.
 	 */
 	ASSERT(s_cr != NULL);
 
 	if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
 		return (1);
 
 	return (0);
 #else
 	return (1);
 #endif
 }
 
 /*
  * This privilege check should be used by actions and subroutines to
  * verify that the process has not setuid or changed credentials.
  */
 static int
 dtrace_priv_proc_common_nocd(void)
 {
 	proc_t *proc;
 
 	if ((proc = ttoproc(curthread)) != NULL &&
 	    !(proc->p_flag & SNOCD))
 		return (1);
 
 	return (0);
 }
 
 static int
 dtrace_priv_proc_destructive(dtrace_state_t *state)
 {
 	int action = state->dts_cred.dcr_action;
 
 	if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
 	    dtrace_priv_proc_common_zone(state) == 0)
 		goto bad;
 
 	if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) &&
 	    dtrace_priv_proc_common_user(state) == 0)
 		goto bad;
 
 	if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) &&
 	    dtrace_priv_proc_common_nocd() == 0)
 		goto bad;
 
 	return (1);
 
 bad:
 	cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
 
 	return (0);
 }
 
 static int
 dtrace_priv_proc_control(dtrace_state_t *state)
 {
 	if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
 		return (1);
 
 	if (dtrace_priv_proc_common_zone(state) &&
 	    dtrace_priv_proc_common_user(state) &&
 	    dtrace_priv_proc_common_nocd())
 		return (1);
 
 	cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
 
 	return (0);
 }
 
 static int
 dtrace_priv_proc(dtrace_state_t *state)
 {
 	if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
 		return (1);
 
 	cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
 
 	return (0);
 }
 
 static int
 dtrace_priv_kernel(dtrace_state_t *state)
 {
 	if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
 		return (1);
 
 	cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
 
 	return (0);
 }
 
 static int
 dtrace_priv_kernel_destructive(dtrace_state_t *state)
 {
 	if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)
 		return (1);
 
 	cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
 
 	return (0);
 }
 
 /*
  * Determine if the dte_cond of the specified ECB allows for processing of
  * the current probe to continue.  Note that this routine may allow continued
  * processing, but with access(es) stripped from the mstate's dtms_access
  * field.
  */
 static int
 dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
     dtrace_ecb_t *ecb)
 {
 	dtrace_probe_t *probe = ecb->dte_probe;
 	dtrace_provider_t *prov = probe->dtpr_provider;
 	dtrace_pops_t *pops = &prov->dtpv_pops;
 	int mode = DTRACE_MODE_NOPRIV_DROP;
 
 	ASSERT(ecb->dte_cond);
 
 #ifdef illumos
 	if (pops->dtps_mode != NULL) {
 		mode = pops->dtps_mode(prov->dtpv_arg,
 		    probe->dtpr_id, probe->dtpr_arg);
 
 		ASSERT((mode & DTRACE_MODE_USER) ||
 		    (mode & DTRACE_MODE_KERNEL));
 		ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
 		    (mode & DTRACE_MODE_NOPRIV_DROP));
 	}
 
 	/*
 	 * If the dte_cond bits indicate that this consumer is only allowed to
 	 * see user-mode firings of this probe, call the provider's dtps_mode()
 	 * entry point to check that the probe was fired while in a user
 	 * context.  If that's not the case, use the policy specified by the
 	 * provider to determine if we drop the probe or merely restrict
 	 * operation.
 	 */
 	if (ecb->dte_cond & DTRACE_COND_USERMODE) {
 		ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
 
 		if (!(mode & DTRACE_MODE_USER)) {
 			if (mode & DTRACE_MODE_NOPRIV_DROP)
 				return (0);
 
 			mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
 		}
 	}
 #endif
 
 	/*
 	 * This is more subtle than it looks. We have to be absolutely certain
 	 * that CRED() isn't going to change out from under us so it's only
 	 * legit to examine that structure if we're in constrained situations.
 	 * Currently, the only times we'll this check is if a non-super-user
 	 * has enabled the profile or syscall providers -- providers that
 	 * allow visibility of all processes. For the profile case, the check
 	 * above will ensure that we're examining a user context.
 	 */
 	if (ecb->dte_cond & DTRACE_COND_OWNER) {
 		cred_t *cr;
 		cred_t *s_cr = state->dts_cred.dcr_cred;
 		proc_t *proc;
 
 		ASSERT(s_cr != NULL);
 
 		if ((cr = CRED()) == NULL ||
 		    s_cr->cr_uid != cr->cr_uid ||
 		    s_cr->cr_uid != cr->cr_ruid ||
 		    s_cr->cr_uid != cr->cr_suid ||
 		    s_cr->cr_gid != cr->cr_gid ||
 		    s_cr->cr_gid != cr->cr_rgid ||
 		    s_cr->cr_gid != cr->cr_sgid ||
 		    (proc = ttoproc(curthread)) == NULL ||
 		    (proc->p_flag & SNOCD)) {
 			if (mode & DTRACE_MODE_NOPRIV_DROP)
 				return (0);
 
 #ifdef illumos
 			mstate->dtms_access &= ~DTRACE_ACCESS_PROC;
 #endif
 		}
 	}
 
 #ifdef illumos
 	/*
 	 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not
 	 * in our zone, check to see if our mode policy is to restrict rather
 	 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC
 	 * and DTRACE_ACCESS_ARGS
 	 */
 	if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
 		cred_t *cr;
 		cred_t *s_cr = state->dts_cred.dcr_cred;
 
 		ASSERT(s_cr != NULL);
 
 		if ((cr = CRED()) == NULL ||
 		    s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) {
 			if (mode & DTRACE_MODE_NOPRIV_DROP)
 				return (0);
 
 			mstate->dtms_access &=
 			    ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS);
 		}
 	}
 #endif
 
 	return (1);
 }
 
 /*
  * Note:  not called from probe context.  This function is called
  * asynchronously (and at a regular interval) from outside of probe context to
  * clean the dirty dynamic variable lists on all CPUs.  Dynamic variable
  * cleaning is explained in detail in <sys/dtrace_impl.h>.
  */
 void
 dtrace_dynvar_clean(dtrace_dstate_t *dstate)
 {
 	dtrace_dynvar_t *dirty;
 	dtrace_dstate_percpu_t *dcpu;
 	dtrace_dynvar_t **rinsep;
 	int i, j, work = 0;
 
 	for (i = 0; i < NCPU; i++) {
 		dcpu = &dstate->dtds_percpu[i];
 		rinsep = &dcpu->dtdsc_rinsing;
 
 		/*
 		 * If the dirty list is NULL, there is no dirty work to do.
 		 */
 		if (dcpu->dtdsc_dirty == NULL)
 			continue;
 
 		if (dcpu->dtdsc_rinsing != NULL) {
 			/*
 			 * If the rinsing list is non-NULL, then it is because
 			 * this CPU was selected to accept another CPU's
 			 * dirty list -- and since that time, dirty buffers
 			 * have accumulated.  This is a highly unlikely
 			 * condition, but we choose to ignore the dirty
 			 * buffers -- they'll be picked up a future cleanse.
 			 */
 			continue;
 		}
 
 		if (dcpu->dtdsc_clean != NULL) {
 			/*
 			 * If the clean list is non-NULL, then we're in a
 			 * situation where a CPU has done deallocations (we
 			 * have a non-NULL dirty list) but no allocations (we
 			 * also have a non-NULL clean list).  We can't simply
 			 * move the dirty list into the clean list on this
 			 * CPU, yet we also don't want to allow this condition
 			 * to persist, lest a short clean list prevent a
 			 * massive dirty list from being cleaned (which in
 			 * turn could lead to otherwise avoidable dynamic
 			 * drops).  To deal with this, we look for some CPU
 			 * with a NULL clean list, NULL dirty list, and NULL
 			 * rinsing list -- and then we borrow this CPU to
 			 * rinse our dirty list.
 			 */
 			for (j = 0; j < NCPU; j++) {
 				dtrace_dstate_percpu_t *rinser;
 
 				rinser = &dstate->dtds_percpu[j];
 
 				if (rinser->dtdsc_rinsing != NULL)
 					continue;
 
 				if (rinser->dtdsc_dirty != NULL)
 					continue;
 
 				if (rinser->dtdsc_clean != NULL)
 					continue;
 
 				rinsep = &rinser->dtdsc_rinsing;
 				break;
 			}
 
 			if (j == NCPU) {
 				/*
 				 * We were unable to find another CPU that
 				 * could accept this dirty list -- we are
 				 * therefore unable to clean it now.
 				 */
 				dtrace_dynvar_failclean++;
 				continue;
 			}
 		}
 
 		work = 1;
 
 		/*
 		 * Atomically move the dirty list aside.
 		 */
 		do {
 			dirty = dcpu->dtdsc_dirty;
 
 			/*
 			 * Before we zap the dirty list, set the rinsing list.
 			 * (This allows for a potential assertion in
 			 * dtrace_dynvar():  if a free dynamic variable appears
 			 * on a hash chain, either the dirty list or the
 			 * rinsing list for some CPU must be non-NULL.)
 			 */
 			*rinsep = dirty;
 			dtrace_membar_producer();
 		} while (dtrace_casptr(&dcpu->dtdsc_dirty,
 		    dirty, NULL) != dirty);
 	}
 
 	if (!work) {
 		/*
 		 * We have no work to do; we can simply return.
 		 */
 		return;
 	}
 
 	dtrace_sync();
 
 	for (i = 0; i < NCPU; i++) {
 		dcpu = &dstate->dtds_percpu[i];
 
 		if (dcpu->dtdsc_rinsing == NULL)
 			continue;
 
 		/*
 		 * We are now guaranteed that no hash chain contains a pointer
 		 * into this dirty list; we can make it clean.
 		 */
 		ASSERT(dcpu->dtdsc_clean == NULL);
 		dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;
 		dcpu->dtdsc_rinsing = NULL;
 	}
 
 	/*
 	 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
 	 * sure that all CPUs have seen all of the dtdsc_clean pointers.
 	 * This prevents a race whereby a CPU incorrectly decides that
 	 * the state should be something other than DTRACE_DSTATE_CLEAN
 	 * after dtrace_dynvar_clean() has completed.
 	 */
 	dtrace_sync();
 
 	dstate->dtds_state = DTRACE_DSTATE_CLEAN;
 }
 
 /*
  * Depending on the value of the op parameter, this function looks-up,
  * allocates or deallocates an arbitrarily-keyed dynamic variable.  If an
  * allocation is requested, this function will return a pointer to a
  * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
  * variable can be allocated.  If NULL is returned, the appropriate counter
  * will be incremented.
  */
 dtrace_dynvar_t *
 dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,
     dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
 {
 	uint64_t hashval = DTRACE_DYNHASH_VALID;
 	dtrace_dynhash_t *hash = dstate->dtds_hash;
 	dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;
 	processorid_t me = curcpu, cpu = me;
 	dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];
 	size_t bucket, ksize;
 	size_t chunksize = dstate->dtds_chunksize;
 	uintptr_t kdata, lock, nstate;
 	uint_t i;
 
 	ASSERT(nkeys != 0);
 
 	/*
 	 * Hash the key.  As with aggregations, we use Jenkins' "One-at-a-time"
 	 * algorithm.  For the by-value portions, we perform the algorithm in
 	 * 16-bit chunks (as opposed to 8-bit chunks).  This speeds things up a
 	 * bit, and seems to have only a minute effect on distribution.  For
 	 * the by-reference data, we perform "One-at-a-time" iterating (safely)
 	 * over each referenced byte.  It's painful to do this, but it's much
 	 * better than pathological hash distribution.  The efficacy of the
 	 * hashing algorithm (and a comparison with other algorithms) may be
 	 * found by running the ::dtrace_dynstat MDB dcmd.
 	 */
 	for (i = 0; i < nkeys; i++) {
 		if (key[i].dttk_size == 0) {
 			uint64_t val = key[i].dttk_value;
 
 			hashval += (val >> 48) & 0xffff;
 			hashval += (hashval << 10);
 			hashval ^= (hashval >> 6);
 
 			hashval += (val >> 32) & 0xffff;
 			hashval += (hashval << 10);
 			hashval ^= (hashval >> 6);
 
 			hashval += (val >> 16) & 0xffff;
 			hashval += (hashval << 10);
 			hashval ^= (hashval >> 6);
 
 			hashval += val & 0xffff;
 			hashval += (hashval << 10);
 			hashval ^= (hashval >> 6);
 		} else {
 			/*
 			 * This is incredibly painful, but it beats the hell
 			 * out of the alternative.
 			 */
 			uint64_t j, size = key[i].dttk_size;
 			uintptr_t base = (uintptr_t)key[i].dttk_value;
 
 			if (!dtrace_canload(base, size, mstate, vstate))
 				break;
 
 			for (j = 0; j < size; j++) {
 				hashval += dtrace_load8(base + j);
 				hashval += (hashval << 10);
 				hashval ^= (hashval >> 6);
 			}
 		}
 	}
 
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
 		return (NULL);
 
 	hashval += (hashval << 3);
 	hashval ^= (hashval >> 11);
 	hashval += (hashval << 15);
 
 	/*
 	 * There is a remote chance (ideally, 1 in 2^31) that our hashval
 	 * comes out to be one of our two sentinel hash values.  If this
 	 * actually happens, we set the hashval to be a value known to be a
 	 * non-sentinel value.
 	 */
 	if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK)
 		hashval = DTRACE_DYNHASH_VALID;
 
 	/*
 	 * Yes, it's painful to do a divide here.  If the cycle count becomes
 	 * important here, tricks can be pulled to reduce it.  (However, it's
 	 * critical that hash collisions be kept to an absolute minimum;
 	 * they're much more painful than a divide.)  It's better to have a
 	 * solution that generates few collisions and still keeps things
 	 * relatively simple.
 	 */
 	bucket = hashval % dstate->dtds_hashsize;
 
 	if (op == DTRACE_DYNVAR_DEALLOC) {
 		volatile uintptr_t *lockp = &hash[bucket].dtdh_lock;
 
 		for (;;) {
 			while ((lock = *lockp) & 1)
 				continue;
 
 			if (dtrace_casptr((volatile void *)lockp,
 			    (volatile void *)lock, (volatile void *)(lock + 1)) == (void *)lock)
 				break;
 		}
 
 		dtrace_membar_producer();
 	}
 
 top:
 	prev = NULL;
 	lock = hash[bucket].dtdh_lock;
 
 	dtrace_membar_consumer();
 
 	start = hash[bucket].dtdh_chain;
 	ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK ||
 	    start->dtdv_hashval != DTRACE_DYNHASH_FREE ||
 	    op != DTRACE_DYNVAR_DEALLOC));
 
 	for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) {
 		dtrace_tuple_t *dtuple = &dvar->dtdv_tuple;
 		dtrace_key_t *dkey = &dtuple->dtt_key[0];
 
 		if (dvar->dtdv_hashval != hashval) {
 			if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) {
 				/*
 				 * We've reached the sink, and therefore the
 				 * end of the hash chain; we can kick out of
 				 * the loop knowing that we have seen a valid
 				 * snapshot of state.
 				 */
 				ASSERT(dvar->dtdv_next == NULL);
 				ASSERT(dvar == &dtrace_dynhash_sink);
 				break;
 			}
 
 			if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) {
 				/*
 				 * We've gone off the rails:  somewhere along
 				 * the line, one of the members of this hash
 				 * chain was deleted.  Note that we could also
 				 * detect this by simply letting this loop run
 				 * to completion, as we would eventually hit
 				 * the end of the dirty list.  However, we
 				 * want to avoid running the length of the
 				 * dirty list unnecessarily (it might be quite
 				 * long), so we catch this as early as
 				 * possible by detecting the hash marker.  In
 				 * this case, we simply set dvar to NULL and
 				 * break; the conditional after the loop will
 				 * send us back to top.
 				 */
 				dvar = NULL;
 				break;
 			}
 
 			goto next;
 		}
 
 		if (dtuple->dtt_nkeys != nkeys)
 			goto next;
 
 		for (i = 0; i < nkeys; i++, dkey++) {
 			if (dkey->dttk_size != key[i].dttk_size)
 				goto next; /* size or type mismatch */
 
 			if (dkey->dttk_size != 0) {
 				if (dtrace_bcmp(
 				    (void *)(uintptr_t)key[i].dttk_value,
 				    (void *)(uintptr_t)dkey->dttk_value,
 				    dkey->dttk_size))
 					goto next;
 			} else {
 				if (dkey->dttk_value != key[i].dttk_value)
 					goto next;
 			}
 		}
 
 		if (op != DTRACE_DYNVAR_DEALLOC)
 			return (dvar);
 
 		ASSERT(dvar->dtdv_next == NULL ||
 		    dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE);
 
 		if (prev != NULL) {
 			ASSERT(hash[bucket].dtdh_chain != dvar);
 			ASSERT(start != dvar);
 			ASSERT(prev->dtdv_next == dvar);
 			prev->dtdv_next = dvar->dtdv_next;
 		} else {
 			if (dtrace_casptr(&hash[bucket].dtdh_chain,
 			    start, dvar->dtdv_next) != start) {
 				/*
 				 * We have failed to atomically swing the
 				 * hash table head pointer, presumably because
 				 * of a conflicting allocation on another CPU.
 				 * We need to reread the hash chain and try
 				 * again.
 				 */
 				goto top;
 			}
 		}
 
 		dtrace_membar_producer();
 
 		/*
 		 * Now set the hash value to indicate that it's free.
 		 */
 		ASSERT(hash[bucket].dtdh_chain != dvar);
 		dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
 
 		dtrace_membar_producer();
 
 		/*
 		 * Set the next pointer to point at the dirty list, and
 		 * atomically swing the dirty pointer to the newly freed dvar.
 		 */
 		do {
 			next = dcpu->dtdsc_dirty;
 			dvar->dtdv_next = next;
 		} while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next);
 
 		/*
 		 * Finally, unlock this hash bucket.
 		 */
 		ASSERT(hash[bucket].dtdh_lock == lock);
 		ASSERT(lock & 1);
 		hash[bucket].dtdh_lock++;
 
 		return (NULL);
 next:
 		prev = dvar;
 		continue;
 	}
 
 	if (dvar == NULL) {
 		/*
 		 * If dvar is NULL, it is because we went off the rails:
 		 * one of the elements that we traversed in the hash chain
 		 * was deleted while we were traversing it.  In this case,
 		 * we assert that we aren't doing a dealloc (deallocs lock
 		 * the hash bucket to prevent themselves from racing with
 		 * one another), and retry the hash chain traversal.
 		 */
 		ASSERT(op != DTRACE_DYNVAR_DEALLOC);
 		goto top;
 	}
 
 	if (op != DTRACE_DYNVAR_ALLOC) {
 		/*
 		 * If we are not to allocate a new variable, we want to
 		 * return NULL now.  Before we return, check that the value
 		 * of the lock word hasn't changed.  If it has, we may have
 		 * seen an inconsistent snapshot.
 		 */
 		if (op == DTRACE_DYNVAR_NOALLOC) {
 			if (hash[bucket].dtdh_lock != lock)
 				goto top;
 		} else {
 			ASSERT(op == DTRACE_DYNVAR_DEALLOC);
 			ASSERT(hash[bucket].dtdh_lock == lock);
 			ASSERT(lock & 1);
 			hash[bucket].dtdh_lock++;
 		}
 
 		return (NULL);
 	}
 
 	/*
 	 * We need to allocate a new dynamic variable.  The size we need is the
 	 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
 	 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
 	 * the size of any referred-to data (dsize).  We then round the final
 	 * size up to the chunksize for allocation.
 	 */
 	for (ksize = 0, i = 0; i < nkeys; i++)
 		ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
 
 	/*
 	 * This should be pretty much impossible, but could happen if, say,
 	 * strange DIF specified the tuple.  Ideally, this should be an
 	 * assertion and not an error condition -- but that requires that the
 	 * chunksize calculation in dtrace_difo_chunksize() be absolutely
 	 * bullet-proof.  (That is, it must not be able to be fooled by
 	 * malicious DIF.)  Given the lack of backwards branches in DIF,
 	 * solving this would presumably not amount to solving the Halting
 	 * Problem -- but it still seems awfully hard.
 	 */
 	if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) +
 	    ksize + dsize > chunksize) {
 		dcpu->dtdsc_drops++;
 		return (NULL);
 	}
 
 	nstate = DTRACE_DSTATE_EMPTY;
 
 	do {
 retry:
 		free = dcpu->dtdsc_free;
 
 		if (free == NULL) {
 			dtrace_dynvar_t *clean = dcpu->dtdsc_clean;
 			void *rval;
 
 			if (clean == NULL) {
 				/*
 				 * We're out of dynamic variable space on
 				 * this CPU.  Unless we have tried all CPUs,
 				 * we'll try to allocate from a different
 				 * CPU.
 				 */
 				switch (dstate->dtds_state) {
 				case DTRACE_DSTATE_CLEAN: {
 					void *sp = &dstate->dtds_state;
 
 					if (++cpu >= NCPU)
 						cpu = 0;
 
 					if (dcpu->dtdsc_dirty != NULL &&
 					    nstate == DTRACE_DSTATE_EMPTY)
 						nstate = DTRACE_DSTATE_DIRTY;
 
 					if (dcpu->dtdsc_rinsing != NULL)
 						nstate = DTRACE_DSTATE_RINSING;
 
 					dcpu = &dstate->dtds_percpu[cpu];
 
 					if (cpu != me)
 						goto retry;
 
 					(void) dtrace_cas32(sp,
 					    DTRACE_DSTATE_CLEAN, nstate);
 
 					/*
 					 * To increment the correct bean
 					 * counter, take another lap.
 					 */
 					goto retry;
 				}
 
 				case DTRACE_DSTATE_DIRTY:
 					dcpu->dtdsc_dirty_drops++;
 					break;
 
 				case DTRACE_DSTATE_RINSING:
 					dcpu->dtdsc_rinsing_drops++;
 					break;
 
 				case DTRACE_DSTATE_EMPTY:
 					dcpu->dtdsc_drops++;
 					break;
 				}
 
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP);
 				return (NULL);
 			}
 
 			/*
 			 * The clean list appears to be non-empty.  We want to
 			 * move the clean list to the free list; we start by
 			 * moving the clean pointer aside.
 			 */
 			if (dtrace_casptr(&dcpu->dtdsc_clean,
 			    clean, NULL) != clean) {
 				/*
 				 * We are in one of two situations:
 				 *
 				 *  (a)	The clean list was switched to the
 				 *	free list by another CPU.
 				 *
 				 *  (b)	The clean list was added to by the
 				 *	cleansing cyclic.
 				 *
 				 * In either of these situations, we can
 				 * just reattempt the free list allocation.
 				 */
 				goto retry;
 			}
 
 			ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
 
 			/*
 			 * Now we'll move the clean list to our free list.
 			 * It's impossible for this to fail:  the only way
 			 * the free list can be updated is through this
 			 * code path, and only one CPU can own the clean list.
 			 * Thus, it would only be possible for this to fail if
 			 * this code were racing with dtrace_dynvar_clean().
 			 * (That is, if dtrace_dynvar_clean() updated the clean
 			 * list, and we ended up racing to update the free
 			 * list.)  This race is prevented by the dtrace_sync()
 			 * in dtrace_dynvar_clean() -- which flushes the
 			 * owners of the clean lists out before resetting
 			 * the clean lists.
 			 */
 			dcpu = &dstate->dtds_percpu[me];
 			rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
 			ASSERT(rval == NULL);
 			goto retry;
 		}
 
 		dvar = free;
 		new_free = dvar->dtdv_next;
 	} while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free);
 
 	/*
 	 * We have now allocated a new chunk.  We copy the tuple keys into the
 	 * tuple array and copy any referenced key data into the data space
 	 * following the tuple array.  As we do this, we relocate dttk_value
 	 * in the final tuple to point to the key data address in the chunk.
 	 */
 	kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys];
 	dvar->dtdv_data = (void *)(kdata + ksize);
 	dvar->dtdv_tuple.dtt_nkeys = nkeys;
 
 	for (i = 0; i < nkeys; i++) {
 		dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i];
 		size_t kesize = key[i].dttk_size;
 
 		if (kesize != 0) {
 			dtrace_bcopy(
 			    (const void *)(uintptr_t)key[i].dttk_value,
 			    (void *)kdata, kesize);
 			dkey->dttk_value = kdata;
 			kdata += P2ROUNDUP(kesize, sizeof (uint64_t));
 		} else {
 			dkey->dttk_value = key[i].dttk_value;
 		}
 
 		dkey->dttk_size = kesize;
 	}
 
 	ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE);
 	dvar->dtdv_hashval = hashval;
 	dvar->dtdv_next = start;
 
 	if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start)
 		return (dvar);
 
 	/*
 	 * The cas has failed.  Either another CPU is adding an element to
 	 * this hash chain, or another CPU is deleting an element from this
 	 * hash chain.  The simplest way to deal with both of these cases
 	 * (though not necessarily the most efficient) is to free our
 	 * allocated block and re-attempt it all.  Note that the free is
 	 * to the dirty list and _not_ to the free list.  This is to prevent
 	 * races with allocators, above.
 	 */
 	dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
 
 	dtrace_membar_producer();
 
 	do {
 		free = dcpu->dtdsc_dirty;
 		dvar->dtdv_next = free;
 	} while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free);
 
 	goto top;
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
 {
 	if ((int64_t)nval < (int64_t)*oval)
 		*oval = nval;
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
 {
 	if ((int64_t)nval > (int64_t)*oval)
 		*oval = nval;
 }
 
 static void
 dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr)
 {
 	int i, zero = DTRACE_QUANTIZE_ZEROBUCKET;
 	int64_t val = (int64_t)nval;
 
 	if (val < 0) {
 		for (i = 0; i < zero; i++) {
 			if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) {
 				quanta[i] += incr;
 				return;
 			}
 		}
 	} else {
 		for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) {
 			if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) {
 				quanta[i - 1] += incr;
 				return;
 			}
 		}
 
 		quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr;
 		return;
 	}
 
 	ASSERT(0);
 }
 
 static void
 dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
 {
 	uint64_t arg = *lquanta++;
 	int32_t base = DTRACE_LQUANTIZE_BASE(arg);
 	uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
 	uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
 	int32_t val = (int32_t)nval, level;
 
 	ASSERT(step != 0);
 	ASSERT(levels != 0);
 
 	if (val < base) {
 		/*
 		 * This is an underflow.
 		 */
 		lquanta[0] += incr;
 		return;
 	}
 
 	level = (val - base) / step;
 
 	if (level < levels) {
 		lquanta[level + 1] += incr;
 		return;
 	}
 
 	/*
 	 * This is an overflow.
 	 */
 	lquanta[levels + 1] += incr;
 }
 
 static int
 dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low,
     uint16_t high, uint16_t nsteps, int64_t value)
 {
 	int64_t this = 1, last, next;
 	int base = 1, order;
 
 	ASSERT(factor <= nsteps);
 	ASSERT(nsteps % factor == 0);
 
 	for (order = 0; order < low; order++)
 		this *= factor;
 
 	/*
 	 * If our value is less than our factor taken to the power of the
 	 * low order of magnitude, it goes into the zeroth bucket.
 	 */
 	if (value < (last = this))
 		return (0);
 
 	for (this *= factor; order <= high; order++) {
 		int nbuckets = this > nsteps ? nsteps : this;
 
 		if ((next = this * factor) < this) {
 			/*
 			 * We should not generally get log/linear quantizations
 			 * with a high magnitude that allows 64-bits to
 			 * overflow, but we nonetheless protect against this
 			 * by explicitly checking for overflow, and clamping
 			 * our value accordingly.
 			 */
 			value = this - 1;
 		}
 
 		if (value < this) {
 			/*
 			 * If our value lies within this order of magnitude,
 			 * determine its position by taking the offset within
 			 * the order of magnitude, dividing by the bucket
 			 * width, and adding to our (accumulated) base.
 			 */
 			return (base + (value - last) / (this / nbuckets));
 		}
 
 		base += nbuckets - (nbuckets / factor);
 		last = this;
 		this = next;
 	}
 
 	/*
 	 * Our value is greater than or equal to our factor taken to the
 	 * power of one plus the high magnitude -- return the top bucket.
 	 */
 	return (base);
 }
 
 static void
 dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
 {
 	uint64_t arg = *llquanta++;
 	uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
 	uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
 	uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
 	uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
 
 	llquanta[dtrace_aggregate_llquantize_bucket(factor,
 	    low, high, nsteps, nval)] += incr;
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
 {
 	data[0]++;
 	data[1] += nval;
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg)
 {
 	int64_t snval = (int64_t)nval;
 	uint64_t tmp[2];
 
 	data[0]++;
 	data[1] += nval;
 
 	/*
 	 * What we want to say here is:
 	 *
 	 * data[2] += nval * nval;
 	 *
 	 * But given that nval is 64-bit, we could easily overflow, so
 	 * we do this as 128-bit arithmetic.
 	 */
 	if (snval < 0)
 		snval = -snval;
 
 	dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp);
 	dtrace_add_128(data + 2, tmp, data + 2);
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
 {
 	*oval = *oval + 1;
 }
 
 /*ARGSUSED*/
 static void
 dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
 {
 	*oval += nval;
 }
 
 /*
  * Aggregate given the tuple in the principal data buffer, and the aggregating
  * action denoted by the specified dtrace_aggregation_t.  The aggregation
  * buffer is specified as the buf parameter.  This routine does not return
  * failure; if there is no space in the aggregation buffer, the data will be
  * dropped, and a corresponding counter incremented.
  */
 static void
 dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
     intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
 {
 	dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
 	uint32_t i, ndx, size, fsize;
 	uint32_t align = sizeof (uint64_t) - 1;
 	dtrace_aggbuffer_t *agb;
 	dtrace_aggkey_t *key;
 	uint32_t hashval = 0, limit, isstr;
 	caddr_t tomax, data, kdata;
 	dtrace_actkind_t action;
 	dtrace_action_t *act;
 	uintptr_t offs;
 
 	if (buf == NULL)
 		return;
 
 	if (!agg->dtag_hasarg) {
 		/*
 		 * Currently, only quantize() and lquantize() take additional
 		 * arguments, and they have the same semantics:  an increment
 		 * value that defaults to 1 when not present.  If additional
 		 * aggregating actions take arguments, the setting of the
 		 * default argument value will presumably have to become more
 		 * sophisticated...
 		 */
 		arg = 1;
 	}
 
 	action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION;
 	size = rec->dtrd_offset - agg->dtag_base;
 	fsize = size + rec->dtrd_size;
 
 	ASSERT(dbuf->dtb_tomax != NULL);
 	data = dbuf->dtb_tomax + offset + agg->dtag_base;
 
 	if ((tomax = buf->dtb_tomax) == NULL) {
 		dtrace_buffer_drop(buf);
 		return;
 	}
 
 	/*
 	 * The metastructure is always at the bottom of the buffer.
 	 */
 	agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size -
 	    sizeof (dtrace_aggbuffer_t));
 
 	if (buf->dtb_offset == 0) {
 		/*
 		 * We just kludge up approximately 1/8th of the size to be
 		 * buckets.  If this guess ends up being routinely
 		 * off-the-mark, we may need to dynamically readjust this
 		 * based on past performance.
 		 */
 		uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t);
 
 		if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) <
 		    (uintptr_t)tomax || hashsize == 0) {
 			/*
 			 * We've been given a ludicrously small buffer;
 			 * increment our drop count and leave.
 			 */
 			dtrace_buffer_drop(buf);
 			return;
 		}
 
 		/*
 		 * And now, a pathetic attempt to try to get a an odd (or
 		 * perchance, a prime) hash size for better hash distribution.
 		 */
 		if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3))
 			hashsize -= DTRACE_AGGHASHSIZE_SLEW;
 
 		agb->dtagb_hashsize = hashsize;
 		agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb -
 		    agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *));
 		agb->dtagb_free = (uintptr_t)agb->dtagb_hash;
 
 		for (i = 0; i < agb->dtagb_hashsize; i++)
 			agb->dtagb_hash[i] = NULL;
 	}
 
 	ASSERT(agg->dtag_first != NULL);
 	ASSERT(agg->dtag_first->dta_intuple);
 
 	/*
 	 * Calculate the hash value based on the key.  Note that we _don't_
 	 * include the aggid in the hashing (but we will store it as part of
 	 * the key).  The hashing algorithm is Bob Jenkins' "One-at-a-time"
 	 * algorithm: a simple, quick algorithm that has no known funnels, and
 	 * gets good distribution in practice.  The efficacy of the hashing
 	 * algorithm (and a comparison with other algorithms) may be found by
 	 * running the ::dtrace_aggstat MDB dcmd.
 	 */
 	for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
 		i = act->dta_rec.dtrd_offset - agg->dtag_base;
 		limit = i + act->dta_rec.dtrd_size;
 		ASSERT(limit <= size);
 		isstr = DTRACEACT_ISSTRING(act);
 
 		for (; i < limit; i++) {
 			hashval += data[i];
 			hashval += (hashval << 10);
 			hashval ^= (hashval >> 6);
 
 			if (isstr && data[i] == '\0')
 				break;
 		}
 	}
 
 	hashval += (hashval << 3);
 	hashval ^= (hashval >> 11);
 	hashval += (hashval << 15);
 
 	/*
 	 * Yes, the divide here is expensive -- but it's generally the least
 	 * of the performance issues given the amount of data that we iterate
 	 * over to compute hash values, compare data, etc.
 	 */
 	ndx = hashval % agb->dtagb_hashsize;
 
 	for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) {
 		ASSERT((caddr_t)key >= tomax);
 		ASSERT((caddr_t)key < tomax + buf->dtb_size);
 
 		if (hashval != key->dtak_hashval || key->dtak_size != size)
 			continue;
 
 		kdata = key->dtak_data;
 		ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size);
 
 		for (act = agg->dtag_first; act->dta_intuple;
 		    act = act->dta_next) {
 			i = act->dta_rec.dtrd_offset - agg->dtag_base;
 			limit = i + act->dta_rec.dtrd_size;
 			ASSERT(limit <= size);
 			isstr = DTRACEACT_ISSTRING(act);
 
 			for (; i < limit; i++) {
 				if (kdata[i] != data[i])
 					goto next;
 
 				if (isstr && data[i] == '\0')
 					break;
 			}
 		}
 
 		if (action != key->dtak_action) {
 			/*
 			 * We are aggregating on the same value in the same
 			 * aggregation with two different aggregating actions.
 			 * (This should have been picked up in the compiler,
 			 * so we may be dealing with errant or devious DIF.)
 			 * This is an error condition; we indicate as much,
 			 * and return.
 			 */
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 			return;
 		}
 
 		/*
 		 * This is a hit:  we need to apply the aggregator to
 		 * the value at this key.
 		 */
 		agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg);
 		return;
 next:
 		continue;
 	}
 
 	/*
 	 * We didn't find it.  We need to allocate some zero-filled space,
 	 * link it into the hash table appropriately, and apply the aggregator
 	 * to the (zero-filled) value.
 	 */
 	offs = buf->dtb_offset;
 	while (offs & (align - 1))
 		offs += sizeof (uint32_t);
 
 	/*
 	 * If we don't have enough room to both allocate a new key _and_
 	 * its associated data, increment the drop count and return.
 	 */
 	if ((uintptr_t)tomax + offs + fsize >
 	    agb->dtagb_free - sizeof (dtrace_aggkey_t)) {
 		dtrace_buffer_drop(buf);
 		return;
 	}
 
 	/*CONSTCOND*/
 	ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1)));
 	key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t));
 	agb->dtagb_free -= sizeof (dtrace_aggkey_t);
 
 	key->dtak_data = kdata = tomax + offs;
 	buf->dtb_offset = offs + fsize;
 
 	/*
 	 * Now copy the data across.
 	 */
 	*((dtrace_aggid_t *)kdata) = agg->dtag_id;
 
 	for (i = sizeof (dtrace_aggid_t); i < size; i++)
 		kdata[i] = data[i];
 
 	/*
 	 * Because strings are not zeroed out by default, we need to iterate
 	 * looking for actions that store strings, and we need to explicitly
 	 * pad these strings out with zeroes.
 	 */
 	for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
 		int nul;
 
 		if (!DTRACEACT_ISSTRING(act))
 			continue;
 
 		i = act->dta_rec.dtrd_offset - agg->dtag_base;
 		limit = i + act->dta_rec.dtrd_size;
 		ASSERT(limit <= size);
 
 		for (nul = 0; i < limit; i++) {
 			if (nul) {
 				kdata[i] = '\0';
 				continue;
 			}
 
 			if (data[i] != '\0')
 				continue;
 
 			nul = 1;
 		}
 	}
 
 	for (i = size; i < fsize; i++)
 		kdata[i] = 0;
 
 	key->dtak_hashval = hashval;
 	key->dtak_size = size;
 	key->dtak_action = action;
 	key->dtak_next = agb->dtagb_hash[ndx];
 	agb->dtagb_hash[ndx] = key;
 
 	/*
 	 * Finally, apply the aggregator.
 	 */
 	*((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial;
 	agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg);
 }
 
 /*
  * Given consumer state, this routine finds a speculation in the INACTIVE
  * state and transitions it into the ACTIVE state.  If there is no speculation
  * in the INACTIVE state, 0 is returned.  In this case, no error counter is
  * incremented -- it is up to the caller to take appropriate action.
  */
 static int
 dtrace_speculation(dtrace_state_t *state)
 {
 	int i = 0;
 	dtrace_speculation_state_t current;
 	uint32_t *stat = &state->dts_speculations_unavail, count;
 
 	while (i < state->dts_nspeculations) {
 		dtrace_speculation_t *spec = &state->dts_speculations[i];
 
 		current = spec->dtsp_state;
 
 		if (current != DTRACESPEC_INACTIVE) {
 			if (current == DTRACESPEC_COMMITTINGMANY ||
 			    current == DTRACESPEC_COMMITTING ||
 			    current == DTRACESPEC_DISCARDING)
 				stat = &state->dts_speculations_busy;
 			i++;
 			continue;
 		}
 
 		if (dtrace_cas32((uint32_t *)&spec->dtsp_state,
 		    current, DTRACESPEC_ACTIVE) == current)
 			return (i + 1);
 	}
 
 	/*
 	 * We couldn't find a speculation.  If we found as much as a single
 	 * busy speculation buffer, we'll attribute this failure as "busy"
 	 * instead of "unavail".
 	 */
 	do {
 		count = *stat;
 	} while (dtrace_cas32(stat, count, count + 1) != count);
 
 	return (0);
 }
 
 /*
  * This routine commits an active speculation.  If the specified speculation
  * is not in a valid state to perform a commit(), this routine will silently do
  * nothing.  The state of the specified speculation is transitioned according
  * to the state transition diagram outlined in <sys/dtrace_impl.h>
  */
 static void
 dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
     dtrace_specid_t which)
 {
 	dtrace_speculation_t *spec;
 	dtrace_buffer_t *src, *dest;
 	uintptr_t daddr, saddr, dlimit, slimit;
 	dtrace_speculation_state_t current, new = 0;
 	intptr_t offs;
 	uint64_t timestamp;
 
 	if (which == 0)
 		return;
 
 	if (which > state->dts_nspeculations) {
 		cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
 		return;
 	}
 
 	spec = &state->dts_speculations[which - 1];
 	src = &spec->dtsp_buffer[cpu];
 	dest = &state->dts_buffer[cpu];
 
 	do {
 		current = spec->dtsp_state;
 
 		if (current == DTRACESPEC_COMMITTINGMANY)
 			break;
 
 		switch (current) {
 		case DTRACESPEC_INACTIVE:
 		case DTRACESPEC_DISCARDING:
 			return;
 
 		case DTRACESPEC_COMMITTING:
 			/*
 			 * This is only possible if we are (a) commit()'ing
 			 * without having done a prior speculate() on this CPU
 			 * and (b) racing with another commit() on a different
 			 * CPU.  There's nothing to do -- we just assert that
 			 * our offset is 0.
 			 */
 			ASSERT(src->dtb_offset == 0);
 			return;
 
 		case DTRACESPEC_ACTIVE:
 			new = DTRACESPEC_COMMITTING;
 			break;
 
 		case DTRACESPEC_ACTIVEONE:
 			/*
 			 * This speculation is active on one CPU.  If our
 			 * buffer offset is non-zero, we know that the one CPU
 			 * must be us.  Otherwise, we are committing on a
 			 * different CPU from the speculate(), and we must
 			 * rely on being asynchronously cleaned.
 			 */
 			if (src->dtb_offset != 0) {
 				new = DTRACESPEC_COMMITTING;
 				break;
 			}
 			/*FALLTHROUGH*/
 
 		case DTRACESPEC_ACTIVEMANY:
 			new = DTRACESPEC_COMMITTINGMANY;
 			break;
 
 		default:
 			ASSERT(0);
 		}
 	} while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
 	    current, new) != current);
 
 	/*
 	 * We have set the state to indicate that we are committing this
 	 * speculation.  Now reserve the necessary space in the destination
 	 * buffer.
 	 */
 	if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset,
 	    sizeof (uint64_t), state, NULL)) < 0) {
 		dtrace_buffer_drop(dest);
 		goto out;
 	}
 
 	/*
 	 * We have sufficient space to copy the speculative buffer into the
 	 * primary buffer.  First, modify the speculative buffer, filling
 	 * in the timestamp of all entries with the current time.  The data
 	 * must have the commit() time rather than the time it was traced,
 	 * so that all entries in the primary buffer are in timestamp order.
 	 */
 	timestamp = dtrace_gethrtime();
 	saddr = (uintptr_t)src->dtb_tomax;
 	slimit = saddr + src->dtb_offset;
 	while (saddr < slimit) {
 		size_t size;
 		dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr;
 
 		if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
 			saddr += sizeof (dtrace_epid_t);
 			continue;
 		}
 		ASSERT3U(dtrh->dtrh_epid, <=, state->dts_necbs);
 		size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size;
 
 		ASSERT3U(saddr + size, <=, slimit);
 		ASSERT3U(size, >=, sizeof (dtrace_rechdr_t));
 		ASSERT3U(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh), ==, UINT64_MAX);
 
 		DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp);
 
 		saddr += size;
 	}
 
 	/*
 	 * Copy the buffer across.  (Note that this is a
 	 * highly subobtimal bcopy(); in the unlikely event that this becomes
 	 * a serious performance issue, a high-performance DTrace-specific
 	 * bcopy() should obviously be invented.)
 	 */
 	daddr = (uintptr_t)dest->dtb_tomax + offs;
 	dlimit = daddr + src->dtb_offset;
 	saddr = (uintptr_t)src->dtb_tomax;
 
 	/*
 	 * First, the aligned portion.
 	 */
 	while (dlimit - daddr >= sizeof (uint64_t)) {
 		*((uint64_t *)daddr) = *((uint64_t *)saddr);
 
 		daddr += sizeof (uint64_t);
 		saddr += sizeof (uint64_t);
 	}
 
 	/*
 	 * Now any left-over bit...
 	 */
 	while (dlimit - daddr)
 		*((uint8_t *)daddr++) = *((uint8_t *)saddr++);
 
 	/*
 	 * Finally, commit the reserved space in the destination buffer.
 	 */
 	dest->dtb_offset = offs + src->dtb_offset;
 
 out:
 	/*
 	 * If we're lucky enough to be the only active CPU on this speculation
 	 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
 	 */
 	if (current == DTRACESPEC_ACTIVE ||
 	    (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) {
 		uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state,
 		    DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE);
 
 		ASSERT(rval == DTRACESPEC_COMMITTING);
 	}
 
 	src->dtb_offset = 0;
 	src->dtb_xamot_drops += src->dtb_drops;
 	src->dtb_drops = 0;
 }
 
 /*
  * This routine discards an active speculation.  If the specified speculation
  * is not in a valid state to perform a discard(), this routine will silently
  * do nothing.  The state of the specified speculation is transitioned
  * according to the state transition diagram outlined in <sys/dtrace_impl.h>
  */
 static void
 dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu,
     dtrace_specid_t which)
 {
 	dtrace_speculation_t *spec;
 	dtrace_speculation_state_t current, new = 0;
 	dtrace_buffer_t *buf;
 
 	if (which == 0)
 		return;
 
 	if (which > state->dts_nspeculations) {
 		cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
 		return;
 	}
 
 	spec = &state->dts_speculations[which - 1];
 	buf = &spec->dtsp_buffer[cpu];
 
 	do {
 		current = spec->dtsp_state;
 
 		switch (current) {
 		case DTRACESPEC_INACTIVE:
 		case DTRACESPEC_COMMITTINGMANY:
 		case DTRACESPEC_COMMITTING:
 		case DTRACESPEC_DISCARDING:
 			return;
 
 		case DTRACESPEC_ACTIVE:
 		case DTRACESPEC_ACTIVEMANY:
 			new = DTRACESPEC_DISCARDING;
 			break;
 
 		case DTRACESPEC_ACTIVEONE:
 			if (buf->dtb_offset != 0) {
 				new = DTRACESPEC_INACTIVE;
 			} else {
 				new = DTRACESPEC_DISCARDING;
 			}
 			break;
 
 		default:
 			ASSERT(0);
 		}
 	} while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
 	    current, new) != current);
 
 	buf->dtb_offset = 0;
 	buf->dtb_drops = 0;
 }
 
 /*
  * Note:  not called from probe context.  This function is called
  * asynchronously from cross call context to clean any speculations that are
  * in the COMMITTINGMANY or DISCARDING states.  These speculations may not be
  * transitioned back to the INACTIVE state until all CPUs have cleaned the
  * speculation.
  */
 static void
 dtrace_speculation_clean_here(dtrace_state_t *state)
 {
 	dtrace_icookie_t cookie;
 	processorid_t cpu = curcpu;
 	dtrace_buffer_t *dest = &state->dts_buffer[cpu];
 	dtrace_specid_t i;
 
 	cookie = dtrace_interrupt_disable();
 
 	if (dest->dtb_tomax == NULL) {
 		dtrace_interrupt_enable(cookie);
 		return;
 	}
 
 	for (i = 0; i < state->dts_nspeculations; i++) {
 		dtrace_speculation_t *spec = &state->dts_speculations[i];
 		dtrace_buffer_t *src = &spec->dtsp_buffer[cpu];
 
 		if (src->dtb_tomax == NULL)
 			continue;
 
 		if (spec->dtsp_state == DTRACESPEC_DISCARDING) {
 			src->dtb_offset = 0;
 			continue;
 		}
 
 		if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
 			continue;
 
 		if (src->dtb_offset == 0)
 			continue;
 
 		dtrace_speculation_commit(state, cpu, i + 1);
 	}
 
 	dtrace_interrupt_enable(cookie);
 }
 
 /*
  * Note:  not called from probe context.  This function is called
  * asynchronously (and at a regular interval) to clean any speculations that
  * are in the COMMITTINGMANY or DISCARDING states.  If it discovers that there
  * is work to be done, it cross calls all CPUs to perform that work;
  * COMMITMANY and DISCARDING speculations may not be transitioned back to the
  * INACTIVE state until they have been cleaned by all CPUs.
  */
 static void
 dtrace_speculation_clean(dtrace_state_t *state)
 {
 	int work = 0, rv;
 	dtrace_specid_t i;
 
 	for (i = 0; i < state->dts_nspeculations; i++) {
 		dtrace_speculation_t *spec = &state->dts_speculations[i];
 
 		ASSERT(!spec->dtsp_cleaning);
 
 		if (spec->dtsp_state != DTRACESPEC_DISCARDING &&
 		    spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
 			continue;
 
 		work++;
 		spec->dtsp_cleaning = 1;
 	}
 
 	if (!work)
 		return;
 
 	dtrace_xcall(DTRACE_CPUALL,
 	    (dtrace_xcall_t)dtrace_speculation_clean_here, state);
 
 	/*
 	 * We now know that all CPUs have committed or discarded their
 	 * speculation buffers, as appropriate.  We can now set the state
 	 * to inactive.
 	 */
 	for (i = 0; i < state->dts_nspeculations; i++) {
 		dtrace_speculation_t *spec = &state->dts_speculations[i];
 		dtrace_speculation_state_t current, new;
 
 		if (!spec->dtsp_cleaning)
 			continue;
 
 		current = spec->dtsp_state;
 		ASSERT(current == DTRACESPEC_DISCARDING ||
 		    current == DTRACESPEC_COMMITTINGMANY);
 
 		new = DTRACESPEC_INACTIVE;
 
 		rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new);
 		ASSERT(rv == current);
 		spec->dtsp_cleaning = 0;
 	}
 }
 
 /*
  * Called as part of a speculate() to get the speculative buffer associated
  * with a given speculation.  Returns NULL if the specified speculation is not
  * in an ACTIVE state.  If the speculation is in the ACTIVEONE state -- and
  * the active CPU is not the specified CPU -- the speculation will be
  * atomically transitioned into the ACTIVEMANY state.
  */
 static dtrace_buffer_t *
 dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid,
     dtrace_specid_t which)
 {
 	dtrace_speculation_t *spec;
 	dtrace_speculation_state_t current, new = 0;
 	dtrace_buffer_t *buf;
 
 	if (which == 0)
 		return (NULL);
 
 	if (which > state->dts_nspeculations) {
 		cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
 		return (NULL);
 	}
 
 	spec = &state->dts_speculations[which - 1];
 	buf = &spec->dtsp_buffer[cpuid];
 
 	do {
 		current = spec->dtsp_state;
 
 		switch (current) {
 		case DTRACESPEC_INACTIVE:
 		case DTRACESPEC_COMMITTINGMANY:
 		case DTRACESPEC_DISCARDING:
 			return (NULL);
 
 		case DTRACESPEC_COMMITTING:
 			ASSERT(buf->dtb_offset == 0);
 			return (NULL);
 
 		case DTRACESPEC_ACTIVEONE:
 			/*
 			 * This speculation is currently active on one CPU.
 			 * Check the offset in the buffer; if it's non-zero,
 			 * that CPU must be us (and we leave the state alone).
 			 * If it's zero, assume that we're starting on a new
 			 * CPU -- and change the state to indicate that the
 			 * speculation is active on more than one CPU.
 			 */
 			if (buf->dtb_offset != 0)
 				return (buf);
 
 			new = DTRACESPEC_ACTIVEMANY;
 			break;
 
 		case DTRACESPEC_ACTIVEMANY:
 			return (buf);
 
 		case DTRACESPEC_ACTIVE:
 			new = DTRACESPEC_ACTIVEONE;
 			break;
 
 		default:
 			ASSERT(0);
 		}
 	} while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
 	    current, new) != current);
 
 	ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY);
 	return (buf);
 }
 
 /*
  * Return a string.  In the event that the user lacks the privilege to access
  * arbitrary kernel memory, we copy the string out to scratch memory so that we
  * don't fail access checking.
  *
  * dtrace_dif_variable() uses this routine as a helper for various
  * builtin values such as 'execname' and 'probefunc.'
  */
 uintptr_t
 dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state,
     dtrace_mstate_t *mstate)
 {
 	uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 	uintptr_t ret;
 	size_t strsz;
 
 	/*
 	 * The easy case: this probe is allowed to read all of memory, so
 	 * we can just return this as a vanilla pointer.
 	 */
 	if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
 		return (addr);
 
 	/*
 	 * This is the tougher case: we copy the string in question from
 	 * kernel memory into scratch memory and return it that way: this
 	 * ensures that we won't trip up when access checking tests the
 	 * BYREF return value.
 	 */
 	strsz = dtrace_strlen((char *)addr, size) + 1;
 
 	if (mstate->dtms_scratch_ptr + strsz >
 	    mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 		return (0);
 	}
 
 	dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
 	    strsz);
 	ret = mstate->dtms_scratch_ptr;
 	mstate->dtms_scratch_ptr += strsz;
 	return (ret);
 }
 
 /*
  * Return a string from a memoy address which is known to have one or
  * more concatenated, individually zero terminated, sub-strings.
  * In the event that the user lacks the privilege to access
  * arbitrary kernel memory, we copy the string out to scratch memory so that we
  * don't fail access checking.
  *
  * dtrace_dif_variable() uses this routine as a helper for various
  * builtin values such as 'execargs'.
  */
 static uintptr_t
 dtrace_dif_varstrz(uintptr_t addr, size_t strsz, dtrace_state_t *state,
     dtrace_mstate_t *mstate)
 {
 	char *p;
 	size_t i;
 	uintptr_t ret;
 
 	if (mstate->dtms_scratch_ptr + strsz >
 	    mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 		return (0);
 	}
 
 	dtrace_bcopy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
 	    strsz);
 
 	/* Replace sub-string termination characters with a space. */
 	for (p = (char *) mstate->dtms_scratch_ptr, i = 0; i < strsz - 1;
 	    p++, i++)
 		if (*p == '\0')
 			*p = ' ';
 
 	ret = mstate->dtms_scratch_ptr;
 	mstate->dtms_scratch_ptr += strsz;
 	return (ret);
 }
 
 /*
  * This function implements the DIF emulator's variable lookups.  The emulator
  * passes a reserved variable identifier and optional built-in array index.
  */
 static uint64_t
 dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
     uint64_t ndx)
 {
 	/*
 	 * If we're accessing one of the uncached arguments, we'll turn this
 	 * into a reference in the args array.
 	 */
 	if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) {
 		ndx = v - DIF_VAR_ARG0;
 		v = DIF_VAR_ARGS;
 	}
 
 	switch (v) {
 	case DIF_VAR_ARGS:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
 		if (ndx >= sizeof (mstate->dtms_arg) /
 		    sizeof (mstate->dtms_arg[0])) {
 			int aframes = mstate->dtms_probe->dtpr_aframes + 2;
 			dtrace_provider_t *pv;
 			uint64_t val;
 
 			pv = mstate->dtms_probe->dtpr_provider;
 			if (pv->dtpv_pops.dtps_getargval != NULL)
 				val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg,
 				    mstate->dtms_probe->dtpr_id,
 				    mstate->dtms_probe->dtpr_arg, ndx, aframes);
 			else
 				val = dtrace_getarg(ndx, aframes);
 
 			/*
 			 * This is regrettably required to keep the compiler
 			 * from tail-optimizing the call to dtrace_getarg().
 			 * The condition always evaluates to true, but the
 			 * compiler has no way of figuring that out a priori.
 			 * (None of this would be necessary if the compiler
 			 * could be relied upon to _always_ tail-optimize
 			 * the call to dtrace_getarg() -- but it can't.)
 			 */
 			if (mstate->dtms_probe != NULL)
 				return (val);
 
 			ASSERT(0);
 		}
 
 		return (mstate->dtms_arg[ndx]);
 
 #ifdef illumos
 	case DIF_VAR_UREGS: {
 		klwp_t *lwp;
 
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		if ((lwp = curthread->t_lwp) == NULL) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = NULL;
 			return (0);
 		}
 
 		return (dtrace_getreg(lwp->lwp_regs, ndx));
 		return (0);
 	}
 #else
 	case DIF_VAR_UREGS: {
 		struct trapframe *tframe;
 
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		if ((tframe = curthread->td_frame) == NULL) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 			cpu_core[curcpu].cpuc_dtrace_illval = 0;
 			return (0);
 		}
 
 		return (dtrace_getreg(tframe, ndx));
 	}
 #endif
 
 	case DIF_VAR_CURTHREAD:
 		if (!dtrace_priv_proc(state))
 			return (0);
 		return ((uint64_t)(uintptr_t)curthread);
 
 	case DIF_VAR_TIMESTAMP:
 		if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
 			mstate->dtms_timestamp = dtrace_gethrtime();
 			mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
 		}
 		return (mstate->dtms_timestamp);
 
 	case DIF_VAR_VTIMESTAMP:
 		ASSERT(dtrace_vtime_references != 0);
 		return (curthread->t_dtrace_vtime);
 
 	case DIF_VAR_WALLTIMESTAMP:
 		if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
 			mstate->dtms_walltimestamp = dtrace_gethrestime();
 			mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
 		}
 		return (mstate->dtms_walltimestamp);
 
 #ifdef illumos
 	case DIF_VAR_IPL:
 		if (!dtrace_priv_kernel(state))
 			return (0);
 		if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) {
 			mstate->dtms_ipl = dtrace_getipl();
 			mstate->dtms_present |= DTRACE_MSTATE_IPL;
 		}
 		return (mstate->dtms_ipl);
 #endif
 
 	case DIF_VAR_EPID:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID);
 		return (mstate->dtms_epid);
 
 	case DIF_VAR_ID:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
 		return (mstate->dtms_probe->dtpr_id);
 
 	case DIF_VAR_STACKDEPTH:
 		if (!dtrace_priv_kernel(state))
 			return (0);
 		if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) {
 			int aframes = mstate->dtms_probe->dtpr_aframes + 2;
 
 			mstate->dtms_stackdepth = dtrace_getstackdepth(aframes);
 			mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH;
 		}
 		return (mstate->dtms_stackdepth);
 
 	case DIF_VAR_USTACKDEPTH:
 		if (!dtrace_priv_proc(state))
 			return (0);
 		if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
 			/*
 			 * See comment in DIF_VAR_PID.
 			 */
 			if (DTRACE_ANCHORED(mstate->dtms_probe) &&
 			    CPU_ON_INTR(CPU)) {
 				mstate->dtms_ustackdepth = 0;
 			} else {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 				mstate->dtms_ustackdepth =
 				    dtrace_getustackdepth();
 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			}
 			mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH;
 		}
 		return (mstate->dtms_ustackdepth);
 
 	case DIF_VAR_CALLER:
 		if (!dtrace_priv_kernel(state))
 			return (0);
 		if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) {
 			int aframes = mstate->dtms_probe->dtpr_aframes + 2;
 
 			if (!DTRACE_ANCHORED(mstate->dtms_probe)) {
 				/*
 				 * If this is an unanchored probe, we are
 				 * required to go through the slow path:
 				 * dtrace_caller() only guarantees correct
 				 * results for anchored probes.
 				 */
 				pc_t caller[2] = {0, 0};
 
 				dtrace_getpcstack(caller, 2, aframes,
 				    (uint32_t *)(uintptr_t)mstate->dtms_arg[0]);
 				mstate->dtms_caller = caller[1];
 			} else if ((mstate->dtms_caller =
 			    dtrace_caller(aframes)) == -1) {
 				/*
 				 * We have failed to do this the quick way;
 				 * we must resort to the slower approach of
 				 * calling dtrace_getpcstack().
 				 */
 				pc_t caller = 0;
 
 				dtrace_getpcstack(&caller, 1, aframes, NULL);
 				mstate->dtms_caller = caller;
 			}
 
 			mstate->dtms_present |= DTRACE_MSTATE_CALLER;
 		}
 		return (mstate->dtms_caller);
 
 	case DIF_VAR_UCALLER:
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
 			uint64_t ustack[3];
 
 			/*
 			 * dtrace_getupcstack() fills in the first uint64_t
 			 * with the current PID.  The second uint64_t will
 			 * be the program counter at user-level.  The third
 			 * uint64_t will contain the caller, which is what
 			 * we're after.
 			 */
 			ustack[2] = 0;
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			dtrace_getupcstack(ustack, 3);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			mstate->dtms_ucaller = ustack[2];
 			mstate->dtms_present |= DTRACE_MSTATE_UCALLER;
 		}
 
 		return (mstate->dtms_ucaller);
 
 	case DIF_VAR_PROBEPROV:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
 		return (dtrace_dif_varstr(
 		    (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name,
 		    state, mstate));
 
 	case DIF_VAR_PROBEMOD:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
 		return (dtrace_dif_varstr(
 		    (uintptr_t)mstate->dtms_probe->dtpr_mod,
 		    state, mstate));
 
 	case DIF_VAR_PROBEFUNC:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
 		return (dtrace_dif_varstr(
 		    (uintptr_t)mstate->dtms_probe->dtpr_func,
 		    state, mstate));
 
 	case DIF_VAR_PROBENAME:
 		ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
 		return (dtrace_dif_varstr(
 		    (uintptr_t)mstate->dtms_probe->dtpr_name,
 		    state, mstate));
 
 	case DIF_VAR_PID:
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 #ifdef illumos
 		/*
 		 * Note that we are assuming that an unanchored probe is
 		 * always due to a high-level interrupt.  (And we're assuming
 		 * that there is only a single high level interrupt.)
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return (pid0.pid_id);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * Further, it is always safe to dereference the p_pidp member
 		 * of one's own proc structure.  (These are truisms becuase
 		 * threads and processes don't clean up their own state --
 		 * they leave that task to whomever reaps them.)
 		 */
 		return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
 #else
 		return ((uint64_t)curproc->p_pid);
 #endif
 
 	case DIF_VAR_PPID:
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 #ifdef illumos
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return (pid0.pid_id);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * (This is true because threads don't clean up their own
 		 * state -- they leave that task to whomever reaps them.)
 		 */
 		return ((uint64_t)curthread->t_procp->p_ppid);
 #else
 		if (curproc->p_pid == proc0.p_pid)
 			return (curproc->p_pid);
 		else
 			return (curproc->p_pptr->p_pid);
 #endif
 
 	case DIF_VAR_TID:
 #ifdef illumos
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return (0);
 #endif
 
 		return ((uint64_t)curthread->t_tid);
 
 	case DIF_VAR_EXECARGS: {
 		struct pargs *p_args = curthread->td_proc->p_args;
 
 		if (p_args == NULL)
 			return(0);
 
 		return (dtrace_dif_varstrz(
 		    (uintptr_t) p_args->ar_args, p_args->ar_length, state, mstate));
 	}
 
 	case DIF_VAR_EXECNAME:
 #ifdef illumos
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return ((uint64_t)(uintptr_t)p0.p_user.u_comm);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * (This is true because threads don't clean up their own
 		 * state -- they leave that task to whomever reaps them.)
 		 */
 		return (dtrace_dif_varstr(
 		    (uintptr_t)curthread->t_procp->p_user.u_comm,
 		    state, mstate));
 #else
 		return (dtrace_dif_varstr(
 		    (uintptr_t) curthread->td_proc->p_comm, state, mstate));
 #endif
 
 	case DIF_VAR_ZONENAME:
 #ifdef illumos
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return ((uint64_t)(uintptr_t)p0.p_zone->zone_name);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * (This is true because threads don't clean up their own
 		 * state -- they leave that task to whomever reaps them.)
 		 */
 		return (dtrace_dif_varstr(
 		    (uintptr_t)curthread->t_procp->p_zone->zone_name,
 		    state, mstate));
 #else
 		return (0);
 #endif
 
 	case DIF_VAR_UID:
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 #ifdef illumos
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return ((uint64_t)p0.p_cred->cr_uid);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * (This is true because threads don't clean up their own
 		 * state -- they leave that task to whomever reaps them.)
 		 *
 		 * Additionally, it is safe to dereference one's own process
 		 * credential, since this is never NULL after process birth.
 		 */
 		return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
 #else
 		return ((uint64_t)curthread->td_ucred->cr_uid);
 #endif
 
 	case DIF_VAR_GID:
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 #ifdef illumos
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return ((uint64_t)p0.p_cred->cr_gid);
 
 		/*
 		 * It is always safe to dereference one's own t_procp pointer:
 		 * it always points to a valid, allocated proc structure.
 		 * (This is true because threads don't clean up their own
 		 * state -- they leave that task to whomever reaps them.)
 		 *
 		 * Additionally, it is safe to dereference one's own process
 		 * credential, since this is never NULL after process birth.
 		 */
 		return ((uint64_t)curthread->t_procp->p_cred->cr_gid);
 #else
 		return ((uint64_t)curthread->td_ucred->cr_gid);
 #endif
 
 	case DIF_VAR_ERRNO: {
 #ifdef illumos
 		klwp_t *lwp;
 		if (!dtrace_priv_proc(state))
 			return (0);
 
 		/*
 		 * See comment in DIF_VAR_PID.
 		 */
 		if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
 			return (0);
 
 		/*
 		 * It is always safe to dereference one's own t_lwp pointer in
 		 * the event that this pointer is non-NULL.  (This is true
 		 * because threads and lwps don't clean up their own state --
 		 * they leave that task to whomever reaps them.)
 		 */
 		if ((lwp = curthread->t_lwp) == NULL)
 			return (0);
 
 		return ((uint64_t)lwp->lwp_errno);
 #else
 		return (curthread->td_errno);
 #endif
 	}
 #ifndef illumos
 	case DIF_VAR_CPU: {
 		return curcpu;
 	}
 #endif
 	default:
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 		return (0);
 	}
 }
 
 
 typedef enum dtrace_json_state {
 	DTRACE_JSON_REST = 1,
 	DTRACE_JSON_OBJECT,
 	DTRACE_JSON_STRING,
 	DTRACE_JSON_STRING_ESCAPE,
 	DTRACE_JSON_STRING_ESCAPE_UNICODE,
 	DTRACE_JSON_COLON,
 	DTRACE_JSON_COMMA,
 	DTRACE_JSON_VALUE,
 	DTRACE_JSON_IDENTIFIER,
 	DTRACE_JSON_NUMBER,
 	DTRACE_JSON_NUMBER_FRAC,
 	DTRACE_JSON_NUMBER_EXP,
 	DTRACE_JSON_COLLECT_OBJECT
 } dtrace_json_state_t;
 
 /*
  * This function possesses just enough knowledge about JSON to extract a single
  * value from a JSON string and store it in the scratch buffer.  It is able
  * to extract nested object values, and members of arrays by index.
  *
  * elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to
  * be looked up as we descend into the object tree.  e.g.
  *
  *    foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL
  *       with nelems = 5.
  *
  * The run time of this function must be bounded above by strsize to limit the
  * amount of work done in probe context.  As such, it is implemented as a
  * simple state machine, reading one character at a time using safe loads
  * until we find the requested element, hit a parsing error or run off the
  * end of the object or string.
  *
  * As there is no way for a subroutine to return an error without interrupting
  * clause execution, we simply return NULL in the event of a missing key or any
  * other error condition.  Each NULL return in this function is commented with
  * the error condition it represents -- parsing or otherwise.
  *
  * The set of states for the state machine closely matches the JSON
  * specification (http://json.org/).  Briefly:
  *
  *   DTRACE_JSON_REST:
  *     Skip whitespace until we find either a top-level Object, moving
  *     to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE.
  *
  *   DTRACE_JSON_OBJECT:
  *     Locate the next key String in an Object.  Sets a flag to denote
  *     the next String as a key string and moves to DTRACE_JSON_STRING.
  *
  *   DTRACE_JSON_COLON:
  *     Skip whitespace until we find the colon that separates key Strings
  *     from their values.  Once found, move to DTRACE_JSON_VALUE.
  *
  *   DTRACE_JSON_VALUE:
  *     Detects the type of the next value (String, Number, Identifier, Object
  *     or Array) and routes to the states that process that type.  Here we also
  *     deal with the element selector list if we are requested to traverse down
  *     into the object tree.
  *
  *   DTRACE_JSON_COMMA:
  *     Skip whitespace until we find the comma that separates key-value pairs
  *     in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays
  *     (similarly DTRACE_JSON_VALUE).  All following literal value processing
  *     states return to this state at the end of their value, unless otherwise
  *     noted.
  *
  *   DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP:
  *     Processes a Number literal from the JSON, including any exponent
  *     component that may be present.  Numbers are returned as strings, which
  *     may be passed to strtoll() if an integer is required.
  *
  *   DTRACE_JSON_IDENTIFIER:
  *     Processes a "true", "false" or "null" literal in the JSON.
  *
  *   DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE,
  *   DTRACE_JSON_STRING_ESCAPE_UNICODE:
  *     Processes a String literal from the JSON, whether the String denotes
  *     a key, a value or part of a larger Object.  Handles all escape sequences
  *     present in the specification, including four-digit unicode characters,
  *     but merely includes the escape sequence without converting it to the
  *     actual escaped character.  If the String is flagged as a key, we
  *     move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA.
  *
  *   DTRACE_JSON_COLLECT_OBJECT:
  *     This state collects an entire Object (or Array), correctly handling
  *     embedded strings.  If the full element selector list matches this nested
  *     object, we return the Object in full as a string.  If not, we use this
  *     state to skip to the next value at this level and continue processing.
  *
  * NOTE: This function uses various macros from strtolctype.h to manipulate
  * digit values, etc -- these have all been checked to ensure they make
  * no additional function calls.
  */
 static char *
 dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems,
     char *dest)
 {
 	dtrace_json_state_t state = DTRACE_JSON_REST;
 	int64_t array_elem = INT64_MIN;
 	int64_t array_pos = 0;
 	uint8_t escape_unicount = 0;
 	boolean_t string_is_key = B_FALSE;
 	boolean_t collect_object = B_FALSE;
 	boolean_t found_key = B_FALSE;
 	boolean_t in_array = B_FALSE;
 	uint32_t braces = 0, brackets = 0;
 	char *elem = elemlist;
 	char *dd = dest;
 	uintptr_t cur;
 
 	for (cur = json; cur < json + size; cur++) {
 		char cc = dtrace_load8(cur);
 		if (cc == '\0')
 			return (NULL);
 
 		switch (state) {
 		case DTRACE_JSON_REST:
 			if (isspace(cc))
 				break;
 
 			if (cc == '{') {
 				state = DTRACE_JSON_OBJECT;
 				break;
 			}
 
 			if (cc == '[') {
 				in_array = B_TRUE;
 				array_pos = 0;
 				array_elem = dtrace_strtoll(elem, 10, size);
 				found_key = array_elem == 0 ? B_TRUE : B_FALSE;
 				state = DTRACE_JSON_VALUE;
 				break;
 			}
 
 			/*
 			 * ERROR: expected to find a top-level object or array.
 			 */
 			return (NULL);
 		case DTRACE_JSON_OBJECT:
 			if (isspace(cc))
 				break;
 
 			if (cc == '"') {
 				state = DTRACE_JSON_STRING;
 				string_is_key = B_TRUE;
 				break;
 			}
 
 			/*
 			 * ERROR: either the object did not start with a key
 			 * string, or we've run off the end of the object
 			 * without finding the requested key.
 			 */
 			return (NULL);
 		case DTRACE_JSON_STRING:
 			if (cc == '\\') {
 				*dd++ = '\\';
 				state = DTRACE_JSON_STRING_ESCAPE;
 				break;
 			}
 
 			if (cc == '"') {
 				if (collect_object) {
 					/*
 					 * We don't reset the dest here, as
 					 * the string is part of a larger
 					 * object being collected.
 					 */
 					*dd++ = cc;
 					collect_object = B_FALSE;
 					state = DTRACE_JSON_COLLECT_OBJECT;
 					break;
 				}
 				*dd = '\0';
 				dd = dest; /* reset string buffer */
 				if (string_is_key) {
 					if (dtrace_strncmp(dest, elem,
 					    size) == 0)
 						found_key = B_TRUE;
 				} else if (found_key) {
 					if (nelems > 1) {
 						/*
 						 * We expected an object, not
 						 * this string.
 						 */
 						return (NULL);
 					}
 					return (dest);
 				}
 				state = string_is_key ? DTRACE_JSON_COLON :
 				    DTRACE_JSON_COMMA;
 				string_is_key = B_FALSE;
 				break;
 			}
 
 			*dd++ = cc;
 			break;
 		case DTRACE_JSON_STRING_ESCAPE:
 			*dd++ = cc;
 			if (cc == 'u') {
 				escape_unicount = 0;
 				state = DTRACE_JSON_STRING_ESCAPE_UNICODE;
 			} else {
 				state = DTRACE_JSON_STRING;
 			}
 			break;
 		case DTRACE_JSON_STRING_ESCAPE_UNICODE:
 			if (!isxdigit(cc)) {
 				/*
 				 * ERROR: invalid unicode escape, expected
 				 * four valid hexidecimal digits.
 				 */
 				return (NULL);
 			}
 
 			*dd++ = cc;
 			if (++escape_unicount == 4)
 				state = DTRACE_JSON_STRING;
 			break;
 		case DTRACE_JSON_COLON:
 			if (isspace(cc))
 				break;
 
 			if (cc == ':') {
 				state = DTRACE_JSON_VALUE;
 				break;
 			}
 
 			/*
 			 * ERROR: expected a colon.
 			 */
 			return (NULL);
 		case DTRACE_JSON_COMMA:
 			if (isspace(cc))
 				break;
 
 			if (cc == ',') {
 				if (in_array) {
 					state = DTRACE_JSON_VALUE;
 					if (++array_pos == array_elem)
 						found_key = B_TRUE;
 				} else {
 					state = DTRACE_JSON_OBJECT;
 				}
 				break;
 			}
 
 			/*
 			 * ERROR: either we hit an unexpected character, or
 			 * we reached the end of the object or array without
 			 * finding the requested key.
 			 */
 			return (NULL);
 		case DTRACE_JSON_IDENTIFIER:
 			if (islower(cc)) {
 				*dd++ = cc;
 				break;
 			}
 
 			*dd = '\0';
 			dd = dest; /* reset string buffer */
 
 			if (dtrace_strncmp(dest, "true", 5) == 0 ||
 			    dtrace_strncmp(dest, "false", 6) == 0 ||
 			    dtrace_strncmp(dest, "null", 5) == 0) {
 				if (found_key) {
 					if (nelems > 1) {
 						/*
 						 * ERROR: We expected an object,
 						 * not this identifier.
 						 */
 						return (NULL);
 					}
 					return (dest);
 				} else {
 					cur--;
 					state = DTRACE_JSON_COMMA;
 					break;
 				}
 			}
 
 			/*
 			 * ERROR: we did not recognise the identifier as one
 			 * of those in the JSON specification.
 			 */
 			return (NULL);
 		case DTRACE_JSON_NUMBER:
 			if (cc == '.') {
 				*dd++ = cc;
 				state = DTRACE_JSON_NUMBER_FRAC;
 				break;
 			}
 
 			if (cc == 'x' || cc == 'X') {
 				/*
 				 * ERROR: specification explicitly excludes
 				 * hexidecimal or octal numbers.
 				 */
 				return (NULL);
 			}
 
 			/* FALLTHRU */
 		case DTRACE_JSON_NUMBER_FRAC:
 			if (cc == 'e' || cc == 'E') {
 				*dd++ = cc;
 				state = DTRACE_JSON_NUMBER_EXP;
 				break;
 			}
 
 			if (cc == '+' || cc == '-') {
 				/*
 				 * ERROR: expect sign as part of exponent only.
 				 */
 				return (NULL);
 			}
 			/* FALLTHRU */
 		case DTRACE_JSON_NUMBER_EXP:
 			if (isdigit(cc) || cc == '+' || cc == '-') {
 				*dd++ = cc;
 				break;
 			}
 
 			*dd = '\0';
 			dd = dest; /* reset string buffer */
 			if (found_key) {
 				if (nelems > 1) {
 					/*
 					 * ERROR: We expected an object, not
 					 * this number.
 					 */
 					return (NULL);
 				}
 				return (dest);
 			}
 
 			cur--;
 			state = DTRACE_JSON_COMMA;
 			break;
 		case DTRACE_JSON_VALUE:
 			if (isspace(cc))
 				break;
 
 			if (cc == '{' || cc == '[') {
 				if (nelems > 1 && found_key) {
 					in_array = cc == '[' ? B_TRUE : B_FALSE;
 					/*
 					 * If our element selector directs us
 					 * to descend into this nested object,
 					 * then move to the next selector
 					 * element in the list and restart the
 					 * state machine.
 					 */
 					while (*elem != '\0')
 						elem++;
 					elem++; /* skip the inter-element NUL */
 					nelems--;
 					dd = dest;
 					if (in_array) {
 						state = DTRACE_JSON_VALUE;
 						array_pos = 0;
 						array_elem = dtrace_strtoll(
 						    elem, 10, size);
 						found_key = array_elem == 0 ?
 						    B_TRUE : B_FALSE;
 					} else {
 						found_key = B_FALSE;
 						state = DTRACE_JSON_OBJECT;
 					}
 					break;
 				}
 
 				/*
 				 * Otherwise, we wish to either skip this
 				 * nested object or return it in full.
 				 */
 				if (cc == '[')
 					brackets = 1;
 				else
 					braces = 1;
 				*dd++ = cc;
 				state = DTRACE_JSON_COLLECT_OBJECT;
 				break;
 			}
 
 			if (cc == '"') {
 				state = DTRACE_JSON_STRING;
 				break;
 			}
 
 			if (islower(cc)) {
 				/*
 				 * Here we deal with true, false and null.
 				 */
 				*dd++ = cc;
 				state = DTRACE_JSON_IDENTIFIER;
 				break;
 			}
 
 			if (cc == '-' || isdigit(cc)) {
 				*dd++ = cc;
 				state = DTRACE_JSON_NUMBER;
 				break;
 			}
 
 			/*
 			 * ERROR: unexpected character at start of value.
 			 */
 			return (NULL);
 		case DTRACE_JSON_COLLECT_OBJECT:
 			if (cc == '\0')
 				/*
 				 * ERROR: unexpected end of input.
 				 */
 				return (NULL);
 
 			*dd++ = cc;
 			if (cc == '"') {
 				collect_object = B_TRUE;
 				state = DTRACE_JSON_STRING;
 				break;
 			}
 
 			if (cc == ']') {
 				if (brackets-- == 0) {
 					/*
 					 * ERROR: unbalanced brackets.
 					 */
 					return (NULL);
 				}
 			} else if (cc == '}') {
 				if (braces-- == 0) {
 					/*
 					 * ERROR: unbalanced braces.
 					 */
 					return (NULL);
 				}
 			} else if (cc == '{') {
 				braces++;
 			} else if (cc == '[') {
 				brackets++;
 			}
 
 			if (brackets == 0 && braces == 0) {
 				if (found_key) {
 					*dd = '\0';
 					return (dest);
 				}
 				dd = dest; /* reset string buffer */
 				state = DTRACE_JSON_COMMA;
 			}
 			break;
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
  * Notice that we don't bother validating the proper number of arguments or
  * their types in the tuple stack.  This isn't needed because all argument
  * interpretation is safe because of our load safety -- the worst that can
  * happen is that a bogus program can obtain bogus results.
  */
 static void
 dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
     dtrace_key_t *tupregs, int nargs,
     dtrace_mstate_t *mstate, dtrace_state_t *state)
 {
 	volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
 	volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval;
 	dtrace_vstate_t *vstate = &state->dts_vstate;
 
 #ifdef illumos
 	union {
 		mutex_impl_t mi;
 		uint64_t mx;
 	} m;
 
 	union {
 		krwlock_t ri;
 		uintptr_t rw;
 	} r;
 #else
 	struct thread *lowner;
 	union {
 		struct lock_object *li;
 		uintptr_t lx;
 	} l;
 #endif
 
 	switch (subr) {
 	case DIF_SUBR_RAND:
 		regs[rd] = dtrace_xoroshiro128_plus_next(
 		    state->dts_rstate[curcpu]);
 		break;
 
 #ifdef illumos
 	case DIF_SUBR_MUTEX_OWNED:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		m.mx = dtrace_load64(tupregs[0].dttk_value);
 		if (MUTEX_TYPE_ADAPTIVE(&m.mi))
 			regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER;
 		else
 			regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock);
 		break;
 
 	case DIF_SUBR_MUTEX_OWNER:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		m.mx = dtrace_load64(tupregs[0].dttk_value);
 		if (MUTEX_TYPE_ADAPTIVE(&m.mi) &&
 		    MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER)
 			regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi);
 		else
 			regs[rd] = 0;
 		break;
 
 	case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		m.mx = dtrace_load64(tupregs[0].dttk_value);
 		regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi);
 		break;
 
 	case DIF_SUBR_MUTEX_TYPE_SPIN:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		m.mx = dtrace_load64(tupregs[0].dttk_value);
 		regs[rd] = MUTEX_TYPE_SPIN(&m.mi);
 		break;
 
 	case DIF_SUBR_RW_READ_HELD: {
 		uintptr_t tmp;
 
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		r.rw = dtrace_loadptr(tupregs[0].dttk_value);
 		regs[rd] = _RW_READ_HELD(&r.ri, tmp);
 		break;
 	}
 
 	case DIF_SUBR_RW_WRITE_HELD:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		r.rw = dtrace_loadptr(tupregs[0].dttk_value);
 		regs[rd] = _RW_WRITE_HELD(&r.ri);
 		break;
 
 	case DIF_SUBR_RW_ISWRITER:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		r.rw = dtrace_loadptr(tupregs[0].dttk_value);
 		regs[rd] = _RW_ISWRITER(&r.ri);
 		break;
 
 #else /* !illumos */
 	case DIF_SUBR_MUTEX_OWNED:
 		if (!dtrace_canload(tupregs[0].dttk_value,
 			sizeof (struct lock_object), mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 
 	case DIF_SUBR_MUTEX_OWNER:
 		if (!dtrace_canload(tupregs[0].dttk_value,
 			sizeof (struct lock_object), mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		LOCK_CLASS(l.li)->lc_owner(l.li, &lowner);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		regs[rd] = (uintptr_t)lowner;
 		break;
 
 	case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		regs[rd] = (LOCK_CLASS(l.li)->lc_flags & LC_SLEEPLOCK) != 0;
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 
 	case DIF_SUBR_MUTEX_TYPE_SPIN:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		regs[rd] = (LOCK_CLASS(l.li)->lc_flags & LC_SPINLOCK) != 0;
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 
 	case DIF_SUBR_RW_READ_HELD: 
 	case DIF_SUBR_SX_SHARED_HELD: 
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) &&
 		    lowner == NULL;
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 
 	case DIF_SUBR_RW_WRITE_HELD:
 	case DIF_SUBR_SX_EXCLUSIVE_HELD:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr(tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) &&
 		    lowner != NULL;
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 
 	case DIF_SUBR_RW_ISWRITER:
 	case DIF_SUBR_SX_ISEXCLUSIVE:
 		if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
 		    mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		l.lx = dtrace_loadptr(tupregs[0].dttk_value);
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		LOCK_CLASS(l.li)->lc_owner(l.li, &lowner);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		regs[rd] = (lowner == curthread);
 		break;
 #endif /* illumos */
 
 	case DIF_SUBR_BCOPY: {
 		/*
 		 * We need to be sure that the destination is in the scratch
 		 * region -- no other region is allowed.
 		 */
 		uintptr_t src = tupregs[0].dttk_value;
 		uintptr_t dest = tupregs[1].dttk_value;
 		size_t size = tupregs[2].dttk_value;
 
 		if (!dtrace_inscratch(dest, size, mstate)) {
 			*flags |= CPU_DTRACE_BADADDR;
 			*illval = regs[rd];
 			break;
 		}
 
 		if (!dtrace_canload(src, size, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		dtrace_bcopy((void *)src, (void *)dest, size);
 		break;
 	}
 
 	case DIF_SUBR_ALLOCA:
 	case DIF_SUBR_COPYIN: {
 		uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
 		uint64_t size =
 		    tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value;
 		size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size;
 
 		/*
 		 * This action doesn't require any credential checks since
 		 * probes will not activate in user contexts to which the
 		 * enabling user does not have permissions.
 		 */
 
 		/*
 		 * Rounding up the user allocation size could have overflowed
 		 * a large, bogus allocation (like -1ULL) to 0.
 		 */
 		if (scratch_size < size ||
 		    !DTRACE_INSCRATCH(mstate, scratch_size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		if (subr == DIF_SUBR_COPYIN) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		}
 
 		mstate->dtms_scratch_ptr += scratch_size;
 		regs[rd] = dest;
 		break;
 	}
 
 	case DIF_SUBR_COPYINTO: {
 		uint64_t size = tupregs[1].dttk_value;
 		uintptr_t dest = tupregs[2].dttk_value;
 
 		/*
 		 * This action doesn't require any credential checks since
 		 * probes will not activate in user contexts to which the
 		 * enabling user does not have permissions.
 		 */
 		if (!dtrace_inscratch(dest, size, mstate)) {
 			*flags |= CPU_DTRACE_BADADDR;
 			*illval = regs[rd];
 			break;
 		}
 
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		break;
 	}
 
 	case DIF_SUBR_COPYINSTR: {
 		uintptr_t dest = mstate->dtms_scratch_ptr;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 
 		if (nargs > 1 && tupregs[1].dttk_value < size)
 			size = tupregs[1].dttk_value + 1;
 
 		/*
 		 * This action doesn't require any credential checks since
 		 * probes will not activate in user contexts to which the
 		 * enabling user does not have permissions.
 		 */
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
 		((char *)dest)[size - 1] = '\0';
 		mstate->dtms_scratch_ptr += size;
 		regs[rd] = dest;
 		break;
 	}
 
 #ifdef illumos
 	case DIF_SUBR_MSGSIZE:
 	case DIF_SUBR_MSGDSIZE: {
 		uintptr_t baddr = tupregs[0].dttk_value, daddr;
 		uintptr_t wptr, rptr;
 		size_t count = 0;
 		int cont = 0;
 
 		while (baddr != 0 && !(*flags & CPU_DTRACE_FAULT)) {
 
 			if (!dtrace_canload(baddr, sizeof (mblk_t), mstate,
 			    vstate)) {
 				regs[rd] = 0;
 				break;
 			}
 
 			wptr = dtrace_loadptr(baddr +
 			    offsetof(mblk_t, b_wptr));
 
 			rptr = dtrace_loadptr(baddr +
 			    offsetof(mblk_t, b_rptr));
 
 			if (wptr < rptr) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = tupregs[0].dttk_value;
 				break;
 			}
 
 			daddr = dtrace_loadptr(baddr +
 			    offsetof(mblk_t, b_datap));
 
 			baddr = dtrace_loadptr(baddr +
 			    offsetof(mblk_t, b_cont));
 
 			/*
 			 * We want to prevent against denial-of-service here,
 			 * so we're only going to search the list for
 			 * dtrace_msgdsize_max mblks.
 			 */
 			if (cont++ > dtrace_msgdsize_max) {
 				*flags |= CPU_DTRACE_ILLOP;
 				break;
 			}
 
 			if (subr == DIF_SUBR_MSGDSIZE) {
 				if (dtrace_load8(daddr +
 				    offsetof(dblk_t, db_type)) != M_DATA)
 					continue;
 			}
 
 			count += wptr - rptr;
 		}
 
 		if (!(*flags & CPU_DTRACE_FAULT))
 			regs[rd] = count;
 
 		break;
 	}
 #endif
 
 	case DIF_SUBR_PROGENYOF: {
 		pid_t pid = tupregs[0].dttk_value;
 		proc_t *p;
 		int rval = 0;
 
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 
 		for (p = curthread->t_procp; p != NULL; p = p->p_parent) {
 #ifdef illumos
 			if (p->p_pidp->pid_id == pid) {
 #else
 			if (p->p_pid == pid) {
 #endif
 				rval = 1;
 				break;
 			}
 		}
 
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
 		regs[rd] = rval;
 		break;
 	}
 
 	case DIF_SUBR_SPECULATION:
 		regs[rd] = dtrace_speculation(state);
 		break;
 
 	case DIF_SUBR_COPYOUT: {
 		uintptr_t kaddr = tupregs[0].dttk_value;
 		uintptr_t uaddr = tupregs[1].dttk_value;
 		uint64_t size = tupregs[2].dttk_value;
 
 		if (!dtrace_destructive_disallow &&
 		    dtrace_priv_proc_control(state) &&
 		    !dtrace_istoxic(kaddr, size) &&
 		    dtrace_canload(kaddr, size, mstate, vstate)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			dtrace_copyout(kaddr, uaddr, size, flags);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		}
 		break;
 	}
 
 	case DIF_SUBR_COPYOUTSTR: {
 		uintptr_t kaddr = tupregs[0].dttk_value;
 		uintptr_t uaddr = tupregs[1].dttk_value;
 		uint64_t size = tupregs[2].dttk_value;
 		size_t lim;
 
 		if (!dtrace_destructive_disallow &&
 		    dtrace_priv_proc_control(state) &&
 		    !dtrace_istoxic(kaddr, size) &&
 		    dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			dtrace_copyoutstr(kaddr, uaddr, lim, flags);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 		}
 		break;
 	}
 
 	case DIF_SUBR_STRLEN: {
 		size_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t addr = (uintptr_t)tupregs[0].dttk_value;
 		size_t lim;
 
 		if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		regs[rd] = dtrace_strlen((char *)addr, lim);
 		break;
 	}
 
 	case DIF_SUBR_STRCHR:
 	case DIF_SUBR_STRRCHR: {
 		/*
 		 * We're going to iterate over the string looking for the
 		 * specified character.  We will iterate until we have reached
 		 * the string length or we have found the character.  If this
 		 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
 		 * of the specified character instead of the first.
 		 */
 		uintptr_t addr = tupregs[0].dttk_value;
 		uintptr_t addr_limit;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		size_t lim;
 		char c, target = (char)tupregs[1].dttk_value;
 
 		if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		addr_limit = addr + lim;
 
 		for (regs[rd] = 0; addr < addr_limit; addr++) {
 			if ((c = dtrace_load8(addr)) == target) {
 				regs[rd] = addr;
 
 				if (subr == DIF_SUBR_STRCHR)
 					break;
 			}
 
 			if (c == '\0')
 				break;
 		}
 		break;
 	}
 
 	case DIF_SUBR_STRSTR:
 	case DIF_SUBR_INDEX:
 	case DIF_SUBR_RINDEX: {
 		/*
 		 * We're going to iterate over the string looking for the
 		 * specified string.  We will iterate until we have reached
 		 * the string length or we have found the string.  (Yes, this
 		 * is done in the most naive way possible -- but considering
 		 * that the string we're searching for is likely to be
 		 * relatively short, the complexity of Rabin-Karp or similar
 		 * hardly seems merited.)
 		 */
 		char *addr = (char *)(uintptr_t)tupregs[0].dttk_value;
 		char *substr = (char *)(uintptr_t)tupregs[1].dttk_value;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		size_t len = dtrace_strlen(addr, size);
 		size_t sublen = dtrace_strlen(substr, size);
 		char *limit = addr + len, *orig = addr;
 		int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1;
 		int inc = 1;
 
 		regs[rd] = notfound;
 
 		if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate,
 		    vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		/*
 		 * strstr() and index()/rindex() have similar semantics if
 		 * both strings are the empty string: strstr() returns a
 		 * pointer to the (empty) string, and index() and rindex()
 		 * both return index 0 (regardless of any position argument).
 		 */
 		if (sublen == 0 && len == 0) {
 			if (subr == DIF_SUBR_STRSTR)
 				regs[rd] = (uintptr_t)addr;
 			else
 				regs[rd] = 0;
 			break;
 		}
 
 		if (subr != DIF_SUBR_STRSTR) {
 			if (subr == DIF_SUBR_RINDEX) {
 				limit = orig - 1;
 				addr += len;
 				inc = -1;
 			}
 
 			/*
 			 * Both index() and rindex() take an optional position
 			 * argument that denotes the starting position.
 			 */
 			if (nargs == 3) {
 				int64_t pos = (int64_t)tupregs[2].dttk_value;
 
 				/*
 				 * If the position argument to index() is
 				 * negative, Perl implicitly clamps it at
 				 * zero.  This semantic is a little surprising
 				 * given the special meaning of negative
 				 * positions to similar Perl functions like
 				 * substr(), but it appears to reflect a
 				 * notion that index() can start from a
 				 * negative index and increment its way up to
 				 * the string.  Given this notion, Perl's
 				 * rindex() is at least self-consistent in
 				 * that it implicitly clamps positions greater
 				 * than the string length to be the string
 				 * length.  Where Perl completely loses
 				 * coherence, however, is when the specified
 				 * substring is the empty string ("").  In
 				 * this case, even if the position is
 				 * negative, rindex() returns 0 -- and even if
 				 * the position is greater than the length,
 				 * index() returns the string length.  These
 				 * semantics violate the notion that index()
 				 * should never return a value less than the
 				 * specified position and that rindex() should
 				 * never return a value greater than the
 				 * specified position.  (One assumes that
 				 * these semantics are artifacts of Perl's
 				 * implementation and not the results of
 				 * deliberate design -- it beggars belief that
 				 * even Larry Wall could desire such oddness.)
 				 * While in the abstract one would wish for
 				 * consistent position semantics across
 				 * substr(), index() and rindex() -- or at the
 				 * very least self-consistent position
 				 * semantics for index() and rindex() -- we
 				 * instead opt to keep with the extant Perl
 				 * semantics, in all their broken glory.  (Do
 				 * we have more desire to maintain Perl's
 				 * semantics than Perl does?  Probably.)
 				 */
 				if (subr == DIF_SUBR_RINDEX) {
 					if (pos < 0) {
 						if (sublen == 0)
 							regs[rd] = 0;
 						break;
 					}
 
 					if (pos > len)
 						pos = len;
 				} else {
 					if (pos < 0)
 						pos = 0;
 
 					if (pos >= len) {
 						if (sublen == 0)
 							regs[rd] = len;
 						break;
 					}
 				}
 
 				addr = orig + pos;
 			}
 		}
 
 		for (regs[rd] = notfound; addr != limit; addr += inc) {
 			if (dtrace_strncmp(addr, substr, sublen) == 0) {
 				if (subr != DIF_SUBR_STRSTR) {
 					/*
 					 * As D index() and rindex() are
 					 * modeled on Perl (and not on awk),
 					 * we return a zero-based (and not a
 					 * one-based) index.  (For you Perl
 					 * weenies: no, we're not going to add
 					 * $[ -- and shouldn't you be at a con
 					 * or something?)
 					 */
 					regs[rd] = (uintptr_t)(addr - orig);
 					break;
 				}
 
 				ASSERT(subr == DIF_SUBR_STRSTR);
 				regs[rd] = (uintptr_t)addr;
 				break;
 			}
 		}
 
 		break;
 	}
 
 	case DIF_SUBR_STRTOK: {
 		uintptr_t addr = tupregs[0].dttk_value;
 		uintptr_t tokaddr = tupregs[1].dttk_value;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t limit, toklimit;
 		size_t clim;
 		uint8_t c = 0, tokmap[32];	 /* 256 / 8 */
 		char *dest = (char *)mstate->dtms_scratch_ptr;
 		int i;
 
 		/*
 		 * Check both the token buffer and (later) the input buffer,
 		 * since both could be non-scratch addresses.
 		 */
 		if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 		toklimit = tokaddr + clim;
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		if (addr == 0) {
 			/*
 			 * If the address specified is NULL, we use our saved
 			 * strtok pointer from the mstate.  Note that this
 			 * means that the saved strtok pointer is _only_
 			 * valid within multiple enablings of the same probe --
 			 * it behaves like an implicit clause-local variable.
 			 */
 			addr = mstate->dtms_strtok;
 			limit = mstate->dtms_strtok_limit;
 		} else {
 			/*
 			 * If the user-specified address is non-NULL we must
 			 * access check it.  This is the only time we have
 			 * a chance to do so, since this address may reside
 			 * in the string table of this clause-- future calls
 			 * (when we fetch addr from mstate->dtms_strtok)
 			 * would fail this access check.
 			 */
 			if (!dtrace_strcanload(addr, size, &clim, mstate,
 			    vstate)) {
 				regs[rd] = 0;
 				break;
 			}
 			limit = addr + clim;
 		}
 
 		/*
 		 * First, zero the token map, and then process the token
 		 * string -- setting a bit in the map for every character
 		 * found in the token string.
 		 */
 		for (i = 0; i < sizeof (tokmap); i++)
 			tokmap[i] = 0;
 
 		for (; tokaddr < toklimit; tokaddr++) {
 			if ((c = dtrace_load8(tokaddr)) == '\0')
 				break;
 
 			ASSERT((c >> 3) < sizeof (tokmap));
 			tokmap[c >> 3] |= (1 << (c & 0x7));
 		}
 
 		for (; addr < limit; addr++) {
 			/*
 			 * We're looking for a character that is _not_
 			 * contained in the token string.
 			 */
 			if ((c = dtrace_load8(addr)) == '\0')
 				break;
 
 			if (!(tokmap[c >> 3] & (1 << (c & 0x7))))
 				break;
 		}
 
 		if (c == '\0') {
 			/*
 			 * We reached the end of the string without finding
 			 * any character that was not in the token string.
 			 * We return NULL in this case, and we set the saved
 			 * address to NULL as well.
 			 */
 			regs[rd] = 0;
 			mstate->dtms_strtok = 0;
 			mstate->dtms_strtok_limit = 0;
 			break;
 		}
 
 		/*
 		 * From here on, we're copying into the destination string.
 		 */
 		for (i = 0; addr < limit && i < size - 1; addr++) {
 			if ((c = dtrace_load8(addr)) == '\0')
 				break;
 
 			if (tokmap[c >> 3] & (1 << (c & 0x7)))
 				break;
 
 			ASSERT(i < size);
 			dest[i++] = c;
 		}
 
 		ASSERT(i < size);
 		dest[i] = '\0';
 		regs[rd] = (uintptr_t)dest;
 		mstate->dtms_scratch_ptr += size;
 		mstate->dtms_strtok = addr;
 		mstate->dtms_strtok_limit = limit;
 		break;
 	}
 
 	case DIF_SUBR_SUBSTR: {
 		uintptr_t s = tupregs[0].dttk_value;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		char *d = (char *)mstate->dtms_scratch_ptr;
 		int64_t index = (int64_t)tupregs[1].dttk_value;
 		int64_t remaining = (int64_t)tupregs[2].dttk_value;
 		size_t len = dtrace_strlen((char *)s, size);
 		int64_t i;
 
 		if (!dtrace_canload(s, len + 1, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		if (nargs <= 2)
 			remaining = (int64_t)size;
 
 		if (index < 0) {
 			index += len;
 
 			if (index < 0 && index + remaining > 0) {
 				remaining += index;
 				index = 0;
 			}
 		}
 
 		if (index >= len || index < 0) {
 			remaining = 0;
 		} else if (remaining < 0) {
 			remaining += len - index;
 		} else if (index + remaining > size) {
 			remaining = size - index;
 		}
 
 		for (i = 0; i < remaining; i++) {
 			if ((d[i] = dtrace_load8(s + index + i)) == '\0')
 				break;
 		}
 
 		d[i] = '\0';
 
 		mstate->dtms_scratch_ptr += size;
 		regs[rd] = (uintptr_t)d;
 		break;
 	}
 
 	case DIF_SUBR_JSON: {
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t json = tupregs[0].dttk_value;
 		size_t jsonlen = dtrace_strlen((char *)json, size);
 		uintptr_t elem = tupregs[1].dttk_value;
 		size_t elemlen = dtrace_strlen((char *)elem, size);
 
 		char *dest = (char *)mstate->dtms_scratch_ptr;
 		char *elemlist = (char *)mstate->dtms_scratch_ptr + jsonlen + 1;
 		char *ee = elemlist;
 		int nelems = 1;
 		uintptr_t cur;
 
 		if (!dtrace_canload(json, jsonlen + 1, mstate, vstate) ||
 		    !dtrace_canload(elem, elemlen + 1, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, jsonlen + 1 + elemlen + 1)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		/*
 		 * Read the element selector and split it up into a packed list
 		 * of strings.
 		 */
 		for (cur = elem; cur < elem + elemlen; cur++) {
 			char cc = dtrace_load8(cur);
 
 			if (cur == elem && cc == '[') {
 				/*
 				 * If the first element selector key is
 				 * actually an array index then ignore the
 				 * bracket.
 				 */
 				continue;
 			}
 
 			if (cc == ']')
 				continue;
 
 			if (cc == '.' || cc == '[') {
 				nelems++;
 				cc = '\0';
 			}
 
 			*ee++ = cc;
 		}
 		*ee++ = '\0';
 
 		if ((regs[rd] = (uintptr_t)dtrace_json(size, json, elemlist,
 		    nelems, dest)) != 0)
 			mstate->dtms_scratch_ptr += jsonlen + 1;
 		break;
 	}
 
 	case DIF_SUBR_TOUPPER:
 	case DIF_SUBR_TOLOWER: {
 		uintptr_t s = tupregs[0].dttk_value;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		char *dest = (char *)mstate->dtms_scratch_ptr, c;
 		size_t len = dtrace_strlen((char *)s, size);
 		char lower, upper, convert;
 		int64_t i;
 
 		if (subr == DIF_SUBR_TOUPPER) {
 			lower = 'a';
 			upper = 'z';
 			convert = 'A';
 		} else {
 			lower = 'A';
 			upper = 'Z';
 			convert = 'a';
 		}
 
 		if (!dtrace_canload(s, len + 1, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		for (i = 0; i < size - 1; i++) {
 			if ((c = dtrace_load8(s + i)) == '\0')
 				break;
 
 			if (c >= lower && c <= upper)
 				c = convert + (c - lower);
 
 			dest[i] = c;
 		}
 
 		ASSERT(i < size);
 		dest[i] = '\0';
 		regs[rd] = (uintptr_t)dest;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 
 #ifdef illumos
 	case DIF_SUBR_GETMAJOR:
 #ifdef _LP64
 		regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
 #else
 		regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ;
 #endif
 		break;
 
 	case DIF_SUBR_GETMINOR:
 #ifdef _LP64
 		regs[rd] = tupregs[0].dttk_value & MAXMIN64;
 #else
 		regs[rd] = tupregs[0].dttk_value & MAXMIN;
 #endif
 		break;
 
 	case DIF_SUBR_DDI_PATHNAME: {
 		/*
 		 * This one is a galactic mess.  We are going to roughly
 		 * emulate ddi_pathname(), but it's made more complicated
 		 * by the fact that we (a) want to include the minor name and
 		 * (b) must proceed iteratively instead of recursively.
 		 */
 		uintptr_t dest = mstate->dtms_scratch_ptr;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		char *start = (char *)dest, *end = start + size - 1;
 		uintptr_t daddr = tupregs[0].dttk_value;
 		int64_t minor = (int64_t)tupregs[1].dttk_value;
 		char *s;
 		int i, len, depth = 0;
 
 		/*
 		 * Due to all the pointer jumping we do and context we must
 		 * rely upon, we just mandate that the user must have kernel
 		 * read privileges to use this routine.
 		 */
 		if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) {
 			*flags |= CPU_DTRACE_KPRIV;
 			*illval = daddr;
 			regs[rd] = 0;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		*end = '\0';
 
 		/*
 		 * We want to have a name for the minor.  In order to do this,
 		 * we need to walk the minor list from the devinfo.  We want
 		 * to be sure that we don't infinitely walk a circular list,
 		 * so we check for circularity by sending a scout pointer
 		 * ahead two elements for every element that we iterate over;
 		 * if the list is circular, these will ultimately point to the
 		 * same element.  You may recognize this little trick as the
 		 * answer to a stupid interview question -- one that always
 		 * seems to be asked by those who had to have it laboriously
 		 * explained to them, and who can't even concisely describe
 		 * the conditions under which one would be forced to resort to
 		 * this technique.  Needless to say, those conditions are
 		 * found here -- and probably only here.  Is this the only use
 		 * of this infamous trick in shipping, production code?  If it
 		 * isn't, it probably should be...
 		 */
 		if (minor != -1) {
 			uintptr_t maddr = dtrace_loadptr(daddr +
 			    offsetof(struct dev_info, devi_minor));
 
 			uintptr_t next = offsetof(struct ddi_minor_data, next);
 			uintptr_t name = offsetof(struct ddi_minor_data,
 			    d_minor) + offsetof(struct ddi_minor, name);
 			uintptr_t dev = offsetof(struct ddi_minor_data,
 			    d_minor) + offsetof(struct ddi_minor, dev);
 			uintptr_t scout;
 
 			if (maddr != NULL)
 				scout = dtrace_loadptr(maddr + next);
 
 			while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
 				uint64_t m;
 #ifdef _LP64
 				m = dtrace_load64(maddr + dev) & MAXMIN64;
 #else
 				m = dtrace_load32(maddr + dev) & MAXMIN;
 #endif
 				if (m != minor) {
 					maddr = dtrace_loadptr(maddr + next);
 
 					if (scout == NULL)
 						continue;
 
 					scout = dtrace_loadptr(scout + next);
 
 					if (scout == NULL)
 						continue;
 
 					scout = dtrace_loadptr(scout + next);
 
 					if (scout == NULL)
 						continue;
 
 					if (scout == maddr) {
 						*flags |= CPU_DTRACE_ILLOP;
 						break;
 					}
 
 					continue;
 				}
 
 				/*
 				 * We have the minor data.  Now we need to
 				 * copy the minor's name into the end of the
 				 * pathname.
 				 */
 				s = (char *)dtrace_loadptr(maddr + name);
 				len = dtrace_strlen(s, size);
 
 				if (*flags & CPU_DTRACE_FAULT)
 					break;
 
 				if (len != 0) {
 					if ((end -= (len + 1)) < start)
 						break;
 
 					*end = ':';
 				}
 
 				for (i = 1; i <= len; i++)
 					end[i] = dtrace_load8((uintptr_t)s++);
 				break;
 			}
 		}
 
 		while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
 			ddi_node_state_t devi_state;
 
 			devi_state = dtrace_load32(daddr +
 			    offsetof(struct dev_info, devi_node_state));
 
 			if (*flags & CPU_DTRACE_FAULT)
 				break;
 
 			if (devi_state >= DS_INITIALIZED) {
 				s = (char *)dtrace_loadptr(daddr +
 				    offsetof(struct dev_info, devi_addr));
 				len = dtrace_strlen(s, size);
 
 				if (*flags & CPU_DTRACE_FAULT)
 					break;
 
 				if (len != 0) {
 					if ((end -= (len + 1)) < start)
 						break;
 
 					*end = '@';
 				}
 
 				for (i = 1; i <= len; i++)
 					end[i] = dtrace_load8((uintptr_t)s++);
 			}
 
 			/*
 			 * Now for the node name...
 			 */
 			s = (char *)dtrace_loadptr(daddr +
 			    offsetof(struct dev_info, devi_node_name));
 
 			daddr = dtrace_loadptr(daddr +
 			    offsetof(struct dev_info, devi_parent));
 
 			/*
 			 * If our parent is NULL (that is, if we're the root
 			 * node), we're going to use the special path
 			 * "devices".
 			 */
 			if (daddr == 0)
 				s = "devices";
 
 			len = dtrace_strlen(s, size);
 			if (*flags & CPU_DTRACE_FAULT)
 				break;
 
 			if ((end -= (len + 1)) < start)
 				break;
 
 			for (i = 1; i <= len; i++)
 				end[i] = dtrace_load8((uintptr_t)s++);
 			*end = '/';
 
 			if (depth++ > dtrace_devdepth_max) {
 				*flags |= CPU_DTRACE_ILLOP;
 				break;
 			}
 		}
 
 		if (end < start)
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 
 		if (daddr == 0) {
 			regs[rd] = (uintptr_t)end;
 			mstate->dtms_scratch_ptr += size;
 		}
 
 		break;
 	}
 #endif
 
 	case DIF_SUBR_STRJOIN: {
 		char *d = (char *)mstate->dtms_scratch_ptr;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t s1 = tupregs[0].dttk_value;
 		uintptr_t s2 = tupregs[1].dttk_value;
 		int i = 0, j = 0;
 		size_t lim1, lim2;
 		char c;
 
 		if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) ||
 		    !dtrace_strcanload(s2, size, &lim2, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		for (;;) {
 			if (i >= size) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 				regs[rd] = 0;
 				break;
 			}
 			c = (i >= lim1) ? '\0' : dtrace_load8(s1++);
 			if ((d[i++] = c) == '\0') {
 				i--;
 				break;
 			}
 		}
 
 		for (;;) {
 			if (i >= size) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 				regs[rd] = 0;
 				break;
 			}
 
 			c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++);
 			if ((d[i++] = c) == '\0')
 				break;
 		}
 
 		if (i < size) {
 			mstate->dtms_scratch_ptr += i;
 			regs[rd] = (uintptr_t)d;
 		}
 
 		break;
 	}
 
 	case DIF_SUBR_STRTOLL: {
 		uintptr_t s = tupregs[0].dttk_value;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		size_t lim;
 		int base = 10;
 
 		if (nargs > 1) {
 			if ((base = tupregs[1].dttk_value) <= 1 ||
 			    base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
 				*flags |= CPU_DTRACE_ILLOP;
 				break;
 			}
 		}
 
 		if (!dtrace_strcanload(s, size, &lim, mstate, vstate)) {
 			regs[rd] = INT64_MIN;
 			break;
 		}
 
 		regs[rd] = dtrace_strtoll((char *)s, base, lim);
 		break;
 	}
 
 	case DIF_SUBR_LLTOSTR: {
 		int64_t i = (int64_t)tupregs[0].dttk_value;
 		uint64_t val, digit;
 		uint64_t size = 65;	/* enough room for 2^64 in binary */
 		char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
 		int base = 10;
 
 		if (nargs > 1) {
 			if ((base = tupregs[1].dttk_value) <= 1 ||
 			    base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
 				*flags |= CPU_DTRACE_ILLOP;
 				break;
 			}
 		}
 
 		val = (base == 10 && i < 0) ? i * -1 : i;
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		for (*end-- = '\0'; val; val /= base) {
 			if ((digit = val % base) <= '9' - '0') {
 				*end-- = '0' + digit;
 			} else {
 				*end-- = 'a' + (digit - ('9' - '0') - 1);
 			}
 		}
 
 		if (i == 0 && base == 16)
 			*end-- = '0';
 
 		if (base == 16)
 			*end-- = 'x';
 
 		if (i == 0 || base == 8 || base == 16)
 			*end-- = '0';
 
 		if (i < 0 && base == 10)
 			*end-- = '-';
 
 		regs[rd] = (uintptr_t)end + 1;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 
 	case DIF_SUBR_HTONS:
 	case DIF_SUBR_NTOHS:
 #if BYTE_ORDER == BIG_ENDIAN
 		regs[rd] = (uint16_t)tupregs[0].dttk_value;
 #else
 		regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value);
 #endif
 		break;
 
 
 	case DIF_SUBR_HTONL:
 	case DIF_SUBR_NTOHL:
 #if BYTE_ORDER == BIG_ENDIAN
 		regs[rd] = (uint32_t)tupregs[0].dttk_value;
 #else
 		regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value);
 #endif
 		break;
 
 
 	case DIF_SUBR_HTONLL:
 	case DIF_SUBR_NTOHLL:
 #if BYTE_ORDER == BIG_ENDIAN
 		regs[rd] = (uint64_t)tupregs[0].dttk_value;
 #else
 		regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value);
 #endif
 		break;
 
 
 	case DIF_SUBR_DIRNAME:
 	case DIF_SUBR_BASENAME: {
 		char *dest = (char *)mstate->dtms_scratch_ptr;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t src = tupregs[0].dttk_value;
 		int i, j, len = dtrace_strlen((char *)src, size);
 		int lastbase = -1, firstbase = -1, lastdir = -1;
 		int start, end;
 
 		if (!dtrace_canload(src, len + 1, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		/*
 		 * The basename and dirname for a zero-length string is
 		 * defined to be "."
 		 */
 		if (len == 0) {
 			len = 1;
 			src = (uintptr_t)".";
 		}
 
 		/*
 		 * Start from the back of the string, moving back toward the
 		 * front until we see a character that isn't a slash.  That
 		 * character is the last character in the basename.
 		 */
 		for (i = len - 1; i >= 0; i--) {
 			if (dtrace_load8(src + i) != '/')
 				break;
 		}
 
 		if (i >= 0)
 			lastbase = i;
 
 		/*
 		 * Starting from the last character in the basename, move
 		 * towards the front until we find a slash.  The character
 		 * that we processed immediately before that is the first
 		 * character in the basename.
 		 */
 		for (; i >= 0; i--) {
 			if (dtrace_load8(src + i) == '/')
 				break;
 		}
 
 		if (i >= 0)
 			firstbase = i + 1;
 
 		/*
 		 * Now keep going until we find a non-slash character.  That
 		 * character is the last character in the dirname.
 		 */
 		for (; i >= 0; i--) {
 			if (dtrace_load8(src + i) != '/')
 				break;
 		}
 
 		if (i >= 0)
 			lastdir = i;
 
 		ASSERT(!(lastbase == -1 && firstbase != -1));
 		ASSERT(!(firstbase == -1 && lastdir != -1));
 
 		if (lastbase == -1) {
 			/*
 			 * We didn't find a non-slash character.  We know that
 			 * the length is non-zero, so the whole string must be
 			 * slashes.  In either the dirname or the basename
 			 * case, we return '/'.
 			 */
 			ASSERT(firstbase == -1);
 			firstbase = lastbase = lastdir = 0;
 		}
 
 		if (firstbase == -1) {
 			/*
 			 * The entire string consists only of a basename
 			 * component.  If we're looking for dirname, we need
 			 * to change our string to be just "."; if we're
 			 * looking for a basename, we'll just set the first
 			 * character of the basename to be 0.
 			 */
 			if (subr == DIF_SUBR_DIRNAME) {
 				ASSERT(lastdir == -1);
 				src = (uintptr_t)".";
 				lastdir = 0;
 			} else {
 				firstbase = 0;
 			}
 		}
 
 		if (subr == DIF_SUBR_DIRNAME) {
 			if (lastdir == -1) {
 				/*
 				 * We know that we have a slash in the name --
 				 * or lastdir would be set to 0, above.  And
 				 * because lastdir is -1, we know that this
 				 * slash must be the first character.  (That
 				 * is, the full string must be of the form
 				 * "/basename".)  In this case, the last
 				 * character of the directory name is 0.
 				 */
 				lastdir = 0;
 			}
 
 			start = 0;
 			end = lastdir;
 		} else {
 			ASSERT(subr == DIF_SUBR_BASENAME);
 			ASSERT(firstbase != -1 && lastbase != -1);
 			start = firstbase;
 			end = lastbase;
 		}
 
 		for (i = start, j = 0; i <= end && j < size - 1; i++, j++)
 			dest[j] = dtrace_load8(src + i);
 
 		dest[j] = '\0';
 		regs[rd] = (uintptr_t)dest;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 
 	case DIF_SUBR_GETF: {
 		uintptr_t fd = tupregs[0].dttk_value;
 		struct filedesc *fdp;
 		file_t *fp;
 
 		if (!dtrace_priv_proc(state)) {
 			regs[rd] = 0;
 			break;
 		}
 		fdp = curproc->p_fd;
 		FILEDESC_SLOCK(fdp);
 		fp = fget_locked(fdp, fd);
 		mstate->dtms_getf = fp;
 		regs[rd] = (uintptr_t)fp;
 		FILEDESC_SUNLOCK(fdp);
 		break;
 	}
 
 	case DIF_SUBR_CLEANPATH: {
 		char *dest = (char *)mstate->dtms_scratch_ptr, c;
 		uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
 		uintptr_t src = tupregs[0].dttk_value;
 		size_t lim;
 		int i = 0, j = 0;
 #ifdef illumos
 		zone_t *z;
 #endif
 
 		if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) {
 			regs[rd] = 0;
 			break;
 		}
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			regs[rd] = 0;
 			break;
 		}
 
 		/*
 		 * Move forward, loading each character.
 		 */
 		do {
 			c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
 next:
 			if (j + 5 >= size)	/* 5 = strlen("/..c\0") */
 				break;
 
 			if (c != '/') {
 				dest[j++] = c;
 				continue;
 			}
 
 			c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
 
 			if (c == '/') {
 				/*
 				 * We have two slashes -- we can just advance
 				 * to the next character.
 				 */
 				goto next;
 			}
 
 			if (c != '.') {
 				/*
 				 * This is not "." and it's not ".." -- we can
 				 * just store the "/" and this character and
 				 * drive on.
 				 */
 				dest[j++] = '/';
 				dest[j++] = c;
 				continue;
 			}
 
 			c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
 
 			if (c == '/') {
 				/*
 				 * This is a "/./" component.  We're not going
 				 * to store anything in the destination buffer;
 				 * we're just going to go to the next component.
 				 */
 				goto next;
 			}
 
 			if (c != '.') {
 				/*
 				 * This is not ".." -- we can just store the
 				 * "/." and this character and continue
 				 * processing.
 				 */
 				dest[j++] = '/';
 				dest[j++] = '.';
 				dest[j++] = c;
 				continue;
 			}
 
 			c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
 
 			if (c != '/' && c != '\0') {
 				/*
 				 * This is not ".." -- it's "..[mumble]".
 				 * We'll store the "/.." and this character
 				 * and continue processing.
 				 */
 				dest[j++] = '/';
 				dest[j++] = '.';
 				dest[j++] = '.';
 				dest[j++] = c;
 				continue;
 			}
 
 			/*
 			 * This is "/../" or "/..\0".  We need to back up
 			 * our destination pointer until we find a "/".
 			 */
 			i--;
 			while (j != 0 && dest[--j] != '/')
 				continue;
 
 			if (c == '\0')
 				dest[++j] = '/';
 		} while (c != '\0');
 
 		dest[j] = '\0';
 
 #ifdef illumos
 		if (mstate->dtms_getf != NULL &&
 		    !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
 		    (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
 			/*
 			 * If we've done a getf() as a part of this ECB and we
 			 * don't have kernel access (and we're not in the global
 			 * zone), check if the path we cleaned up begins with
 			 * the zone's root path, and trim it off if so.  Note
 			 * that this is an output cleanliness issue, not a
 			 * security issue: knowing one's zone root path does
 			 * not enable privilege escalation.
 			 */
 			if (strstr(dest, z->zone_rootpath) == dest)
 				dest += strlen(z->zone_rootpath) - 1;
 		}
 #endif
 
 		regs[rd] = (uintptr_t)dest;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 
 	case DIF_SUBR_INET_NTOA:
 	case DIF_SUBR_INET_NTOA6:
 	case DIF_SUBR_INET_NTOP: {
 		size_t size;
 		int af, argi, i;
 		char *base, *end;
 
 		if (subr == DIF_SUBR_INET_NTOP) {
 			af = (int)tupregs[0].dttk_value;
 			argi = 1;
 		} else {
 			af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
 			argi = 0;
 		}
 
 		if (af == AF_INET) {
 			ipaddr_t ip4;
 			uint8_t *ptr8, val;
 
 			if (!dtrace_canload(tupregs[argi].dttk_value,
 			    sizeof (ipaddr_t), mstate, vstate)) {
 				regs[rd] = 0;
 				break;
 			}
 
 			/*
 			 * Safely load the IPv4 address.
 			 */
 			ip4 = dtrace_load32(tupregs[argi].dttk_value);
 
 			/*
 			 * Check an IPv4 string will fit in scratch.
 			 */
 			size = INET_ADDRSTRLEN;
 			if (!DTRACE_INSCRATCH(mstate, size)) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 				regs[rd] = 0;
 				break;
 			}
 			base = (char *)mstate->dtms_scratch_ptr;
 			end = (char *)mstate->dtms_scratch_ptr + size - 1;
 
 			/*
 			 * Stringify as a dotted decimal quad.
 			 */
 			*end-- = '\0';
 			ptr8 = (uint8_t *)&ip4;
 			for (i = 3; i >= 0; i--) {
 				val = ptr8[i];
 
 				if (val == 0) {
 					*end-- = '0';
 				} else {
 					for (; val; val /= 10) {
 						*end-- = '0' + (val % 10);
 					}
 				}
 
 				if (i > 0)
 					*end-- = '.';
 			}
 			ASSERT(end + 1 >= base);
 
 		} else if (af == AF_INET6) {
 			struct in6_addr ip6;
 			int firstzero, tryzero, numzero, v6end;
 			uint16_t val;
 			const char digits[] = "0123456789abcdef";
 
 			/*
 			 * Stringify using RFC 1884 convention 2 - 16 bit
 			 * hexadecimal values with a zero-run compression.
 			 * Lower case hexadecimal digits are used.
 			 * 	eg, fe80::214:4fff:fe0b:76c8.
 			 * The IPv4 embedded form is returned for inet_ntop,
 			 * just the IPv4 string is returned for inet_ntoa6.
 			 */
 
 			if (!dtrace_canload(tupregs[argi].dttk_value,
 			    sizeof (struct in6_addr), mstate, vstate)) {
 				regs[rd] = 0;
 				break;
 			}
 
 			/*
 			 * Safely load the IPv6 address.
 			 */
 			dtrace_bcopy(
 			    (void *)(uintptr_t)tupregs[argi].dttk_value,
 			    (void *)(uintptr_t)&ip6, sizeof (struct in6_addr));
 
 			/*
 			 * Check an IPv6 string will fit in scratch.
 			 */
 			size = INET6_ADDRSTRLEN;
 			if (!DTRACE_INSCRATCH(mstate, size)) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 				regs[rd] = 0;
 				break;
 			}
 			base = (char *)mstate->dtms_scratch_ptr;
 			end = (char *)mstate->dtms_scratch_ptr + size - 1;
 			*end-- = '\0';
 
 			/*
 			 * Find the longest run of 16 bit zero values
 			 * for the single allowed zero compression - "::".
 			 */
 			firstzero = -1;
 			tryzero = -1;
 			numzero = 1;
 			for (i = 0; i < sizeof (struct in6_addr); i++) {
 #ifdef illumos
 				if (ip6._S6_un._S6_u8[i] == 0 &&
 #else
 				if (ip6.__u6_addr.__u6_addr8[i] == 0 &&
 #endif
 				    tryzero == -1 && i % 2 == 0) {
 					tryzero = i;
 					continue;
 				}
 
 				if (tryzero != -1 &&
 #ifdef illumos
 				    (ip6._S6_un._S6_u8[i] != 0 ||
 #else
 				    (ip6.__u6_addr.__u6_addr8[i] != 0 ||
 #endif
 				    i == sizeof (struct in6_addr) - 1)) {
 
 					if (i - tryzero <= numzero) {
 						tryzero = -1;
 						continue;
 					}
 
 					firstzero = tryzero;
 					numzero = i - i % 2 - tryzero;
 					tryzero = -1;
 
 #ifdef illumos
 					if (ip6._S6_un._S6_u8[i] == 0 &&
 #else
 					if (ip6.__u6_addr.__u6_addr8[i] == 0 &&
 #endif
 					    i == sizeof (struct in6_addr) - 1)
 						numzero += 2;
 				}
 			}
 			ASSERT(firstzero + numzero <= sizeof (struct in6_addr));
 
 			/*
 			 * Check for an IPv4 embedded address.
 			 */
 			v6end = sizeof (struct in6_addr) - 2;
 			if (IN6_IS_ADDR_V4MAPPED(&ip6) ||
 			    IN6_IS_ADDR_V4COMPAT(&ip6)) {
 				for (i = sizeof (struct in6_addr) - 1;
 				    i >= DTRACE_V4MAPPED_OFFSET; i--) {
 					ASSERT(end >= base);
 
 #ifdef illumos
 					val = ip6._S6_un._S6_u8[i];
 #else
 					val = ip6.__u6_addr.__u6_addr8[i];
 #endif
 
 					if (val == 0) {
 						*end-- = '0';
 					} else {
 						for (; val; val /= 10) {
 							*end-- = '0' + val % 10;
 						}
 					}
 
 					if (i > DTRACE_V4MAPPED_OFFSET)
 						*end-- = '.';
 				}
 
 				if (subr == DIF_SUBR_INET_NTOA6)
 					goto inetout;
 
 				/*
 				 * Set v6end to skip the IPv4 address that
 				 * we have already stringified.
 				 */
 				v6end = 10;
 			}
 
 			/*
 			 * Build the IPv6 string by working through the
 			 * address in reverse.
 			 */
 			for (i = v6end; i >= 0; i -= 2) {
 				ASSERT(end >= base);
 
 				if (i == firstzero + numzero - 2) {
 					*end-- = ':';
 					*end-- = ':';
 					i -= numzero - 2;
 					continue;
 				}
 
 				if (i < 14 && i != firstzero - 2)
 					*end-- = ':';
 
 #ifdef illumos
 				val = (ip6._S6_un._S6_u8[i] << 8) +
 				    ip6._S6_un._S6_u8[i + 1];
 #else
 				val = (ip6.__u6_addr.__u6_addr8[i] << 8) +
 				    ip6.__u6_addr.__u6_addr8[i + 1];
 #endif
 
 				if (val == 0) {
 					*end-- = '0';
 				} else {
 					for (; val; val /= 16) {
 						*end-- = digits[val % 16];
 					}
 				}
 			}
 			ASSERT(end + 1 >= base);
 
 		} else {
 			/*
 			 * The user didn't use AH_INET or AH_INET6.
 			 */
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 			regs[rd] = 0;
 			break;
 		}
 
 inetout:	regs[rd] = (uintptr_t)end + 1;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 
 	case DIF_SUBR_MEMREF: {
 		uintptr_t size = 2 * sizeof(uintptr_t);
 		uintptr_t *memref = (uintptr_t *) P2ROUNDUP(mstate->dtms_scratch_ptr, sizeof(uintptr_t));
 		size_t scratch_size = ((uintptr_t) memref - mstate->dtms_scratch_ptr) + size;
 
 		/* address and length */
 		memref[0] = tupregs[0].dttk_value;
 		memref[1] = tupregs[1].dttk_value;
 
 		regs[rd] = (uintptr_t) memref;
 		mstate->dtms_scratch_ptr += scratch_size;
 		break;
 	}
 
 #ifndef illumos
 	case DIF_SUBR_MEMSTR: {
 		char *str = (char *)mstate->dtms_scratch_ptr;
 		uintptr_t mem = tupregs[0].dttk_value;
 		char c = tupregs[1].dttk_value;
 		size_t size = tupregs[2].dttk_value;
 		uint8_t n;
 		int i;
 
 		regs[rd] = 0;
 
 		if (size == 0)
 			break;
 
 		if (!dtrace_canload(mem, size - 1, mstate, vstate))
 			break;
 
 		if (!DTRACE_INSCRATCH(mstate, size)) {
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 			break;
 		}
 
 		if (dtrace_memstr_max != 0 && size > dtrace_memstr_max) {
 			*flags |= CPU_DTRACE_ILLOP;
 			break;
 		}
 
 		for (i = 0; i < size - 1; i++) {
 			n = dtrace_load8(mem++);
 			str[i] = (n == 0) ? c : n;
 		}
 		str[size - 1] = 0;
 
 		regs[rd] = (uintptr_t)str;
 		mstate->dtms_scratch_ptr += size;
 		break;
 	}
 #endif
 	}
 }
 
 /*
  * Emulate the execution of DTrace IR instructions specified by the given
  * DIF object.  This function is deliberately void of assertions as all of
  * the necessary checks are handled by a call to dtrace_difo_validate().
  */
 static uint64_t
 dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
     dtrace_vstate_t *vstate, dtrace_state_t *state)
 {
 	const dif_instr_t *text = difo->dtdo_buf;
 	const uint_t textlen = difo->dtdo_len;
 	const char *strtab = difo->dtdo_strtab;
 	const uint64_t *inttab = difo->dtdo_inttab;
 
 	uint64_t rval = 0;
 	dtrace_statvar_t *svar;
 	dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
 	dtrace_difv_t *v;
 	volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
 	volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval;
 
 	dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
 	uint64_t regs[DIF_DIR_NREGS];
 	uint64_t *tmp;
 
 	uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0;
 	int64_t cc_r;
 	uint_t pc = 0, id, opc = 0;
 	uint8_t ttop = 0;
 	dif_instr_t instr;
 	uint_t r1, r2, rd;
 
 	/*
 	 * We stash the current DIF object into the machine state: we need it
 	 * for subsequent access checking.
 	 */
 	mstate->dtms_difo = difo;
 
 	regs[DIF_REG_R0] = 0; 		/* %r0 is fixed at zero */
 
 	while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) {
 		opc = pc;
 
 		instr = text[pc++];
 		r1 = DIF_INSTR_R1(instr);
 		r2 = DIF_INSTR_R2(instr);
 		rd = DIF_INSTR_RD(instr);
 
 		switch (DIF_INSTR_OP(instr)) {
 		case DIF_OP_OR:
 			regs[rd] = regs[r1] | regs[r2];
 			break;
 		case DIF_OP_XOR:
 			regs[rd] = regs[r1] ^ regs[r2];
 			break;
 		case DIF_OP_AND:
 			regs[rd] = regs[r1] & regs[r2];
 			break;
 		case DIF_OP_SLL:
 			regs[rd] = regs[r1] << regs[r2];
 			break;
 		case DIF_OP_SRL:
 			regs[rd] = regs[r1] >> regs[r2];
 			break;
 		case DIF_OP_SUB:
 			regs[rd] = regs[r1] - regs[r2];
 			break;
 		case DIF_OP_ADD:
 			regs[rd] = regs[r1] + regs[r2];
 			break;
 		case DIF_OP_MUL:
 			regs[rd] = regs[r1] * regs[r2];
 			break;
 		case DIF_OP_SDIV:
 			if (regs[r2] == 0) {
 				regs[rd] = 0;
 				*flags |= CPU_DTRACE_DIVZERO;
 			} else {
 				regs[rd] = (int64_t)regs[r1] /
 				    (int64_t)regs[r2];
 			}
 			break;
 
 		case DIF_OP_UDIV:
 			if (regs[r2] == 0) {
 				regs[rd] = 0;
 				*flags |= CPU_DTRACE_DIVZERO;
 			} else {
 				regs[rd] = regs[r1] / regs[r2];
 			}
 			break;
 
 		case DIF_OP_SREM:
 			if (regs[r2] == 0) {
 				regs[rd] = 0;
 				*flags |= CPU_DTRACE_DIVZERO;
 			} else {
 				regs[rd] = (int64_t)regs[r1] %
 				    (int64_t)regs[r2];
 			}
 			break;
 
 		case DIF_OP_UREM:
 			if (regs[r2] == 0) {
 				regs[rd] = 0;
 				*flags |= CPU_DTRACE_DIVZERO;
 			} else {
 				regs[rd] = regs[r1] % regs[r2];
 			}
 			break;
 
 		case DIF_OP_NOT:
 			regs[rd] = ~regs[r1];
 			break;
 		case DIF_OP_MOV:
 			regs[rd] = regs[r1];
 			break;
 		case DIF_OP_CMP:
 			cc_r = regs[r1] - regs[r2];
 			cc_n = cc_r < 0;
 			cc_z = cc_r == 0;
 			cc_v = 0;
 			cc_c = regs[r1] < regs[r2];
 			break;
 		case DIF_OP_TST:
 			cc_n = cc_v = cc_c = 0;
 			cc_z = regs[r1] == 0;
 			break;
 		case DIF_OP_BA:
 			pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BE:
 			if (cc_z)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BNE:
 			if (cc_z == 0)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BG:
 			if ((cc_z | (cc_n ^ cc_v)) == 0)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BGU:
 			if ((cc_c | cc_z) == 0)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BGE:
 			if ((cc_n ^ cc_v) == 0)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BGEU:
 			if (cc_c == 0)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BL:
 			if (cc_n ^ cc_v)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BLU:
 			if (cc_c)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BLE:
 			if (cc_z | (cc_n ^ cc_v))
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_BLEU:
 			if (cc_c | cc_z)
 				pc = DIF_INSTR_LABEL(instr);
 			break;
 		case DIF_OP_RLDSB:
 			if (!dtrace_canload(regs[r1], 1, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDSB:
 			regs[rd] = (int8_t)dtrace_load8(regs[r1]);
 			break;
 		case DIF_OP_RLDSH:
 			if (!dtrace_canload(regs[r1], 2, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDSH:
 			regs[rd] = (int16_t)dtrace_load16(regs[r1]);
 			break;
 		case DIF_OP_RLDSW:
 			if (!dtrace_canload(regs[r1], 4, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDSW:
 			regs[rd] = (int32_t)dtrace_load32(regs[r1]);
 			break;
 		case DIF_OP_RLDUB:
 			if (!dtrace_canload(regs[r1], 1, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDUB:
 			regs[rd] = dtrace_load8(regs[r1]);
 			break;
 		case DIF_OP_RLDUH:
 			if (!dtrace_canload(regs[r1], 2, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDUH:
 			regs[rd] = dtrace_load16(regs[r1]);
 			break;
 		case DIF_OP_RLDUW:
 			if (!dtrace_canload(regs[r1], 4, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDUW:
 			regs[rd] = dtrace_load32(regs[r1]);
 			break;
 		case DIF_OP_RLDX:
 			if (!dtrace_canload(regs[r1], 8, mstate, vstate))
 				break;
 			/*FALLTHROUGH*/
 		case DIF_OP_LDX:
 			regs[rd] = dtrace_load64(regs[r1]);
 			break;
 		case DIF_OP_ULDSB:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] = (int8_t)
 			    dtrace_fuword8((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDSH:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] = (int16_t)
 			    dtrace_fuword16((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDSW:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] = (int32_t)
 			    dtrace_fuword32((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDUB:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] =
 			    dtrace_fuword8((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDUH:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] =
 			    dtrace_fuword16((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDUW:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] =
 			    dtrace_fuword32((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_ULDX:
 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 			regs[rd] =
 			    dtrace_fuword64((void *)(uintptr_t)regs[r1]);
 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 			break;
 		case DIF_OP_RET:
 			rval = regs[rd];
 			pc = textlen;
 			break;
 		case DIF_OP_NOP:
 			break;
 		case DIF_OP_SETX:
 			regs[rd] = inttab[DIF_INSTR_INTEGER(instr)];
 			break;
 		case DIF_OP_SETS:
 			regs[rd] = (uint64_t)(uintptr_t)
 			    (strtab + DIF_INSTR_STRING(instr));
 			break;
 		case DIF_OP_SCMP: {
 			size_t sz = state->dts_options[DTRACEOPT_STRSIZE];
 			uintptr_t s1 = regs[r1];
 			uintptr_t s2 = regs[r2];
 			size_t lim1, lim2;
 
 			if (s1 != 0 &&
 			    !dtrace_strcanload(s1, sz, &lim1, mstate, vstate))
 				break;
 			if (s2 != 0 &&
 			    !dtrace_strcanload(s2, sz, &lim2, mstate, vstate))
 				break;
 
 			cc_r = dtrace_strncmp((char *)s1, (char *)s2,
 			    MIN(lim1, lim2));
 
 			cc_n = cc_r < 0;
 			cc_z = cc_r == 0;
 			cc_v = cc_c = 0;
 			break;
 		}
 		case DIF_OP_LDGA:
 			regs[rd] = dtrace_dif_variable(mstate, state,
 			    r1, regs[r2]);
 			break;
 		case DIF_OP_LDGS:
 			id = DIF_INSTR_VAR(instr);
 
 			if (id >= DIF_VAR_OTHER_UBASE) {
 				uintptr_t a;
 
 				id -= DIF_VAR_OTHER_UBASE;
 				svar = vstate->dtvs_globals[id];
 				ASSERT(svar != NULL);
 				v = &svar->dtsv_var;
 
 				if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) {
 					regs[rd] = svar->dtsv_data;
 					break;
 				}
 
 				a = (uintptr_t)svar->dtsv_data;
 
 				if (*(uint8_t *)a == UINT8_MAX) {
 					/*
 					 * If the 0th byte is set to UINT8_MAX
 					 * then this is to be treated as a
 					 * reference to a NULL variable.
 					 */
 					regs[rd] = 0;
 				} else {
 					regs[rd] = a + sizeof (uint64_t);
 				}
 
 				break;
 			}
 
 			regs[rd] = dtrace_dif_variable(mstate, state, id, 0);
 			break;
 
 		case DIF_OP_STGS:
 			id = DIF_INSTR_VAR(instr);
 
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 
 			VERIFY(id < vstate->dtvs_nglobals);
 			svar = vstate->dtvs_globals[id];
 			ASSERT(svar != NULL);
 			v = &svar->dtsv_var;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				uintptr_t a = (uintptr_t)svar->dtsv_data;
 				size_t lim;
 
 				ASSERT(a != 0);
 				ASSERT(svar->dtsv_size != 0);
 
 				if (regs[rd] == 0) {
 					*(uint8_t *)a = UINT8_MAX;
 					break;
 				} else {
 					*(uint8_t *)a = 0;
 					a += sizeof (uint64_t);
 				}
 				if (!dtrace_vcanload(
 				    (void *)(uintptr_t)regs[rd], &v->dtdv_type,
 				    &lim, mstate, vstate))
 					break;
 
 				dtrace_vcopy((void *)(uintptr_t)regs[rd],
 				    (void *)a, &v->dtdv_type, lim);
 				break;
 			}
 
 			svar->dtsv_data = regs[rd];
 			break;
 
 		case DIF_OP_LDTA:
 			/*
 			 * There are no DTrace built-in thread-local arrays at
 			 * present.  This opcode is saved for future work.
 			 */
 			*flags |= CPU_DTRACE_ILLOP;
 			regs[rd] = 0;
 			break;
 
 		case DIF_OP_LDLS:
 			id = DIF_INSTR_VAR(instr);
 
 			if (id < DIF_VAR_OTHER_UBASE) {
 				/*
 				 * For now, this has no meaning.
 				 */
 				regs[rd] = 0;
 				break;
 			}
 
 			id -= DIF_VAR_OTHER_UBASE;
 
 			ASSERT(id < vstate->dtvs_nlocals);
 			ASSERT(vstate->dtvs_locals != NULL);
 
 			svar = vstate->dtvs_locals[id];
 			ASSERT(svar != NULL);
 			v = &svar->dtsv_var;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				uintptr_t a = (uintptr_t)svar->dtsv_data;
 				size_t sz = v->dtdv_type.dtdt_size;
 				size_t lim;
 
 				sz += sizeof (uint64_t);
 				ASSERT(svar->dtsv_size == NCPU * sz);
 				a += curcpu * sz;
 
 				if (*(uint8_t *)a == UINT8_MAX) {
 					/*
 					 * If the 0th byte is set to UINT8_MAX
 					 * then this is to be treated as a
 					 * reference to a NULL variable.
 					 */
 					regs[rd] = 0;
 				} else {
 					regs[rd] = a + sizeof (uint64_t);
 				}
 
 				break;
 			}
 
 			ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
 			tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
 			regs[rd] = tmp[curcpu];
 			break;
 
 		case DIF_OP_STLS:
 			id = DIF_INSTR_VAR(instr);
 
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 			VERIFY(id < vstate->dtvs_nlocals);
 
 			ASSERT(vstate->dtvs_locals != NULL);
 			svar = vstate->dtvs_locals[id];
 			ASSERT(svar != NULL);
 			v = &svar->dtsv_var;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				uintptr_t a = (uintptr_t)svar->dtsv_data;
 				size_t sz = v->dtdv_type.dtdt_size;
 				size_t lim;
 
 				sz += sizeof (uint64_t);
 				ASSERT(svar->dtsv_size == NCPU * sz);
 				a += curcpu * sz;
 
 				if (regs[rd] == 0) {
 					*(uint8_t *)a = UINT8_MAX;
 					break;
 				} else {
 					*(uint8_t *)a = 0;
 					a += sizeof (uint64_t);
 				}
 
 				if (!dtrace_vcanload(
 				    (void *)(uintptr_t)regs[rd], &v->dtdv_type,
 				    &lim, mstate, vstate))
 					break;
 
 				dtrace_vcopy((void *)(uintptr_t)regs[rd],
 				    (void *)a, &v->dtdv_type, lim);
 				break;
 			}
 
 			ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
 			tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
 			tmp[curcpu] = regs[rd];
 			break;
 
 		case DIF_OP_LDTS: {
 			dtrace_dynvar_t *dvar;
 			dtrace_key_t *key;
 
 			id = DIF_INSTR_VAR(instr);
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 			v = &vstate->dtvs_tlocals[id];
 
 			key = &tupregs[DIF_DTR_NREGS];
 			key[0].dttk_value = (uint64_t)id;
 			key[0].dttk_size = 0;
 			DTRACE_TLS_THRKEY(key[1].dttk_value);
 			key[1].dttk_size = 0;
 
 			dvar = dtrace_dynvar(dstate, 2, key,
 			    sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC,
 			    mstate, vstate);
 
 			if (dvar == NULL) {
 				regs[rd] = 0;
 				break;
 			}
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
 			} else {
 				regs[rd] = *((uint64_t *)dvar->dtdv_data);
 			}
 
 			break;
 		}
 
 		case DIF_OP_STTS: {
 			dtrace_dynvar_t *dvar;
 			dtrace_key_t *key;
 
 			id = DIF_INSTR_VAR(instr);
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 			VERIFY(id < vstate->dtvs_ntlocals);
 
 			key = &tupregs[DIF_DTR_NREGS];
 			key[0].dttk_value = (uint64_t)id;
 			key[0].dttk_size = 0;
 			DTRACE_TLS_THRKEY(key[1].dttk_value);
 			key[1].dttk_size = 0;
 			v = &vstate->dtvs_tlocals[id];
 
 			dvar = dtrace_dynvar(dstate, 2, key,
 			    v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
 			    v->dtdv_type.dtdt_size : sizeof (uint64_t),
 			    regs[rd] ? DTRACE_DYNVAR_ALLOC :
 			    DTRACE_DYNVAR_DEALLOC, mstate, vstate);
 
 			/*
 			 * Given that we're storing to thread-local data,
 			 * we need to flush our predicate cache.
 			 */
 			curthread->t_predcache = 0;
 
 			if (dvar == NULL)
 				break;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				size_t lim;
 
 				if (!dtrace_vcanload(
 				    (void *)(uintptr_t)regs[rd],
 				    &v->dtdv_type, &lim, mstate, vstate))
 					break;
 
 				dtrace_vcopy((void *)(uintptr_t)regs[rd],
 				    dvar->dtdv_data, &v->dtdv_type, lim);
 			} else {
 				*((uint64_t *)dvar->dtdv_data) = regs[rd];
 			}
 
 			break;
 		}
 
 		case DIF_OP_SRA:
 			regs[rd] = (int64_t)regs[r1] >> regs[r2];
 			break;
 
 		case DIF_OP_CALL:
 			dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd,
 			    regs, tupregs, ttop, mstate, state);
 			break;
 
 		case DIF_OP_PUSHTR:
 			if (ttop == DIF_DTR_NREGS) {
 				*flags |= CPU_DTRACE_TUPOFLOW;
 				break;
 			}
 
 			if (r1 == DIF_TYPE_STRING) {
 				/*
 				 * If this is a string type and the size is 0,
 				 * we'll use the system-wide default string
 				 * size.  Note that we are _not_ looking at
 				 * the value of the DTRACEOPT_STRSIZE option;
 				 * had this been set, we would expect to have
 				 * a non-zero size value in the "pushtr".
 				 */
 				tupregs[ttop].dttk_size =
 				    dtrace_strlen((char *)(uintptr_t)regs[rd],
 				    regs[r2] ? regs[r2] :
 				    dtrace_strsize_default) + 1;
 			} else {
 				if (regs[r2] > LONG_MAX) {
 					*flags |= CPU_DTRACE_ILLOP;
 					break;
 				}
 
 				tupregs[ttop].dttk_size = regs[r2];
 			}
 
 			tupregs[ttop++].dttk_value = regs[rd];
 			break;
 
 		case DIF_OP_PUSHTV:
 			if (ttop == DIF_DTR_NREGS) {
 				*flags |= CPU_DTRACE_TUPOFLOW;
 				break;
 			}
 
 			tupregs[ttop].dttk_value = regs[rd];
 			tupregs[ttop++].dttk_size = 0;
 			break;
 
 		case DIF_OP_POPTS:
 			if (ttop != 0)
 				ttop--;
 			break;
 
 		case DIF_OP_FLUSHTS:
 			ttop = 0;
 			break;
 
 		case DIF_OP_LDGAA:
 		case DIF_OP_LDTAA: {
 			dtrace_dynvar_t *dvar;
 			dtrace_key_t *key = tupregs;
 			uint_t nkeys = ttop;
 
 			id = DIF_INSTR_VAR(instr);
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 
 			key[nkeys].dttk_value = (uint64_t)id;
 			key[nkeys++].dttk_size = 0;
 
 			if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) {
 				DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
 				key[nkeys++].dttk_size = 0;
 				VERIFY(id < vstate->dtvs_ntlocals);
 				v = &vstate->dtvs_tlocals[id];
 			} else {
 				VERIFY(id < vstate->dtvs_nglobals);
 				v = &vstate->dtvs_globals[id]->dtsv_var;
 			}
 
 			dvar = dtrace_dynvar(dstate, nkeys, key,
 			    v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
 			    v->dtdv_type.dtdt_size : sizeof (uint64_t),
 			    DTRACE_DYNVAR_NOALLOC, mstate, vstate);
 
 			if (dvar == NULL) {
 				regs[rd] = 0;
 				break;
 			}
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
 			} else {
 				regs[rd] = *((uint64_t *)dvar->dtdv_data);
 			}
 
 			break;
 		}
 
 		case DIF_OP_STGAA:
 		case DIF_OP_STTAA: {
 			dtrace_dynvar_t *dvar;
 			dtrace_key_t *key = tupregs;
 			uint_t nkeys = ttop;
 
 			id = DIF_INSTR_VAR(instr);
 			ASSERT(id >= DIF_VAR_OTHER_UBASE);
 			id -= DIF_VAR_OTHER_UBASE;
 
 			key[nkeys].dttk_value = (uint64_t)id;
 			key[nkeys++].dttk_size = 0;
 
 			if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) {
 				DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
 				key[nkeys++].dttk_size = 0;
 				VERIFY(id < vstate->dtvs_ntlocals);
 				v = &vstate->dtvs_tlocals[id];
 			} else {
 				VERIFY(id < vstate->dtvs_nglobals);
 				v = &vstate->dtvs_globals[id]->dtsv_var;
 			}
 
 			dvar = dtrace_dynvar(dstate, nkeys, key,
 			    v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
 			    v->dtdv_type.dtdt_size : sizeof (uint64_t),
 			    regs[rd] ? DTRACE_DYNVAR_ALLOC :
 			    DTRACE_DYNVAR_DEALLOC, mstate, vstate);
 
 			if (dvar == NULL)
 				break;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
 				size_t lim;
 
 				if (!dtrace_vcanload(
 				    (void *)(uintptr_t)regs[rd], &v->dtdv_type,
 				    &lim, mstate, vstate))
 					break;
 
 				dtrace_vcopy((void *)(uintptr_t)regs[rd],
 				    dvar->dtdv_data, &v->dtdv_type, lim);
 			} else {
 				*((uint64_t *)dvar->dtdv_data) = regs[rd];
 			}
 
 			break;
 		}
 
 		case DIF_OP_ALLOCS: {
 			uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
 			size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1];
 
 			/*
 			 * Rounding up the user allocation size could have
 			 * overflowed large, bogus allocations (like -1ULL) to
 			 * 0.
 			 */
 			if (size < regs[r1] ||
 			    !DTRACE_INSCRATCH(mstate, size)) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 				regs[rd] = 0;
 				break;
 			}
 
 			dtrace_bzero((void *) mstate->dtms_scratch_ptr, size);
 			mstate->dtms_scratch_ptr += size;
 			regs[rd] = ptr;
 			break;
 		}
 
 		case DIF_OP_COPYS:
 			if (!dtrace_canstore(regs[rd], regs[r2],
 			    mstate, vstate)) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = regs[rd];
 				break;
 			}
 
 			if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate))
 				break;
 
 			dtrace_bcopy((void *)(uintptr_t)regs[r1],
 			    (void *)(uintptr_t)regs[rd], (size_t)regs[r2]);
 			break;
 
 		case DIF_OP_STB:
 			if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = regs[rd];
 				break;
 			}
 			*((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1];
 			break;
 
 		case DIF_OP_STH:
 			if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = regs[rd];
 				break;
 			}
 			if (regs[rd] & 1) {
 				*flags |= CPU_DTRACE_BADALIGN;
 				*illval = regs[rd];
 				break;
 			}
 			*((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1];
 			break;
 
 		case DIF_OP_STW:
 			if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = regs[rd];
 				break;
 			}
 			if (regs[rd] & 3) {
 				*flags |= CPU_DTRACE_BADALIGN;
 				*illval = regs[rd];
 				break;
 			}
 			*((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1];
 			break;
 
 		case DIF_OP_STX:
 			if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) {
 				*flags |= CPU_DTRACE_BADADDR;
 				*illval = regs[rd];
 				break;
 			}
 			if (regs[rd] & 7) {
 				*flags |= CPU_DTRACE_BADALIGN;
 				*illval = regs[rd];
 				break;
 			}
 			*((uint64_t *)(uintptr_t)regs[rd]) = regs[r1];
 			break;
 		}
 	}
 
 	if (!(*flags & CPU_DTRACE_FAULT))
 		return (rval);
 
 	mstate->dtms_fltoffs = opc * sizeof (dif_instr_t);
 	mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS;
 
 	return (0);
 }
 
 static void
 dtrace_action_breakpoint(dtrace_ecb_t *ecb)
 {
 	dtrace_probe_t *probe = ecb->dte_probe;
 	dtrace_provider_t *prov = probe->dtpr_provider;
 	char c[DTRACE_FULLNAMELEN + 80], *str;
 	char *msg = "dtrace: breakpoint action at probe ";
 	char *ecbmsg = " (ecb ";
 	uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4));
 	uintptr_t val = (uintptr_t)ecb;
 	int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0;
 
 	if (dtrace_destructive_disallow)
 		return;
 
 	/*
 	 * It's impossible to be taking action on the NULL probe.
 	 */
 	ASSERT(probe != NULL);
 
 	/*
 	 * This is a poor man's (destitute man's?) sprintf():  we want to
 	 * print the provider name, module name, function name and name of
 	 * the probe, along with the hex address of the ECB with the breakpoint
 	 * action -- all of which we must place in the character buffer by
 	 * hand.
 	 */
 	while (*msg != '\0')
 		c[i++] = *msg++;
 
 	for (str = prov->dtpv_name; *str != '\0'; str++)
 		c[i++] = *str;
 	c[i++] = ':';
 
 	for (str = probe->dtpr_mod; *str != '\0'; str++)
 		c[i++] = *str;
 	c[i++] = ':';
 
 	for (str = probe->dtpr_func; *str != '\0'; str++)
 		c[i++] = *str;
 	c[i++] = ':';
 
 	for (str = probe->dtpr_name; *str != '\0'; str++)
 		c[i++] = *str;
 
 	while (*ecbmsg != '\0')
 		c[i++] = *ecbmsg++;
 
 	while (shift >= 0) {
 		mask = (uintptr_t)0xf << shift;
 
 		if (val >= ((uintptr_t)1 << shift))
 			c[i++] = "0123456789abcdef"[(val & mask) >> shift];
 		shift -= 4;
 	}
 
 	c[i++] = ')';
 	c[i] = '\0';
 
 #ifdef illumos
 	debug_enter(c);
 #else
 	kdb_enter(KDB_WHY_DTRACE, "breakpoint action");
 #endif
 }
 
 static void
 dtrace_action_panic(dtrace_ecb_t *ecb)
 {
 	dtrace_probe_t *probe = ecb->dte_probe;
 
 	/*
 	 * It's impossible to be taking action on the NULL probe.
 	 */
 	ASSERT(probe != NULL);
 
 	if (dtrace_destructive_disallow)
 		return;
 
 	if (dtrace_panicked != NULL)
 		return;
 
 	if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL)
 		return;
 
 	/*
 	 * We won the right to panic.  (We want to be sure that only one
 	 * thread calls panic() from dtrace_probe(), and that panic() is
 	 * called exactly once.)
 	 */
 	dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
 	    probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
 	    probe->dtpr_func, probe->dtpr_name, (void *)ecb);
 }
 
 static void
 dtrace_action_raise(uint64_t sig)
 {
 	if (dtrace_destructive_disallow)
 		return;
 
 	if (sig >= NSIG) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 		return;
 	}
 
 #ifdef illumos
 	/*
 	 * raise() has a queue depth of 1 -- we ignore all subsequent
 	 * invocations of the raise() action.
 	 */
 	if (curthread->t_dtrace_sig == 0)
 		curthread->t_dtrace_sig = (uint8_t)sig;
 
 	curthread->t_sig_check = 1;
 	aston(curthread);
 #else
 	struct proc *p = curproc;
 	PROC_LOCK(p);
 	kern_psignal(p, sig);
 	PROC_UNLOCK(p);
 #endif
 }
 
 static void
 dtrace_action_stop(void)
 {
 	if (dtrace_destructive_disallow)
 		return;
 
 #ifdef illumos
 	if (!curthread->t_dtrace_stop) {
 		curthread->t_dtrace_stop = 1;
 		curthread->t_sig_check = 1;
 		aston(curthread);
 	}
 #else
 	struct proc *p = curproc;
 	PROC_LOCK(p);
 	kern_psignal(p, SIGSTOP);
 	PROC_UNLOCK(p);
 #endif
 }
 
 static void
 dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
 {
 	hrtime_t now;
 	volatile uint16_t *flags;
 #ifdef illumos
 	cpu_t *cpu = CPU;
 #else
 	cpu_t *cpu = &solaris_cpu[curcpu];
 #endif
 
 	if (dtrace_destructive_disallow)
 		return;
 
 	flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 
 	now = dtrace_gethrtime();
 
 	if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) {
 		/*
 		 * We need to advance the mark to the current time.
 		 */
 		cpu->cpu_dtrace_chillmark = now;
 		cpu->cpu_dtrace_chilled = 0;
 	}
 
 	/*
 	 * Now check to see if the requested chill time would take us over
 	 * the maximum amount of time allowed in the chill interval.  (Or
 	 * worse, if the calculation itself induces overflow.)
 	 */
 	if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max ||
 	    cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) {
 		*flags |= CPU_DTRACE_ILLOP;
 		return;
 	}
 
 	while (dtrace_gethrtime() - now < val)
 		continue;
 
 	/*
 	 * Normally, we assure that the value of the variable "timestamp" does
 	 * not change within an ECB.  The presence of chill() represents an
 	 * exception to this rule, however.
 	 */
 	mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP;
 	cpu->cpu_dtrace_chilled += val;
 }
 
 static void
 dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state,
     uint64_t *buf, uint64_t arg)
 {
 	int nframes = DTRACE_USTACK_NFRAMES(arg);
 	int strsize = DTRACE_USTACK_STRSIZE(arg);
 	uint64_t *pcs = &buf[1], *fps;
 	char *str = (char *)&pcs[nframes];
 	int size, offs = 0, i, j;
 	size_t rem;
 	uintptr_t old = mstate->dtms_scratch_ptr, saved;
 	uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
 	char *sym;
 
 	/*
 	 * Should be taking a faster path if string space has not been
 	 * allocated.
 	 */
 	ASSERT(strsize != 0);
 
 	/*
 	 * We will first allocate some temporary space for the frame pointers.
 	 */
 	fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
 	size = (uintptr_t)fps - mstate->dtms_scratch_ptr +
 	    (nframes * sizeof (uint64_t));
 
 	if (!DTRACE_INSCRATCH(mstate, size)) {
 		/*
 		 * Not enough room for our frame pointers -- need to indicate
 		 * that we ran out of scratch space.
 		 */
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
 		return;
 	}
 
 	mstate->dtms_scratch_ptr += size;
 	saved = mstate->dtms_scratch_ptr;
 
 	/*
 	 * Now get a stack with both program counters and frame pointers.
 	 */
 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 	dtrace_getufpstack(buf, fps, nframes + 1);
 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
 	/*
 	 * If that faulted, we're cooked.
 	 */
 	if (*flags & CPU_DTRACE_FAULT)
 		goto out;
 
 	/*
 	 * Now we want to walk up the stack, calling the USTACK helper.  For
 	 * each iteration, we restore the scratch pointer.
 	 */
 	for (i = 0; i < nframes; i++) {
 		mstate->dtms_scratch_ptr = saved;
 
 		if (offs >= strsize)
 			break;
 
 		sym = (char *)(uintptr_t)dtrace_helper(
 		    DTRACE_HELPER_ACTION_USTACK,
 		    mstate, state, pcs[i], fps[i]);
 
 		/*
 		 * If we faulted while running the helper, we're going to
 		 * clear the fault and null out the corresponding string.
 		 */
 		if (*flags & CPU_DTRACE_FAULT) {
 			*flags &= ~CPU_DTRACE_FAULT;
 			str[offs++] = '\0';
 			continue;
 		}
 
 		if (sym == NULL) {
 			str[offs++] = '\0';
 			continue;
 		}
 
 		if (!dtrace_strcanload((uintptr_t)sym, strsize, &rem, mstate,
 		    &(state->dts_vstate))) {
 			str[offs++] = '\0';
 			continue;
 		}
 
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 
 		/*
 		 * Now copy in the string that the helper returned to us.
 		 */
 		for (j = 0; offs + j < strsize && j < rem; j++) {
 			if ((str[offs + j] = sym[j]) == '\0')
 				break;
 		}
 
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
 		offs += j + 1;
 	}
 
 	if (offs >= strsize) {
 		/*
 		 * If we didn't have room for all of the strings, we don't
 		 * abort processing -- this needn't be a fatal error -- but we
 		 * still want to increment a counter (dts_stkstroverflows) to
 		 * allow this condition to be warned about.  (If this is from
 		 * a jstack() action, it is easily tuned via jstackstrsize.)
 		 */
 		dtrace_error(&state->dts_stkstroverflows);
 	}
 
 	while (offs < strsize)
 		str[offs++] = '\0';
 
 out:
 	mstate->dtms_scratch_ptr = old;
 }
 
 static void
 dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size,
     size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind)
 {
 	volatile uint16_t *flags;
 	uint64_t val = *valp;
 	size_t valoffs = *valoffsp;
 
 	flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
 	ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF);
 
 	/*
 	 * If this is a string, we're going to only load until we find the zero
 	 * byte -- after which we'll store zero bytes.
 	 */
 	if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
 		char c = '\0' + 1;
 		size_t s;
 
 		for (s = 0; s < size; s++) {
 			if (c != '\0' && dtkind == DIF_TF_BYREF) {
 				c = dtrace_load8(val++);
 			} else if (c != '\0' && dtkind == DIF_TF_BYUREF) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 				c = dtrace_fuword8((void *)(uintptr_t)val++);
 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 				if (*flags & CPU_DTRACE_FAULT)
 					break;
 			}
 
 			DTRACE_STORE(uint8_t, tomax, valoffs++, c);
 
 			if (c == '\0' && intuple)
 				break;
 		}
 	} else {
 		uint8_t c;
 		while (valoffs < end) {
 			if (dtkind == DIF_TF_BYREF) {
 				c = dtrace_load8(val++);
 			} else if (dtkind == DIF_TF_BYUREF) {
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 				c = dtrace_fuword8((void *)(uintptr_t)val++);
 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 				if (*flags & CPU_DTRACE_FAULT)
 					break;
 			}
 
 			DTRACE_STORE(uint8_t, tomax,
 			    valoffs++, c);
 		}
 	}
 
 	*valp = val;
 	*valoffsp = valoffs;
 }
 
 /*
  * If you're looking for the epicenter of DTrace, you just found it.  This
  * is the function called by the provider to fire a probe -- from which all
  * subsequent probe-context DTrace activity emanates.
  */
 void
 dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
     uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
 {
 	processorid_t cpuid;
 	dtrace_icookie_t cookie;
 	dtrace_probe_t *probe;
 	dtrace_mstate_t mstate;
 	dtrace_ecb_t *ecb;
 	dtrace_action_t *act;
 	intptr_t offs;
 	size_t size;
 	int vtime, onintr;
 	volatile uint16_t *flags;
 	hrtime_t now;
 
 	if (panicstr != NULL)
 		return;
 
 #ifdef illumos
 	/*
 	 * Kick out immediately if this CPU is still being born (in which case
 	 * curthread will be set to -1) or the current thread can't allow
 	 * probes in its current context.
 	 */
 	if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE))
 		return;
 #endif
 
 	cookie = dtrace_interrupt_disable();
 	probe = dtrace_probes[id - 1];
 	cpuid = curcpu;
 	onintr = CPU_ON_INTR(CPU);
 
 	if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE &&
 	    probe->dtpr_predcache == curthread->t_predcache) {
 		/*
 		 * We have hit in the predicate cache; we know that
 		 * this predicate would evaluate to be false.
 		 */
 		dtrace_interrupt_enable(cookie);
 		return;
 	}
 
 #ifdef illumos
 	if (panic_quiesce) {
 #else
 	if (panicstr != NULL) {
 #endif
 		/*
 		 * We don't trace anything if we're panicking.
 		 */
 		dtrace_interrupt_enable(cookie);
 		return;
 	}
 
 	now = mstate.dtms_timestamp = dtrace_gethrtime();
 	mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
 	vtime = dtrace_vtime_references != 0;
 
 	if (vtime && curthread->t_dtrace_start)
 		curthread->t_dtrace_vtime += now - curthread->t_dtrace_start;
 
 	mstate.dtms_difo = NULL;
 	mstate.dtms_probe = probe;
 	mstate.dtms_strtok = 0;
 	mstate.dtms_arg[0] = arg0;
 	mstate.dtms_arg[1] = arg1;
 	mstate.dtms_arg[2] = arg2;
 	mstate.dtms_arg[3] = arg3;
 	mstate.dtms_arg[4] = arg4;
 
 	flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags;
 
 	for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
 		dtrace_predicate_t *pred = ecb->dte_predicate;
 		dtrace_state_t *state = ecb->dte_state;
 		dtrace_buffer_t *buf = &state->dts_buffer[cpuid];
 		dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
 		dtrace_vstate_t *vstate = &state->dts_vstate;
 		dtrace_provider_t *prov = probe->dtpr_provider;
 		uint64_t tracememsize = 0;
 		int committed = 0;
 		caddr_t tomax;
 
 		/*
 		 * A little subtlety with the following (seemingly innocuous)
 		 * declaration of the automatic 'val':  by looking at the
 		 * code, you might think that it could be declared in the
 		 * action processing loop, below.  (That is, it's only used in
 		 * the action processing loop.)  However, it must be declared
 		 * out of that scope because in the case of DIF expression
 		 * arguments to aggregating actions, one iteration of the
 		 * action loop will use the last iteration's value.
 		 */
 		uint64_t val = 0;
 
 		mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
 		mstate.dtms_getf = NULL;
 
 		*flags &= ~CPU_DTRACE_ERROR;
 
 		if (prov == dtrace_provider) {
 			/*
 			 * If dtrace itself is the provider of this probe,
 			 * we're only going to continue processing the ECB if
 			 * arg0 (the dtrace_state_t) is equal to the ECB's
 			 * creating state.  (This prevents disjoint consumers
 			 * from seeing one another's metaprobes.)
 			 */
 			if (arg0 != (uint64_t)(uintptr_t)state)
 				continue;
 		}
 
 		if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
 			/*
 			 * We're not currently active.  If our provider isn't
 			 * the dtrace pseudo provider, we're not interested.
 			 */
 			if (prov != dtrace_provider)
 				continue;
 
 			/*
 			 * Now we must further check if we are in the BEGIN
 			 * probe.  If we are, we will only continue processing
 			 * if we're still in WARMUP -- if one BEGIN enabling
 			 * has invoked the exit() action, we don't want to
 			 * evaluate subsequent BEGIN enablings.
 			 */
 			if (probe->dtpr_id == dtrace_probeid_begin &&
 			    state->dts_activity != DTRACE_ACTIVITY_WARMUP) {
 				ASSERT(state->dts_activity ==
 				    DTRACE_ACTIVITY_DRAINING);
 				continue;
 			}
 		}
 
 		if (ecb->dte_cond) {
 			/*
 			 * If the dte_cond bits indicate that this
 			 * consumer is only allowed to see user-mode firings
 			 * of this probe, call the provider's dtps_usermode()
 			 * entry point to check that the probe was fired
 			 * while in a user context. Skip this ECB if that's
 			 * not the case.
 			 */
 			if ((ecb->dte_cond & DTRACE_COND_USERMODE) &&
 			    prov->dtpv_pops.dtps_usermode(prov->dtpv_arg,
 			    probe->dtpr_id, probe->dtpr_arg) == 0)
 				continue;
 
 #ifdef illumos
 			/*
 			 * This is more subtle than it looks. We have to be
 			 * absolutely certain that CRED() isn't going to
 			 * change out from under us so it's only legit to
 			 * examine that structure if we're in constrained
 			 * situations. Currently, the only times we'll this
 			 * check is if a non-super-user has enabled the
 			 * profile or syscall providers -- providers that
 			 * allow visibility of all processes. For the
 			 * profile case, the check above will ensure that
 			 * we're examining a user context.
 			 */
 			if (ecb->dte_cond & DTRACE_COND_OWNER) {
 				cred_t *cr;
 				cred_t *s_cr =
 				    ecb->dte_state->dts_cred.dcr_cred;
 				proc_t *proc;
 
 				ASSERT(s_cr != NULL);
 
 				if ((cr = CRED()) == NULL ||
 				    s_cr->cr_uid != cr->cr_uid ||
 				    s_cr->cr_uid != cr->cr_ruid ||
 				    s_cr->cr_uid != cr->cr_suid ||
 				    s_cr->cr_gid != cr->cr_gid ||
 				    s_cr->cr_gid != cr->cr_rgid ||
 				    s_cr->cr_gid != cr->cr_sgid ||
 				    (proc = ttoproc(curthread)) == NULL ||
 				    (proc->p_flag & SNOCD))
 					continue;
 			}
 
 			if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
 				cred_t *cr;
 				cred_t *s_cr =
 				    ecb->dte_state->dts_cred.dcr_cred;
 
 				ASSERT(s_cr != NULL);
 
 				if ((cr = CRED()) == NULL ||
 				    s_cr->cr_zone->zone_id !=
 				    cr->cr_zone->zone_id)
 					continue;
 			}
 #endif
 		}
 
 		if (now - state->dts_alive > dtrace_deadman_timeout) {
 			/*
 			 * We seem to be dead.  Unless we (a) have kernel
 			 * destructive permissions (b) have explicitly enabled
 			 * destructive actions and (c) destructive actions have
 			 * not been disabled, we're going to transition into
 			 * the KILLED state, from which no further processing
 			 * on this state will be performed.
 			 */
 			if (!dtrace_priv_kernel_destructive(state) ||
 			    !state->dts_cred.dcr_destructive ||
 			    dtrace_destructive_disallow) {
 				void *activity = &state->dts_activity;
 				dtrace_activity_t current;
 
 				do {
 					current = state->dts_activity;
 				} while (dtrace_cas32(activity, current,
 				    DTRACE_ACTIVITY_KILLED) != current);
 
 				continue;
 			}
 		}
 
 		if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed,
 		    ecb->dte_alignment, state, &mstate)) < 0)
 			continue;
 
 		tomax = buf->dtb_tomax;
 		ASSERT(tomax != NULL);
 
 		if (ecb->dte_size != 0) {
 			dtrace_rechdr_t dtrh;
 			if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
 				mstate.dtms_timestamp = dtrace_gethrtime();
 				mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
 			}
 			ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t));
 			dtrh.dtrh_epid = ecb->dte_epid;
 			DTRACE_RECORD_STORE_TIMESTAMP(&dtrh,
 			    mstate.dtms_timestamp);
 			*((dtrace_rechdr_t *)(tomax + offs)) = dtrh;
 		}
 
 		mstate.dtms_epid = ecb->dte_epid;
 		mstate.dtms_present |= DTRACE_MSTATE_EPID;
 
 		if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
 			mstate.dtms_access = DTRACE_ACCESS_KERNEL;
 		else
 			mstate.dtms_access = 0;
 
 		if (pred != NULL) {
 			dtrace_difo_t *dp = pred->dtp_difo;
 			uint64_t rval;
 
 			rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
 
 			if (!(*flags & CPU_DTRACE_ERROR) && !rval) {
 				dtrace_cacheid_t cid = probe->dtpr_predcache;
 
 				if (cid != DTRACE_CACHEIDNONE && !onintr) {
 					/*
 					 * Update the predicate cache...
 					 */
 					ASSERT(cid == pred->dtp_cacheid);
 					curthread->t_predcache = cid;
 				}
 
 				continue;
 			}
 		}
 
 		for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) &&
 		    act != NULL; act = act->dta_next) {
 			size_t valoffs;
 			dtrace_difo_t *dp;
 			dtrace_recdesc_t *rec = &act->dta_rec;
 
 			size = rec->dtrd_size;
 			valoffs = offs + rec->dtrd_offset;
 
 			if (DTRACEACT_ISAGG(act->dta_kind)) {
 				uint64_t v = 0xbad;
 				dtrace_aggregation_t *agg;
 
 				agg = (dtrace_aggregation_t *)act;
 
 				if ((dp = act->dta_difo) != NULL)
 					v = dtrace_dif_emulate(dp,
 					    &mstate, vstate, state);
 
 				if (*flags & CPU_DTRACE_ERROR)
 					continue;
 
 				/*
 				 * Note that we always pass the expression
 				 * value from the previous iteration of the
 				 * action loop.  This value will only be used
 				 * if there is an expression argument to the
 				 * aggregating action, denoted by the
 				 * dtag_hasarg field.
 				 */
 				dtrace_aggregate(agg, buf,
 				    offs, aggbuf, v, val);
 				continue;
 			}
 
 			switch (act->dta_kind) {
 			case DTRACEACT_STOP:
 				if (dtrace_priv_proc_destructive(state))
 					dtrace_action_stop();
 				continue;
 
 			case DTRACEACT_BREAKPOINT:
 				if (dtrace_priv_kernel_destructive(state))
 					dtrace_action_breakpoint(ecb);
 				continue;
 
 			case DTRACEACT_PANIC:
 				if (dtrace_priv_kernel_destructive(state))
 					dtrace_action_panic(ecb);
 				continue;
 
 			case DTRACEACT_STACK:
 				if (!dtrace_priv_kernel(state))
 					continue;
 
 				dtrace_getpcstack((pc_t *)(tomax + valoffs),
 				    size / sizeof (pc_t), probe->dtpr_aframes,
 				    DTRACE_ANCHORED(probe) ? NULL :
 				    (uint32_t *)arg0);
 				continue;
 
 			case DTRACEACT_JSTACK:
 			case DTRACEACT_USTACK:
 				if (!dtrace_priv_proc(state))
 					continue;
 
 				/*
 				 * See comment in DIF_VAR_PID.
 				 */
 				if (DTRACE_ANCHORED(mstate.dtms_probe) &&
 				    CPU_ON_INTR(CPU)) {
 					int depth = DTRACE_USTACK_NFRAMES(
 					    rec->dtrd_arg) + 1;
 
 					dtrace_bzero((void *)(tomax + valoffs),
 					    DTRACE_USTACK_STRSIZE(rec->dtrd_arg)
 					    + depth * sizeof (uint64_t));
 
 					continue;
 				}
 
 				if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 &&
 				    curproc->p_dtrace_helpers != NULL) {
 					/*
 					 * This is the slow path -- we have
 					 * allocated string space, and we're
 					 * getting the stack of a process that
 					 * has helpers.  Call into a separate
 					 * routine to perform this processing.
 					 */
 					dtrace_action_ustack(&mstate, state,
 					    (uint64_t *)(tomax + valoffs),
 					    rec->dtrd_arg);
 					continue;
 				}
 
 				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 				dtrace_getupcstack((uint64_t *)
 				    (tomax + valoffs),
 				    DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1);
 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 				continue;
 
 			default:
 				break;
 			}
 
 			dp = act->dta_difo;
 			ASSERT(dp != NULL);
 
 			val = dtrace_dif_emulate(dp, &mstate, vstate, state);
 
 			if (*flags & CPU_DTRACE_ERROR)
 				continue;
 
 			switch (act->dta_kind) {
 			case DTRACEACT_SPECULATE: {
 				dtrace_rechdr_t *dtrh;
 
 				ASSERT(buf == &state->dts_buffer[cpuid]);
 				buf = dtrace_speculation_buffer(state,
 				    cpuid, val);
 
 				if (buf == NULL) {
 					*flags |= CPU_DTRACE_DROP;
 					continue;
 				}
 
 				offs = dtrace_buffer_reserve(buf,
 				    ecb->dte_needed, ecb->dte_alignment,
 				    state, NULL);
 
 				if (offs < 0) {
 					*flags |= CPU_DTRACE_DROP;
 					continue;
 				}
 
 				tomax = buf->dtb_tomax;
 				ASSERT(tomax != NULL);
 
 				if (ecb->dte_size == 0)
 					continue;
 
 				ASSERT3U(ecb->dte_size, >=,
 				    sizeof (dtrace_rechdr_t));
 				dtrh = ((void *)(tomax + offs));
 				dtrh->dtrh_epid = ecb->dte_epid;
 				/*
 				 * When the speculation is committed, all of
 				 * the records in the speculative buffer will
 				 * have their timestamps set to the commit
 				 * time.  Until then, it is set to a sentinel
 				 * value, for debugability.
 				 */
 				DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX);
 				continue;
 			}
 
 			case DTRACEACT_PRINTM: {
 				/* The DIF returns a 'memref'. */
 				uintptr_t *memref = (uintptr_t *)(uintptr_t) val;
 
 				/* Get the size from the memref. */
 				size = memref[1];
 
 				/*
 				 * Check if the size exceeds the allocated
 				 * buffer size.
 				 */
 				if (size + sizeof(uintptr_t) > dp->dtdo_rtype.dtdt_size) {
 					/* Flag a drop! */
 					*flags |= CPU_DTRACE_DROP;
 					continue;
 				}
 
 				/* Store the size in the buffer first. */
 				DTRACE_STORE(uintptr_t, tomax,
 				    valoffs, size);
 
 				/*
 				 * Offset the buffer address to the start
 				 * of the data.
 				 */
 				valoffs += sizeof(uintptr_t);
 
 				/*
 				 * Reset to the memory address rather than
 				 * the memref array, then let the BYREF
 				 * code below do the work to store the 
 				 * memory data in the buffer.
 				 */
 				val = memref[0];
 				break;
 			}
 
 			case DTRACEACT_CHILL:
 				if (dtrace_priv_kernel_destructive(state))
 					dtrace_action_chill(&mstate, val);
 				continue;
 
 			case DTRACEACT_RAISE:
 				if (dtrace_priv_proc_destructive(state))
 					dtrace_action_raise(val);
 				continue;
 
 			case DTRACEACT_COMMIT:
 				ASSERT(!committed);
 
 				/*
 				 * We need to commit our buffer state.
 				 */
 				if (ecb->dte_size)
 					buf->dtb_offset = offs + ecb->dte_size;
 				buf = &state->dts_buffer[cpuid];
 				dtrace_speculation_commit(state, cpuid, val);
 				committed = 1;
 				continue;
 
 			case DTRACEACT_DISCARD:
 				dtrace_speculation_discard(state, cpuid, val);
 				continue;
 
 			case DTRACEACT_DIFEXPR:
 			case DTRACEACT_LIBACT:
 			case DTRACEACT_PRINTF:
 			case DTRACEACT_PRINTA:
 			case DTRACEACT_SYSTEM:
 			case DTRACEACT_FREOPEN:
 			case DTRACEACT_TRACEMEM:
 				break;
 
 			case DTRACEACT_TRACEMEM_DYNSIZE:
 				tracememsize = val;
 				break;
 
 			case DTRACEACT_SYM:
 			case DTRACEACT_MOD:
 				if (!dtrace_priv_kernel(state))
 					continue;
 				break;
 
 			case DTRACEACT_USYM:
 			case DTRACEACT_UMOD:
 			case DTRACEACT_UADDR: {
 #ifdef illumos
 				struct pid *pid = curthread->t_procp->p_pidp;
 #endif
 
 				if (!dtrace_priv_proc(state))
 					continue;
 
 				DTRACE_STORE(uint64_t, tomax,
 #ifdef illumos
 				    valoffs, (uint64_t)pid->pid_id);
 #else
 				    valoffs, (uint64_t) curproc->p_pid);
 #endif
 				DTRACE_STORE(uint64_t, tomax,
 				    valoffs + sizeof (uint64_t), val);
 
 				continue;
 			}
 
 			case DTRACEACT_EXIT: {
 				/*
 				 * For the exit action, we are going to attempt
 				 * to atomically set our activity to be
 				 * draining.  If this fails (either because
 				 * another CPU has beat us to the exit action,
 				 * or because our current activity is something
 				 * other than ACTIVE or WARMUP), we will
 				 * continue.  This assures that the exit action
 				 * can be successfully recorded at most once
 				 * when we're in the ACTIVE state.  If we're
 				 * encountering the exit() action while in
 				 * COOLDOWN, however, we want to honor the new
 				 * status code.  (We know that we're the only
 				 * thread in COOLDOWN, so there is no race.)
 				 */
 				void *activity = &state->dts_activity;
 				dtrace_activity_t current = state->dts_activity;
 
 				if (current == DTRACE_ACTIVITY_COOLDOWN)
 					break;
 
 				if (current != DTRACE_ACTIVITY_WARMUP)
 					current = DTRACE_ACTIVITY_ACTIVE;
 
 				if (dtrace_cas32(activity, current,
 				    DTRACE_ACTIVITY_DRAINING) != current) {
 					*flags |= CPU_DTRACE_DROP;
 					continue;
 				}
 
 				break;
 			}
 
 			default:
 				ASSERT(0);
 			}
 
 			if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ||
 			    dp->dtdo_rtype.dtdt_flags & DIF_TF_BYUREF) {
 				uintptr_t end = valoffs + size;
 
 				if (tracememsize != 0 &&
 				    valoffs + tracememsize < end) {
 					end = valoffs + tracememsize;
 					tracememsize = 0;
 				}
 
 				if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF &&
 				    !dtrace_vcanload((void *)(uintptr_t)val,
 				    &dp->dtdo_rtype, NULL, &mstate, vstate))
 					continue;
 
 				dtrace_store_by_ref(dp, tomax, size, &valoffs,
 				    &val, end, act->dta_intuple,
 				    dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ?
 				    DIF_TF_BYREF: DIF_TF_BYUREF);
 				continue;
 			}
 
 			switch (size) {
 			case 0:
 				break;
 
 			case sizeof (uint8_t):
 				DTRACE_STORE(uint8_t, tomax, valoffs, val);
 				break;
 			case sizeof (uint16_t):
 				DTRACE_STORE(uint16_t, tomax, valoffs, val);
 				break;
 			case sizeof (uint32_t):
 				DTRACE_STORE(uint32_t, tomax, valoffs, val);
 				break;
 			case sizeof (uint64_t):
 				DTRACE_STORE(uint64_t, tomax, valoffs, val);
 				break;
 			default:
 				/*
 				 * Any other size should have been returned by
 				 * reference, not by value.
 				 */
 				ASSERT(0);
 				break;
 			}
 		}
 
 		if (*flags & CPU_DTRACE_DROP)
 			continue;
 
 		if (*flags & CPU_DTRACE_FAULT) {
 			int ndx;
 			dtrace_action_t *err;
 
 			buf->dtb_errors++;
 
 			if (probe->dtpr_id == dtrace_probeid_error) {
 				/*
 				 * There's nothing we can do -- we had an
 				 * error on the error probe.  We bump an
 				 * error counter to at least indicate that
 				 * this condition happened.
 				 */
 				dtrace_error(&state->dts_dblerrors);
 				continue;
 			}
 
 			if (vtime) {
 				/*
 				 * Before recursing on dtrace_probe(), we
 				 * need to explicitly clear out our start
 				 * time to prevent it from being accumulated
 				 * into t_dtrace_vtime.
 				 */
 				curthread->t_dtrace_start = 0;
 			}
 
 			/*
 			 * Iterate over the actions to figure out which action
 			 * we were processing when we experienced the error.
 			 * Note that act points _past_ the faulting action; if
 			 * act is ecb->dte_action, the fault was in the
 			 * predicate, if it's ecb->dte_action->dta_next it's
 			 * in action #1, and so on.
 			 */
 			for (err = ecb->dte_action, ndx = 0;
 			    err != act; err = err->dta_next, ndx++)
 				continue;
 
 			dtrace_probe_error(state, ecb->dte_epid, ndx,
 			    (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ?
 			    mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags),
 			    cpu_core[cpuid].cpuc_dtrace_illval);
 
 			continue;
 		}
 
 		if (!committed)
 			buf->dtb_offset = offs + ecb->dte_size;
 	}
 
 	if (vtime)
 		curthread->t_dtrace_start = dtrace_gethrtime();
 
 	dtrace_interrupt_enable(cookie);
 }
 
 /*
  * DTrace Probe Hashing Functions
  *
  * The functions in this section (and indeed, the functions in remaining
  * sections) are not _called_ from probe context.  (Any exceptions to this are
  * marked with a "Note:".)  Rather, they are called from elsewhere in the
  * DTrace framework to look-up probes in, add probes to and remove probes from
  * the DTrace probe hashes.  (Each probe is hashed by each element of the
  * probe tuple -- allowing for fast lookups, regardless of what was
  * specified.)
  */
 static uint_t
 dtrace_hash_str(const char *p)
 {
 	unsigned int g;
 	uint_t hval = 0;
 
 	while (*p) {
 		hval = (hval << 4) + *p++;
 		if ((g = (hval & 0xf0000000)) != 0)
 			hval ^= g >> 24;
 		hval &= ~g;
 	}
 	return (hval);
 }
 
 static dtrace_hash_t *
 dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs)
 {
 	dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP);
 
 	hash->dth_stroffs = stroffs;
 	hash->dth_nextoffs = nextoffs;
 	hash->dth_prevoffs = prevoffs;
 
 	hash->dth_size = 1;
 	hash->dth_mask = hash->dth_size - 1;
 
 	hash->dth_tab = kmem_zalloc(hash->dth_size *
 	    sizeof (dtrace_hashbucket_t *), KM_SLEEP);
 
 	return (hash);
 }
 
 static void
 dtrace_hash_destroy(dtrace_hash_t *hash)
 {
 #ifdef DEBUG
 	int i;
 
 	for (i = 0; i < hash->dth_size; i++)
 		ASSERT(hash->dth_tab[i] == NULL);
 #endif
 
 	kmem_free(hash->dth_tab,
 	    hash->dth_size * sizeof (dtrace_hashbucket_t *));
 	kmem_free(hash, sizeof (dtrace_hash_t));
 }
 
 static void
 dtrace_hash_resize(dtrace_hash_t *hash)
 {
 	int size = hash->dth_size, i, ndx;
 	int new_size = hash->dth_size << 1;
 	int new_mask = new_size - 1;
 	dtrace_hashbucket_t **new_tab, *bucket, *next;
 
 	ASSERT((new_size & new_mask) == 0);
 
 	new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP);
 
 	for (i = 0; i < size; i++) {
 		for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) {
 			dtrace_probe_t *probe = bucket->dthb_chain;
 
 			ASSERT(probe != NULL);
 			ndx = DTRACE_HASHSTR(hash, probe) & new_mask;
 
 			next = bucket->dthb_next;
 			bucket->dthb_next = new_tab[ndx];
 			new_tab[ndx] = bucket;
 		}
 	}
 
 	kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *));
 	hash->dth_tab = new_tab;
 	hash->dth_size = new_size;
 	hash->dth_mask = new_mask;
 }
 
 static void
 dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new)
 {
 	int hashval = DTRACE_HASHSTR(hash, new);
 	int ndx = hashval & hash->dth_mask;
 	dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
 	dtrace_probe_t **nextp, **prevp;
 
 	for (; bucket != NULL; bucket = bucket->dthb_next) {
 		if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new))
 			goto add;
 	}
 
 	if ((hash->dth_nbuckets >> 1) > hash->dth_size) {
 		dtrace_hash_resize(hash);
 		dtrace_hash_add(hash, new);
 		return;
 	}
 
 	bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP);
 	bucket->dthb_next = hash->dth_tab[ndx];
 	hash->dth_tab[ndx] = bucket;
 	hash->dth_nbuckets++;
 
 add:
 	nextp = DTRACE_HASHNEXT(hash, new);
 	ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL);
 	*nextp = bucket->dthb_chain;
 
 	if (bucket->dthb_chain != NULL) {
 		prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain);
 		ASSERT(*prevp == NULL);
 		*prevp = new;
 	}
 
 	bucket->dthb_chain = new;
 	bucket->dthb_len++;
 }
 
 static dtrace_probe_t *
 dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template)
 {
 	int hashval = DTRACE_HASHSTR(hash, template);
 	int ndx = hashval & hash->dth_mask;
 	dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
 
 	for (; bucket != NULL; bucket = bucket->dthb_next) {
 		if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
 			return (bucket->dthb_chain);
 	}
 
 	return (NULL);
 }
 
 static int
 dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template)
 {
 	int hashval = DTRACE_HASHSTR(hash, template);
 	int ndx = hashval & hash->dth_mask;
 	dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
 
 	for (; bucket != NULL; bucket = bucket->dthb_next) {
 		if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
 			return (bucket->dthb_len);
 	}
 
 	return (0);
 }
 
 static void
 dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe)
 {
 	int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask;
 	dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
 
 	dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe);
 	dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe);
 
 	/*
 	 * Find the bucket that we're removing this probe from.
 	 */
 	for (; bucket != NULL; bucket = bucket->dthb_next) {
 		if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe))
 			break;
 	}
 
 	ASSERT(bucket != NULL);
 
 	if (*prevp == NULL) {
 		if (*nextp == NULL) {
 			/*
 			 * The removed probe was the only probe on this
 			 * bucket; we need to remove the bucket.
 			 */
 			dtrace_hashbucket_t *b = hash->dth_tab[ndx];
 
 			ASSERT(bucket->dthb_chain == probe);
 			ASSERT(b != NULL);
 
 			if (b == bucket) {
 				hash->dth_tab[ndx] = bucket->dthb_next;
 			} else {
 				while (b->dthb_next != bucket)
 					b = b->dthb_next;
 				b->dthb_next = bucket->dthb_next;
 			}
 
 			ASSERT(hash->dth_nbuckets > 0);
 			hash->dth_nbuckets--;
 			kmem_free(bucket, sizeof (dtrace_hashbucket_t));
 			return;
 		}
 
 		bucket->dthb_chain = *nextp;
 	} else {
 		*(DTRACE_HASHNEXT(hash, *prevp)) = *nextp;
 	}
 
 	if (*nextp != NULL)
 		*(DTRACE_HASHPREV(hash, *nextp)) = *prevp;
 }
 
 /*
  * DTrace Utility Functions
  *
  * These are random utility functions that are _not_ called from probe context.
  */
 static int
 dtrace_badattr(const dtrace_attribute_t *a)
 {
 	return (a->dtat_name > DTRACE_STABILITY_MAX ||
 	    a->dtat_data > DTRACE_STABILITY_MAX ||
 	    a->dtat_class > DTRACE_CLASS_MAX);
 }
 
 /*
  * Return a duplicate copy of a string.  If the specified string is NULL,
  * this function returns a zero-length string.
  */
 static char *
 dtrace_strdup(const char *str)
 {
 	char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP);
 
 	if (str != NULL)
 		(void) strcpy(new, str);
 
 	return (new);
 }
 
 #define	DTRACE_ISALPHA(c)	\
 	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
 
 static int
 dtrace_badname(const char *s)
 {
 	char c;
 
 	if (s == NULL || (c = *s++) == '\0')
 		return (0);
 
 	if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.')
 		return (1);
 
 	while ((c = *s++) != '\0') {
 		if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') &&
 		    c != '-' && c != '_' && c != '.' && c != '`')
 			return (1);
 	}
 
 	return (0);
 }
 
 static void
 dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
 {
 	uint32_t priv;
 
 #ifdef illumos
 	if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
 		/*
 		 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
 		 */
 		priv = DTRACE_PRIV_ALL;
 	} else {
 		*uidp = crgetuid(cr);
 		*zoneidp = crgetzoneid(cr);
 
 		priv = 0;
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
 			priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER;
 		else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE))
 			priv |= DTRACE_PRIV_USER;
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE))
 			priv |= DTRACE_PRIV_PROC;
 		if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
 			priv |= DTRACE_PRIV_OWNER;
 		if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
 			priv |= DTRACE_PRIV_ZONEOWNER;
 	}
 #else
 	priv = DTRACE_PRIV_ALL;
 #endif
 
 	*privp = priv;
 }
 
 #ifdef DTRACE_ERRDEBUG
 static void
 dtrace_errdebug(const char *str)
 {
 	int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ;
 	int occupied = 0;
 
 	mutex_enter(&dtrace_errlock);
 	dtrace_errlast = str;
 	dtrace_errthread = curthread;
 
 	while (occupied++ < DTRACE_ERRHASHSZ) {
 		if (dtrace_errhash[hval].dter_msg == str) {
 			dtrace_errhash[hval].dter_count++;
 			goto out;
 		}
 
 		if (dtrace_errhash[hval].dter_msg != NULL) {
 			hval = (hval + 1) % DTRACE_ERRHASHSZ;
 			continue;
 		}
 
 		dtrace_errhash[hval].dter_msg = str;
 		dtrace_errhash[hval].dter_count = 1;
 		goto out;
 	}
 
 	panic("dtrace: undersized error hash");
 out:
 	mutex_exit(&dtrace_errlock);
 }
 #endif
 
 /*
  * DTrace Matching Functions
  *
  * These functions are used to match groups of probes, given some elements of
  * a probe tuple, or some globbed expressions for elements of a probe tuple.
  */
 static int
 dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid,
     zoneid_t zoneid)
 {
 	if (priv != DTRACE_PRIV_ALL) {
 		uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags;
 		uint32_t match = priv & ppriv;
 
 		/*
 		 * No PRIV_DTRACE_* privileges...
 		 */
 		if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER |
 		    DTRACE_PRIV_KERNEL)) == 0)
 			return (0);
 
 		/*
 		 * No matching bits, but there were bits to match...
 		 */
 		if (match == 0 && ppriv != 0)
 			return (0);
 
 		/*
 		 * Need to have permissions to the process, but don't...
 		 */
 		if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 &&
 		    uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) {
 			return (0);
 		}
 
 		/*
 		 * Need to be in the same zone unless we possess the
 		 * privilege to examine all zones.
 		 */
 		if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 &&
 		    zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) {
 			return (0);
 		}
 	}
 
 	return (1);
 }
 
 /*
  * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
  * consists of input pattern strings and an ops-vector to evaluate them.
  * This function returns >0 for match, 0 for no match, and <0 for error.
  */
 static int
 dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp,
     uint32_t priv, uid_t uid, zoneid_t zoneid)
 {
 	dtrace_provider_t *pvp = prp->dtpr_provider;
 	int rv;
 
 	if (pvp->dtpv_defunct)
 		return (0);
 
 	if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0)
 		return (rv);
 
 	if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0)
 		return (rv);
 
 	if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0)
 		return (rv);
 
 	if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0)
 		return (rv);
 
 	if (dtrace_match_priv(prp, priv, uid, zoneid) == 0)
 		return (0);
 
 	return (rv);
 }
 
 /*
  * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
  * interface for matching a glob pattern 'p' to an input string 's'.  Unlike
  * libc's version, the kernel version only applies to 8-bit ASCII strings.
  * In addition, all of the recursion cases except for '*' matching have been
  * unwound.  For '*', we still implement recursive evaluation, but a depth
  * counter is maintained and matching is aborted if we recurse too deep.
  * The function returns 0 if no match, >0 if match, and <0 if recursion error.
  */
 static int
 dtrace_match_glob(const char *s, const char *p, int depth)
 {
 	const char *olds;
 	char s1, c;
 	int gs;
 
 	if (depth > DTRACE_PROBEKEY_MAXDEPTH)
 		return (-1);
 
 	if (s == NULL)
 		s = ""; /* treat NULL as empty string */
 
 top:
 	olds = s;
 	s1 = *s++;
 
 	if (p == NULL)
 		return (0);
 
 	if ((c = *p++) == '\0')
 		return (s1 == '\0');
 
 	switch (c) {
 	case '[': {
 		int ok = 0, notflag = 0;
 		char lc = '\0';
 
 		if (s1 == '\0')
 			return (0);
 
 		if (*p == '!') {
 			notflag = 1;
 			p++;
 		}
 
 		if ((c = *p++) == '\0')
 			return (0);
 
 		do {
 			if (c == '-' && lc != '\0' && *p != ']') {
 				if ((c = *p++) == '\0')
 					return (0);
 				if (c == '\\' && (c = *p++) == '\0')
 					return (0);
 
 				if (notflag) {
 					if (s1 < lc || s1 > c)
 						ok++;
 					else
 						return (0);
 				} else if (lc <= s1 && s1 <= c)
 					ok++;
 
 			} else if (c == '\\' && (c = *p++) == '\0')
 				return (0);
 
 			lc = c; /* save left-hand 'c' for next iteration */
 
 			if (notflag) {
 				if (s1 != c)
 					ok++;
 				else
 					return (0);
 			} else if (s1 == c)
 				ok++;
 
 			if ((c = *p++) == '\0')
 				return (0);
 
 		} while (c != ']');
 
 		if (ok)
 			goto top;
 
 		return (0);
 	}
 
 	case '\\':
 		if ((c = *p++) == '\0')
 			return (0);
 		/*FALLTHRU*/
 
 	default:
 		if (c != s1)
 			return (0);
 		/*FALLTHRU*/
 
 	case '?':
 		if (s1 != '\0')
 			goto top;
 		return (0);
 
 	case '*':
 		while (*p == '*')
 			p++; /* consecutive *'s are identical to a single one */
 
 		if (*p == '\0')
 			return (1);
 
 		for (s = olds; *s != '\0'; s++) {
 			if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0)
 				return (gs);
 		}
 
 		return (0);
 	}
 }
 
 /*ARGSUSED*/
 static int
 dtrace_match_string(const char *s, const char *p, int depth)
 {
 	return (s != NULL && strcmp(s, p) == 0);
 }
 
 /*ARGSUSED*/
 static int
 dtrace_match_nul(const char *s, const char *p, int depth)
 {
 	return (1); /* always match the empty pattern */
 }
 
 /*ARGSUSED*/
 static int
 dtrace_match_nonzero(const char *s, const char *p, int depth)
 {
 	return (s != NULL && s[0] != '\0');
 }
 
 static int
 dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
     zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg)
 {
 	dtrace_probe_t template, *probe;
 	dtrace_hash_t *hash = NULL;
 	int len, best = INT_MAX, nmatched = 0;
 	dtrace_id_t i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	/*
 	 * If the probe ID is specified in the key, just lookup by ID and
 	 * invoke the match callback once if a matching probe is found.
 	 */
 	if (pkp->dtpk_id != DTRACE_IDNONE) {
 		if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
 		    dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
 			(void) (*matched)(probe, arg);
 			nmatched++;
 		}
 		return (nmatched);
 	}
 
 	template.dtpr_mod = (char *)pkp->dtpk_mod;
 	template.dtpr_func = (char *)pkp->dtpk_func;
 	template.dtpr_name = (char *)pkp->dtpk_name;
 
 	/*
 	 * We want to find the most distinct of the module name, function
 	 * name, and name.  So for each one that is not a glob pattern or
 	 * empty string, we perform a lookup in the corresponding hash and
 	 * use the hash table with the fewest collisions to do our search.
 	 */
 	if (pkp->dtpk_mmatch == &dtrace_match_string &&
 	    (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) {
 		best = len;
 		hash = dtrace_bymod;
 	}
 
 	if (pkp->dtpk_fmatch == &dtrace_match_string &&
 	    (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) {
 		best = len;
 		hash = dtrace_byfunc;
 	}
 
 	if (pkp->dtpk_nmatch == &dtrace_match_string &&
 	    (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) {
 		best = len;
 		hash = dtrace_byname;
 	}
 
 	/*
 	 * If we did not select a hash table, iterate over every probe and
 	 * invoke our callback for each one that matches our input probe key.
 	 */
 	if (hash == NULL) {
 		for (i = 0; i < dtrace_nprobes; i++) {
 			if ((probe = dtrace_probes[i]) == NULL ||
 			    dtrace_match_probe(probe, pkp, priv, uid,
 			    zoneid) <= 0)
 				continue;
 
 			nmatched++;
 
 			if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
 				break;
 		}
 
 		return (nmatched);
 	}
 
 	/*
 	 * If we selected a hash table, iterate over each probe of the same key
 	 * name and invoke the callback for every probe that matches the other
 	 * attributes of our input probe key.
 	 */
 	for (probe = dtrace_hash_lookup(hash, &template); probe != NULL;
 	    probe = *(DTRACE_HASHNEXT(hash, probe))) {
 
 		if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0)
 			continue;
 
 		nmatched++;
 
 		if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
 			break;
 	}
 
 	return (nmatched);
 }
 
 /*
  * Return the function pointer dtrace_probecmp() should use to compare the
  * specified pattern with a string.  For NULL or empty patterns, we select
  * dtrace_match_nul().  For glob pattern strings, we use dtrace_match_glob().
  * For non-empty non-glob strings, we use dtrace_match_string().
  */
 static dtrace_probekey_f *
 dtrace_probekey_func(const char *p)
 {
 	char c;
 
 	if (p == NULL || *p == '\0')
 		return (&dtrace_match_nul);
 
 	while ((c = *p++) != '\0') {
 		if (c == '[' || c == '?' || c == '*' || c == '\\')
 			return (&dtrace_match_glob);
 	}
 
 	return (&dtrace_match_string);
 }
 
 /*
  * Build a probe comparison key for use with dtrace_match_probe() from the
  * given probe description.  By convention, a null key only matches anchored
  * probes: if each field is the empty string, reset dtpk_fmatch to
  * dtrace_match_nonzero().
  */
 static void
 dtrace_probekey(dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp)
 {
 	pkp->dtpk_prov = pdp->dtpd_provider;
 	pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider);
 
 	pkp->dtpk_mod = pdp->dtpd_mod;
 	pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod);
 
 	pkp->dtpk_func = pdp->dtpd_func;
 	pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func);
 
 	pkp->dtpk_name = pdp->dtpd_name;
 	pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name);
 
 	pkp->dtpk_id = pdp->dtpd_id;
 
 	if (pkp->dtpk_id == DTRACE_IDNONE &&
 	    pkp->dtpk_pmatch == &dtrace_match_nul &&
 	    pkp->dtpk_mmatch == &dtrace_match_nul &&
 	    pkp->dtpk_fmatch == &dtrace_match_nul &&
 	    pkp->dtpk_nmatch == &dtrace_match_nul)
 		pkp->dtpk_fmatch = &dtrace_match_nonzero;
 }
 
 /*
  * DTrace Provider-to-Framework API Functions
  *
  * These functions implement much of the Provider-to-Framework API, as
  * described in <sys/dtrace.h>.  The parts of the API not in this section are
  * the functions in the API for probe management (found below), and
  * dtrace_probe() itself (found above).
  */
 
 /*
  * Register the calling provider with the DTrace framework.  This should
  * generally be called by DTrace providers in their attach(9E) entry point.
  */
 int
 dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
     cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp)
 {
 	dtrace_provider_t *provider;
 
 	if (name == NULL || pap == NULL || pops == NULL || idp == NULL) {
 		cmn_err(CE_WARN, "failed to register provider '%s': invalid "
 		    "arguments", name ? name : "<NULL>");
 		return (EINVAL);
 	}
 
 	if (name[0] == '\0' || dtrace_badname(name)) {
 		cmn_err(CE_WARN, "failed to register provider '%s': invalid "
 		    "provider name", name);
 		return (EINVAL);
 	}
 
 	if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) ||
 	    pops->dtps_enable == NULL || pops->dtps_disable == NULL ||
 	    pops->dtps_destroy == NULL ||
 	    ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) {
 		cmn_err(CE_WARN, "failed to register provider '%s': invalid "
 		    "provider ops", name);
 		return (EINVAL);
 	}
 
 	if (dtrace_badattr(&pap->dtpa_provider) ||
 	    dtrace_badattr(&pap->dtpa_mod) ||
 	    dtrace_badattr(&pap->dtpa_func) ||
 	    dtrace_badattr(&pap->dtpa_name) ||
 	    dtrace_badattr(&pap->dtpa_args)) {
 		cmn_err(CE_WARN, "failed to register provider '%s': invalid "
 		    "provider attributes", name);
 		return (EINVAL);
 	}
 
 	if (priv & ~DTRACE_PRIV_ALL) {
 		cmn_err(CE_WARN, "failed to register provider '%s': invalid "
 		    "privilege attributes", name);
 		return (EINVAL);
 	}
 
 	if ((priv & DTRACE_PRIV_KERNEL) &&
 	    (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
 	    pops->dtps_usermode == NULL) {
 		cmn_err(CE_WARN, "failed to register provider '%s': need "
 		    "dtps_usermode() op for given privilege attributes", name);
 		return (EINVAL);
 	}
 
 	provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP);
 	provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
 	(void) strcpy(provider->dtpv_name, name);
 
 	provider->dtpv_attr = *pap;
 	provider->dtpv_priv.dtpp_flags = priv;
 	if (cr != NULL) {
 		provider->dtpv_priv.dtpp_uid = crgetuid(cr);
 		provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
 	}
 	provider->dtpv_pops = *pops;
 
 	if (pops->dtps_provide == NULL) {
 		ASSERT(pops->dtps_provide_module != NULL);
 		provider->dtpv_pops.dtps_provide =
 		    (void (*)(void *, dtrace_probedesc_t *))dtrace_nullop;
 	}
 
 	if (pops->dtps_provide_module == NULL) {
 		ASSERT(pops->dtps_provide != NULL);
 		provider->dtpv_pops.dtps_provide_module =
 		    (void (*)(void *, modctl_t *))dtrace_nullop;
 	}
 
 	if (pops->dtps_suspend == NULL) {
 		ASSERT(pops->dtps_resume == NULL);
 		provider->dtpv_pops.dtps_suspend =
 		    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
 		provider->dtpv_pops.dtps_resume =
 		    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
 	}
 
 	provider->dtpv_arg = arg;
 	*idp = (dtrace_provider_id_t)provider;
 
 	if (pops == &dtrace_provider_ops) {
 		ASSERT(MUTEX_HELD(&dtrace_provider_lock));
 		ASSERT(MUTEX_HELD(&dtrace_lock));
 		ASSERT(dtrace_anon.dta_enabling == NULL);
 
 		/*
 		 * We make sure that the DTrace provider is at the head of
 		 * the provider chain.
 		 */
 		provider->dtpv_next = dtrace_provider;
 		dtrace_provider = provider;
 		return (0);
 	}
 
 	mutex_enter(&dtrace_provider_lock);
 	mutex_enter(&dtrace_lock);
 
 	/*
 	 * If there is at least one provider registered, we'll add this
 	 * provider after the first provider.
 	 */
 	if (dtrace_provider != NULL) {
 		provider->dtpv_next = dtrace_provider->dtpv_next;
 		dtrace_provider->dtpv_next = provider;
 	} else {
 		dtrace_provider = provider;
 	}
 
 	if (dtrace_retained != NULL) {
 		dtrace_enabling_provide(provider);
 
 		/*
 		 * Now we need to call dtrace_enabling_matchall() -- which
 		 * will acquire cpu_lock and dtrace_lock.  We therefore need
 		 * to drop all of our locks before calling into it...
 		 */
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&dtrace_provider_lock);
 		dtrace_enabling_matchall();
 
 		return (0);
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_provider_lock);
 
 	return (0);
 }
 
 /*
  * Unregister the specified provider from the DTrace framework.  This should
  * generally be called by DTrace providers in their detach(9E) entry point.
  */
 int
 dtrace_unregister(dtrace_provider_id_t id)
 {
 	dtrace_provider_t *old = (dtrace_provider_t *)id;
 	dtrace_provider_t *prev = NULL;
 	int i, self = 0, noreap = 0;
 	dtrace_probe_t *probe, *first = NULL;
 
 	if (old->dtpv_pops.dtps_enable ==
 	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) {
 		/*
 		 * If DTrace itself is the provider, we're called with locks
 		 * already held.
 		 */
 		ASSERT(old == dtrace_provider);
 #ifdef illumos
 		ASSERT(dtrace_devi != NULL);
 #endif
 		ASSERT(MUTEX_HELD(&dtrace_provider_lock));
 		ASSERT(MUTEX_HELD(&dtrace_lock));
 		self = 1;
 
 		if (dtrace_provider->dtpv_next != NULL) {
 			/*
 			 * There's another provider here; return failure.
 			 */
 			return (EBUSY);
 		}
 	} else {
 		mutex_enter(&dtrace_provider_lock);
 #ifdef illumos
 		mutex_enter(&mod_lock);
 #endif
 		mutex_enter(&dtrace_lock);
 	}
 
 	/*
 	 * If anyone has /dev/dtrace open, or if there are anonymous enabled
 	 * probes, we refuse to let providers slither away, unless this
 	 * provider has already been explicitly invalidated.
 	 */
 	if (!old->dtpv_defunct &&
 	    (dtrace_opens || (dtrace_anon.dta_state != NULL &&
 	    dtrace_anon.dta_state->dts_necbs > 0))) {
 		if (!self) {
 			mutex_exit(&dtrace_lock);
 #ifdef illumos
 			mutex_exit(&mod_lock);
 #endif
 			mutex_exit(&dtrace_provider_lock);
 		}
 		return (EBUSY);
 	}
 
 	/*
 	 * Attempt to destroy the probes associated with this provider.
 	 */
 	for (i = 0; i < dtrace_nprobes; i++) {
 		if ((probe = dtrace_probes[i]) == NULL)
 			continue;
 
 		if (probe->dtpr_provider != old)
 			continue;
 
 		if (probe->dtpr_ecb == NULL)
 			continue;
 
 		/*
 		 * If we are trying to unregister a defunct provider, and the
 		 * provider was made defunct within the interval dictated by
 		 * dtrace_unregister_defunct_reap, we'll (asynchronously)
 		 * attempt to reap our enablings.  To denote that the provider
 		 * should reattempt to unregister itself at some point in the
 		 * future, we will return a differentiable error code (EAGAIN
 		 * instead of EBUSY) in this case.
 		 */
 		if (dtrace_gethrtime() - old->dtpv_defunct >
 		    dtrace_unregister_defunct_reap)
 			noreap = 1;
 
 		if (!self) {
 			mutex_exit(&dtrace_lock);
 #ifdef illumos
 			mutex_exit(&mod_lock);
 #endif
 			mutex_exit(&dtrace_provider_lock);
 		}
 
 		if (noreap)
 			return (EBUSY);
 
 		(void) taskq_dispatch(dtrace_taskq,
 		    (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP);
 
 		return (EAGAIN);
 	}
 
 	/*
 	 * All of the probes for this provider are disabled; we can safely
 	 * remove all of them from their hash chains and from the probe array.
 	 */
 	for (i = 0; i < dtrace_nprobes; i++) {
 		if ((probe = dtrace_probes[i]) == NULL)
 			continue;
 
 		if (probe->dtpr_provider != old)
 			continue;
 
 		dtrace_probes[i] = NULL;
 
 		dtrace_hash_remove(dtrace_bymod, probe);
 		dtrace_hash_remove(dtrace_byfunc, probe);
 		dtrace_hash_remove(dtrace_byname, probe);
 
 		if (first == NULL) {
 			first = probe;
 			probe->dtpr_nextmod = NULL;
 		} else {
 			probe->dtpr_nextmod = first;
 			first = probe;
 		}
 	}
 
 	/*
 	 * The provider's probes have been removed from the hash chains and
 	 * from the probe array.  Now issue a dtrace_sync() to be sure that
 	 * everyone has cleared out from any probe array processing.
 	 */
 	dtrace_sync();
 
 	for (probe = first; probe != NULL; probe = first) {
 		first = probe->dtpr_nextmod;
 
 		old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id,
 		    probe->dtpr_arg);
 		kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
 		kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
 		kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
 #ifdef illumos
 		vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1);
 #else
 		free_unr(dtrace_arena, probe->dtpr_id);
 #endif
 		kmem_free(probe, sizeof (dtrace_probe_t));
 	}
 
 	if ((prev = dtrace_provider) == old) {
 #ifdef illumos
 		ASSERT(self || dtrace_devi == NULL);
 		ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL);
 #endif
 		dtrace_provider = old->dtpv_next;
 	} else {
 		while (prev != NULL && prev->dtpv_next != old)
 			prev = prev->dtpv_next;
 
 		if (prev == NULL) {
 			panic("attempt to unregister non-existent "
 			    "dtrace provider %p\n", (void *)id);
 		}
 
 		prev->dtpv_next = old->dtpv_next;
 	}
 
 	if (!self) {
 		mutex_exit(&dtrace_lock);
 #ifdef illumos
 		mutex_exit(&mod_lock);
 #endif
 		mutex_exit(&dtrace_provider_lock);
 	}
 
 	kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1);
 	kmem_free(old, sizeof (dtrace_provider_t));
 
 	return (0);
 }
 
 /*
  * Invalidate the specified provider.  All subsequent probe lookups for the
  * specified provider will fail, but its probes will not be removed.
  */
 void
 dtrace_invalidate(dtrace_provider_id_t id)
 {
 	dtrace_provider_t *pvp = (dtrace_provider_t *)id;
 
 	ASSERT(pvp->dtpv_pops.dtps_enable !=
 	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
 
 	mutex_enter(&dtrace_provider_lock);
 	mutex_enter(&dtrace_lock);
 
 	pvp->dtpv_defunct = dtrace_gethrtime();
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_provider_lock);
 }
 
 /*
  * Indicate whether or not DTrace has attached.
  */
 int
 dtrace_attached(void)
 {
 	/*
 	 * dtrace_provider will be non-NULL iff the DTrace driver has
 	 * attached.  (It's non-NULL because DTrace is always itself a
 	 * provider.)
 	 */
 	return (dtrace_provider != NULL);
 }
 
 /*
  * Remove all the unenabled probes for the given provider.  This function is
  * not unlike dtrace_unregister(), except that it doesn't remove the provider
  * -- just as many of its associated probes as it can.
  */
 int
 dtrace_condense(dtrace_provider_id_t id)
 {
 	dtrace_provider_t *prov = (dtrace_provider_t *)id;
 	int i;
 	dtrace_probe_t *probe;
 
 	/*
 	 * Make sure this isn't the dtrace provider itself.
 	 */
 	ASSERT(prov->dtpv_pops.dtps_enable !=
 	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
 
 	mutex_enter(&dtrace_provider_lock);
 	mutex_enter(&dtrace_lock);
 
 	/*
 	 * Attempt to destroy the probes associated with this provider.
 	 */
 	for (i = 0; i < dtrace_nprobes; i++) {
 		if ((probe = dtrace_probes[i]) == NULL)
 			continue;
 
 		if (probe->dtpr_provider != prov)
 			continue;
 
 		if (probe->dtpr_ecb != NULL)
 			continue;
 
 		dtrace_probes[i] = NULL;
 
 		dtrace_hash_remove(dtrace_bymod, probe);
 		dtrace_hash_remove(dtrace_byfunc, probe);
 		dtrace_hash_remove(dtrace_byname, probe);
 
 		prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1,
 		    probe->dtpr_arg);
 		kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
 		kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
 		kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
 		kmem_free(probe, sizeof (dtrace_probe_t));
 #ifdef illumos
 		vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1);
 #else
 		free_unr(dtrace_arena, i + 1);
 #endif
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_provider_lock);
 
 	return (0);
 }
 
 /*
  * DTrace Probe Management Functions
  *
  * The functions in this section perform the DTrace probe management,
  * including functions to create probes, look-up probes, and call into the
  * providers to request that probes be provided.  Some of these functions are
  * in the Provider-to-Framework API; these functions can be identified by the
  * fact that they are not declared "static".
  */
 
 /*
  * Create a probe with the specified module name, function name, and name.
  */
 dtrace_id_t
 dtrace_probe_create(dtrace_provider_id_t prov, const char *mod,
     const char *func, const char *name, int aframes, void *arg)
 {
 	dtrace_probe_t *probe, **probes;
 	dtrace_provider_t *provider = (dtrace_provider_t *)prov;
 	dtrace_id_t id;
 
 	if (provider == dtrace_provider) {
 		ASSERT(MUTEX_HELD(&dtrace_lock));
 	} else {
 		mutex_enter(&dtrace_lock);
 	}
 
 #ifdef illumos
 	id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1,
 	    VM_BESTFIT | VM_SLEEP);
 #else
 	id = alloc_unr(dtrace_arena);
 #endif
 	probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP);
 
 	probe->dtpr_id = id;
 	probe->dtpr_gen = dtrace_probegen++;
 	probe->dtpr_mod = dtrace_strdup(mod);
 	probe->dtpr_func = dtrace_strdup(func);
 	probe->dtpr_name = dtrace_strdup(name);
 	probe->dtpr_arg = arg;
 	probe->dtpr_aframes = aframes;
 	probe->dtpr_provider = provider;
 
 	dtrace_hash_add(dtrace_bymod, probe);
 	dtrace_hash_add(dtrace_byfunc, probe);
 	dtrace_hash_add(dtrace_byname, probe);
 
 	if (id - 1 >= dtrace_nprobes) {
 		size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *);
 		size_t nsize = osize << 1;
 
 		if (nsize == 0) {
 			ASSERT(osize == 0);
 			ASSERT(dtrace_probes == NULL);
 			nsize = sizeof (dtrace_probe_t *);
 		}
 
 		probes = kmem_zalloc(nsize, KM_SLEEP);
 
 		if (dtrace_probes == NULL) {
 			ASSERT(osize == 0);
 			dtrace_probes = probes;
 			dtrace_nprobes = 1;
 		} else {
 			dtrace_probe_t **oprobes = dtrace_probes;
 
 			bcopy(oprobes, probes, osize);
 			dtrace_membar_producer();
 			dtrace_probes = probes;
 
 			dtrace_sync();
 
 			/*
 			 * All CPUs are now seeing the new probes array; we can
 			 * safely free the old array.
 			 */
 			kmem_free(oprobes, osize);
 			dtrace_nprobes <<= 1;
 		}
 
 		ASSERT(id - 1 < dtrace_nprobes);
 	}
 
 	ASSERT(dtrace_probes[id - 1] == NULL);
 	dtrace_probes[id - 1] = probe;
 
 	if (provider != dtrace_provider)
 		mutex_exit(&dtrace_lock);
 
 	return (id);
 }
 
 static dtrace_probe_t *
 dtrace_probe_lookup_id(dtrace_id_t id)
 {
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (id == 0 || id > dtrace_nprobes)
 		return (NULL);
 
 	return (dtrace_probes[id - 1]);
 }
 
 static int
 dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg)
 {
 	*((dtrace_id_t *)arg) = probe->dtpr_id;
 
 	return (DTRACE_MATCH_DONE);
 }
 
 /*
  * Look up a probe based on provider and one or more of module name, function
  * name and probe name.
  */
 dtrace_id_t
 dtrace_probe_lookup(dtrace_provider_id_t prid, char *mod,
     char *func, char *name)
 {
 	dtrace_probekey_t pkey;
 	dtrace_id_t id;
 	int match;
 
 	pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name;
 	pkey.dtpk_pmatch = &dtrace_match_string;
 	pkey.dtpk_mod = mod;
 	pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
 	pkey.dtpk_func = func;
 	pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
 	pkey.dtpk_name = name;
 	pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
 	pkey.dtpk_id = DTRACE_IDNONE;
 
 	mutex_enter(&dtrace_lock);
 	match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0,
 	    dtrace_probe_lookup_match, &id);
 	mutex_exit(&dtrace_lock);
 
 	ASSERT(match == 1 || match == 0);
 	return (match ? id : 0);
 }
 
 /*
  * Returns the probe argument associated with the specified probe.
  */
 void *
 dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid)
 {
 	dtrace_probe_t *probe;
 	void *rval = NULL;
 
 	mutex_enter(&dtrace_lock);
 
 	if ((probe = dtrace_probe_lookup_id(pid)) != NULL &&
 	    probe->dtpr_provider == (dtrace_provider_t *)id)
 		rval = probe->dtpr_arg;
 
 	mutex_exit(&dtrace_lock);
 
 	return (rval);
 }
 
 /*
  * Copy a probe into a probe description.
  */
 static void
 dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp)
 {
 	bzero(pdp, sizeof (dtrace_probedesc_t));
 	pdp->dtpd_id = prp->dtpr_id;
 
 	(void) strncpy(pdp->dtpd_provider,
 	    prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1);
 
 	(void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1);
 	(void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1);
 	(void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1);
 }
 
 /*
  * Called to indicate that a probe -- or probes -- should be provided by a
  * specfied provider.  If the specified description is NULL, the provider will
  * be told to provide all of its probes.  (This is done whenever a new
  * consumer comes along, or whenever a retained enabling is to be matched.) If
  * the specified description is non-NULL, the provider is given the
  * opportunity to dynamically provide the specified probe, allowing providers
  * to support the creation of probes on-the-fly.  (So-called _autocreated_
  * probes.)  If the provider is NULL, the operations will be applied to all
  * providers; if the provider is non-NULL the operations will only be applied
  * to the specified provider.  The dtrace_provider_lock must be held, and the
  * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
  * will need to grab the dtrace_lock when it reenters the framework through
  * dtrace_probe_lookup(), dtrace_probe_create(), etc.
  */
 static void
 dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
 {
 #ifdef illumos
 	modctl_t *ctl;
 #endif
 	int all = 0;
 
 	ASSERT(MUTEX_HELD(&dtrace_provider_lock));
 
 	if (prv == NULL) {
 		all = 1;
 		prv = dtrace_provider;
 	}
 
 	do {
 		/*
 		 * First, call the blanket provide operation.
 		 */
 		prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
 
 #ifdef illumos
 		/*
 		 * Now call the per-module provide operation.  We will grab
 		 * mod_lock to prevent the list from being modified.  Note
 		 * that this also prevents the mod_busy bits from changing.
 		 * (mod_busy can only be changed with mod_lock held.)
 		 */
 		mutex_enter(&mod_lock);
 
 		ctl = &modules;
 		do {
 			if (ctl->mod_busy || ctl->mod_mp == NULL)
 				continue;
 
 			prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
 
 		} while ((ctl = ctl->mod_next) != &modules);
 
 		mutex_exit(&mod_lock);
 #endif
 	} while (all && (prv = prv->dtpv_next) != NULL);
 }
 
 #ifdef illumos
 /*
  * Iterate over each probe, and call the Framework-to-Provider API function
  * denoted by offs.
  */
 static void
 dtrace_probe_foreach(uintptr_t offs)
 {
 	dtrace_provider_t *prov;
 	void (*func)(void *, dtrace_id_t, void *);
 	dtrace_probe_t *probe;
 	dtrace_icookie_t cookie;
 	int i;
 
 	/*
 	 * We disable interrupts to walk through the probe array.  This is
 	 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
 	 * won't see stale data.
 	 */
 	cookie = dtrace_interrupt_disable();
 
 	for (i = 0; i < dtrace_nprobes; i++) {
 		if ((probe = dtrace_probes[i]) == NULL)
 			continue;
 
 		if (probe->dtpr_ecb == NULL) {
 			/*
 			 * This probe isn't enabled -- don't call the function.
 			 */
 			continue;
 		}
 
 		prov = probe->dtpr_provider;
 		func = *((void(**)(void *, dtrace_id_t, void *))
 		    ((uintptr_t)&prov->dtpv_pops + offs));
 
 		func(prov->dtpv_arg, i + 1, probe->dtpr_arg);
 	}
 
 	dtrace_interrupt_enable(cookie);
 }
 #endif
 
 static int
 dtrace_probe_enable(dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
 {
 	dtrace_probekey_t pkey;
 	uint32_t priv;
 	uid_t uid;
 	zoneid_t zoneid;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	dtrace_ecb_create_cache = NULL;
 
 	if (desc == NULL) {
 		/*
 		 * If we're passed a NULL description, we're being asked to
 		 * create an ECB with a NULL probe.
 		 */
 		(void) dtrace_ecb_create_enable(NULL, enab);
 		return (0);
 	}
 
 	dtrace_probekey(desc, &pkey);
 	dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
 	    &priv, &uid, &zoneid);
 
 	return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
 	    enab));
 }
 
 /*
  * DTrace Helper Provider Functions
  */
 static void
 dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr)
 {
 	attr->dtat_name = DOF_ATTR_NAME(dofattr);
 	attr->dtat_data = DOF_ATTR_DATA(dofattr);
 	attr->dtat_class = DOF_ATTR_CLASS(dofattr);
 }
 
 static void
 dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov,
     const dof_provider_t *dofprov, char *strtab)
 {
 	hprov->dthpv_provname = strtab + dofprov->dofpv_name;
 	dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider,
 	    dofprov->dofpv_provattr);
 	dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod,
 	    dofprov->dofpv_modattr);
 	dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func,
 	    dofprov->dofpv_funcattr);
 	dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name,
 	    dofprov->dofpv_nameattr);
 	dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args,
 	    dofprov->dofpv_argsattr);
 }
 
 static void
 dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
 {
 	uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
 	dof_hdr_t *dof = (dof_hdr_t *)daddr;
 	dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
 	dof_provider_t *provider;
 	dof_probe_t *probe;
 	uint32_t *off, *enoff;
 	uint8_t *arg;
 	char *strtab;
 	uint_t i, nprobes;
 	dtrace_helper_provdesc_t dhpv;
 	dtrace_helper_probedesc_t dhpb;
 	dtrace_meta_t *meta = dtrace_meta_pid;
 	dtrace_mops_t *mops = &meta->dtm_mops;
 	void *parg;
 
 	provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 	    provider->dofpv_strtab * dof->dofh_secsize);
 	prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 	    provider->dofpv_probes * dof->dofh_secsize);
 	arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 	    provider->dofpv_prargs * dof->dofh_secsize);
 	off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 	    provider->dofpv_proffs * dof->dofh_secsize);
 
 	strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
 	off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset);
 	arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
 	enoff = NULL;
 
 	/*
 	 * See dtrace_helper_provider_validate().
 	 */
 	if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
 	    provider->dofpv_prenoffs != DOF_SECT_NONE) {
 		enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 		    provider->dofpv_prenoffs * dof->dofh_secsize);
 		enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset);
 	}
 
 	nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
 
 	/*
 	 * Create the provider.
 	 */
 	dtrace_dofprov2hprov(&dhpv, provider, strtab);
 
 	if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL)
 		return;
 
 	meta->dtm_count++;
 
 	/*
 	 * Create the probes.
 	 */
 	for (i = 0; i < nprobes; i++) {
 		probe = (dof_probe_t *)(uintptr_t)(daddr +
 		    prb_sec->dofs_offset + i * prb_sec->dofs_entsize);
 
 		/* See the check in dtrace_helper_provider_validate(). */
 		if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN)
 			continue;
 
 		dhpb.dthpb_mod = dhp->dofhp_mod;
 		dhpb.dthpb_func = strtab + probe->dofpr_func;
 		dhpb.dthpb_name = strtab + probe->dofpr_name;
 		dhpb.dthpb_base = probe->dofpr_addr;
 		dhpb.dthpb_offs = off + probe->dofpr_offidx;
 		dhpb.dthpb_noffs = probe->dofpr_noffs;
 		if (enoff != NULL) {
 			dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx;
 			dhpb.dthpb_nenoffs = probe->dofpr_nenoffs;
 		} else {
 			dhpb.dthpb_enoffs = NULL;
 			dhpb.dthpb_nenoffs = 0;
 		}
 		dhpb.dthpb_args = arg + probe->dofpr_argidx;
 		dhpb.dthpb_nargc = probe->dofpr_nargc;
 		dhpb.dthpb_xargc = probe->dofpr_xargc;
 		dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv;
 		dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv;
 
 		mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb);
 	}
 }
 
 static void
 dtrace_helper_provide(dof_helper_t *dhp, pid_t pid)
 {
 	uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
 	dof_hdr_t *dof = (dof_hdr_t *)daddr;
 	int i;
 
 	ASSERT(MUTEX_HELD(&dtrace_meta_lock));
 
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
 		    dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (sec->dofs_type != DOF_SECT_PROVIDER)
 			continue;
 
 		dtrace_helper_provide_one(dhp, sec, pid);
 	}
 
 	/*
 	 * We may have just created probes, so we must now rematch against
 	 * any retained enablings.  Note that this call will acquire both
 	 * cpu_lock and dtrace_lock; the fact that we are holding
 	 * dtrace_meta_lock now is what defines the ordering with respect to
 	 * these three locks.
 	 */
 	dtrace_enabling_matchall();
 }
 
 static void
 dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
 {
 	uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
 	dof_hdr_t *dof = (dof_hdr_t *)daddr;
 	dof_sec_t *str_sec;
 	dof_provider_t *provider;
 	char *strtab;
 	dtrace_helper_provdesc_t dhpv;
 	dtrace_meta_t *meta = dtrace_meta_pid;
 	dtrace_mops_t *mops = &meta->dtm_mops;
 
 	provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
 	    provider->dofpv_strtab * dof->dofh_secsize);
 
 	strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
 
 	/*
 	 * Create the provider.
 	 */
 	dtrace_dofprov2hprov(&dhpv, provider, strtab);
 
 	mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid);
 
 	meta->dtm_count--;
 }
 
 static void
 dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid)
 {
 	uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
 	dof_hdr_t *dof = (dof_hdr_t *)daddr;
 	int i;
 
 	ASSERT(MUTEX_HELD(&dtrace_meta_lock));
 
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
 		    dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (sec->dofs_type != DOF_SECT_PROVIDER)
 			continue;
 
 		dtrace_helper_provider_remove_one(dhp, sec, pid);
 	}
 }
 
 /*
  * DTrace Meta Provider-to-Framework API Functions
  *
  * These functions implement the Meta Provider-to-Framework API, as described
  * in <sys/dtrace.h>.
  */
 int
 dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg,
     dtrace_meta_provider_id_t *idp)
 {
 	dtrace_meta_t *meta;
 	dtrace_helpers_t *help, *next;
 	int i;
 
 	*idp = DTRACE_METAPROVNONE;
 
 	/*
 	 * We strictly don't need the name, but we hold onto it for
 	 * debuggability. All hail error queues!
 	 */
 	if (name == NULL) {
 		cmn_err(CE_WARN, "failed to register meta-provider: "
 		    "invalid name");
 		return (EINVAL);
 	}
 
 	if (mops == NULL ||
 	    mops->dtms_create_probe == NULL ||
 	    mops->dtms_provide_pid == NULL ||
 	    mops->dtms_remove_pid == NULL) {
 		cmn_err(CE_WARN, "failed to register meta-register %s: "
 		    "invalid ops", name);
 		return (EINVAL);
 	}
 
 	meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP);
 	meta->dtm_mops = *mops;
 	meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
 	(void) strcpy(meta->dtm_name, name);
 	meta->dtm_arg = arg;
 
 	mutex_enter(&dtrace_meta_lock);
 	mutex_enter(&dtrace_lock);
 
 	if (dtrace_meta_pid != NULL) {
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&dtrace_meta_lock);
 		cmn_err(CE_WARN, "failed to register meta-register %s: "
 		    "user-land meta-provider exists", name);
 		kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1);
 		kmem_free(meta, sizeof (dtrace_meta_t));
 		return (EINVAL);
 	}
 
 	dtrace_meta_pid = meta;
 	*idp = (dtrace_meta_provider_id_t)meta;
 
 	/*
 	 * If there are providers and probes ready to go, pass them
 	 * off to the new meta provider now.
 	 */
 
 	help = dtrace_deferred_pid;
 	dtrace_deferred_pid = NULL;
 
 	mutex_exit(&dtrace_lock);
 
 	while (help != NULL) {
 		for (i = 0; i < help->dthps_nprovs; i++) {
 			dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
 			    help->dthps_pid);
 		}
 
 		next = help->dthps_next;
 		help->dthps_next = NULL;
 		help->dthps_prev = NULL;
 		help->dthps_deferred = 0;
 		help = next;
 	}
 
 	mutex_exit(&dtrace_meta_lock);
 
 	return (0);
 }
 
 int
 dtrace_meta_unregister(dtrace_meta_provider_id_t id)
 {
 	dtrace_meta_t **pp, *old = (dtrace_meta_t *)id;
 
 	mutex_enter(&dtrace_meta_lock);
 	mutex_enter(&dtrace_lock);
 
 	if (old == dtrace_meta_pid) {
 		pp = &dtrace_meta_pid;
 	} else {
 		panic("attempt to unregister non-existent "
 		    "dtrace meta-provider %p\n", (void *)old);
 	}
 
 	if (old->dtm_count != 0) {
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&dtrace_meta_lock);
 		return (EBUSY);
 	}
 
 	*pp = NULL;
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_meta_lock);
 
 	kmem_free(old->dtm_name, strlen(old->dtm_name) + 1);
 	kmem_free(old, sizeof (dtrace_meta_t));
 
 	return (0);
 }
 
 
 /*
  * DTrace DIF Object Functions
  */
 static int
 dtrace_difo_err(uint_t pc, const char *format, ...)
 {
 	if (dtrace_err_verbose) {
 		va_list alist;
 
 		(void) uprintf("dtrace DIF object error: [%u]: ", pc);
 		va_start(alist, format);
 		(void) vuprintf(format, alist);
 		va_end(alist);
 	}
 
 #ifdef DTRACE_ERRDEBUG
 	dtrace_errdebug(format);
 #endif
 	return (1);
 }
 
 /*
  * Validate a DTrace DIF object by checking the IR instructions.  The following
  * rules are currently enforced by dtrace_difo_validate():
  *
  * 1. Each instruction must have a valid opcode
  * 2. Each register, string, variable, or subroutine reference must be valid
  * 3. No instruction can modify register %r0 (must be zero)
  * 4. All instruction reserved bits must be set to zero
  * 5. The last instruction must be a "ret" instruction
  * 6. All branch targets must reference a valid instruction _after_ the branch
  */
 static int
 dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
     cred_t *cr)
 {
 	int err = 0, i;
 	int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
 	int kcheckload;
 	uint_t pc;
 	int maxglobal = -1, maxlocal = -1, maxtlocal = -1;
 
 	kcheckload = cr == NULL ||
 	    (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0;
 
 	dp->dtdo_destructive = 0;
 
 	for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
 		dif_instr_t instr = dp->dtdo_buf[pc];
 
 		uint_t r1 = DIF_INSTR_R1(instr);
 		uint_t r2 = DIF_INSTR_R2(instr);
 		uint_t rd = DIF_INSTR_RD(instr);
 		uint_t rs = DIF_INSTR_RS(instr);
 		uint_t label = DIF_INSTR_LABEL(instr);
 		uint_t v = DIF_INSTR_VAR(instr);
 		uint_t subr = DIF_INSTR_SUBR(instr);
 		uint_t type = DIF_INSTR_TYPE(instr);
 		uint_t op = DIF_INSTR_OP(instr);
 
 		switch (op) {
 		case DIF_OP_OR:
 		case DIF_OP_XOR:
 		case DIF_OP_AND:
 		case DIF_OP_SLL:
 		case DIF_OP_SRL:
 		case DIF_OP_SRA:
 		case DIF_OP_SUB:
 		case DIF_OP_ADD:
 		case DIF_OP_MUL:
 		case DIF_OP_SDIV:
 		case DIF_OP_UDIV:
 		case DIF_OP_SREM:
 		case DIF_OP_UREM:
 		case DIF_OP_COPYS:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r2);
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_NOT:
 		case DIF_OP_MOV:
 		case DIF_OP_ALLOCS:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_LDSB:
 		case DIF_OP_LDSH:
 		case DIF_OP_LDSW:
 		case DIF_OP_LDUB:
 		case DIF_OP_LDUH:
 		case DIF_OP_LDUW:
 		case DIF_OP_LDX:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			if (kcheckload)
 				dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op +
 				    DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd);
 			break;
 		case DIF_OP_RLDSB:
 		case DIF_OP_RLDSH:
 		case DIF_OP_RLDSW:
 		case DIF_OP_RLDUB:
 		case DIF_OP_RLDUH:
 		case DIF_OP_RLDUW:
 		case DIF_OP_RLDX:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_ULDSB:
 		case DIF_OP_ULDSH:
 		case DIF_OP_ULDSW:
 		case DIF_OP_ULDUB:
 		case DIF_OP_ULDUH:
 		case DIF_OP_ULDUW:
 		case DIF_OP_ULDX:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_STB:
 		case DIF_OP_STH:
 		case DIF_OP_STW:
 		case DIF_OP_STX:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to 0 address\n");
 			break;
 		case DIF_OP_CMP:
 		case DIF_OP_SCMP:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r2);
 			if (rd != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			break;
 		case DIF_OP_TST:
 			if (r1 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r1);
 			if (r2 != 0 || rd != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			break;
 		case DIF_OP_BA:
 		case DIF_OP_BE:
 		case DIF_OP_BNE:
 		case DIF_OP_BG:
 		case DIF_OP_BGU:
 		case DIF_OP_BGE:
 		case DIF_OP_BGEU:
 		case DIF_OP_BL:
 		case DIF_OP_BLU:
 		case DIF_OP_BLE:
 		case DIF_OP_BLEU:
 			if (label >= dp->dtdo_len) {
 				err += efunc(pc, "invalid branch target %u\n",
 				    label);
 			}
 			if (label <= pc) {
 				err += efunc(pc, "backward branch to %u\n",
 				    label);
 			}
 			break;
 		case DIF_OP_RET:
 			if (r1 != 0 || r2 != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			break;
 		case DIF_OP_NOP:
 		case DIF_OP_POPTS:
 		case DIF_OP_FLUSHTS:
 			if (r1 != 0 || r2 != 0 || rd != 0)
 				err += efunc(pc, "non-zero reserved bits\n");
 			break;
 		case DIF_OP_SETX:
 			if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) {
 				err += efunc(pc, "invalid integer ref %u\n",
 				    DIF_INSTR_INTEGER(instr));
 			}
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_SETS:
 			if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) {
 				err += efunc(pc, "invalid string ref %u\n",
 				    DIF_INSTR_STRING(instr));
 			}
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_LDGA:
 		case DIF_OP_LDTA:
 			if (r1 > DIF_VAR_ARRAY_MAX)
 				err += efunc(pc, "invalid array %u\n", r1);
 			if (r2 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r2);
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_LDGS:
 		case DIF_OP_LDTS:
 		case DIF_OP_LDLS:
 		case DIF_OP_LDGAA:
 		case DIF_OP_LDTAA:
 			if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX)
 				err += efunc(pc, "invalid variable %u\n", v);
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 			break;
 		case DIF_OP_STGS:
 		case DIF_OP_STTS:
 		case DIF_OP_STLS:
 		case DIF_OP_STGAA:
 		case DIF_OP_STTAA:
 			if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
 				err += efunc(pc, "invalid variable %u\n", v);
 			if (rs >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			break;
 		case DIF_OP_CALL:
 			if (subr > DIF_SUBR_MAX)
 				err += efunc(pc, "invalid subr %u\n", subr);
 			if (rd >= nregs)
 				err += efunc(pc, "invalid register %u\n", rd);
 			if (rd == 0)
 				err += efunc(pc, "cannot write to %r0\n");
 
 			if (subr == DIF_SUBR_COPYOUT ||
 			    subr == DIF_SUBR_COPYOUTSTR) {
 				dp->dtdo_destructive = 1;
 			}
 
 			if (subr == DIF_SUBR_GETF) {
 				/*
 				 * If we have a getf() we need to record that
 				 * in our state.  Note that our state can be
 				 * NULL if this is a helper -- but in that
 				 * case, the call to getf() is itself illegal,
 				 * and will be caught (slightly later) when
 				 * the helper is validated.
 				 */
 				if (vstate->dtvs_state != NULL)
 					vstate->dtvs_state->dts_getf++;
 			}
 
 			break;
 		case DIF_OP_PUSHTR:
 			if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
 				err += efunc(pc, "invalid ref type %u\n", type);
 			if (r2 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r2);
 			if (rs >= nregs)
 				err += efunc(pc, "invalid register %u\n", rs);
 			break;
 		case DIF_OP_PUSHTV:
 			if (type != DIF_TYPE_CTF)
 				err += efunc(pc, "invalid val type %u\n", type);
 			if (r2 >= nregs)
 				err += efunc(pc, "invalid register %u\n", r2);
 			if (rs >= nregs)
 				err += efunc(pc, "invalid register %u\n", rs);
 			break;
 		default:
 			err += efunc(pc, "invalid opcode %u\n",
 			    DIF_INSTR_OP(instr));
 		}
 	}
 
 	if (dp->dtdo_len != 0 &&
 	    DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) {
 		err += efunc(dp->dtdo_len - 1,
 		    "expected 'ret' as last DIF instruction\n");
 	}
 
 	if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) {
 		/*
 		 * If we're not returning by reference, the size must be either
 		 * 0 or the size of one of the base types.
 		 */
 		switch (dp->dtdo_rtype.dtdt_size) {
 		case 0:
 		case sizeof (uint8_t):
 		case sizeof (uint16_t):
 		case sizeof (uint32_t):
 		case sizeof (uint64_t):
 			break;
 
 		default:
 			err += efunc(dp->dtdo_len - 1, "bad return size\n");
 		}
 	}
 
 	for (i = 0; i < dp->dtdo_varlen && err == 0; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL;
 		dtrace_diftype_t *vt, *et;
 		uint_t id, ndx;
 
 		if (v->dtdv_scope != DIFV_SCOPE_GLOBAL &&
 		    v->dtdv_scope != DIFV_SCOPE_THREAD &&
 		    v->dtdv_scope != DIFV_SCOPE_LOCAL) {
 			err += efunc(i, "unrecognized variable scope %d\n",
 			    v->dtdv_scope);
 			break;
 		}
 
 		if (v->dtdv_kind != DIFV_KIND_ARRAY &&
 		    v->dtdv_kind != DIFV_KIND_SCALAR) {
 			err += efunc(i, "unrecognized variable type %d\n",
 			    v->dtdv_kind);
 			break;
 		}
 
 		if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) {
 			err += efunc(i, "%d exceeds variable id limit\n", id);
 			break;
 		}
 
 		if (id < DIF_VAR_OTHER_UBASE)
 			continue;
 
 		/*
 		 * For user-defined variables, we need to check that this
 		 * definition is identical to any previous definition that we
 		 * encountered.
 		 */
 		ndx = id - DIF_VAR_OTHER_UBASE;
 
 		switch (v->dtdv_scope) {
 		case DIFV_SCOPE_GLOBAL:
 			if (maxglobal == -1 || ndx > maxglobal)
 				maxglobal = ndx;
 
 			if (ndx < vstate->dtvs_nglobals) {
 				dtrace_statvar_t *svar;
 
 				if ((svar = vstate->dtvs_globals[ndx]) != NULL)
 					existing = &svar->dtsv_var;
 			}
 
 			break;
 
 		case DIFV_SCOPE_THREAD:
 			if (maxtlocal == -1 || ndx > maxtlocal)
 				maxtlocal = ndx;
 
 			if (ndx < vstate->dtvs_ntlocals)
 				existing = &vstate->dtvs_tlocals[ndx];
 			break;
 
 		case DIFV_SCOPE_LOCAL:
 			if (maxlocal == -1 || ndx > maxlocal)
 				maxlocal = ndx;
 
 			if (ndx < vstate->dtvs_nlocals) {
 				dtrace_statvar_t *svar;
 
 				if ((svar = vstate->dtvs_locals[ndx]) != NULL)
 					existing = &svar->dtsv_var;
 			}
 
 			break;
 		}
 
 		vt = &v->dtdv_type;
 
 		if (vt->dtdt_flags & DIF_TF_BYREF) {
 			if (vt->dtdt_size == 0) {
 				err += efunc(i, "zero-sized variable\n");
 				break;
 			}
 
 			if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL ||
 			    v->dtdv_scope == DIFV_SCOPE_LOCAL) &&
 			    vt->dtdt_size > dtrace_statvar_maxsize) {
 				err += efunc(i, "oversized by-ref static\n");
 				break;
 			}
 		}
 
 		if (existing == NULL || existing->dtdv_id == 0)
 			continue;
 
 		ASSERT(existing->dtdv_id == v->dtdv_id);
 		ASSERT(existing->dtdv_scope == v->dtdv_scope);
 
 		if (existing->dtdv_kind != v->dtdv_kind)
 			err += efunc(i, "%d changed variable kind\n", id);
 
 		et = &existing->dtdv_type;
 
 		if (vt->dtdt_flags != et->dtdt_flags) {
 			err += efunc(i, "%d changed variable type flags\n", id);
 			break;
 		}
 
 		if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) {
 			err += efunc(i, "%d changed variable type size\n", id);
 			break;
 		}
 	}
 
 	for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
 		dif_instr_t instr = dp->dtdo_buf[pc];
 
 		uint_t v = DIF_INSTR_VAR(instr);
 		uint_t op = DIF_INSTR_OP(instr);
 
 		switch (op) {
 		case DIF_OP_LDGS:
 		case DIF_OP_LDGAA:
 		case DIF_OP_STGS:
 		case DIF_OP_STGAA:
 			if (v > DIF_VAR_OTHER_UBASE + maxglobal)
 				err += efunc(pc, "invalid variable %u\n", v);
 			break;
 		case DIF_OP_LDTS:
 		case DIF_OP_LDTAA:
 		case DIF_OP_STTS:
 		case DIF_OP_STTAA:
 			if (v > DIF_VAR_OTHER_UBASE + maxtlocal)
 				err += efunc(pc, "invalid variable %u\n", v);
 			break;
 		case DIF_OP_LDLS:
 		case DIF_OP_STLS:
 			if (v > DIF_VAR_OTHER_UBASE + maxlocal)
 				err += efunc(pc, "invalid variable %u\n", v);
 			break;
 		default:
 			break;
 		}
 	}
 
 	return (err);
 }
 
 /*
  * Validate a DTrace DIF object that it is to be used as a helper.  Helpers
  * are much more constrained than normal DIFOs.  Specifically, they may
  * not:
  *
  * 1. Make calls to subroutines other than copyin(), copyinstr() or
  *    miscellaneous string routines
  * 2. Access DTrace variables other than the args[] array, and the
  *    curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
  * 3. Have thread-local variables.
  * 4. Have dynamic variables.
  */
 static int
 dtrace_difo_validate_helper(dtrace_difo_t *dp)
 {
 	int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
 	int err = 0;
 	uint_t pc;
 
 	for (pc = 0; pc < dp->dtdo_len; pc++) {
 		dif_instr_t instr = dp->dtdo_buf[pc];
 
 		uint_t v = DIF_INSTR_VAR(instr);
 		uint_t subr = DIF_INSTR_SUBR(instr);
 		uint_t op = DIF_INSTR_OP(instr);
 
 		switch (op) {
 		case DIF_OP_OR:
 		case DIF_OP_XOR:
 		case DIF_OP_AND:
 		case DIF_OP_SLL:
 		case DIF_OP_SRL:
 		case DIF_OP_SRA:
 		case DIF_OP_SUB:
 		case DIF_OP_ADD:
 		case DIF_OP_MUL:
 		case DIF_OP_SDIV:
 		case DIF_OP_UDIV:
 		case DIF_OP_SREM:
 		case DIF_OP_UREM:
 		case DIF_OP_COPYS:
 		case DIF_OP_NOT:
 		case DIF_OP_MOV:
 		case DIF_OP_RLDSB:
 		case DIF_OP_RLDSH:
 		case DIF_OP_RLDSW:
 		case DIF_OP_RLDUB:
 		case DIF_OP_RLDUH:
 		case DIF_OP_RLDUW:
 		case DIF_OP_RLDX:
 		case DIF_OP_ULDSB:
 		case DIF_OP_ULDSH:
 		case DIF_OP_ULDSW:
 		case DIF_OP_ULDUB:
 		case DIF_OP_ULDUH:
 		case DIF_OP_ULDUW:
 		case DIF_OP_ULDX:
 		case DIF_OP_STB:
 		case DIF_OP_STH:
 		case DIF_OP_STW:
 		case DIF_OP_STX:
 		case DIF_OP_ALLOCS:
 		case DIF_OP_CMP:
 		case DIF_OP_SCMP:
 		case DIF_OP_TST:
 		case DIF_OP_BA:
 		case DIF_OP_BE:
 		case DIF_OP_BNE:
 		case DIF_OP_BG:
 		case DIF_OP_BGU:
 		case DIF_OP_BGE:
 		case DIF_OP_BGEU:
 		case DIF_OP_BL:
 		case DIF_OP_BLU:
 		case DIF_OP_BLE:
 		case DIF_OP_BLEU:
 		case DIF_OP_RET:
 		case DIF_OP_NOP:
 		case DIF_OP_POPTS:
 		case DIF_OP_FLUSHTS:
 		case DIF_OP_SETX:
 		case DIF_OP_SETS:
 		case DIF_OP_LDGA:
 		case DIF_OP_LDLS:
 		case DIF_OP_STGS:
 		case DIF_OP_STLS:
 		case DIF_OP_PUSHTR:
 		case DIF_OP_PUSHTV:
 			break;
 
 		case DIF_OP_LDGS:
 			if (v >= DIF_VAR_OTHER_UBASE)
 				break;
 
 			if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9)
 				break;
 
 			if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID ||
 			    v == DIF_VAR_PPID || v == DIF_VAR_TID ||
 			    v == DIF_VAR_EXECARGS ||
 			    v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME ||
 			    v == DIF_VAR_UID || v == DIF_VAR_GID)
 				break;
 
 			err += efunc(pc, "illegal variable %u\n", v);
 			break;
 
 		case DIF_OP_LDTA:
 		case DIF_OP_LDTS:
 		case DIF_OP_LDGAA:
 		case DIF_OP_LDTAA:
 			err += efunc(pc, "illegal dynamic variable load\n");
 			break;
 
 		case DIF_OP_STTS:
 		case DIF_OP_STGAA:
 		case DIF_OP_STTAA:
 			err += efunc(pc, "illegal dynamic variable store\n");
 			break;
 
 		case DIF_OP_CALL:
 			if (subr == DIF_SUBR_ALLOCA ||
 			    subr == DIF_SUBR_BCOPY ||
 			    subr == DIF_SUBR_COPYIN ||
 			    subr == DIF_SUBR_COPYINTO ||
 			    subr == DIF_SUBR_COPYINSTR ||
 			    subr == DIF_SUBR_INDEX ||
 			    subr == DIF_SUBR_INET_NTOA ||
 			    subr == DIF_SUBR_INET_NTOA6 ||
 			    subr == DIF_SUBR_INET_NTOP ||
 			    subr == DIF_SUBR_JSON ||
 			    subr == DIF_SUBR_LLTOSTR ||
 			    subr == DIF_SUBR_STRTOLL ||
 			    subr == DIF_SUBR_RINDEX ||
 			    subr == DIF_SUBR_STRCHR ||
 			    subr == DIF_SUBR_STRJOIN ||
 			    subr == DIF_SUBR_STRRCHR ||
 			    subr == DIF_SUBR_STRSTR ||
 			    subr == DIF_SUBR_HTONS ||
 			    subr == DIF_SUBR_HTONL ||
 			    subr == DIF_SUBR_HTONLL ||
 			    subr == DIF_SUBR_NTOHS ||
 			    subr == DIF_SUBR_NTOHL ||
 			    subr == DIF_SUBR_NTOHLL ||
 			    subr == DIF_SUBR_MEMREF)
 				break;
 #ifdef __FreeBSD__
 			if (subr == DIF_SUBR_MEMSTR)
 				break;
 #endif
 
 			err += efunc(pc, "invalid subr %u\n", subr);
 			break;
 
 		default:
 			err += efunc(pc, "invalid opcode %u\n",
 			    DIF_INSTR_OP(instr));
 		}
 	}
 
 	return (err);
 }
 
 /*
  * Returns 1 if the expression in the DIF object can be cached on a per-thread
  * basis; 0 if not.
  */
 static int
 dtrace_difo_cacheable(dtrace_difo_t *dp)
 {
 	int i;
 
 	if (dp == NULL)
 		return (0);
 
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 
 		if (v->dtdv_scope != DIFV_SCOPE_GLOBAL)
 			continue;
 
 		switch (v->dtdv_id) {
 		case DIF_VAR_CURTHREAD:
 		case DIF_VAR_PID:
 		case DIF_VAR_TID:
 		case DIF_VAR_EXECARGS:
 		case DIF_VAR_EXECNAME:
 		case DIF_VAR_ZONENAME:
 			break;
 
 		default:
 			return (0);
 		}
 	}
 
 	/*
 	 * This DIF object may be cacheable.  Now we need to look for any
 	 * array loading instructions, any memory loading instructions, or
 	 * any stores to thread-local variables.
 	 */
 	for (i = 0; i < dp->dtdo_len; i++) {
 		uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]);
 
 		if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) ||
 		    (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) ||
 		    (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) ||
 		    op == DIF_OP_LDGA || op == DIF_OP_STTS)
 			return (0);
 	}
 
 	return (1);
 }
 
 static void
 dtrace_difo_hold(dtrace_difo_t *dp)
 {
 	int i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	dp->dtdo_refcnt++;
 	ASSERT(dp->dtdo_refcnt != 0);
 
 	/*
 	 * We need to check this DIF object for references to the variable
 	 * DIF_VAR_VTIMESTAMP.
 	 */
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 
 		if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
 			continue;
 
 		if (dtrace_vtime_references++ == 0)
 			dtrace_vtime_enable();
 	}
 }
 
 /*
  * This routine calculates the dynamic variable chunksize for a given DIF
  * object.  The calculation is not fool-proof, and can probably be tricked by
  * malicious DIF -- but it works for all compiler-generated DIF.  Because this
  * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
  * if a dynamic variable size exceeds the chunksize.
  */
 static void
 dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
 {
 	uint64_t sval = 0;
 	dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
 	const dif_instr_t *text = dp->dtdo_buf;
 	uint_t pc, srd = 0;
 	uint_t ttop = 0;
 	size_t size, ksize;
 	uint_t id, i;
 
 	for (pc = 0; pc < dp->dtdo_len; pc++) {
 		dif_instr_t instr = text[pc];
 		uint_t op = DIF_INSTR_OP(instr);
 		uint_t rd = DIF_INSTR_RD(instr);
 		uint_t r1 = DIF_INSTR_R1(instr);
 		uint_t nkeys = 0;
 		uchar_t scope = 0;
 
 		dtrace_key_t *key = tupregs;
 
 		switch (op) {
 		case DIF_OP_SETX:
 			sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)];
 			srd = rd;
 			continue;
 
 		case DIF_OP_STTS:
 			key = &tupregs[DIF_DTR_NREGS];
 			key[0].dttk_size = 0;
 			key[1].dttk_size = 0;
 			nkeys = 2;
 			scope = DIFV_SCOPE_THREAD;
 			break;
 
 		case DIF_OP_STGAA:
 		case DIF_OP_STTAA:
 			nkeys = ttop;
 
 			if (DIF_INSTR_OP(instr) == DIF_OP_STTAA)
 				key[nkeys++].dttk_size = 0;
 
 			key[nkeys++].dttk_size = 0;
 
 			if (op == DIF_OP_STTAA) {
 				scope = DIFV_SCOPE_THREAD;
 			} else {
 				scope = DIFV_SCOPE_GLOBAL;
 			}
 
 			break;
 
 		case DIF_OP_PUSHTR:
 			if (ttop == DIF_DTR_NREGS)
 				return;
 
 			if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) {
 				/*
 				 * If the register for the size of the "pushtr"
 				 * is %r0 (or the value is 0) and the type is
 				 * a string, we'll use the system-wide default
 				 * string size.
 				 */
 				tupregs[ttop++].dttk_size =
 				    dtrace_strsize_default;
 			} else {
 				if (srd == 0)
 					return;
 
 				if (sval > LONG_MAX)
 					return;
 
 				tupregs[ttop++].dttk_size = sval;
 			}
 
 			break;
 
 		case DIF_OP_PUSHTV:
 			if (ttop == DIF_DTR_NREGS)
 				return;
 
 			tupregs[ttop++].dttk_size = 0;
 			break;
 
 		case DIF_OP_FLUSHTS:
 			ttop = 0;
 			break;
 
 		case DIF_OP_POPTS:
 			if (ttop != 0)
 				ttop--;
 			break;
 		}
 
 		sval = 0;
 		srd = 0;
 
 		if (nkeys == 0)
 			continue;
 
 		/*
 		 * We have a dynamic variable allocation; calculate its size.
 		 */
 		for (ksize = 0, i = 0; i < nkeys; i++)
 			ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
 
 		size = sizeof (dtrace_dynvar_t);
 		size += sizeof (dtrace_key_t) * (nkeys - 1);
 		size += ksize;
 
 		/*
 		 * Now we need to determine the size of the stored data.
 		 */
 		id = DIF_INSTR_VAR(instr);
 
 		for (i = 0; i < dp->dtdo_varlen; i++) {
 			dtrace_difv_t *v = &dp->dtdo_vartab[i];
 
 			if (v->dtdv_id == id && v->dtdv_scope == scope) {
 				size += v->dtdv_type.dtdt_size;
 				break;
 			}
 		}
 
 		if (i == dp->dtdo_varlen)
 			return;
 
 		/*
 		 * We have the size.  If this is larger than the chunk size
 		 * for our dynamic variable state, reset the chunk size.
 		 */
 		size = P2ROUNDUP(size, sizeof (uint64_t));
 
 		/*
 		 * Before setting the chunk size, check that we're not going
 		 * to set it to a negative value...
 		 */
 		if (size > LONG_MAX)
 			return;
 
 		/*
 		 * ...and make certain that we didn't badly overflow.
 		 */
 		if (size < ksize || size < sizeof (dtrace_dynvar_t))
 			return;
 
 		if (size > vstate->dtvs_dynvars.dtds_chunksize)
 			vstate->dtvs_dynvars.dtds_chunksize = size;
 	}
 }
 
 static void
 dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
 {
 	int i, oldsvars, osz, nsz, otlocals, ntlocals;
 	uint_t id;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0);
 
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 		dtrace_statvar_t *svar, ***svarp = NULL;
 		size_t dsize = 0;
 		uint8_t scope = v->dtdv_scope;
 		int *np = NULL;
 
 		if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
 			continue;
 
 		id -= DIF_VAR_OTHER_UBASE;
 
 		switch (scope) {
 		case DIFV_SCOPE_THREAD:
 			while (id >= (otlocals = vstate->dtvs_ntlocals)) {
 				dtrace_difv_t *tlocals;
 
 				if ((ntlocals = (otlocals << 1)) == 0)
 					ntlocals = 1;
 
 				osz = otlocals * sizeof (dtrace_difv_t);
 				nsz = ntlocals * sizeof (dtrace_difv_t);
 
 				tlocals = kmem_zalloc(nsz, KM_SLEEP);
 
 				if (osz != 0) {
 					bcopy(vstate->dtvs_tlocals,
 					    tlocals, osz);
 					kmem_free(vstate->dtvs_tlocals, osz);
 				}
 
 				vstate->dtvs_tlocals = tlocals;
 				vstate->dtvs_ntlocals = ntlocals;
 			}
 
 			vstate->dtvs_tlocals[id] = *v;
 			continue;
 
 		case DIFV_SCOPE_LOCAL:
 			np = &vstate->dtvs_nlocals;
 			svarp = &vstate->dtvs_locals;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
 				dsize = NCPU * (v->dtdv_type.dtdt_size +
 				    sizeof (uint64_t));
 			else
 				dsize = NCPU * sizeof (uint64_t);
 
 			break;
 
 		case DIFV_SCOPE_GLOBAL:
 			np = &vstate->dtvs_nglobals;
 			svarp = &vstate->dtvs_globals;
 
 			if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
 				dsize = v->dtdv_type.dtdt_size +
 				    sizeof (uint64_t);
 
 			break;
 
 		default:
 			ASSERT(0);
 		}
 
 		while (id >= (oldsvars = *np)) {
 			dtrace_statvar_t **statics;
 			int newsvars, oldsize, newsize;
 
 			if ((newsvars = (oldsvars << 1)) == 0)
 				newsvars = 1;
 
 			oldsize = oldsvars * sizeof (dtrace_statvar_t *);
 			newsize = newsvars * sizeof (dtrace_statvar_t *);
 
 			statics = kmem_zalloc(newsize, KM_SLEEP);
 
 			if (oldsize != 0) {
 				bcopy(*svarp, statics, oldsize);
 				kmem_free(*svarp, oldsize);
 			}
 
 			*svarp = statics;
 			*np = newsvars;
 		}
 
 		if ((svar = (*svarp)[id]) == NULL) {
 			svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP);
 			svar->dtsv_var = *v;
 
 			if ((svar->dtsv_size = dsize) != 0) {
 				svar->dtsv_data = (uint64_t)(uintptr_t)
 				    kmem_zalloc(dsize, KM_SLEEP);
 			}
 
 			(*svarp)[id] = svar;
 		}
 
 		svar->dtsv_refcnt++;
 	}
 
 	dtrace_difo_chunksize(dp, vstate);
 	dtrace_difo_hold(dp);
 }
 
 static dtrace_difo_t *
 dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
 {
 	dtrace_difo_t *new;
 	size_t sz;
 
 	ASSERT(dp->dtdo_buf != NULL);
 	ASSERT(dp->dtdo_refcnt != 0);
 
 	new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
 
 	ASSERT(dp->dtdo_buf != NULL);
 	sz = dp->dtdo_len * sizeof (dif_instr_t);
 	new->dtdo_buf = kmem_alloc(sz, KM_SLEEP);
 	bcopy(dp->dtdo_buf, new->dtdo_buf, sz);
 	new->dtdo_len = dp->dtdo_len;
 
 	if (dp->dtdo_strtab != NULL) {
 		ASSERT(dp->dtdo_strlen != 0);
 		new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP);
 		bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen);
 		new->dtdo_strlen = dp->dtdo_strlen;
 	}
 
 	if (dp->dtdo_inttab != NULL) {
 		ASSERT(dp->dtdo_intlen != 0);
 		sz = dp->dtdo_intlen * sizeof (uint64_t);
 		new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP);
 		bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz);
 		new->dtdo_intlen = dp->dtdo_intlen;
 	}
 
 	if (dp->dtdo_vartab != NULL) {
 		ASSERT(dp->dtdo_varlen != 0);
 		sz = dp->dtdo_varlen * sizeof (dtrace_difv_t);
 		new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP);
 		bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz);
 		new->dtdo_varlen = dp->dtdo_varlen;
 	}
 
 	dtrace_difo_init(new, vstate);
 	return (new);
 }
 
 static void
 dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
 {
 	int i;
 
 	ASSERT(dp->dtdo_refcnt == 0);
 
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 		dtrace_statvar_t *svar, **svarp = NULL;
 		uint_t id;
 		uint8_t scope = v->dtdv_scope;
 		int *np = NULL;
 
 		switch (scope) {
 		case DIFV_SCOPE_THREAD:
 			continue;
 
 		case DIFV_SCOPE_LOCAL:
 			np = &vstate->dtvs_nlocals;
 			svarp = vstate->dtvs_locals;
 			break;
 
 		case DIFV_SCOPE_GLOBAL:
 			np = &vstate->dtvs_nglobals;
 			svarp = vstate->dtvs_globals;
 			break;
 
 		default:
 			ASSERT(0);
 		}
 
 		if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
 			continue;
 
 		id -= DIF_VAR_OTHER_UBASE;
 		ASSERT(id < *np);
 
 		svar = svarp[id];
 		ASSERT(svar != NULL);
 		ASSERT(svar->dtsv_refcnt > 0);
 
 		if (--svar->dtsv_refcnt > 0)
 			continue;
 
 		if (svar->dtsv_size != 0) {
 			ASSERT(svar->dtsv_data != 0);
 			kmem_free((void *)(uintptr_t)svar->dtsv_data,
 			    svar->dtsv_size);
 		}
 
 		kmem_free(svar, sizeof (dtrace_statvar_t));
 		svarp[id] = NULL;
 	}
 
 	if (dp->dtdo_buf != NULL)
 		kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
 	if (dp->dtdo_inttab != NULL)
 		kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
 	if (dp->dtdo_strtab != NULL)
 		kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
 	if (dp->dtdo_vartab != NULL)
 		kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
 
 	kmem_free(dp, sizeof (dtrace_difo_t));
 }
 
 static void
 dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
 {
 	int i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dp->dtdo_refcnt != 0);
 
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 
 		if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
 			continue;
 
 		ASSERT(dtrace_vtime_references > 0);
 		if (--dtrace_vtime_references == 0)
 			dtrace_vtime_disable();
 	}
 
 	if (--dp->dtdo_refcnt == 0)
 		dtrace_difo_destroy(dp, vstate);
 }
 
 /*
  * DTrace Format Functions
  */
 static uint16_t
 dtrace_format_add(dtrace_state_t *state, char *str)
 {
 	char *fmt, **new;
 	uint16_t ndx, len = strlen(str) + 1;
 
 	fmt = kmem_zalloc(len, KM_SLEEP);
 	bcopy(str, fmt, len);
 
 	for (ndx = 0; ndx < state->dts_nformats; ndx++) {
 		if (state->dts_formats[ndx] == NULL) {
 			state->dts_formats[ndx] = fmt;
 			return (ndx + 1);
 		}
 	}
 
 	if (state->dts_nformats == USHRT_MAX) {
 		/*
 		 * This is only likely if a denial-of-service attack is being
 		 * attempted.  As such, it's okay to fail silently here.
 		 */
 		kmem_free(fmt, len);
 		return (0);
 	}
 
 	/*
 	 * For simplicity, we always resize the formats array to be exactly the
 	 * number of formats.
 	 */
 	ndx = state->dts_nformats++;
 	new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP);
 
 	if (state->dts_formats != NULL) {
 		ASSERT(ndx != 0);
 		bcopy(state->dts_formats, new, ndx * sizeof (char *));
 		kmem_free(state->dts_formats, ndx * sizeof (char *));
 	}
 
 	state->dts_formats = new;
 	state->dts_formats[ndx] = fmt;
 
 	return (ndx + 1);
 }
 
 static void
 dtrace_format_remove(dtrace_state_t *state, uint16_t format)
 {
 	char *fmt;
 
 	ASSERT(state->dts_formats != NULL);
 	ASSERT(format <= state->dts_nformats);
 	ASSERT(state->dts_formats[format - 1] != NULL);
 
 	fmt = state->dts_formats[format - 1];
 	kmem_free(fmt, strlen(fmt) + 1);
 	state->dts_formats[format - 1] = NULL;
 }
 
 static void
 dtrace_format_destroy(dtrace_state_t *state)
 {
 	int i;
 
 	if (state->dts_nformats == 0) {
 		ASSERT(state->dts_formats == NULL);
 		return;
 	}
 
 	ASSERT(state->dts_formats != NULL);
 
 	for (i = 0; i < state->dts_nformats; i++) {
 		char *fmt = state->dts_formats[i];
 
 		if (fmt == NULL)
 			continue;
 
 		kmem_free(fmt, strlen(fmt) + 1);
 	}
 
 	kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *));
 	state->dts_nformats = 0;
 	state->dts_formats = NULL;
 }
 
 /*
  * DTrace Predicate Functions
  */
 static dtrace_predicate_t *
 dtrace_predicate_create(dtrace_difo_t *dp)
 {
 	dtrace_predicate_t *pred;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dp->dtdo_refcnt != 0);
 
 	pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP);
 	pred->dtp_difo = dp;
 	pred->dtp_refcnt = 1;
 
 	if (!dtrace_difo_cacheable(dp))
 		return (pred);
 
 	if (dtrace_predcache_id == DTRACE_CACHEIDNONE) {
 		/*
 		 * This is only theoretically possible -- we have had 2^32
 		 * cacheable predicates on this machine.  We cannot allow any
 		 * more predicates to become cacheable:  as unlikely as it is,
 		 * there may be a thread caching a (now stale) predicate cache
 		 * ID. (N.B.: the temptation is being successfully resisted to
 		 * have this cmn_err() "Holy shit -- we executed this code!")
 		 */
 		return (pred);
 	}
 
 	pred->dtp_cacheid = dtrace_predcache_id++;
 
 	return (pred);
 }
 
 static void
 dtrace_predicate_hold(dtrace_predicate_t *pred)
 {
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0);
 	ASSERT(pred->dtp_refcnt > 0);
 
 	pred->dtp_refcnt++;
 }
 
 static void
 dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate)
 {
 	dtrace_difo_t *dp = pred->dtp_difo;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dp != NULL && dp->dtdo_refcnt != 0);
 	ASSERT(pred->dtp_refcnt > 0);
 
 	if (--pred->dtp_refcnt == 0) {
 		dtrace_difo_release(pred->dtp_difo, vstate);
 		kmem_free(pred, sizeof (dtrace_predicate_t));
 	}
 }
 
 /*
  * DTrace Action Description Functions
  */
 static dtrace_actdesc_t *
 dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple,
     uint64_t uarg, uint64_t arg)
 {
 	dtrace_actdesc_t *act;
 
 #ifdef illumos
 	ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL &&
 	    arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));
 #endif
 
 	act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP);
 	act->dtad_kind = kind;
 	act->dtad_ntuple = ntuple;
 	act->dtad_uarg = uarg;
 	act->dtad_arg = arg;
 	act->dtad_refcnt = 1;
 
 	return (act);
 }
 
 static void
 dtrace_actdesc_hold(dtrace_actdesc_t *act)
 {
 	ASSERT(act->dtad_refcnt >= 1);
 	act->dtad_refcnt++;
 }
 
 static void
 dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate)
 {
 	dtrace_actkind_t kind = act->dtad_kind;
 	dtrace_difo_t *dp;
 
 	ASSERT(act->dtad_refcnt >= 1);
 
 	if (--act->dtad_refcnt != 0)
 		return;
 
 	if ((dp = act->dtad_difo) != NULL)
 		dtrace_difo_release(dp, vstate);
 
 	if (DTRACEACT_ISPRINTFLIKE(kind)) {
 		char *str = (char *)(uintptr_t)act->dtad_arg;
 
 #ifdef illumos
 		ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
 		    (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));
 #endif
 
 		if (str != NULL)
 			kmem_free(str, strlen(str) + 1);
 	}
 
 	kmem_free(act, sizeof (dtrace_actdesc_t));
 }
 
 /*
  * DTrace ECB Functions
  */
 static dtrace_ecb_t *
 dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
 {
 	dtrace_ecb_t *ecb;
 	dtrace_epid_t epid;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP);
 	ecb->dte_predicate = NULL;
 	ecb->dte_probe = probe;
 
 	/*
 	 * The default size is the size of the default action: recording
 	 * the header.
 	 */
 	ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t);
 	ecb->dte_alignment = sizeof (dtrace_epid_t);
 
 	epid = state->dts_epid++;
 
 	if (epid - 1 >= state->dts_necbs) {
 		dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs;
 		int necbs = state->dts_necbs << 1;
 
 		ASSERT(epid == state->dts_necbs + 1);
 
 		if (necbs == 0) {
 			ASSERT(oecbs == NULL);
 			necbs = 1;
 		}
 
 		ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP);
 
 		if (oecbs != NULL)
 			bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs));
 
 		dtrace_membar_producer();
 		state->dts_ecbs = ecbs;
 
 		if (oecbs != NULL) {
 			/*
 			 * If this state is active, we must dtrace_sync()
 			 * before we can free the old dts_ecbs array:  we're
 			 * coming in hot, and there may be active ring
 			 * buffer processing (which indexes into the dts_ecbs
 			 * array) on another CPU.
 			 */
 			if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
 				dtrace_sync();
 
 			kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs));
 		}
 
 		dtrace_membar_producer();
 		state->dts_necbs = necbs;
 	}
 
 	ecb->dte_state = state;
 
 	ASSERT(state->dts_ecbs[epid - 1] == NULL);
 	dtrace_membar_producer();
 	state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb;
 
 	return (ecb);
 }
 
 static void
 dtrace_ecb_enable(dtrace_ecb_t *ecb)
 {
 	dtrace_probe_t *probe = ecb->dte_probe;
 
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(ecb->dte_next == NULL);
 
 	if (probe == NULL) {
 		/*
 		 * This is the NULL probe -- there's nothing to do.
 		 */
 		return;
 	}
 
 	if (probe->dtpr_ecb == NULL) {
 		dtrace_provider_t *prov = probe->dtpr_provider;
 
 		/*
 		 * We're the first ECB on this probe.
 		 */
 		probe->dtpr_ecb = probe->dtpr_ecb_last = ecb;
 
 		if (ecb->dte_predicate != NULL)
 			probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
 
 		prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
 		    probe->dtpr_id, probe->dtpr_arg);
 	} else {
 		/*
 		 * This probe is already active.  Swing the last pointer to
 		 * point to the new ECB, and issue a dtrace_sync() to assure
 		 * that all CPUs have seen the change.
 		 */
 		ASSERT(probe->dtpr_ecb_last != NULL);
 		probe->dtpr_ecb_last->dte_next = ecb;
 		probe->dtpr_ecb_last = ecb;
 		probe->dtpr_predcache = 0;
 
 		dtrace_sync();
 	}
 }
 
 static int
 dtrace_ecb_resize(dtrace_ecb_t *ecb)
 {
 	dtrace_action_t *act;
 	uint32_t curneeded = UINT32_MAX;
 	uint32_t aggbase = UINT32_MAX;
 
 	/*
 	 * If we record anything, we always record the dtrace_rechdr_t.  (And
 	 * we always record it first.)
 	 */
 	ecb->dte_size = sizeof (dtrace_rechdr_t);
 	ecb->dte_alignment = sizeof (dtrace_epid_t);
 
 	for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
 		dtrace_recdesc_t *rec = &act->dta_rec;
 		ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1);
 
 		ecb->dte_alignment = MAX(ecb->dte_alignment,
 		    rec->dtrd_alignment);
 
 		if (DTRACEACT_ISAGG(act->dta_kind)) {
 			dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
 
 			ASSERT(rec->dtrd_size != 0);
 			ASSERT(agg->dtag_first != NULL);
 			ASSERT(act->dta_prev->dta_intuple);
 			ASSERT(aggbase != UINT32_MAX);
 			ASSERT(curneeded != UINT32_MAX);
 
 			agg->dtag_base = aggbase;
 
 			curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
 			rec->dtrd_offset = curneeded;
 			if (curneeded + rec->dtrd_size < curneeded)
 				return (EINVAL);
 			curneeded += rec->dtrd_size;
 			ecb->dte_needed = MAX(ecb->dte_needed, curneeded);
 
 			aggbase = UINT32_MAX;
 			curneeded = UINT32_MAX;
 		} else if (act->dta_intuple) {
 			if (curneeded == UINT32_MAX) {
 				/*
 				 * This is the first record in a tuple.  Align
 				 * curneeded to be at offset 4 in an 8-byte
 				 * aligned block.
 				 */
 				ASSERT(act->dta_prev == NULL ||
 				    !act->dta_prev->dta_intuple);
 				ASSERT3U(aggbase, ==, UINT32_MAX);
 				curneeded = P2PHASEUP(ecb->dte_size,
 				    sizeof (uint64_t), sizeof (dtrace_aggid_t));
 
 				aggbase = curneeded - sizeof (dtrace_aggid_t);
 				ASSERT(IS_P2ALIGNED(aggbase,
 				    sizeof (uint64_t)));
 			}
 			curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
 			rec->dtrd_offset = curneeded;
 			if (curneeded + rec->dtrd_size < curneeded)
 				return (EINVAL);
 			curneeded += rec->dtrd_size;
 		} else {
 			/* tuples must be followed by an aggregation */
 			ASSERT(act->dta_prev == NULL ||
 			    !act->dta_prev->dta_intuple);
 
 			ecb->dte_size = P2ROUNDUP(ecb->dte_size,
 			    rec->dtrd_alignment);
 			rec->dtrd_offset = ecb->dte_size;
 			if (ecb->dte_size + rec->dtrd_size < ecb->dte_size)
 				return (EINVAL);
 			ecb->dte_size += rec->dtrd_size;
 			ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size);
 		}
 	}
 
 	if ((act = ecb->dte_action) != NULL &&
 	    !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
 	    ecb->dte_size == sizeof (dtrace_rechdr_t)) {
 		/*
 		 * If the size is still sizeof (dtrace_rechdr_t), then all
 		 * actions store no data; set the size to 0.
 		 */
 		ecb->dte_size = 0;
 	}
 
 	ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t));
 	ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t)));
 	ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed,
 	    ecb->dte_needed);
 	return (0);
 }
 
 static dtrace_action_t *
 dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
 {
 	dtrace_aggregation_t *agg;
 	size_t size = sizeof (uint64_t);
 	int ntuple = desc->dtad_ntuple;
 	dtrace_action_t *act;
 	dtrace_recdesc_t *frec;
 	dtrace_aggid_t aggid;
 	dtrace_state_t *state = ecb->dte_state;
 
 	agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP);
 	agg->dtag_ecb = ecb;
 
 	ASSERT(DTRACEACT_ISAGG(desc->dtad_kind));
 
 	switch (desc->dtad_kind) {
 	case DTRACEAGG_MIN:
 		agg->dtag_initial = INT64_MAX;
 		agg->dtag_aggregate = dtrace_aggregate_min;
 		break;
 
 	case DTRACEAGG_MAX:
 		agg->dtag_initial = INT64_MIN;
 		agg->dtag_aggregate = dtrace_aggregate_max;
 		break;
 
 	case DTRACEAGG_COUNT:
 		agg->dtag_aggregate = dtrace_aggregate_count;
 		break;
 
 	case DTRACEAGG_QUANTIZE:
 		agg->dtag_aggregate = dtrace_aggregate_quantize;
 		size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) *
 		    sizeof (uint64_t);
 		break;
 
 	case DTRACEAGG_LQUANTIZE: {
 		uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg);
 		uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg);
 
 		agg->dtag_initial = desc->dtad_arg;
 		agg->dtag_aggregate = dtrace_aggregate_lquantize;
 
 		if (step == 0 || levels == 0)
 			goto err;
 
 		size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t);
 		break;
 	}
 
 	case DTRACEAGG_LLQUANTIZE: {
 		uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
 		uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
 		uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
 		uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
 		int64_t v;
 
 		agg->dtag_initial = desc->dtad_arg;
 		agg->dtag_aggregate = dtrace_aggregate_llquantize;
 
 		if (factor < 2 || low >= high || nsteps < factor)
 			goto err;
 
 		/*
 		 * Now check that the number of steps evenly divides a power
 		 * of the factor.  (This assures both integer bucket size and
 		 * linearity within each magnitude.)
 		 */
 		for (v = factor; v < nsteps; v *= factor)
 			continue;
 
 		if ((v % nsteps) || (nsteps % factor))
 			goto err;
 
 		size = (dtrace_aggregate_llquantize_bucket(factor,
 		    low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
 		break;
 	}
 
 	case DTRACEAGG_AVG:
 		agg->dtag_aggregate = dtrace_aggregate_avg;
 		size = sizeof (uint64_t) * 2;
 		break;
 
 	case DTRACEAGG_STDDEV:
 		agg->dtag_aggregate = dtrace_aggregate_stddev;
 		size = sizeof (uint64_t) * 4;
 		break;
 
 	case DTRACEAGG_SUM:
 		agg->dtag_aggregate = dtrace_aggregate_sum;
 		break;
 
 	default:
 		goto err;
 	}
 
 	agg->dtag_action.dta_rec.dtrd_size = size;
 
 	if (ntuple == 0)
 		goto err;
 
 	/*
 	 * We must make sure that we have enough actions for the n-tuple.
 	 */
 	for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) {
 		if (DTRACEACT_ISAGG(act->dta_kind))
 			break;
 
 		if (--ntuple == 0) {
 			/*
 			 * This is the action with which our n-tuple begins.
 			 */
 			agg->dtag_first = act;
 			goto success;
 		}
 	}
 
 	/*
 	 * This n-tuple is short by ntuple elements.  Return failure.
 	 */
 	ASSERT(ntuple != 0);
 err:
 	kmem_free(agg, sizeof (dtrace_aggregation_t));
 	return (NULL);
 
 success:
 	/*
 	 * If the last action in the tuple has a size of zero, it's actually
 	 * an expression argument for the aggregating action.
 	 */
 	ASSERT(ecb->dte_action_last != NULL);
 	act = ecb->dte_action_last;
 
 	if (act->dta_kind == DTRACEACT_DIFEXPR) {
 		ASSERT(act->dta_difo != NULL);
 
 		if (act->dta_difo->dtdo_rtype.dtdt_size == 0)
 			agg->dtag_hasarg = 1;
 	}
 
 	/*
 	 * We need to allocate an id for this aggregation.
 	 */
 #ifdef illumos
 	aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1,
 	    VM_BESTFIT | VM_SLEEP);
 #else
 	aggid = alloc_unr(state->dts_aggid_arena);
 #endif
 
 	if (aggid - 1 >= state->dts_naggregations) {
 		dtrace_aggregation_t **oaggs = state->dts_aggregations;
 		dtrace_aggregation_t **aggs;
 		int naggs = state->dts_naggregations << 1;
 		int onaggs = state->dts_naggregations;
 
 		ASSERT(aggid == state->dts_naggregations + 1);
 
 		if (naggs == 0) {
 			ASSERT(oaggs == NULL);
 			naggs = 1;
 		}
 
 		aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP);
 
 		if (oaggs != NULL) {
 			bcopy(oaggs, aggs, onaggs * sizeof (*aggs));
 			kmem_free(oaggs, onaggs * sizeof (*aggs));
 		}
 
 		state->dts_aggregations = aggs;
 		state->dts_naggregations = naggs;
 	}
 
 	ASSERT(state->dts_aggregations[aggid - 1] == NULL);
 	state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg;
 
 	frec = &agg->dtag_first->dta_rec;
 	if (frec->dtrd_alignment < sizeof (dtrace_aggid_t))
 		frec->dtrd_alignment = sizeof (dtrace_aggid_t);
 
 	for (act = agg->dtag_first; act != NULL; act = act->dta_next) {
 		ASSERT(!act->dta_intuple);
 		act->dta_intuple = 1;
 	}
 
 	return (&agg->dtag_action);
 }
 
 static void
 dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act)
 {
 	dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
 	dtrace_state_t *state = ecb->dte_state;
 	dtrace_aggid_t aggid = agg->dtag_id;
 
 	ASSERT(DTRACEACT_ISAGG(act->dta_kind));
 #ifdef illumos
 	vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1);
 #else
 	free_unr(state->dts_aggid_arena, aggid);
 #endif
 
 	ASSERT(state->dts_aggregations[aggid - 1] == agg);
 	state->dts_aggregations[aggid - 1] = NULL;
 
 	kmem_free(agg, sizeof (dtrace_aggregation_t));
 }
 
 static int
 dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
 {
 	dtrace_action_t *action, *last;
 	dtrace_difo_t *dp = desc->dtad_difo;
 	uint32_t size = 0, align = sizeof (uint8_t), mask;
 	uint16_t format = 0;
 	dtrace_recdesc_t *rec;
 	dtrace_state_t *state = ecb->dte_state;
 	dtrace_optval_t *opt = state->dts_options, nframes = 0, strsize;
 	uint64_t arg = desc->dtad_arg;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1);
 
 	if (DTRACEACT_ISAGG(desc->dtad_kind)) {
 		/*
 		 * If this is an aggregating action, there must be neither
 		 * a speculate nor a commit on the action chain.
 		 */
 		dtrace_action_t *act;
 
 		for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
 			if (act->dta_kind == DTRACEACT_COMMIT)
 				return (EINVAL);
 
 			if (act->dta_kind == DTRACEACT_SPECULATE)
 				return (EINVAL);
 		}
 
 		action = dtrace_ecb_aggregation_create(ecb, desc);
 
 		if (action == NULL)
 			return (EINVAL);
 	} else {
 		if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) ||
 		    (desc->dtad_kind == DTRACEACT_DIFEXPR &&
 		    dp != NULL && dp->dtdo_destructive)) {
 			state->dts_destructive = 1;
 		}
 
 		switch (desc->dtad_kind) {
 		case DTRACEACT_PRINTF:
 		case DTRACEACT_PRINTA:
 		case DTRACEACT_SYSTEM:
 		case DTRACEACT_FREOPEN:
 		case DTRACEACT_DIFEXPR:
 			/*
 			 * We know that our arg is a string -- turn it into a
 			 * format.
 			 */
 			if (arg == 0) {
 				ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
 				    desc->dtad_kind == DTRACEACT_DIFEXPR);
 				format = 0;
 			} else {
 				ASSERT(arg != 0);
 #ifdef illumos
 				ASSERT(arg > KERNELBASE);
 #endif
 				format = dtrace_format_add(state,
 				    (char *)(uintptr_t)arg);
 			}
 
 			/*FALLTHROUGH*/
 		case DTRACEACT_LIBACT:
 		case DTRACEACT_TRACEMEM:
 		case DTRACEACT_TRACEMEM_DYNSIZE:
 			if (dp == NULL)
 				return (EINVAL);
 
 			if ((size = dp->dtdo_rtype.dtdt_size) != 0)
 				break;
 
 			if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
 				if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
 					return (EINVAL);
 
 				size = opt[DTRACEOPT_STRSIZE];
 			}
 
 			break;
 
 		case DTRACEACT_STACK:
 			if ((nframes = arg) == 0) {
 				nframes = opt[DTRACEOPT_STACKFRAMES];
 				ASSERT(nframes > 0);
 				arg = nframes;
 			}
 
 			size = nframes * sizeof (pc_t);
 			break;
 
 		case DTRACEACT_JSTACK:
 			if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0)
 				strsize = opt[DTRACEOPT_JSTACKSTRSIZE];
 
 			if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0)
 				nframes = opt[DTRACEOPT_JSTACKFRAMES];
 
 			arg = DTRACE_USTACK_ARG(nframes, strsize);
 
 			/*FALLTHROUGH*/
 		case DTRACEACT_USTACK:
 			if (desc->dtad_kind != DTRACEACT_JSTACK &&
 			    (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) {
 				strsize = DTRACE_USTACK_STRSIZE(arg);
 				nframes = opt[DTRACEOPT_USTACKFRAMES];
 				ASSERT(nframes > 0);
 				arg = DTRACE_USTACK_ARG(nframes, strsize);
 			}
 
 			/*
 			 * Save a slot for the pid.
 			 */
 			size = (nframes + 1) * sizeof (uint64_t);
 			size += DTRACE_USTACK_STRSIZE(arg);
 			size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t)));
 
 			break;
 
 		case DTRACEACT_SYM:
 		case DTRACEACT_MOD:
 			if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) !=
 			    sizeof (uint64_t)) ||
 			    (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
 				return (EINVAL);
 			break;
 
 		case DTRACEACT_USYM:
 		case DTRACEACT_UMOD:
 		case DTRACEACT_UADDR:
 			if (dp == NULL ||
 			    (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) ||
 			    (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
 				return (EINVAL);
 
 			/*
 			 * We have a slot for the pid, plus a slot for the
 			 * argument.  To keep things simple (aligned with
 			 * bitness-neutral sizing), we store each as a 64-bit
 			 * quantity.
 			 */
 			size = 2 * sizeof (uint64_t);
 			break;
 
 		case DTRACEACT_STOP:
 		case DTRACEACT_BREAKPOINT:
 		case DTRACEACT_PANIC:
 			break;
 
 		case DTRACEACT_CHILL:
 		case DTRACEACT_DISCARD:
 		case DTRACEACT_RAISE:
 			if (dp == NULL)
 				return (EINVAL);
 			break;
 
 		case DTRACEACT_EXIT:
 			if (dp == NULL ||
 			    (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) ||
 			    (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
 				return (EINVAL);
 			break;
 
 		case DTRACEACT_SPECULATE:
 			if (ecb->dte_size > sizeof (dtrace_rechdr_t))
 				return (EINVAL);
 
 			if (dp == NULL)
 				return (EINVAL);
 
 			state->dts_speculates = 1;
 			break;
 
 		case DTRACEACT_PRINTM:
 		    	size = dp->dtdo_rtype.dtdt_size;
 			break;
 
 		case DTRACEACT_COMMIT: {
 			dtrace_action_t *act = ecb->dte_action;
 
 			for (; act != NULL; act = act->dta_next) {
 				if (act->dta_kind == DTRACEACT_COMMIT)
 					return (EINVAL);
 			}
 
 			if (dp == NULL)
 				return (EINVAL);
 			break;
 		}
 
 		default:
 			return (EINVAL);
 		}
 
 		if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) {
 			/*
 			 * If this is a data-storing action or a speculate,
 			 * we must be sure that there isn't a commit on the
 			 * action chain.
 			 */
 			dtrace_action_t *act = ecb->dte_action;
 
 			for (; act != NULL; act = act->dta_next) {
 				if (act->dta_kind == DTRACEACT_COMMIT)
 					return (EINVAL);
 			}
 		}
 
 		action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP);
 		action->dta_rec.dtrd_size = size;
 	}
 
 	action->dta_refcnt = 1;
 	rec = &action->dta_rec;
 	size = rec->dtrd_size;
 
 	for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) {
 		if (!(size & mask)) {
 			align = mask + 1;
 			break;
 		}
 	}
 
 	action->dta_kind = desc->dtad_kind;
 
 	if ((action->dta_difo = dp) != NULL)
 		dtrace_difo_hold(dp);
 
 	rec->dtrd_action = action->dta_kind;
 	rec->dtrd_arg = arg;
 	rec->dtrd_uarg = desc->dtad_uarg;
 	rec->dtrd_alignment = (uint16_t)align;
 	rec->dtrd_format = format;
 
 	if ((last = ecb->dte_action_last) != NULL) {
 		ASSERT(ecb->dte_action != NULL);
 		action->dta_prev = last;
 		last->dta_next = action;
 	} else {
 		ASSERT(ecb->dte_action == NULL);
 		ecb->dte_action = action;
 	}
 
 	ecb->dte_action_last = action;
 
 	return (0);
 }
 
 static void
 dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
 {
 	dtrace_action_t *act = ecb->dte_action, *next;
 	dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate;
 	dtrace_difo_t *dp;
 	uint16_t format;
 
 	if (act != NULL && act->dta_refcnt > 1) {
 		ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1);
 		act->dta_refcnt--;
 	} else {
 		for (; act != NULL; act = next) {
 			next = act->dta_next;
 			ASSERT(next != NULL || act == ecb->dte_action_last);
 			ASSERT(act->dta_refcnt == 1);
 
 			if ((format = act->dta_rec.dtrd_format) != 0)
 				dtrace_format_remove(ecb->dte_state, format);
 
 			if ((dp = act->dta_difo) != NULL)
 				dtrace_difo_release(dp, vstate);
 
 			if (DTRACEACT_ISAGG(act->dta_kind)) {
 				dtrace_ecb_aggregation_destroy(ecb, act);
 			} else {
 				kmem_free(act, sizeof (dtrace_action_t));
 			}
 		}
 	}
 
 	ecb->dte_action = NULL;
 	ecb->dte_action_last = NULL;
 	ecb->dte_size = 0;
 }
 
 static void
 dtrace_ecb_disable(dtrace_ecb_t *ecb)
 {
 	/*
 	 * We disable the ECB by removing it from its probe.
 	 */
 	dtrace_ecb_t *pecb, *prev = NULL;
 	dtrace_probe_t *probe = ecb->dte_probe;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (probe == NULL) {
 		/*
 		 * This is the NULL probe; there is nothing to disable.
 		 */
 		return;
 	}
 
 	for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) {
 		if (pecb == ecb)
 			break;
 		prev = pecb;
 	}
 
 	ASSERT(pecb != NULL);
 
 	if (prev == NULL) {
 		probe->dtpr_ecb = ecb->dte_next;
 	} else {
 		prev->dte_next = ecb->dte_next;
 	}
 
 	if (ecb == probe->dtpr_ecb_last) {
 		ASSERT(ecb->dte_next == NULL);
 		probe->dtpr_ecb_last = prev;
 	}
 
 	/*
 	 * The ECB has been disconnected from the probe; now sync to assure
 	 * that all CPUs have seen the change before returning.
 	 */
 	dtrace_sync();
 
 	if (probe->dtpr_ecb == NULL) {
 		/*
 		 * That was the last ECB on the probe; clear the predicate
 		 * cache ID for the probe, disable it and sync one more time
 		 * to assure that we'll never hit it again.
 		 */
 		dtrace_provider_t *prov = probe->dtpr_provider;
 
 		ASSERT(ecb->dte_next == NULL);
 		ASSERT(probe->dtpr_ecb_last == NULL);
 		probe->dtpr_predcache = DTRACE_CACHEIDNONE;
 		prov->dtpv_pops.dtps_disable(prov->dtpv_arg,
 		    probe->dtpr_id, probe->dtpr_arg);
 		dtrace_sync();
 	} else {
 		/*
 		 * There is at least one ECB remaining on the probe.  If there
 		 * is _exactly_ one, set the probe's predicate cache ID to be
 		 * the predicate cache ID of the remaining ECB.
 		 */
 		ASSERT(probe->dtpr_ecb_last != NULL);
 		ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE);
 
 		if (probe->dtpr_ecb == probe->dtpr_ecb_last) {
 			dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate;
 
 			ASSERT(probe->dtpr_ecb->dte_next == NULL);
 
 			if (p != NULL)
 				probe->dtpr_predcache = p->dtp_cacheid;
 		}
 
 		ecb->dte_next = NULL;
 	}
 }
 
 static void
 dtrace_ecb_destroy(dtrace_ecb_t *ecb)
 {
 	dtrace_state_t *state = ecb->dte_state;
 	dtrace_vstate_t *vstate = &state->dts_vstate;
 	dtrace_predicate_t *pred;
 	dtrace_epid_t epid = ecb->dte_epid;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(ecb->dte_next == NULL);
 	ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb);
 
 	if ((pred = ecb->dte_predicate) != NULL)
 		dtrace_predicate_release(pred, vstate);
 
 	dtrace_ecb_action_remove(ecb);
 
 	ASSERT(state->dts_ecbs[epid - 1] == ecb);
 	state->dts_ecbs[epid - 1] = NULL;
 
 	kmem_free(ecb, sizeof (dtrace_ecb_t));
 }
 
 static dtrace_ecb_t *
 dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe,
     dtrace_enabling_t *enab)
 {
 	dtrace_ecb_t *ecb;
 	dtrace_predicate_t *pred;
 	dtrace_actdesc_t *act;
 	dtrace_provider_t *prov;
 	dtrace_ecbdesc_t *desc = enab->dten_current;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(state != NULL);
 
 	ecb = dtrace_ecb_add(state, probe);
 	ecb->dte_uarg = desc->dted_uarg;
 
 	if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) {
 		dtrace_predicate_hold(pred);
 		ecb->dte_predicate = pred;
 	}
 
 	if (probe != NULL) {
 		/*
 		 * If the provider shows more leg than the consumer is old
 		 * enough to see, we need to enable the appropriate implicit
 		 * predicate bits to prevent the ecb from activating at
 		 * revealing times.
 		 *
 		 * Providers specifying DTRACE_PRIV_USER at register time
 		 * are stating that they need the /proc-style privilege
 		 * model to be enforced, and this is what DTRACE_COND_OWNER
 		 * and DTRACE_COND_ZONEOWNER will then do at probe time.
 		 */
 		prov = probe->dtpr_provider;
 		if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) &&
 		    (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
 			ecb->dte_cond |= DTRACE_COND_OWNER;
 
 		if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) &&
 		    (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
 			ecb->dte_cond |= DTRACE_COND_ZONEOWNER;
 
 		/*
 		 * If the provider shows us kernel innards and the user
 		 * is lacking sufficient privilege, enable the
 		 * DTRACE_COND_USERMODE implicit predicate.
 		 */
 		if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) &&
 		    (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL))
 			ecb->dte_cond |= DTRACE_COND_USERMODE;
 	}
 
 	if (dtrace_ecb_create_cache != NULL) {
 		/*
 		 * If we have a cached ecb, we'll use its action list instead
 		 * of creating our own (saving both time and space).
 		 */
 		dtrace_ecb_t *cached = dtrace_ecb_create_cache;
 		dtrace_action_t *act = cached->dte_action;
 
 		if (act != NULL) {
 			ASSERT(act->dta_refcnt > 0);
 			act->dta_refcnt++;
 			ecb->dte_action = act;
 			ecb->dte_action_last = cached->dte_action_last;
 			ecb->dte_needed = cached->dte_needed;
 			ecb->dte_size = cached->dte_size;
 			ecb->dte_alignment = cached->dte_alignment;
 		}
 
 		return (ecb);
 	}
 
 	for (act = desc->dted_action; act != NULL; act = act->dtad_next) {
 		if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) {
 			dtrace_ecb_destroy(ecb);
 			return (NULL);
 		}
 	}
 
 	if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) {
 		dtrace_ecb_destroy(ecb);
 		return (NULL);
 	}
 
 	return (dtrace_ecb_create_cache = ecb);
 }
 
 static int
 dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
 {
 	dtrace_ecb_t *ecb;
 	dtrace_enabling_t *enab = arg;
 	dtrace_state_t *state = enab->dten_vstate->dtvs_state;
 
 	ASSERT(state != NULL);
 
 	if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) {
 		/*
 		 * This probe was created in a generation for which this
 		 * enabling has previously created ECBs; we don't want to
 		 * enable it again, so just kick out.
 		 */
 		return (DTRACE_MATCH_NEXT);
 	}
 
 	if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
 		return (DTRACE_MATCH_DONE);
 
 	dtrace_ecb_enable(ecb);
 	return (DTRACE_MATCH_NEXT);
 }
 
 static dtrace_ecb_t *
 dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id)
 {
 	dtrace_ecb_t *ecb;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (id == 0 || id > state->dts_necbs)
 		return (NULL);
 
 	ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL);
 	ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id);
 
 	return (state->dts_ecbs[id - 1]);
 }
 
 static dtrace_aggregation_t *
 dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id)
 {
 	dtrace_aggregation_t *agg;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (id == 0 || id > state->dts_naggregations)
 		return (NULL);
 
 	ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL);
 	ASSERT((agg = state->dts_aggregations[id - 1]) == NULL ||
 	    agg->dtag_id == id);
 
 	return (state->dts_aggregations[id - 1]);
 }
 
 /*
  * DTrace Buffer Functions
  *
  * The following functions manipulate DTrace buffers.  Most of these functions
  * are called in the context of establishing or processing consumer state;
  * exceptions are explicitly noted.
  */
 
 /*
  * Note:  called from cross call context.  This function switches the two
  * buffers on a given CPU.  The atomicity of this operation is assured by
  * disabling interrupts while the actual switch takes place; the disabling of
  * interrupts serializes the execution with any execution of dtrace_probe() on
  * the same CPU.
  */
 static void
 dtrace_buffer_switch(dtrace_buffer_t *buf)
 {
 	caddr_t tomax = buf->dtb_tomax;
 	caddr_t xamot = buf->dtb_xamot;
 	dtrace_icookie_t cookie;
 	hrtime_t now;
 
 	ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
 	ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
 
 	cookie = dtrace_interrupt_disable();
 	now = dtrace_gethrtime();
 	buf->dtb_tomax = xamot;
 	buf->dtb_xamot = tomax;
 	buf->dtb_xamot_drops = buf->dtb_drops;
 	buf->dtb_xamot_offset = buf->dtb_offset;
 	buf->dtb_xamot_errors = buf->dtb_errors;
 	buf->dtb_xamot_flags = buf->dtb_flags;
 	buf->dtb_offset = 0;
 	buf->dtb_drops = 0;
 	buf->dtb_errors = 0;
 	buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
 	buf->dtb_interval = now - buf->dtb_switched;
 	buf->dtb_switched = now;
 	dtrace_interrupt_enable(cookie);
 }
 
 /*
  * Note:  called from cross call context.  This function activates a buffer
  * on a CPU.  As with dtrace_buffer_switch(), the atomicity of the operation
  * is guaranteed by the disabling of interrupts.
  */
 static void
 dtrace_buffer_activate(dtrace_state_t *state)
 {
 	dtrace_buffer_t *buf;
 	dtrace_icookie_t cookie = dtrace_interrupt_disable();
 
 	buf = &state->dts_buffer[curcpu];
 
 	if (buf->dtb_tomax != NULL) {
 		/*
 		 * We might like to assert that the buffer is marked inactive,
 		 * but this isn't necessarily true:  the buffer for the CPU
 		 * that processes the BEGIN probe has its buffer activated
 		 * manually.  In this case, we take the (harmless) action
 		 * re-clearing the bit INACTIVE bit.
 		 */
 		buf->dtb_flags &= ~DTRACEBUF_INACTIVE;
 	}
 
 	dtrace_interrupt_enable(cookie);
 }
 
 #ifdef __FreeBSD__
 /*
  * Activate the specified per-CPU buffer.  This is used instead of
  * dtrace_buffer_activate() when APs have not yet started, i.e. when
  * activating anonymous state.
  */
 static void
 dtrace_buffer_activate_cpu(dtrace_state_t *state, int cpu)
 {
 
 	if (state->dts_buffer[cpu].dtb_tomax != NULL)
 		state->dts_buffer[cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
 }
 #endif
 
 static int
 dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
     processorid_t cpu, int *factor)
 {
 #ifdef illumos
 	cpu_t *cp;
 #endif
 	dtrace_buffer_t *buf;
 	int allocated = 0, desired = 0;
 
 #ifdef illumos
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	*factor = 1;
 
 	if (size > dtrace_nonroot_maxsize &&
 	    !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
 		return (EFBIG);
 
 	cp = cpu_list;
 
 	do {
 		if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
 			continue;
 
 		buf = &bufs[cp->cpu_id];
 
 		/*
 		 * If there is already a buffer allocated for this CPU, it
 		 * is only possible that this is a DR event.  In this case,
 		 */
 		if (buf->dtb_tomax != NULL) {
 			ASSERT(buf->dtb_size == size);
 			continue;
 		}
 
 		ASSERT(buf->dtb_xamot == NULL);
 
 		if ((buf->dtb_tomax = kmem_zalloc(size,
 		    KM_NOSLEEP | KM_NORMALPRI)) == NULL)
 			goto err;
 
 		buf->dtb_size = size;
 		buf->dtb_flags = flags;
 		buf->dtb_offset = 0;
 		buf->dtb_drops = 0;
 
 		if (flags & DTRACEBUF_NOSWITCH)
 			continue;
 
 		if ((buf->dtb_xamot = kmem_zalloc(size,
 		    KM_NOSLEEP | KM_NORMALPRI)) == NULL)
 			goto err;
 	} while ((cp = cp->cpu_next) != cpu_list);
 
 	return (0);
 
 err:
 	cp = cpu_list;
 
 	do {
 		if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
 			continue;
 
 		buf = &bufs[cp->cpu_id];
 		desired += 2;
 
 		if (buf->dtb_xamot != NULL) {
 			ASSERT(buf->dtb_tomax != NULL);
 			ASSERT(buf->dtb_size == size);
 			kmem_free(buf->dtb_xamot, size);
 			allocated++;
 		}
 
 		if (buf->dtb_tomax != NULL) {
 			ASSERT(buf->dtb_size == size);
 			kmem_free(buf->dtb_tomax, size);
 			allocated++;
 		}
 
 		buf->dtb_tomax = NULL;
 		buf->dtb_xamot = NULL;
 		buf->dtb_size = 0;
 	} while ((cp = cp->cpu_next) != cpu_list);
 #else
 	int i;
 
 	*factor = 1;
 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
     defined(__mips__) || defined(__powerpc__) || defined(__riscv__)
 	/*
 	 * FreeBSD isn't good at limiting the amount of memory we
 	 * ask to malloc, so let's place a limit here before trying
 	 * to do something that might well end in tears at bedtime.
 	 */
 	if (size > physmem * PAGE_SIZE / (128 * (mp_maxid + 1)))
 		return (ENOMEM);
 #endif
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	CPU_FOREACH(i) {
 		if (cpu != DTRACE_CPUALL && cpu != i)
 			continue;
 
 		buf = &bufs[i];
 
 		/*
 		 * If there is already a buffer allocated for this CPU, it
 		 * is only possible that this is a DR event.  In this case,
 		 * the buffer size must match our specified size.
 		 */
 		if (buf->dtb_tomax != NULL) {
 			ASSERT(buf->dtb_size == size);
 			continue;
 		}
 
 		ASSERT(buf->dtb_xamot == NULL);
 
 		if ((buf->dtb_tomax = kmem_zalloc(size,
 		    KM_NOSLEEP | KM_NORMALPRI)) == NULL)
 			goto err;
 
 		buf->dtb_size = size;
 		buf->dtb_flags = flags;
 		buf->dtb_offset = 0;
 		buf->dtb_drops = 0;
 
 		if (flags & DTRACEBUF_NOSWITCH)
 			continue;
 
 		if ((buf->dtb_xamot = kmem_zalloc(size,
 		    KM_NOSLEEP | KM_NORMALPRI)) == NULL)
 			goto err;
 	}
 
 	return (0);
 
 err:
 	/*
 	 * Error allocating memory, so free the buffers that were
 	 * allocated before the failed allocation.
 	 */
 	CPU_FOREACH(i) {
 		if (cpu != DTRACE_CPUALL && cpu != i)
 			continue;
 
 		buf = &bufs[i];
 		desired += 2;
 
 		if (buf->dtb_xamot != NULL) {
 			ASSERT(buf->dtb_tomax != NULL);
 			ASSERT(buf->dtb_size == size);
 			kmem_free(buf->dtb_xamot, size);
 			allocated++;
 		}
 
 		if (buf->dtb_tomax != NULL) {
 			ASSERT(buf->dtb_size == size);
 			kmem_free(buf->dtb_tomax, size);
 			allocated++;
 		}
 
 		buf->dtb_tomax = NULL;
 		buf->dtb_xamot = NULL;
 		buf->dtb_size = 0;
 
 	}
 #endif
 	*factor = desired / (allocated > 0 ? allocated : 1);
 
 	return (ENOMEM);
 }
 
 /*
  * Note:  called from probe context.  This function just increments the drop
  * count on a buffer.  It has been made a function to allow for the
  * possibility of understanding the source of mysterious drop counts.  (A
  * problem for which one may be particularly disappointed that DTrace cannot
  * be used to understand DTrace.)
  */
 static void
 dtrace_buffer_drop(dtrace_buffer_t *buf)
 {
 	buf->dtb_drops++;
 }
 
 /*
  * Note:  called from probe context.  This function is called to reserve space
  * in a buffer.  If mstate is non-NULL, sets the scratch base and size in the
  * mstate.  Returns the new offset in the buffer, or a negative value if an
  * error has occurred.
  */
 static intptr_t
 dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
     dtrace_state_t *state, dtrace_mstate_t *mstate)
 {
 	intptr_t offs = buf->dtb_offset, soffs;
 	intptr_t woffs;
 	caddr_t tomax;
 	size_t total;
 
 	if (buf->dtb_flags & DTRACEBUF_INACTIVE)
 		return (-1);
 
 	if ((tomax = buf->dtb_tomax) == NULL) {
 		dtrace_buffer_drop(buf);
 		return (-1);
 	}
 
 	if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) {
 		while (offs & (align - 1)) {
 			/*
 			 * Assert that our alignment is off by a number which
 			 * is itself sizeof (uint32_t) aligned.
 			 */
 			ASSERT(!((align - (offs & (align - 1))) &
 			    (sizeof (uint32_t) - 1)));
 			DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
 			offs += sizeof (uint32_t);
 		}
 
 		if ((soffs = offs + needed) > buf->dtb_size) {
 			dtrace_buffer_drop(buf);
 			return (-1);
 		}
 
 		if (mstate == NULL)
 			return (offs);
 
 		mstate->dtms_scratch_base = (uintptr_t)tomax + soffs;
 		mstate->dtms_scratch_size = buf->dtb_size - soffs;
 		mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
 
 		return (offs);
 	}
 
 	if (buf->dtb_flags & DTRACEBUF_FILL) {
 		if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN &&
 		    (buf->dtb_flags & DTRACEBUF_FULL))
 			return (-1);
 		goto out;
 	}
 
 	total = needed + (offs & (align - 1));
 
 	/*
 	 * For a ring buffer, life is quite a bit more complicated.  Before
 	 * we can store any padding, we need to adjust our wrapping offset.
 	 * (If we've never before wrapped or we're not about to, no adjustment
 	 * is required.)
 	 */
 	if ((buf->dtb_flags & DTRACEBUF_WRAPPED) ||
 	    offs + total > buf->dtb_size) {
 		woffs = buf->dtb_xamot_offset;
 
 		if (offs + total > buf->dtb_size) {
 			/*
 			 * We can't fit in the end of the buffer.  First, a
 			 * sanity check that we can fit in the buffer at all.
 			 */
 			if (total > buf->dtb_size) {
 				dtrace_buffer_drop(buf);
 				return (-1);
 			}
 
 			/*
 			 * We're going to be storing at the top of the buffer,
 			 * so now we need to deal with the wrapped offset.  We
 			 * only reset our wrapped offset to 0 if it is
 			 * currently greater than the current offset.  If it
 			 * is less than the current offset, it is because a
 			 * previous allocation induced a wrap -- but the
 			 * allocation didn't subsequently take the space due
 			 * to an error or false predicate evaluation.  In this
 			 * case, we'll just leave the wrapped offset alone: if
 			 * the wrapped offset hasn't been advanced far enough
 			 * for this allocation, it will be adjusted in the
 			 * lower loop.
 			 */
 			if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
 				if (woffs >= offs)
 					woffs = 0;
 			} else {
 				woffs = 0;
 			}
 
 			/*
 			 * Now we know that we're going to be storing to the
 			 * top of the buffer and that there is room for us
 			 * there.  We need to clear the buffer from the current
 			 * offset to the end (there may be old gunk there).
 			 */
 			while (offs < buf->dtb_size)
 				tomax[offs++] = 0;
 
 			/*
 			 * We need to set our offset to zero.  And because we
 			 * are wrapping, we need to set the bit indicating as
 			 * much.  We can also adjust our needed space back
 			 * down to the space required by the ECB -- we know
 			 * that the top of the buffer is aligned.
 			 */
 			offs = 0;
 			total = needed;
 			buf->dtb_flags |= DTRACEBUF_WRAPPED;
 		} else {
 			/*
 			 * There is room for us in the buffer, so we simply
 			 * need to check the wrapped offset.
 			 */
 			if (woffs < offs) {
 				/*
 				 * The wrapped offset is less than the offset.
 				 * This can happen if we allocated buffer space
 				 * that induced a wrap, but then we didn't
 				 * subsequently take the space due to an error
 				 * or false predicate evaluation.  This is
 				 * okay; we know that _this_ allocation isn't
 				 * going to induce a wrap.  We still can't
 				 * reset the wrapped offset to be zero,
 				 * however: the space may have been trashed in
 				 * the previous failed probe attempt.  But at
 				 * least the wrapped offset doesn't need to
 				 * be adjusted at all...
 				 */
 				goto out;
 			}
 		}
 
 		while (offs + total > woffs) {
 			dtrace_epid_t epid = *(uint32_t *)(tomax + woffs);
 			size_t size;
 
 			if (epid == DTRACE_EPIDNONE) {
 				size = sizeof (uint32_t);
 			} else {
 				ASSERT3U(epid, <=, state->dts_necbs);
 				ASSERT(state->dts_ecbs[epid - 1] != NULL);
 
 				size = state->dts_ecbs[epid - 1]->dte_size;
 			}
 
 			ASSERT(woffs + size <= buf->dtb_size);
 			ASSERT(size != 0);
 
 			if (woffs + size == buf->dtb_size) {
 				/*
 				 * We've reached the end of the buffer; we want
 				 * to set the wrapped offset to 0 and break
 				 * out.  However, if the offs is 0, then we're
 				 * in a strange edge-condition:  the amount of
 				 * space that we want to reserve plus the size
 				 * of the record that we're overwriting is
 				 * greater than the size of the buffer.  This
 				 * is problematic because if we reserve the
 				 * space but subsequently don't consume it (due
 				 * to a failed predicate or error) the wrapped
 				 * offset will be 0 -- yet the EPID at offset 0
 				 * will not be committed.  This situation is
 				 * relatively easy to deal with:  if we're in
 				 * this case, the buffer is indistinguishable
 				 * from one that hasn't wrapped; we need only
 				 * finish the job by clearing the wrapped bit,
 				 * explicitly setting the offset to be 0, and
 				 * zero'ing out the old data in the buffer.
 				 */
 				if (offs == 0) {
 					buf->dtb_flags &= ~DTRACEBUF_WRAPPED;
 					buf->dtb_offset = 0;
 					woffs = total;
 
 					while (woffs < buf->dtb_size)
 						tomax[woffs++] = 0;
 				}
 
 				woffs = 0;
 				break;
 			}
 
 			woffs += size;
 		}
 
 		/*
 		 * We have a wrapped offset.  It may be that the wrapped offset
 		 * has become zero -- that's okay.
 		 */
 		buf->dtb_xamot_offset = woffs;
 	}
 
 out:
 	/*
 	 * Now we can plow the buffer with any necessary padding.
 	 */
 	while (offs & (align - 1)) {
 		/*
 		 * Assert that our alignment is off by a number which
 		 * is itself sizeof (uint32_t) aligned.
 		 */
 		ASSERT(!((align - (offs & (align - 1))) &
 		    (sizeof (uint32_t) - 1)));
 		DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
 		offs += sizeof (uint32_t);
 	}
 
 	if (buf->dtb_flags & DTRACEBUF_FILL) {
 		if (offs + needed > buf->dtb_size - state->dts_reserve) {
 			buf->dtb_flags |= DTRACEBUF_FULL;
 			return (-1);
 		}
 	}
 
 	if (mstate == NULL)
 		return (offs);
 
 	/*
 	 * For ring buffers and fill buffers, the scratch space is always
 	 * the inactive buffer.
 	 */
 	mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot;
 	mstate->dtms_scratch_size = buf->dtb_size;
 	mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
 
 	return (offs);
 }
 
 static void
 dtrace_buffer_polish(dtrace_buffer_t *buf)
 {
 	ASSERT(buf->dtb_flags & DTRACEBUF_RING);
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (!(buf->dtb_flags & DTRACEBUF_WRAPPED))
 		return;
 
 	/*
 	 * We need to polish the ring buffer.  There are three cases:
 	 *
 	 * - The first (and presumably most common) is that there is no gap
 	 *   between the buffer offset and the wrapped offset.  In this case,
 	 *   there is nothing in the buffer that isn't valid data; we can
 	 *   mark the buffer as polished and return.
 	 *
 	 * - The second (less common than the first but still more common
 	 *   than the third) is that there is a gap between the buffer offset
 	 *   and the wrapped offset, and the wrapped offset is larger than the
 	 *   buffer offset.  This can happen because of an alignment issue, or
 	 *   can happen because of a call to dtrace_buffer_reserve() that
 	 *   didn't subsequently consume the buffer space.  In this case,
 	 *   we need to zero the data from the buffer offset to the wrapped
 	 *   offset.
 	 *
 	 * - The third (and least common) is that there is a gap between the
 	 *   buffer offset and the wrapped offset, but the wrapped offset is
 	 *   _less_ than the buffer offset.  This can only happen because a
 	 *   call to dtrace_buffer_reserve() induced a wrap, but the space
 	 *   was not subsequently consumed.  In this case, we need to zero the
 	 *   space from the offset to the end of the buffer _and_ from the
 	 *   top of the buffer to the wrapped offset.
 	 */
 	if (buf->dtb_offset < buf->dtb_xamot_offset) {
 		bzero(buf->dtb_tomax + buf->dtb_offset,
 		    buf->dtb_xamot_offset - buf->dtb_offset);
 	}
 
 	if (buf->dtb_offset > buf->dtb_xamot_offset) {
 		bzero(buf->dtb_tomax + buf->dtb_offset,
 		    buf->dtb_size - buf->dtb_offset);
 		bzero(buf->dtb_tomax, buf->dtb_xamot_offset);
 	}
 }
 
 /*
  * This routine determines if data generated at the specified time has likely
  * been entirely consumed at user-level.  This routine is called to determine
  * if an ECB on a defunct probe (but for an active enabling) can be safely
  * disabled and destroyed.
  */
 static int
 dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when)
 {
 	int i;
 
 	for (i = 0; i < NCPU; i++) {
 		dtrace_buffer_t *buf = &bufs[i];
 
 		if (buf->dtb_size == 0)
 			continue;
 
 		if (buf->dtb_flags & DTRACEBUF_RING)
 			return (0);
 
 		if (!buf->dtb_switched && buf->dtb_offset != 0)
 			return (0);
 
 		if (buf->dtb_switched - buf->dtb_interval < when)
 			return (0);
 	}
 
 	return (1);
 }
 
 static void
 dtrace_buffer_free(dtrace_buffer_t *bufs)
 {
 	int i;
 
 	for (i = 0; i < NCPU; i++) {
 		dtrace_buffer_t *buf = &bufs[i];
 
 		if (buf->dtb_tomax == NULL) {
 			ASSERT(buf->dtb_xamot == NULL);
 			ASSERT(buf->dtb_size == 0);
 			continue;
 		}
 
 		if (buf->dtb_xamot != NULL) {
 			ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
 			kmem_free(buf->dtb_xamot, buf->dtb_size);
 		}
 
 		kmem_free(buf->dtb_tomax, buf->dtb_size);
 		buf->dtb_size = 0;
 		buf->dtb_tomax = NULL;
 		buf->dtb_xamot = NULL;
 	}
 }
 
 /*
  * DTrace Enabling Functions
  */
 static dtrace_enabling_t *
 dtrace_enabling_create(dtrace_vstate_t *vstate)
 {
 	dtrace_enabling_t *enab;
 
 	enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP);
 	enab->dten_vstate = vstate;
 
 	return (enab);
 }
 
 static void
 dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb)
 {
 	dtrace_ecbdesc_t **ndesc;
 	size_t osize, nsize;
 
 	/*
 	 * We can't add to enablings after we've enabled them, or after we've
 	 * retained them.
 	 */
 	ASSERT(enab->dten_probegen == 0);
 	ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
 
 	if (enab->dten_ndesc < enab->dten_maxdesc) {
 		enab->dten_desc[enab->dten_ndesc++] = ecb;
 		return;
 	}
 
 	osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
 
 	if (enab->dten_maxdesc == 0) {
 		enab->dten_maxdesc = 1;
 	} else {
 		enab->dten_maxdesc <<= 1;
 	}
 
 	ASSERT(enab->dten_ndesc < enab->dten_maxdesc);
 
 	nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
 	ndesc = kmem_zalloc(nsize, KM_SLEEP);
 	bcopy(enab->dten_desc, ndesc, osize);
 	if (enab->dten_desc != NULL)
 		kmem_free(enab->dten_desc, osize);
 
 	enab->dten_desc = ndesc;
 	enab->dten_desc[enab->dten_ndesc++] = ecb;
 }
 
 static void
 dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb,
     dtrace_probedesc_t *pd)
 {
 	dtrace_ecbdesc_t *new;
 	dtrace_predicate_t *pred;
 	dtrace_actdesc_t *act;
 
 	/*
 	 * We're going to create a new ECB description that matches the
 	 * specified ECB in every way, but has the specified probe description.
 	 */
 	new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
 
 	if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL)
 		dtrace_predicate_hold(pred);
 
 	for (act = ecb->dted_action; act != NULL; act = act->dtad_next)
 		dtrace_actdesc_hold(act);
 
 	new->dted_action = ecb->dted_action;
 	new->dted_pred = ecb->dted_pred;
 	new->dted_probe = *pd;
 	new->dted_uarg = ecb->dted_uarg;
 
 	dtrace_enabling_add(enab, new);
 }
 
 static void
 dtrace_enabling_dump(dtrace_enabling_t *enab)
 {
 	int i;
 
 	for (i = 0; i < enab->dten_ndesc; i++) {
 		dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe;
 
 #ifdef __FreeBSD__
 		printf("dtrace: enabling probe %d (%s:%s:%s:%s)\n", i,
 		    desc->dtpd_provider, desc->dtpd_mod,
 		    desc->dtpd_func, desc->dtpd_name);
 #else
 		cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i,
 		    desc->dtpd_provider, desc->dtpd_mod,
 		    desc->dtpd_func, desc->dtpd_name);
 #endif
 	}
 }
 
 static void
 dtrace_enabling_destroy(dtrace_enabling_t *enab)
 {
 	int i;
 	dtrace_ecbdesc_t *ep;
 	dtrace_vstate_t *vstate = enab->dten_vstate;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	for (i = 0; i < enab->dten_ndesc; i++) {
 		dtrace_actdesc_t *act, *next;
 		dtrace_predicate_t *pred;
 
 		ep = enab->dten_desc[i];
 
 		if ((pred = ep->dted_pred.dtpdd_predicate) != NULL)
 			dtrace_predicate_release(pred, vstate);
 
 		for (act = ep->dted_action; act != NULL; act = next) {
 			next = act->dtad_next;
 			dtrace_actdesc_release(act, vstate);
 		}
 
 		kmem_free(ep, sizeof (dtrace_ecbdesc_t));
 	}
 
 	if (enab->dten_desc != NULL)
 		kmem_free(enab->dten_desc,
 		    enab->dten_maxdesc * sizeof (dtrace_enabling_t *));
 
 	/*
 	 * If this was a retained enabling, decrement the dts_nretained count
 	 * and take it off of the dtrace_retained list.
 	 */
 	if (enab->dten_prev != NULL || enab->dten_next != NULL ||
 	    dtrace_retained == enab) {
 		ASSERT(enab->dten_vstate->dtvs_state != NULL);
 		ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
 		enab->dten_vstate->dtvs_state->dts_nretained--;
 		dtrace_retained_gen++;
 	}
 
 	if (enab->dten_prev == NULL) {
 		if (dtrace_retained == enab) {
 			dtrace_retained = enab->dten_next;
 
 			if (dtrace_retained != NULL)
 				dtrace_retained->dten_prev = NULL;
 		}
 	} else {
 		ASSERT(enab != dtrace_retained);
 		ASSERT(dtrace_retained != NULL);
 		enab->dten_prev->dten_next = enab->dten_next;
 	}
 
 	if (enab->dten_next != NULL) {
 		ASSERT(dtrace_retained != NULL);
 		enab->dten_next->dten_prev = enab->dten_prev;
 	}
 
 	kmem_free(enab, sizeof (dtrace_enabling_t));
 }
 
 static int
 dtrace_enabling_retain(dtrace_enabling_t *enab)
 {
 	dtrace_state_t *state;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
 	ASSERT(enab->dten_vstate != NULL);
 
 	state = enab->dten_vstate->dtvs_state;
 	ASSERT(state != NULL);
 
 	/*
 	 * We only allow each state to retain dtrace_retain_max enablings.
 	 */
 	if (state->dts_nretained >= dtrace_retain_max)
 		return (ENOSPC);
 
 	state->dts_nretained++;
 	dtrace_retained_gen++;
 
 	if (dtrace_retained == NULL) {
 		dtrace_retained = enab;
 		return (0);
 	}
 
 	enab->dten_next = dtrace_retained;
 	dtrace_retained->dten_prev = enab;
 	dtrace_retained = enab;
 
 	return (0);
 }
 
 static int
 dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match,
     dtrace_probedesc_t *create)
 {
 	dtrace_enabling_t *new, *enab;
 	int found = 0, err = ENOENT;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN);
 	ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN);
 	ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN);
 	ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN);
 
 	new = dtrace_enabling_create(&state->dts_vstate);
 
 	/*
 	 * Iterate over all retained enablings, looking for enablings that
 	 * match the specified state.
 	 */
 	for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
 		int i;
 
 		/*
 		 * dtvs_state can only be NULL for helper enablings -- and
 		 * helper enablings can't be retained.
 		 */
 		ASSERT(enab->dten_vstate->dtvs_state != NULL);
 
 		if (enab->dten_vstate->dtvs_state != state)
 			continue;
 
 		/*
 		 * Now iterate over each probe description; we're looking for
 		 * an exact match to the specified probe description.
 		 */
 		for (i = 0; i < enab->dten_ndesc; i++) {
 			dtrace_ecbdesc_t *ep = enab->dten_desc[i];
 			dtrace_probedesc_t *pd = &ep->dted_probe;
 
 			if (strcmp(pd->dtpd_provider, match->dtpd_provider))
 				continue;
 
 			if (strcmp(pd->dtpd_mod, match->dtpd_mod))
 				continue;
 
 			if (strcmp(pd->dtpd_func, match->dtpd_func))
 				continue;
 
 			if (strcmp(pd->dtpd_name, match->dtpd_name))
 				continue;
 
 			/*
 			 * We have a winning probe!  Add it to our growing
 			 * enabling.
 			 */
 			found = 1;
 			dtrace_enabling_addlike(new, ep, create);
 		}
 	}
 
 	if (!found || (err = dtrace_enabling_retain(new)) != 0) {
 		dtrace_enabling_destroy(new);
 		return (err);
 	}
 
 	return (0);
 }
 
 static void
 dtrace_enabling_retract(dtrace_state_t *state)
 {
 	dtrace_enabling_t *enab, *next;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	/*
 	 * Iterate over all retained enablings, destroy the enablings retained
 	 * for the specified state.
 	 */
 	for (enab = dtrace_retained; enab != NULL; enab = next) {
 		next = enab->dten_next;
 
 		/*
 		 * dtvs_state can only be NULL for helper enablings -- and
 		 * helper enablings can't be retained.
 		 */
 		ASSERT(enab->dten_vstate->dtvs_state != NULL);
 
 		if (enab->dten_vstate->dtvs_state == state) {
 			ASSERT(state->dts_nretained > 0);
 			dtrace_enabling_destroy(enab);
 		}
 	}
 
 	ASSERT(state->dts_nretained == 0);
 }
 
 static int
 dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
 {
 	int i = 0;
 	int matched = 0;
 
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	for (i = 0; i < enab->dten_ndesc; i++) {
 		dtrace_ecbdesc_t *ep = enab->dten_desc[i];
 
 		enab->dten_current = ep;
 		enab->dten_error = 0;
 
 		matched += dtrace_probe_enable(&ep->dted_probe, enab);
 
 		if (enab->dten_error != 0) {
 			/*
 			 * If we get an error half-way through enabling the
 			 * probes, we kick out -- perhaps with some number of
 			 * them enabled.  Leaving enabled probes enabled may
 			 * be slightly confusing for user-level, but we expect
 			 * that no one will attempt to actually drive on in
 			 * the face of such errors.  If this is an anonymous
 			 * enabling (indicated with a NULL nmatched pointer),
 			 * we cmn_err() a message.  We aren't expecting to
 			 * get such an error -- such as it can exist at all,
 			 * it would be a result of corrupted DOF in the driver
 			 * properties.
 			 */
 			if (nmatched == NULL) {
 				cmn_err(CE_WARN, "dtrace_enabling_match() "
 				    "error on %p: %d", (void *)ep,
 				    enab->dten_error);
 			}
 
 			return (enab->dten_error);
 		}
 	}
 
 	enab->dten_probegen = dtrace_probegen;
 	if (nmatched != NULL)
 		*nmatched = matched;
 
 	return (0);
 }
 
 static void
 dtrace_enabling_matchall(void)
 {
 	dtrace_enabling_t *enab;
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_lock);
 
 	/*
 	 * Iterate over all retained enablings to see if any probes match
 	 * against them.  We only perform this operation on enablings for which
 	 * we have sufficient permissions by virtue of being in the global zone
 	 * or in the same zone as the DTrace client.  Because we can be called
 	 * after dtrace_detach() has been called, we cannot assert that there
 	 * are retained enablings.  We can safely load from dtrace_retained,
 	 * however:  the taskq_destroy() at the end of dtrace_detach() will
 	 * block pending our completion.
 	 */
 	for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
 #ifdef illumos
 		cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred;
 
 		if (INGLOBALZONE(curproc) ||
 		    cr != NULL && getzoneid() == crgetzoneid(cr))
 #endif
 			(void) dtrace_enabling_match(enab, NULL);
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&cpu_lock);
 }
 
 /*
  * If an enabling is to be enabled without having matched probes (that is, if
  * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
  * enabling must be _primed_ by creating an ECB for every ECB description.
  * This must be done to assure that we know the number of speculations, the
  * number of aggregations, the minimum buffer size needed, etc. before we
  * transition out of DTRACE_ACTIVITY_INACTIVE.  To do this without actually
  * enabling any probes, we create ECBs for every ECB decription, but with a
  * NULL probe -- which is exactly what this function does.
  */
 static void
 dtrace_enabling_prime(dtrace_state_t *state)
 {
 	dtrace_enabling_t *enab;
 	int i;
 
 	for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
 		ASSERT(enab->dten_vstate->dtvs_state != NULL);
 
 		if (enab->dten_vstate->dtvs_state != state)
 			continue;
 
 		/*
 		 * We don't want to prime an enabling more than once, lest
 		 * we allow a malicious user to induce resource exhaustion.
 		 * (The ECBs that result from priming an enabling aren't
 		 * leaked -- but they also aren't deallocated until the
 		 * consumer state is destroyed.)
 		 */
 		if (enab->dten_primed)
 			continue;
 
 		for (i = 0; i < enab->dten_ndesc; i++) {
 			enab->dten_current = enab->dten_desc[i];
 			(void) dtrace_probe_enable(NULL, enab);
 		}
 
 		enab->dten_primed = 1;
 	}
 }
 
 /*
  * Called to indicate that probes should be provided due to retained
  * enablings.  This is implemented in terms of dtrace_probe_provide(), but it
  * must take an initial lap through the enabling calling the dtps_provide()
  * entry point explicitly to allow for autocreated probes.
  */
 static void
 dtrace_enabling_provide(dtrace_provider_t *prv)
 {
 	int i, all = 0;
 	dtrace_probedesc_t desc;
 	dtrace_genid_t gen;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(MUTEX_HELD(&dtrace_provider_lock));
 
 	if (prv == NULL) {
 		all = 1;
 		prv = dtrace_provider;
 	}
 
 	do {
 		dtrace_enabling_t *enab;
 		void *parg = prv->dtpv_arg;
 
 retry:
 		gen = dtrace_retained_gen;
 		for (enab = dtrace_retained; enab != NULL;
 		    enab = enab->dten_next) {
 			for (i = 0; i < enab->dten_ndesc; i++) {
 				desc = enab->dten_desc[i]->dted_probe;
 				mutex_exit(&dtrace_lock);
 				prv->dtpv_pops.dtps_provide(parg, &desc);
 				mutex_enter(&dtrace_lock);
 				/*
 				 * Process the retained enablings again if
 				 * they have changed while we weren't holding
 				 * dtrace_lock.
 				 */
 				if (gen != dtrace_retained_gen)
 					goto retry;
 			}
 		}
 	} while (all && (prv = prv->dtpv_next) != NULL);
 
 	mutex_exit(&dtrace_lock);
 	dtrace_probe_provide(NULL, all ? NULL : prv);
 	mutex_enter(&dtrace_lock);
 }
 
 /*
  * Called to reap ECBs that are attached to probes from defunct providers.
  */
 static void
 dtrace_enabling_reap(void)
 {
 	dtrace_provider_t *prov;
 	dtrace_probe_t *probe;
 	dtrace_ecb_t *ecb;
 	hrtime_t when;
 	int i;
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_lock);
 
 	for (i = 0; i < dtrace_nprobes; i++) {
 		if ((probe = dtrace_probes[i]) == NULL)
 			continue;
 
 		if (probe->dtpr_ecb == NULL)
 			continue;
 
 		prov = probe->dtpr_provider;
 
 		if ((when = prov->dtpv_defunct) == 0)
 			continue;
 
 		/*
 		 * We have ECBs on a defunct provider:  we want to reap these
 		 * ECBs to allow the provider to unregister.  The destruction
 		 * of these ECBs must be done carefully:  if we destroy the ECB
 		 * and the consumer later wishes to consume an EPID that
 		 * corresponds to the destroyed ECB (and if the EPID metadata
 		 * has not been previously consumed), the consumer will abort
 		 * processing on the unknown EPID.  To reduce (but not, sadly,
 		 * eliminate) the possibility of this, we will only destroy an
 		 * ECB for a defunct provider if, for the state that
 		 * corresponds to the ECB:
 		 *
 		 *  (a)	There is no speculative tracing (which can effectively
 		 *	cache an EPID for an arbitrary amount of time).
 		 *
 		 *  (b)	The principal buffers have been switched twice since the
 		 *	provider became defunct.
 		 *
 		 *  (c)	The aggregation buffers are of zero size or have been
 		 *	switched twice since the provider became defunct.
 		 *
 		 * We use dts_speculates to determine (a) and call a function
 		 * (dtrace_buffer_consumed()) to determine (b) and (c).  Note
 		 * that as soon as we've been unable to destroy one of the ECBs
 		 * associated with the probe, we quit trying -- reaping is only
 		 * fruitful in as much as we can destroy all ECBs associated
 		 * with the defunct provider's probes.
 		 */
 		while ((ecb = probe->dtpr_ecb) != NULL) {
 			dtrace_state_t *state = ecb->dte_state;
 			dtrace_buffer_t *buf = state->dts_buffer;
 			dtrace_buffer_t *aggbuf = state->dts_aggbuffer;
 
 			if (state->dts_speculates)
 				break;
 
 			if (!dtrace_buffer_consumed(buf, when))
 				break;
 
 			if (!dtrace_buffer_consumed(aggbuf, when))
 				break;
 
 			dtrace_ecb_disable(ecb);
 			ASSERT(probe->dtpr_ecb != ecb);
 			dtrace_ecb_destroy(ecb);
 		}
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&cpu_lock);
 }
 
 /*
  * DTrace DOF Functions
  */
 /*ARGSUSED*/
 static void
 dtrace_dof_error(dof_hdr_t *dof, const char *str)
 {
 	if (dtrace_err_verbose)
 		cmn_err(CE_WARN, "failed to process DOF: %s", str);
 
 #ifdef DTRACE_ERRDEBUG
 	dtrace_errdebug(str);
 #endif
 }
 
 /*
  * Create DOF out of a currently enabled state.  Right now, we only create
  * DOF containing the run-time options -- but this could be expanded to create
  * complete DOF representing the enabled state.
  */
 static dof_hdr_t *
 dtrace_dof_create(dtrace_state_t *state)
 {
 	dof_hdr_t *dof;
 	dof_sec_t *sec;
 	dof_optdesc_t *opt;
 	int i, len = sizeof (dof_hdr_t) +
 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
 	    sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	dof = kmem_zalloc(len, KM_SLEEP);
 	dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
 	dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
 	dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
 	dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
 
 	dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
 	dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
 	dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
 	dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
 	dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
 	dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
 
 	dof->dofh_flags = 0;
 	dof->dofh_hdrsize = sizeof (dof_hdr_t);
 	dof->dofh_secsize = sizeof (dof_sec_t);
 	dof->dofh_secnum = 1;	/* only DOF_SECT_OPTDESC */
 	dof->dofh_secoff = sizeof (dof_hdr_t);
 	dof->dofh_loadsz = len;
 	dof->dofh_filesz = len;
 	dof->dofh_pad = 0;
 
 	/*
 	 * Fill in the option section header...
 	 */
 	sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
 	sec->dofs_type = DOF_SECT_OPTDESC;
 	sec->dofs_align = sizeof (uint64_t);
 	sec->dofs_flags = DOF_SECF_LOAD;
 	sec->dofs_entsize = sizeof (dof_optdesc_t);
 
 	opt = (dof_optdesc_t *)((uintptr_t)sec +
 	    roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
 
 	sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
 	sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
 
 	for (i = 0; i < DTRACEOPT_MAX; i++) {
 		opt[i].dofo_option = i;
 		opt[i].dofo_strtab = DOF_SECIDX_NONE;
 		opt[i].dofo_value = state->dts_options[i];
 	}
 
 	return (dof);
 }
 
 static dof_hdr_t *
 dtrace_dof_copyin(uintptr_t uarg, int *errp)
 {
 	dof_hdr_t hdr, *dof;
 
 	ASSERT(!MUTEX_HELD(&dtrace_lock));
 
 	/*
 	 * First, we're going to copyin() the sizeof (dof_hdr_t).
 	 */
 	if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) {
 		dtrace_dof_error(NULL, "failed to copyin DOF header");
 		*errp = EFAULT;
 		return (NULL);
 	}
 
 	/*
 	 * Now we'll allocate the entire DOF and copy it in -- provided
 	 * that the length isn't outrageous.
 	 */
 	if (hdr.dofh_loadsz >= dtrace_dof_maxsize) {
 		dtrace_dof_error(&hdr, "load size exceeds maximum");
 		*errp = E2BIG;
 		return (NULL);
 	}
 
 	if (hdr.dofh_loadsz < sizeof (hdr)) {
 		dtrace_dof_error(&hdr, "invalid load size");
 		*errp = EINVAL;
 		return (NULL);
 	}
 
 	dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
 
 	if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
 	    dof->dofh_loadsz != hdr.dofh_loadsz) {
 		kmem_free(dof, hdr.dofh_loadsz);
 		*errp = EFAULT;
 		return (NULL);
 	}
 
 	return (dof);
 }
 
 #ifdef __FreeBSD__
 static dof_hdr_t *
 dtrace_dof_copyin_proc(struct proc *p, uintptr_t uarg, int *errp)
 {
 	dof_hdr_t hdr, *dof;
 	struct thread *td;
 	size_t loadsz;
 
 	ASSERT(!MUTEX_HELD(&dtrace_lock));
 
 	td = curthread;
 
 	/*
 	 * First, we're going to copyin() the sizeof (dof_hdr_t).
 	 */
 	if (proc_readmem(td, p, uarg, &hdr, sizeof(hdr)) != sizeof(hdr)) {
 		dtrace_dof_error(NULL, "failed to copyin DOF header");
 		*errp = EFAULT;
 		return (NULL);
 	}
 
 	/*
 	 * Now we'll allocate the entire DOF and copy it in -- provided
 	 * that the length isn't outrageous.
 	 */
 	if (hdr.dofh_loadsz >= dtrace_dof_maxsize) {
 		dtrace_dof_error(&hdr, "load size exceeds maximum");
 		*errp = E2BIG;
 		return (NULL);
 	}
 	loadsz = (size_t)hdr.dofh_loadsz;
 
 	if (loadsz < sizeof (hdr)) {
 		dtrace_dof_error(&hdr, "invalid load size");
 		*errp = EINVAL;
 		return (NULL);
 	}
 
 	dof = kmem_alloc(loadsz, KM_SLEEP);
 
 	if (proc_readmem(td, p, uarg, dof, loadsz) != loadsz ||
 	    dof->dofh_loadsz != loadsz) {
 		kmem_free(dof, hdr.dofh_loadsz);
 		*errp = EFAULT;
 		return (NULL);
 	}
 
 	return (dof);
 }
 
 static __inline uchar_t
 dtrace_dof_char(char c)
 {
 
 	switch (c) {
 	case '0':
 	case '1':
 	case '2':
 	case '3':
 	case '4':
 	case '5':
 	case '6':
 	case '7':
 	case '8':
 	case '9':
 		return (c - '0');
 	case 'A':
 	case 'B':
 	case 'C':
 	case 'D':
 	case 'E':
 	case 'F':
 		return (c - 'A' + 10);
 	case 'a':
 	case 'b':
 	case 'c':
 	case 'd':
 	case 'e':
 	case 'f':
 		return (c - 'a' + 10);
 	}
 	/* Should not reach here. */
 	return (UCHAR_MAX);
 }
 #endif /* __FreeBSD__ */
 
 static dof_hdr_t *
 dtrace_dof_property(const char *name)
 {
 #ifdef __FreeBSD__
 	uint8_t *dofbuf;
 	u_char *data, *eol;
 	caddr_t doffile;
 	size_t bytes, len, i;
 	dof_hdr_t *dof;
 	u_char c1, c2;
 
 	dof = NULL;
 
 	doffile = preload_search_by_type("dtrace_dof");
 	if (doffile == NULL)
 		return (NULL);
 
 	data = preload_fetch_addr(doffile);
 	len = preload_fetch_size(doffile);
 	for (;;) {
 		/* Look for the end of the line. All lines end in a newline. */
 		eol = memchr(data, '\n', len);
 		if (eol == NULL)
 			return (NULL);
 
 		if (strncmp(name, data, strlen(name)) == 0)
 			break;
 
 		eol++; /* skip past the newline */
 		len -= eol - data;
 		data = eol;
 	}
 
 	/* We've found the data corresponding to the specified key. */
 
 	data += strlen(name) + 1; /* skip past the '=' */
 	len = eol - data;
+	if (len % 2 != 0) {
+		dtrace_dof_error(NULL, "invalid DOF encoding length");
+		goto doferr;
+	}
 	bytes = len / 2;
-
 	if (bytes < sizeof(dof_hdr_t)) {
 		dtrace_dof_error(NULL, "truncated header");
 		goto doferr;
 	}
 
 	/*
 	 * Each byte is represented by the two ASCII characters in its hex
 	 * representation.
 	 */
 	dofbuf = malloc(bytes, M_SOLARIS, M_WAITOK);
 	for (i = 0; i < bytes; i++) {
 		c1 = dtrace_dof_char(data[i * 2]);
 		c2 = dtrace_dof_char(data[i * 2 + 1]);
 		if (c1 == UCHAR_MAX || c2 == UCHAR_MAX) {
 			dtrace_dof_error(NULL, "invalid hex char in DOF");
 			goto doferr;
 		}
 		dofbuf[i] = c1 * 16 + c2;
 	}
 
 	dof = (dof_hdr_t *)dofbuf;
 	if (bytes < dof->dofh_loadsz) {
 		dtrace_dof_error(NULL, "truncated DOF");
 		goto doferr;
 	}
 
 	if (dof->dofh_loadsz >= dtrace_dof_maxsize) {
 		dtrace_dof_error(NULL, "oversized DOF");
 		goto doferr;
 	}
 
 	return (dof);
 
 doferr:
 	free(dof, M_SOLARIS);
 	return (NULL);
 #else /* __FreeBSD__ */
 	uchar_t *buf;
 	uint64_t loadsz;
 	unsigned int len, i;
 	dof_hdr_t *dof;
 
 	/*
 	 * Unfortunately, array of values in .conf files are always (and
 	 * only) interpreted to be integer arrays.  We must read our DOF
 	 * as an integer array, and then squeeze it into a byte array.
 	 */
 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0,
 	    (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS)
 		return (NULL);
 
 	for (i = 0; i < len; i++)
 		buf[i] = (uchar_t)(((int *)buf)[i]);
 
 	if (len < sizeof (dof_hdr_t)) {
 		ddi_prop_free(buf);
 		dtrace_dof_error(NULL, "truncated header");
 		return (NULL);
 	}
 
 	if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) {
 		ddi_prop_free(buf);
 		dtrace_dof_error(NULL, "truncated DOF");
 		return (NULL);
 	}
 
 	if (loadsz >= dtrace_dof_maxsize) {
 		ddi_prop_free(buf);
 		dtrace_dof_error(NULL, "oversized DOF");
 		return (NULL);
 	}
 
 	dof = kmem_alloc(loadsz, KM_SLEEP);
 	bcopy(buf, dof, loadsz);
 	ddi_prop_free(buf);
 
 	return (dof);
 #endif /* !__FreeBSD__ */
 }
 
 static void
 dtrace_dof_destroy(dof_hdr_t *dof)
 {
 	kmem_free(dof, dof->dofh_loadsz);
 }
 
 /*
  * Return the dof_sec_t pointer corresponding to a given section index.  If the
  * index is not valid, dtrace_dof_error() is called and NULL is returned.  If
  * a type other than DOF_SECT_NONE is specified, the header is checked against
  * this type and NULL is returned if the types do not match.
  */
 static dof_sec_t *
 dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i)
 {
 	dof_sec_t *sec = (dof_sec_t *)(uintptr_t)
 	    ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize);
 
 	if (i >= dof->dofh_secnum) {
 		dtrace_dof_error(dof, "referenced section index is invalid");
 		return (NULL);
 	}
 
 	if (!(sec->dofs_flags & DOF_SECF_LOAD)) {
 		dtrace_dof_error(dof, "referenced section is not loadable");
 		return (NULL);
 	}
 
 	if (type != DOF_SECT_NONE && type != sec->dofs_type) {
 		dtrace_dof_error(dof, "referenced section is the wrong type");
 		return (NULL);
 	}
 
 	return (sec);
 }
 
 static dtrace_probedesc_t *
 dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc)
 {
 	dof_probedesc_t *probe;
 	dof_sec_t *strtab;
 	uintptr_t daddr = (uintptr_t)dof;
 	uintptr_t str;
 	size_t size;
 
 	if (sec->dofs_type != DOF_SECT_PROBEDESC) {
 		dtrace_dof_error(dof, "invalid probe section");
 		return (NULL);
 	}
 
 	if (sec->dofs_align != sizeof (dof_secidx_t)) {
 		dtrace_dof_error(dof, "bad alignment in probe description");
 		return (NULL);
 	}
 
 	if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) {
 		dtrace_dof_error(dof, "truncated probe description");
 		return (NULL);
 	}
 
 	probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab);
 
 	if (strtab == NULL)
 		return (NULL);
 
 	str = daddr + strtab->dofs_offset;
 	size = strtab->dofs_size;
 
 	if (probe->dofp_provider >= strtab->dofs_size) {
 		dtrace_dof_error(dof, "corrupt probe provider");
 		return (NULL);
 	}
 
 	(void) strncpy(desc->dtpd_provider,
 	    (char *)(str + probe->dofp_provider),
 	    MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider));
 
 	if (probe->dofp_mod >= strtab->dofs_size) {
 		dtrace_dof_error(dof, "corrupt probe module");
 		return (NULL);
 	}
 
 	(void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod),
 	    MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod));
 
 	if (probe->dofp_func >= strtab->dofs_size) {
 		dtrace_dof_error(dof, "corrupt probe function");
 		return (NULL);
 	}
 
 	(void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func),
 	    MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func));
 
 	if (probe->dofp_name >= strtab->dofs_size) {
 		dtrace_dof_error(dof, "corrupt probe name");
 		return (NULL);
 	}
 
 	(void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name),
 	    MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name));
 
 	return (desc);
 }
 
 static dtrace_difo_t *
 dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
     cred_t *cr)
 {
 	dtrace_difo_t *dp;
 	size_t ttl = 0;
 	dof_difohdr_t *dofd;
 	uintptr_t daddr = (uintptr_t)dof;
 	size_t max = dtrace_difo_maxsize;
 	int i, l, n;
 
 	static const struct {
 		int section;
 		int bufoffs;
 		int lenoffs;
 		int entsize;
 		int align;
 		const char *msg;
 	} difo[] = {
 		{ DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf),
 		offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t),
 		sizeof (dif_instr_t), "multiple DIF sections" },
 
 		{ DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab),
 		offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t),
 		sizeof (uint64_t), "multiple integer tables" },
 
 		{ DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab),
 		offsetof(dtrace_difo_t, dtdo_strlen), 0,
 		sizeof (char), "multiple string tables" },
 
 		{ DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab),
 		offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t),
 		sizeof (uint_t), "multiple variable tables" },
 
 		{ DOF_SECT_NONE, 0, 0, 0, 0, NULL }
 	};
 
 	if (sec->dofs_type != DOF_SECT_DIFOHDR) {
 		dtrace_dof_error(dof, "invalid DIFO header section");
 		return (NULL);
 	}
 
 	if (sec->dofs_align != sizeof (dof_secidx_t)) {
 		dtrace_dof_error(dof, "bad alignment in DIFO header");
 		return (NULL);
 	}
 
 	if (sec->dofs_size < sizeof (dof_difohdr_t) ||
 	    sec->dofs_size % sizeof (dof_secidx_t)) {
 		dtrace_dof_error(dof, "bad size in DIFO header");
 		return (NULL);
 	}
 
 	dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1;
 
 	dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
 	dp->dtdo_rtype = dofd->dofd_rtype;
 
 	for (l = 0; l < n; l++) {
 		dof_sec_t *subsec;
 		void **bufp;
 		uint32_t *lenp;
 
 		if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE,
 		    dofd->dofd_links[l])) == NULL)
 			goto err; /* invalid section link */
 
 		if (ttl + subsec->dofs_size > max) {
 			dtrace_dof_error(dof, "exceeds maximum size");
 			goto err;
 		}
 
 		ttl += subsec->dofs_size;
 
 		for (i = 0; difo[i].section != DOF_SECT_NONE; i++) {
 			if (subsec->dofs_type != difo[i].section)
 				continue;
 
 			if (!(subsec->dofs_flags & DOF_SECF_LOAD)) {
 				dtrace_dof_error(dof, "section not loaded");
 				goto err;
 			}
 
 			if (subsec->dofs_align != difo[i].align) {
 				dtrace_dof_error(dof, "bad alignment");
 				goto err;
 			}
 
 			bufp = (void **)((uintptr_t)dp + difo[i].bufoffs);
 			lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs);
 
 			if (*bufp != NULL) {
 				dtrace_dof_error(dof, difo[i].msg);
 				goto err;
 			}
 
 			if (difo[i].entsize != subsec->dofs_entsize) {
 				dtrace_dof_error(dof, "entry size mismatch");
 				goto err;
 			}
 
 			if (subsec->dofs_entsize != 0 &&
 			    (subsec->dofs_size % subsec->dofs_entsize) != 0) {
 				dtrace_dof_error(dof, "corrupt entry size");
 				goto err;
 			}
 
 			*lenp = subsec->dofs_size;
 			*bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP);
 			bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset),
 			    *bufp, subsec->dofs_size);
 
 			if (subsec->dofs_entsize != 0)
 				*lenp /= subsec->dofs_entsize;
 
 			break;
 		}
 
 		/*
 		 * If we encounter a loadable DIFO sub-section that is not
 		 * known to us, assume this is a broken program and fail.
 		 */
 		if (difo[i].section == DOF_SECT_NONE &&
 		    (subsec->dofs_flags & DOF_SECF_LOAD)) {
 			dtrace_dof_error(dof, "unrecognized DIFO subsection");
 			goto err;
 		}
 	}
 
 	if (dp->dtdo_buf == NULL) {
 		/*
 		 * We can't have a DIF object without DIF text.
 		 */
 		dtrace_dof_error(dof, "missing DIF text");
 		goto err;
 	}
 
 	/*
 	 * Before we validate the DIF object, run through the variable table
 	 * looking for the strings -- if any of their size are under, we'll set
 	 * their size to be the system-wide default string size.  Note that
 	 * this should _not_ happen if the "strsize" option has been set --
 	 * in this case, the compiler should have set the size to reflect the
 	 * setting of the option.
 	 */
 	for (i = 0; i < dp->dtdo_varlen; i++) {
 		dtrace_difv_t *v = &dp->dtdo_vartab[i];
 		dtrace_diftype_t *t = &v->dtdv_type;
 
 		if (v->dtdv_id < DIF_VAR_OTHER_UBASE)
 			continue;
 
 		if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0)
 			t->dtdt_size = dtrace_strsize_default;
 	}
 
 	if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0)
 		goto err;
 
 	dtrace_difo_init(dp, vstate);
 	return (dp);
 
 err:
 	kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
 	kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
 	kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
 	kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
 
 	kmem_free(dp, sizeof (dtrace_difo_t));
 	return (NULL);
 }
 
 static dtrace_predicate_t *
 dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
     cred_t *cr)
 {
 	dtrace_difo_t *dp;
 
 	if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL)
 		return (NULL);
 
 	return (dtrace_predicate_create(dp));
 }
 
 static dtrace_actdesc_t *
 dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
     cred_t *cr)
 {
 	dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next;
 	dof_actdesc_t *desc;
 	dof_sec_t *difosec;
 	size_t offs;
 	uintptr_t daddr = (uintptr_t)dof;
 	uint64_t arg;
 	dtrace_actkind_t kind;
 
 	if (sec->dofs_type != DOF_SECT_ACTDESC) {
 		dtrace_dof_error(dof, "invalid action section");
 		return (NULL);
 	}
 
 	if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) {
 		dtrace_dof_error(dof, "truncated action description");
 		return (NULL);
 	}
 
 	if (sec->dofs_align != sizeof (uint64_t)) {
 		dtrace_dof_error(dof, "bad alignment in action description");
 		return (NULL);
 	}
 
 	if (sec->dofs_size < sec->dofs_entsize) {
 		dtrace_dof_error(dof, "section entry size exceeds total size");
 		return (NULL);
 	}
 
 	if (sec->dofs_entsize != sizeof (dof_actdesc_t)) {
 		dtrace_dof_error(dof, "bad entry size in action description");
 		return (NULL);
 	}
 
 	if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) {
 		dtrace_dof_error(dof, "actions exceed dtrace_actions_max");
 		return (NULL);
 	}
 
 	for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) {
 		desc = (dof_actdesc_t *)(daddr +
 		    (uintptr_t)sec->dofs_offset + offs);
 		kind = (dtrace_actkind_t)desc->dofa_kind;
 
 		if ((DTRACEACT_ISPRINTFLIKE(kind) &&
 		    (kind != DTRACEACT_PRINTA ||
 		    desc->dofa_strtab != DOF_SECIDX_NONE)) ||
 		    (kind == DTRACEACT_DIFEXPR &&
 		    desc->dofa_strtab != DOF_SECIDX_NONE)) {
 			dof_sec_t *strtab;
 			char *str, *fmt;
 			uint64_t i;
 
 			/*
 			 * The argument to these actions is an index into the
 			 * DOF string table.  For printf()-like actions, this
 			 * is the format string.  For print(), this is the
 			 * CTF type of the expression result.
 			 */
 			if ((strtab = dtrace_dof_sect(dof,
 			    DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
 				goto err;
 
 			str = (char *)((uintptr_t)dof +
 			    (uintptr_t)strtab->dofs_offset);
 
 			for (i = desc->dofa_arg; i < strtab->dofs_size; i++) {
 				if (str[i] == '\0')
 					break;
 			}
 
 			if (i >= strtab->dofs_size) {
 				dtrace_dof_error(dof, "bogus format string");
 				goto err;
 			}
 
 			if (i == desc->dofa_arg) {
 				dtrace_dof_error(dof, "empty format string");
 				goto err;
 			}
 
 			i -= desc->dofa_arg;
 			fmt = kmem_alloc(i + 1, KM_SLEEP);
 			bcopy(&str[desc->dofa_arg], fmt, i + 1);
 			arg = (uint64_t)(uintptr_t)fmt;
 		} else {
 			if (kind == DTRACEACT_PRINTA) {
 				ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE);
 				arg = 0;
 			} else {
 				arg = desc->dofa_arg;
 			}
 		}
 
 		act = dtrace_actdesc_create(kind, desc->dofa_ntuple,
 		    desc->dofa_uarg, arg);
 
 		if (last != NULL) {
 			last->dtad_next = act;
 		} else {
 			first = act;
 		}
 
 		last = act;
 
 		if (desc->dofa_difo == DOF_SECIDX_NONE)
 			continue;
 
 		if ((difosec = dtrace_dof_sect(dof,
 		    DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL)
 			goto err;
 
 		act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr);
 
 		if (act->dtad_difo == NULL)
 			goto err;
 	}
 
 	ASSERT(first != NULL);
 	return (first);
 
 err:
 	for (act = first; act != NULL; act = next) {
 		next = act->dtad_next;
 		dtrace_actdesc_release(act, vstate);
 	}
 
 	return (NULL);
 }
 
 static dtrace_ecbdesc_t *
 dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
     cred_t *cr)
 {
 	dtrace_ecbdesc_t *ep;
 	dof_ecbdesc_t *ecb;
 	dtrace_probedesc_t *desc;
 	dtrace_predicate_t *pred = NULL;
 
 	if (sec->dofs_size < sizeof (dof_ecbdesc_t)) {
 		dtrace_dof_error(dof, "truncated ECB description");
 		return (NULL);
 	}
 
 	if (sec->dofs_align != sizeof (uint64_t)) {
 		dtrace_dof_error(dof, "bad alignment in ECB description");
 		return (NULL);
 	}
 
 	ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset);
 	sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes);
 
 	if (sec == NULL)
 		return (NULL);
 
 	ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
 	ep->dted_uarg = ecb->dofe_uarg;
 	desc = &ep->dted_probe;
 
 	if (dtrace_dof_probedesc(dof, sec, desc) == NULL)
 		goto err;
 
 	if (ecb->dofe_pred != DOF_SECIDX_NONE) {
 		if ((sec = dtrace_dof_sect(dof,
 		    DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL)
 			goto err;
 
 		if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL)
 			goto err;
 
 		ep->dted_pred.dtpdd_predicate = pred;
 	}
 
 	if (ecb->dofe_actions != DOF_SECIDX_NONE) {
 		if ((sec = dtrace_dof_sect(dof,
 		    DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL)
 			goto err;
 
 		ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr);
 
 		if (ep->dted_action == NULL)
 			goto err;
 	}
 
 	return (ep);
 
 err:
 	if (pred != NULL)
 		dtrace_predicate_release(pred, vstate);
 	kmem_free(ep, sizeof (dtrace_ecbdesc_t));
 	return (NULL);
 }
 
 /*
  * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
- * specified DOF.  At present, this amounts to simply adding 'ubase' to the
- * site of any user SETX relocations to account for load object base address.
- * In the future, if we need other relocations, this function can be extended.
+ * specified DOF.  SETX relocations are computed using 'ubase', the base load
+ * address of the object containing the DOF, and DOFREL relocations are relative
+ * to the relocation offset within the DOF.
  */
 static int
-dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase)
+dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase,
+    uint64_t udaddr)
 {
 	uintptr_t daddr = (uintptr_t)dof;
 	dof_relohdr_t *dofr =
 	    (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	dof_sec_t *ss, *rs, *ts;
 	dof_relodesc_t *r;
 	uint_t i, n;
 
 	if (sec->dofs_size < sizeof (dof_relohdr_t) ||
 	    sec->dofs_align != sizeof (dof_secidx_t)) {
 		dtrace_dof_error(dof, "invalid relocation header");
 		return (-1);
 	}
 
 	ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab);
 	rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec);
 	ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec);
 
 	if (ss == NULL || rs == NULL || ts == NULL)
 		return (-1); /* dtrace_dof_error() has been called already */
 
 	if (rs->dofs_entsize < sizeof (dof_relodesc_t) ||
 	    rs->dofs_align != sizeof (uint64_t)) {
 		dtrace_dof_error(dof, "invalid relocation section");
 		return (-1);
 	}
 
 	r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset);
 	n = rs->dofs_size / rs->dofs_entsize;
 
 	for (i = 0; i < n; i++) {
 		uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset;
 
 		switch (r->dofr_type) {
 		case DOF_RELO_NONE:
 			break;
 		case DOF_RELO_SETX:
+		case DOF_RELO_DOFREL:
 			if (r->dofr_offset >= ts->dofs_size || r->dofr_offset +
 			    sizeof (uint64_t) > ts->dofs_size) {
 				dtrace_dof_error(dof, "bad relocation offset");
 				return (-1);
 			}
 
 			if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) {
 				dtrace_dof_error(dof, "misaligned setx relo");
 				return (-1);
 			}
 
-			*(uint64_t *)taddr += ubase;
+			if (r->dofr_type == DOF_RELO_SETX)
+				*(uint64_t *)taddr += ubase;
+			else
+				*(uint64_t *)taddr +=
+				    udaddr + ts->dofs_offset + r->dofr_offset;
 			break;
 		default:
 			dtrace_dof_error(dof, "invalid relocation type");
 			return (-1);
 		}
 
 		r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize);
 	}
 
 	return (0);
 }
 
 /*
  * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
  * header:  it should be at the front of a memory region that is at least
  * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
  * size.  It need not be validated in any other way.
  */
 static int
 dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
-    dtrace_enabling_t **enabp, uint64_t ubase, int noprobes)
+    dtrace_enabling_t **enabp, uint64_t ubase, uint64_t udaddr, int noprobes)
 {
 	uint64_t len = dof->dofh_loadsz, seclen;
 	uintptr_t daddr = (uintptr_t)dof;
 	dtrace_ecbdesc_t *ep;
 	dtrace_enabling_t *enab;
 	uint_t i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t));
 
 	/*
 	 * Check the DOF header identification bytes.  In addition to checking
 	 * valid settings, we also verify that unused bits/bytes are zeroed so
 	 * we can use them later without fear of regressing existing binaries.
 	 */
 	if (bcmp(&dof->dofh_ident[DOF_ID_MAG0],
 	    DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) {
 		dtrace_dof_error(dof, "DOF magic string mismatch");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 &&
 	    dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) {
 		dtrace_dof_error(dof, "DOF has invalid data model");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) {
 		dtrace_dof_error(dof, "DOF encoding mismatch");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
 	    dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) {
 		dtrace_dof_error(dof, "DOF version mismatch");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) {
 		dtrace_dof_error(dof, "DOF uses unsupported instruction set");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) {
 		dtrace_dof_error(dof, "DOF uses too many integer registers");
 		return (-1);
 	}
 
 	if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) {
 		dtrace_dof_error(dof, "DOF uses too many tuple registers");
 		return (-1);
 	}
 
 	for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) {
 		if (dof->dofh_ident[i] != 0) {
 			dtrace_dof_error(dof, "DOF has invalid ident byte set");
 			return (-1);
 		}
 	}
 
 	if (dof->dofh_flags & ~DOF_FL_VALID) {
 		dtrace_dof_error(dof, "DOF has invalid flag bits set");
 		return (-1);
 	}
 
 	if (dof->dofh_secsize == 0) {
 		dtrace_dof_error(dof, "zero section header size");
 		return (-1);
 	}
 
 	/*
 	 * Check that the section headers don't exceed the amount of DOF
 	 * data.  Note that we cast the section size and number of sections
 	 * to uint64_t's to prevent possible overflow in the multiplication.
 	 */
 	seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize;
 
 	if (dof->dofh_secoff > len || seclen > len ||
 	    dof->dofh_secoff + seclen > len) {
 		dtrace_dof_error(dof, "truncated section headers");
 		return (-1);
 	}
 
 	if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) {
 		dtrace_dof_error(dof, "misaligned section headers");
 		return (-1);
 	}
 
 	if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) {
 		dtrace_dof_error(dof, "misaligned section size");
 		return (-1);
 	}
 
 	/*
 	 * Take an initial pass through the section headers to be sure that
 	 * the headers don't have stray offsets.  If the 'noprobes' flag is
 	 * set, do not permit sections relating to providers, probes, or args.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(daddr +
 		    (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (noprobes) {
 			switch (sec->dofs_type) {
 			case DOF_SECT_PROVIDER:
 			case DOF_SECT_PROBES:
 			case DOF_SECT_PRARGS:
 			case DOF_SECT_PROFFS:
 				dtrace_dof_error(dof, "illegal sections "
 				    "for enabling");
 				return (-1);
 			}
 		}
 
 		if (DOF_SEC_ISLOADABLE(sec->dofs_type) &&
 		    !(sec->dofs_flags & DOF_SECF_LOAD)) {
 			dtrace_dof_error(dof, "loadable section with load "
 			    "flag unset");
 			return (-1);
 		}
 
 		if (!(sec->dofs_flags & DOF_SECF_LOAD))
 			continue; /* just ignore non-loadable sections */
 
 		if (!ISP2(sec->dofs_align)) {
 			dtrace_dof_error(dof, "bad section alignment");
 			return (-1);
 		}
 
 		if (sec->dofs_offset & (sec->dofs_align - 1)) {
 			dtrace_dof_error(dof, "misaligned section");
 			return (-1);
 		}
 
 		if (sec->dofs_offset > len || sec->dofs_size > len ||
 		    sec->dofs_offset + sec->dofs_size > len) {
 			dtrace_dof_error(dof, "corrupt section header");
 			return (-1);
 		}
 
 		if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr +
 		    sec->dofs_offset + sec->dofs_size - 1) != '\0') {
 			dtrace_dof_error(dof, "non-terminating string table");
 			return (-1);
 		}
 	}
 
 	/*
 	 * Take a second pass through the sections and locate and perform any
 	 * relocations that are present.  We do this after the first pass to
 	 * be sure that all sections have had their headers validated.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(daddr +
 		    (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (!(sec->dofs_flags & DOF_SECF_LOAD))
 			continue; /* skip sections that are not loadable */
 
 		switch (sec->dofs_type) {
 		case DOF_SECT_URELHDR:
-			if (dtrace_dof_relocate(dof, sec, ubase) != 0)
+			if (dtrace_dof_relocate(dof, sec, ubase, udaddr) != 0)
 				return (-1);
 			break;
 		}
 	}
 
 	if ((enab = *enabp) == NULL)
 		enab = *enabp = dtrace_enabling_create(vstate);
 
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(daddr +
 		    (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (sec->dofs_type != DOF_SECT_ECBDESC)
 			continue;
 
 		if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) {
 			dtrace_enabling_destroy(enab);
 			*enabp = NULL;
 			return (-1);
 		}
 
 		dtrace_enabling_add(enab, ep);
 	}
 
 	return (0);
 }
 
 /*
  * Process DOF for any options.  This routine assumes that the DOF has been
  * at least processed by dtrace_dof_slurp().
  */
 static int
 dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state)
 {
 	int i, rval;
 	uint32_t entsize;
 	size_t offs;
 	dof_optdesc_t *desc;
 
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof +
 		    (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (sec->dofs_type != DOF_SECT_OPTDESC)
 			continue;
 
 		if (sec->dofs_align != sizeof (uint64_t)) {
 			dtrace_dof_error(dof, "bad alignment in "
 			    "option description");
 			return (EINVAL);
 		}
 
 		if ((entsize = sec->dofs_entsize) == 0) {
 			dtrace_dof_error(dof, "zeroed option entry size");
 			return (EINVAL);
 		}
 
 		if (entsize < sizeof (dof_optdesc_t)) {
 			dtrace_dof_error(dof, "bad option entry size");
 			return (EINVAL);
 		}
 
 		for (offs = 0; offs < sec->dofs_size; offs += entsize) {
 			desc = (dof_optdesc_t *)((uintptr_t)dof +
 			    (uintptr_t)sec->dofs_offset + offs);
 
 			if (desc->dofo_strtab != DOF_SECIDX_NONE) {
 				dtrace_dof_error(dof, "non-zero option string");
 				return (EINVAL);
 			}
 
 			if (desc->dofo_value == DTRACEOPT_UNSET) {
 				dtrace_dof_error(dof, "unset option");
 				return (EINVAL);
 			}
 
 			if ((rval = dtrace_state_option(state,
 			    desc->dofo_option, desc->dofo_value)) != 0) {
 				dtrace_dof_error(dof, "rejected option");
 				return (rval);
 			}
 		}
 	}
 
 	return (0);
 }
 
 /*
  * DTrace Consumer State Functions
  */
 static int
 dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
 {
 	size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize;
 	void *base;
 	uintptr_t limit;
 	dtrace_dynvar_t *dvar, *next, *start;
 	int i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL);
 
 	bzero(dstate, sizeof (dtrace_dstate_t));
 
 	if ((dstate->dtds_chunksize = chunksize) == 0)
 		dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE;
 
 	VERIFY(dstate->dtds_chunksize < LONG_MAX);
 
 	if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
 		size = min;
 
 	if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL)
 		return (ENOMEM);
 
 	dstate->dtds_size = size;
 	dstate->dtds_base = base;
 	dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP);
 	bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t));
 
 	hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t));
 
 	if (hashsize != 1 && (hashsize & 1))
 		hashsize--;
 
 	dstate->dtds_hashsize = hashsize;
 	dstate->dtds_hash = dstate->dtds_base;
 
 	/*
 	 * Set all of our hash buckets to point to the single sink, and (if
 	 * it hasn't already been set), set the sink's hash value to be the
 	 * sink sentinel value.  The sink is needed for dynamic variable
 	 * lookups to know that they have iterated over an entire, valid hash
 	 * chain.
 	 */
 	for (i = 0; i < hashsize; i++)
 		dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink;
 
 	if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK)
 		dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK;
 
 	/*
 	 * Determine number of active CPUs.  Divide free list evenly among
 	 * active CPUs.
 	 */
 	start = (dtrace_dynvar_t *)
 	    ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t));
 	limit = (uintptr_t)base + size;
 
 	VERIFY((uintptr_t)start < limit);
 	VERIFY((uintptr_t)start >= (uintptr_t)base);
 
 	maxper = (limit - (uintptr_t)start) / NCPU;
 	maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize;
 
 #ifndef illumos
 	CPU_FOREACH(i) {
 #else
 	for (i = 0; i < NCPU; i++) {
 #endif
 		dstate->dtds_percpu[i].dtdsc_free = dvar = start;
 
 		/*
 		 * If we don't even have enough chunks to make it once through
 		 * NCPUs, we're just going to allocate everything to the first
 		 * CPU.  And if we're on the last CPU, we're going to allocate
 		 * whatever is left over.  In either case, we set the limit to
 		 * be the limit of the dynamic variable space.
 		 */
 		if (maxper == 0 || i == NCPU - 1) {
 			limit = (uintptr_t)base + size;
 			start = NULL;
 		} else {
 			limit = (uintptr_t)start + maxper;
 			start = (dtrace_dynvar_t *)limit;
 		}
 
 		VERIFY(limit <= (uintptr_t)base + size);
 
 		for (;;) {
 			next = (dtrace_dynvar_t *)((uintptr_t)dvar +
 			    dstate->dtds_chunksize);
 
 			if ((uintptr_t)next + dstate->dtds_chunksize >= limit)
 				break;
 
 			VERIFY((uintptr_t)dvar >= (uintptr_t)base &&
 			    (uintptr_t)dvar <= (uintptr_t)base + size);
 			dvar->dtdv_next = next;
 			dvar = next;
 		}
 
 		if (maxper == 0)
 			break;
 	}
 
 	return (0);
 }
 
 static void
 dtrace_dstate_fini(dtrace_dstate_t *dstate)
 {
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 	if (dstate->dtds_base == NULL)
 		return;
 
 	kmem_free(dstate->dtds_base, dstate->dtds_size);
 	kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu);
 }
 
 static void
 dtrace_vstate_fini(dtrace_vstate_t *vstate)
 {
 	/*
 	 * Logical XOR, where are you?
 	 */
 	ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL));
 
 	if (vstate->dtvs_nglobals > 0) {
 		kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals *
 		    sizeof (dtrace_statvar_t *));
 	}
 
 	if (vstate->dtvs_ntlocals > 0) {
 		kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals *
 		    sizeof (dtrace_difv_t));
 	}
 
 	ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL));
 
 	if (vstate->dtvs_nlocals > 0) {
 		kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals *
 		    sizeof (dtrace_statvar_t *));
 	}
 }
 
 #ifdef illumos
 static void
 dtrace_state_clean(dtrace_state_t *state)
 {
 	if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
 		return;
 
 	dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
 	dtrace_speculation_clean(state);
 }
 
 static void
 dtrace_state_deadman(dtrace_state_t *state)
 {
 	hrtime_t now;
 
 	dtrace_sync();
 
 	now = dtrace_gethrtime();
 
 	if (state != dtrace_anon.dta_state &&
 	    now - state->dts_laststatus >= dtrace_deadman_user)
 		return;
 
 	/*
 	 * We must be sure that dts_alive never appears to be less than the
 	 * value upon entry to dtrace_state_deadman(), and because we lack a
 	 * dtrace_cas64(), we cannot store to it atomically.  We thus instead
 	 * store INT64_MAX to it, followed by a memory barrier, followed by
 	 * the new value.  This assures that dts_alive never appears to be
 	 * less than its true value, regardless of the order in which the
 	 * stores to the underlying storage are issued.
 	 */
 	state->dts_alive = INT64_MAX;
 	dtrace_membar_producer();
 	state->dts_alive = now;
 }
 #else	/* !illumos */
 static void
 dtrace_state_clean(void *arg)
 {
 	dtrace_state_t *state = arg;
 	dtrace_optval_t *opt = state->dts_options;
 
 	if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
 		return;
 
 	dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
 	dtrace_speculation_clean(state);
 
 	callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC,
 	    dtrace_state_clean, state);
 }
 
 static void
 dtrace_state_deadman(void *arg)
 {
 	dtrace_state_t *state = arg;
 	hrtime_t now;
 
 	dtrace_sync();
 
 	dtrace_debug_output();
 
 	now = dtrace_gethrtime();
 
 	if (state != dtrace_anon.dta_state &&
 	    now - state->dts_laststatus >= dtrace_deadman_user)
 		return;
 
 	/*
 	 * We must be sure that dts_alive never appears to be less than the
 	 * value upon entry to dtrace_state_deadman(), and because we lack a
 	 * dtrace_cas64(), we cannot store to it atomically.  We thus instead
 	 * store INT64_MAX to it, followed by a memory barrier, followed by
 	 * the new value.  This assures that dts_alive never appears to be
 	 * less than its true value, regardless of the order in which the
 	 * stores to the underlying storage are issued.
 	 */
 	state->dts_alive = INT64_MAX;
 	dtrace_membar_producer();
 	state->dts_alive = now;
 
 	callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC,
 	    dtrace_state_deadman, state);
 }
 #endif	/* illumos */
 
 static dtrace_state_t *
 #ifdef illumos
 dtrace_state_create(dev_t *devp, cred_t *cr)
 #else
 dtrace_state_create(struct cdev *dev, struct ucred *cred __unused)
 #endif
 {
 #ifdef illumos
 	minor_t minor;
 	major_t major;
 #else
 	cred_t *cr = NULL;
 	int m = 0;
 #endif
 	char c[30];
 	dtrace_state_t *state;
 	dtrace_optval_t *opt;
 	int bufsize = NCPU * sizeof (dtrace_buffer_t), i;
 	int cpu_it;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 #ifdef illumos
 	minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1,
 	    VM_BESTFIT | VM_SLEEP);
 
 	if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) {
 		vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
 		return (NULL);
 	}
 
 	state = ddi_get_soft_state(dtrace_softstate, minor);
 #else
 	if (dev != NULL) {
 		cr = dev->si_cred;
 		m = dev2unit(dev);
 	}
 
 	/* Allocate memory for the state. */
 	state = kmem_zalloc(sizeof(dtrace_state_t), KM_SLEEP);
 #endif
 
 	state->dts_epid = DTRACE_EPIDNONE + 1;
 
 	(void) snprintf(c, sizeof (c), "dtrace_aggid_%d", m);
 #ifdef illumos
 	state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1,
 	    NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
 
 	if (devp != NULL) {
 		major = getemajor(*devp);
 	} else {
 		major = ddi_driver_major(dtrace_devi);
 	}
 
 	state->dts_dev = makedevice(major, minor);
 
 	if (devp != NULL)
 		*devp = state->dts_dev;
 #else
 	state->dts_aggid_arena = new_unrhdr(1, INT_MAX, &dtrace_unr_mtx);
 	state->dts_dev = dev;
 #endif
 
 	/*
 	 * We allocate NCPU buffers.  On the one hand, this can be quite
 	 * a bit of memory per instance (nearly 36K on a Starcat).  On the
 	 * other hand, it saves an additional memory reference in the probe
 	 * path.
 	 */
 	state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP);
 	state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP);
 
 	/*
          * Allocate and initialise the per-process per-CPU random state.
 	 * SI_SUB_RANDOM < SI_SUB_DTRACE_ANON therefore entropy device is
          * assumed to be seeded at this point (if from Fortuna seed file).
 	 */
 	(void) read_random(&state->dts_rstate[0], 2 * sizeof(uint64_t));
 	for (cpu_it = 1; cpu_it < NCPU; cpu_it++) {
 		/*
 		 * Each CPU is assigned a 2^64 period, non-overlapping
 		 * subsequence.
 		 */
 		dtrace_xoroshiro128_plus_jump(state->dts_rstate[cpu_it-1],
 		    state->dts_rstate[cpu_it]); 
 	}
 
 #ifdef illumos
 	state->dts_cleaner = CYCLIC_NONE;
 	state->dts_deadman = CYCLIC_NONE;
 #else
 	callout_init(&state->dts_cleaner, 1);
 	callout_init(&state->dts_deadman, 1);
 #endif
 	state->dts_vstate.dtvs_state = state;
 
 	for (i = 0; i < DTRACEOPT_MAX; i++)
 		state->dts_options[i] = DTRACEOPT_UNSET;
 
 	/*
 	 * Set the default options.
 	 */
 	opt = state->dts_options;
 	opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH;
 	opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO;
 	opt[DTRACEOPT_NSPEC] = dtrace_nspec_default;
 	opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default;
 	opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL;
 	opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default;
 	opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default;
 	opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default;
 	opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default;
 	opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default;
 	opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default;
 	opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default;
 	opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default;
 	opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default;
 
 	state->dts_activity = DTRACE_ACTIVITY_INACTIVE;
 
 	/*
 	 * Depending on the user credentials, we set flag bits which alter probe
 	 * visibility or the amount of destructiveness allowed.  In the case of
 	 * actual anonymous tracing, or the possession of all privileges, all of
 	 * the normal checks are bypassed.
 	 */
 	if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
 		state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
 		state->dts_cred.dcr_action = DTRACE_CRA_ALL;
 	} else {
 		/*
 		 * Set up the credentials for this instantiation.  We take a
 		 * hold on the credential to prevent it from disappearing on
 		 * us; this in turn prevents the zone_t referenced by this
 		 * credential from disappearing.  This means that we can
 		 * examine the credential and the zone from probe context.
 		 */
 		crhold(cr);
 		state->dts_cred.dcr_cred = cr;
 
 		/*
 		 * CRA_PROC means "we have *some* privilege for dtrace" and
 		 * unlocks the use of variables like pid, zonename, etc.
 		 */
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) ||
 		    PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
 			state->dts_cred.dcr_action |= DTRACE_CRA_PROC;
 		}
 
 		/*
 		 * dtrace_user allows use of syscall and profile providers.
 		 * If the user also has proc_owner and/or proc_zone, we
 		 * extend the scope to include additional visibility and
 		 * destructive power.
 		 */
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) {
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) {
 				state->dts_cred.dcr_visible |=
 				    DTRACE_CRV_ALLPROC;
 
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
 			}
 
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) {
 				state->dts_cred.dcr_visible |=
 				    DTRACE_CRV_ALLZONE;
 
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
 			}
 
 			/*
 			 * If we have all privs in whatever zone this is,
 			 * we can do destructive things to processes which
 			 * have altered credentials.
 			 */
 #ifdef illumos
 			if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
 			    cr->cr_zone->zone_privset)) {
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
 			}
 #endif
 		}
 
 		/*
 		 * Holding the dtrace_kernel privilege also implies that
 		 * the user has the dtrace_user privilege from a visibility
 		 * perspective.  But without further privileges, some
 		 * destructive actions are not available.
 		 */
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) {
 			/*
 			 * Make all probes in all zones visible.  However,
 			 * this doesn't mean that all actions become available
 			 * to all zones.
 			 */
 			state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL |
 			    DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE;
 
 			state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL |
 			    DTRACE_CRA_PROC;
 			/*
 			 * Holding proc_owner means that destructive actions
 			 * for *this* zone are allowed.
 			 */
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
 
 			/*
 			 * Holding proc_zone means that destructive actions
 			 * for this user/group ID in all zones is allowed.
 			 */
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
 
 #ifdef illumos
 			/*
 			 * If we have all privs in whatever zone this is,
 			 * we can do destructive things to processes which
 			 * have altered credentials.
 			 */
 			if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
 			    cr->cr_zone->zone_privset)) {
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
 			}
 #endif
 		}
 
 		/*
 		 * Holding the dtrace_proc privilege gives control over fasttrap
 		 * and pid providers.  We need to grant wider destructive
 		 * privileges in the event that the user has proc_owner and/or
 		 * proc_zone.
 		 */
 		if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
 
 			if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
 				state->dts_cred.dcr_action |=
 				    DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
 		}
 	}
 
 	return (state);
 }
 
 static int
 dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
 {
 	dtrace_optval_t *opt = state->dts_options, size;
 	processorid_t cpu = 0;;
 	int flags = 0, rval, factor, divisor = 1;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	ASSERT(which < DTRACEOPT_MAX);
 	ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE ||
 	    (state == dtrace_anon.dta_state &&
 	    state->dts_activity == DTRACE_ACTIVITY_ACTIVE));
 
 	if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0)
 		return (0);
 
 	if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET)
 		cpu = opt[DTRACEOPT_CPU];
 
 	if (which == DTRACEOPT_SPECSIZE)
 		flags |= DTRACEBUF_NOSWITCH;
 
 	if (which == DTRACEOPT_BUFSIZE) {
 		if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING)
 			flags |= DTRACEBUF_RING;
 
 		if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL)
 			flags |= DTRACEBUF_FILL;
 
 		if (state != dtrace_anon.dta_state ||
 		    state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
 			flags |= DTRACEBUF_INACTIVE;
 	}
 
 	for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) {
 		/*
 		 * The size must be 8-byte aligned.  If the size is not 8-byte
 		 * aligned, drop it down by the difference.
 		 */
 		if (size & (sizeof (uint64_t) - 1))
 			size -= size & (sizeof (uint64_t) - 1);
 
 		if (size < state->dts_reserve) {
 			/*
 			 * Buffers always must be large enough to accommodate
 			 * their prereserved space.  We return E2BIG instead
 			 * of ENOMEM in this case to allow for user-level
 			 * software to differentiate the cases.
 			 */
 			return (E2BIG);
 		}
 
 		rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor);
 
 		if (rval != ENOMEM) {
 			opt[which] = size;
 			return (rval);
 		}
 
 		if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
 			return (rval);
 
 		for (divisor = 2; divisor < factor; divisor <<= 1)
 			continue;
 	}
 
 	return (ENOMEM);
 }
 
 static int
 dtrace_state_buffers(dtrace_state_t *state)
 {
 	dtrace_speculation_t *spec = state->dts_speculations;
 	int rval, i;
 
 	if ((rval = dtrace_state_buffer(state, state->dts_buffer,
 	    DTRACEOPT_BUFSIZE)) != 0)
 		return (rval);
 
 	if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer,
 	    DTRACEOPT_AGGSIZE)) != 0)
 		return (rval);
 
 	for (i = 0; i < state->dts_nspeculations; i++) {
 		if ((rval = dtrace_state_buffer(state,
 		    spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0)
 			return (rval);
 	}
 
 	return (0);
 }
 
 static void
 dtrace_state_prereserve(dtrace_state_t *state)
 {
 	dtrace_ecb_t *ecb;
 	dtrace_probe_t *probe;
 
 	state->dts_reserve = 0;
 
 	if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL)
 		return;
 
 	/*
 	 * If our buffer policy is a "fill" buffer policy, we need to set the
 	 * prereserved space to be the space required by the END probes.
 	 */
 	probe = dtrace_probes[dtrace_probeid_end - 1];
 	ASSERT(probe != NULL);
 
 	for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
 		if (ecb->dte_state != state)
 			continue;
 
 		state->dts_reserve += ecb->dte_needed + ecb->dte_alignment;
 	}
 }
 
 static int
 dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
 {
 	dtrace_optval_t *opt = state->dts_options, sz, nspec;
 	dtrace_speculation_t *spec;
 	dtrace_buffer_t *buf;
 #ifdef illumos
 	cyc_handler_t hdlr;
 	cyc_time_t when;
 #endif
 	int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t);
 	dtrace_icookie_t cookie;
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_lock);
 
 	if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
 		rval = EBUSY;
 		goto out;
 	}
 
 	/*
 	 * Before we can perform any checks, we must prime all of the
 	 * retained enablings that correspond to this state.
 	 */
 	dtrace_enabling_prime(state);
 
 	if (state->dts_destructive && !state->dts_cred.dcr_destructive) {
 		rval = EACCES;
 		goto out;
 	}
 
 	dtrace_state_prereserve(state);
 
 	/*
 	 * Now we want to do is try to allocate our speculations.
 	 * We do not automatically resize the number of speculations; if
 	 * this fails, we will fail the operation.
 	 */
 	nspec = opt[DTRACEOPT_NSPEC];
 	ASSERT(nspec != DTRACEOPT_UNSET);
 
 	if (nspec > INT_MAX) {
 		rval = ENOMEM;
 		goto out;
 	}
 
 	spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t),
 	    KM_NOSLEEP | KM_NORMALPRI);
 
 	if (spec == NULL) {
 		rval = ENOMEM;
 		goto out;
 	}
 
 	state->dts_speculations = spec;
 	state->dts_nspeculations = (int)nspec;
 
 	for (i = 0; i < nspec; i++) {
 		if ((buf = kmem_zalloc(bufsize,
 		    KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
 			rval = ENOMEM;
 			goto err;
 		}
 
 		spec[i].dtsp_buffer = buf;
 	}
 
 	if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) {
 		if (dtrace_anon.dta_state == NULL) {
 			rval = ENOENT;
 			goto out;
 		}
 
 		if (state->dts_necbs != 0) {
 			rval = EALREADY;
 			goto out;
 		}
 
 		state->dts_anon = dtrace_anon_grab();
 		ASSERT(state->dts_anon != NULL);
 		state = state->dts_anon;
 
 		/*
 		 * We want "grabanon" to be set in the grabbed state, so we'll
 		 * copy that option value from the grabbing state into the
 		 * grabbed state.
 		 */
 		state->dts_options[DTRACEOPT_GRABANON] =
 		    opt[DTRACEOPT_GRABANON];
 
 		*cpu = dtrace_anon.dta_beganon;
 
 		/*
 		 * If the anonymous state is active (as it almost certainly
 		 * is if the anonymous enabling ultimately matched anything),
 		 * we don't allow any further option processing -- but we
 		 * don't return failure.
 		 */
 		if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
 			goto out;
 	}
 
 	if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET &&
 	    opt[DTRACEOPT_AGGSIZE] != 0) {
 		if (state->dts_aggregations == NULL) {
 			/*
 			 * We're not going to create an aggregation buffer
 			 * because we don't have any ECBs that contain
 			 * aggregations -- set this option to 0.
 			 */
 			opt[DTRACEOPT_AGGSIZE] = 0;
 		} else {
 			/*
 			 * If we have an aggregation buffer, we must also have
 			 * a buffer to use as scratch.
 			 */
 			if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET ||
 			    opt[DTRACEOPT_BUFSIZE] < state->dts_needed) {
 				opt[DTRACEOPT_BUFSIZE] = state->dts_needed;
 			}
 		}
 	}
 
 	if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET &&
 	    opt[DTRACEOPT_SPECSIZE] != 0) {
 		if (!state->dts_speculates) {
 			/*
 			 * We're not going to create speculation buffers
 			 * because we don't have any ECBs that actually
 			 * speculate -- set the speculation size to 0.
 			 */
 			opt[DTRACEOPT_SPECSIZE] = 0;
 		}
 	}
 
 	/*
 	 * The bare minimum size for any buffer that we're actually going to
 	 * do anything to is sizeof (uint64_t).
 	 */
 	sz = sizeof (uint64_t);
 
 	if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) ||
 	    (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) ||
 	    (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) {
 		/*
 		 * A buffer size has been explicitly set to 0 (or to a size
 		 * that will be adjusted to 0) and we need the space -- we
 		 * need to return failure.  We return ENOSPC to differentiate
 		 * it from failing to allocate a buffer due to failure to meet
 		 * the reserve (for which we return E2BIG).
 		 */
 		rval = ENOSPC;
 		goto out;
 	}
 
 	if ((rval = dtrace_state_buffers(state)) != 0)
 		goto err;
 
 	if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET)
 		sz = dtrace_dstate_defsize;
 
 	do {
 		rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz);
 
 		if (rval == 0)
 			break;
 
 		if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
 			goto err;
 	} while (sz >>= 1);
 
 	opt[DTRACEOPT_DYNVARSIZE] = sz;
 
 	if (rval != 0)
 		goto err;
 
 	if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max)
 		opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max;
 
 	if (opt[DTRACEOPT_CLEANRATE] == 0)
 		opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
 
 	if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min)
 		opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min;
 
 	if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max)
 		opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
 
 	state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
 #ifdef illumos
 	hdlr.cyh_func = (cyc_func_t)dtrace_state_clean;
 	hdlr.cyh_arg = state;
 	hdlr.cyh_level = CY_LOW_LEVEL;
 
 	when.cyt_when = 0;
 	when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
 
 	state->dts_cleaner = cyclic_add(&hdlr, &when);
 
 	hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
 	hdlr.cyh_arg = state;
 	hdlr.cyh_level = CY_LOW_LEVEL;
 
 	when.cyt_when = 0;
 	when.cyt_interval = dtrace_deadman_interval;
 
 	state->dts_deadman = cyclic_add(&hdlr, &when);
 #else
 	callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC,
 	    dtrace_state_clean, state);
 	callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC,
 	    dtrace_state_deadman, state);
 #endif
 
 	state->dts_activity = DTRACE_ACTIVITY_WARMUP;
 
 #ifdef illumos
 	if (state->dts_getf != 0 &&
 	    !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
 		/*
 		 * We don't have kernel privs but we have at least one call
 		 * to getf(); we need to bump our zone's count, and (if
 		 * this is the first enabling to have an unprivileged call
 		 * to getf()) we need to hook into closef().
 		 */
 		state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
 
 		if (dtrace_getf++ == 0) {
 			ASSERT(dtrace_closef == NULL);
 			dtrace_closef = dtrace_getf_barrier;
 		}
 	}
 #endif
 
 	/*
 	 * Now it's time to actually fire the BEGIN probe.  We need to disable
 	 * interrupts here both to record the CPU on which we fired the BEGIN
 	 * probe (the data from this CPU will be processed first at user
 	 * level) and to manually activate the buffer for this CPU.
 	 */
 	cookie = dtrace_interrupt_disable();
 	*cpu = curcpu;
 	ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
 	state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
 
 	dtrace_probe(dtrace_probeid_begin,
 	    (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
 	dtrace_interrupt_enable(cookie);
 	/*
 	 * We may have had an exit action from a BEGIN probe; only change our
 	 * state to ACTIVE if we're still in WARMUP.
 	 */
 	ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
 	    state->dts_activity == DTRACE_ACTIVITY_DRAINING);
 
 	if (state->dts_activity == DTRACE_ACTIVITY_WARMUP)
 		state->dts_activity = DTRACE_ACTIVITY_ACTIVE;
 
 #ifdef __FreeBSD__
 	/*
 	 * We enable anonymous tracing before APs are started, so we must
 	 * activate buffers using the current CPU.
 	 */
 	if (state == dtrace_anon.dta_state)
 		for (int i = 0; i < NCPU; i++)
 			dtrace_buffer_activate_cpu(state, i);
 	else
 		dtrace_xcall(DTRACE_CPUALL,
 		    (dtrace_xcall_t)dtrace_buffer_activate, state);
 #else
 	/*
 	 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
 	 * want each CPU to transition its principal buffer out of the
 	 * INACTIVE state.  Doing this assures that no CPU will suddenly begin
 	 * processing an ECB halfway down a probe's ECB chain; all CPUs will
 	 * atomically transition from processing none of a state's ECBs to
 	 * processing all of them.
 	 */
 	dtrace_xcall(DTRACE_CPUALL,
 	    (dtrace_xcall_t)dtrace_buffer_activate, state);
 #endif
 	goto out;
 
 err:
 	dtrace_buffer_free(state->dts_buffer);
 	dtrace_buffer_free(state->dts_aggbuffer);
 
 	if ((nspec = state->dts_nspeculations) == 0) {
 		ASSERT(state->dts_speculations == NULL);
 		goto out;
 	}
 
 	spec = state->dts_speculations;
 	ASSERT(spec != NULL);
 
 	for (i = 0; i < state->dts_nspeculations; i++) {
 		if ((buf = spec[i].dtsp_buffer) == NULL)
 			break;
 
 		dtrace_buffer_free(buf);
 		kmem_free(buf, bufsize);
 	}
 
 	kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
 	state->dts_nspeculations = 0;
 	state->dts_speculations = NULL;
 
 out:
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&cpu_lock);
 
 	return (rval);
 }
 
 static int
 dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
 {
 	dtrace_icookie_t cookie;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE &&
 	    state->dts_activity != DTRACE_ACTIVITY_DRAINING)
 		return (EINVAL);
 
 	/*
 	 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
 	 * to be sure that every CPU has seen it.  See below for the details
 	 * on why this is done.
 	 */
 	state->dts_activity = DTRACE_ACTIVITY_DRAINING;
 	dtrace_sync();
 
 	/*
 	 * By this point, it is impossible for any CPU to be still processing
 	 * with DTRACE_ACTIVITY_ACTIVE.  We can thus set our activity to
 	 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
 	 * other CPU in dtrace_buffer_reserve().  This allows dtrace_probe()
 	 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
 	 * iff we're in the END probe.
 	 */
 	state->dts_activity = DTRACE_ACTIVITY_COOLDOWN;
 	dtrace_sync();
 	ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
 
 	/*
 	 * Finally, we can release the reserve and call the END probe.  We
 	 * disable interrupts across calling the END probe to allow us to
 	 * return the CPU on which we actually called the END probe.  This
 	 * allows user-land to be sure that this CPU's principal buffer is
 	 * processed last.
 	 */
 	state->dts_reserve = 0;
 
 	cookie = dtrace_interrupt_disable();
 	*cpu = curcpu;
 	dtrace_probe(dtrace_probeid_end,
 	    (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
 	dtrace_interrupt_enable(cookie);
 
 	state->dts_activity = DTRACE_ACTIVITY_STOPPED;
 	dtrace_sync();
 
 #ifdef illumos
 	if (state->dts_getf != 0 &&
 	    !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
 		/*
 		 * We don't have kernel privs but we have at least one call
 		 * to getf(); we need to lower our zone's count, and (if
 		 * this is the last enabling to have an unprivileged call
 		 * to getf()) we need to clear the closef() hook.
 		 */
 		ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
 		ASSERT(dtrace_closef == dtrace_getf_barrier);
 		ASSERT(dtrace_getf > 0);
 
 		state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
 
 		if (--dtrace_getf == 0)
 			dtrace_closef = NULL;
 	}
 #endif
 
 	return (0);
 }
 
 static int
 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
     dtrace_optval_t val)
 {
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
 		return (EBUSY);
 
 	if (option >= DTRACEOPT_MAX)
 		return (EINVAL);
 
 	if (option != DTRACEOPT_CPU && val < 0)
 		return (EINVAL);
 
 	switch (option) {
 	case DTRACEOPT_DESTRUCTIVE:
 		if (dtrace_destructive_disallow)
 			return (EACCES);
 
 		state->dts_cred.dcr_destructive = 1;
 		break;
 
 	case DTRACEOPT_BUFSIZE:
 	case DTRACEOPT_DYNVARSIZE:
 	case DTRACEOPT_AGGSIZE:
 	case DTRACEOPT_SPECSIZE:
 	case DTRACEOPT_STRSIZE:
 		if (val < 0)
 			return (EINVAL);
 
 		if (val >= LONG_MAX) {
 			/*
 			 * If this is an otherwise negative value, set it to
 			 * the highest multiple of 128m less than LONG_MAX.
 			 * Technically, we're adjusting the size without
 			 * regard to the buffer resizing policy, but in fact,
 			 * this has no effect -- if we set the buffer size to
 			 * ~LONG_MAX and the buffer policy is ultimately set to
 			 * be "manual", the buffer allocation is guaranteed to
 			 * fail, if only because the allocation requires two
 			 * buffers.  (We set the the size to the highest
 			 * multiple of 128m because it ensures that the size
 			 * will remain a multiple of a megabyte when
 			 * repeatedly halved -- all the way down to 15m.)
 			 */
 			val = LONG_MAX - (1 << 27) + 1;
 		}
 	}
 
 	state->dts_options[option] = val;
 
 	return (0);
 }
 
 static void
 dtrace_state_destroy(dtrace_state_t *state)
 {
 	dtrace_ecb_t *ecb;
 	dtrace_vstate_t *vstate = &state->dts_vstate;
 #ifdef illumos
 	minor_t minor = getminor(state->dts_dev);
 #endif
 	int i, bufsize = NCPU * sizeof (dtrace_buffer_t);
 	dtrace_speculation_t *spec = state->dts_speculations;
 	int nspec = state->dts_nspeculations;
 	uint32_t match;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 	/*
 	 * First, retract any retained enablings for this state.
 	 */
 	dtrace_enabling_retract(state);
 	ASSERT(state->dts_nretained == 0);
 
 	if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE ||
 	    state->dts_activity == DTRACE_ACTIVITY_DRAINING) {
 		/*
 		 * We have managed to come into dtrace_state_destroy() on a
 		 * hot enabling -- almost certainly because of a disorderly
 		 * shutdown of a consumer.  (That is, a consumer that is
 		 * exiting without having called dtrace_stop().) In this case,
 		 * we're going to set our activity to be KILLED, and then
 		 * issue a sync to be sure that everyone is out of probe
 		 * context before we start blowing away ECBs.
 		 */
 		state->dts_activity = DTRACE_ACTIVITY_KILLED;
 		dtrace_sync();
 	}
 
 	/*
 	 * Release the credential hold we took in dtrace_state_create().
 	 */
 	if (state->dts_cred.dcr_cred != NULL)
 		crfree(state->dts_cred.dcr_cred);
 
 	/*
 	 * Now we can safely disable and destroy any enabled probes.  Because
 	 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
 	 * (especially if they're all enabled), we take two passes through the
 	 * ECBs:  in the first, we disable just DTRACE_PRIV_KERNEL probes, and
 	 * in the second we disable whatever is left over.
 	 */
 	for (match = DTRACE_PRIV_KERNEL; ; match = 0) {
 		for (i = 0; i < state->dts_necbs; i++) {
 			if ((ecb = state->dts_ecbs[i]) == NULL)
 				continue;
 
 			if (match && ecb->dte_probe != NULL) {
 				dtrace_probe_t *probe = ecb->dte_probe;
 				dtrace_provider_t *prov = probe->dtpr_provider;
 
 				if (!(prov->dtpv_priv.dtpp_flags & match))
 					continue;
 			}
 
 			dtrace_ecb_disable(ecb);
 			dtrace_ecb_destroy(ecb);
 		}
 
 		if (!match)
 			break;
 	}
 
 	/*
 	 * Before we free the buffers, perform one more sync to assure that
 	 * every CPU is out of probe context.
 	 */
 	dtrace_sync();
 
 	dtrace_buffer_free(state->dts_buffer);
 	dtrace_buffer_free(state->dts_aggbuffer);
 
 	for (i = 0; i < nspec; i++)
 		dtrace_buffer_free(spec[i].dtsp_buffer);
 
 #ifdef illumos
 	if (state->dts_cleaner != CYCLIC_NONE)
 		cyclic_remove(state->dts_cleaner);
 
 	if (state->dts_deadman != CYCLIC_NONE)
 		cyclic_remove(state->dts_deadman);
 #else
 	callout_stop(&state->dts_cleaner);
 	callout_drain(&state->dts_cleaner);
 	callout_stop(&state->dts_deadman);
 	callout_drain(&state->dts_deadman);
 #endif
 
 	dtrace_dstate_fini(&vstate->dtvs_dynvars);
 	dtrace_vstate_fini(vstate);
 	if (state->dts_ecbs != NULL)
 		kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *));
 
 	if (state->dts_aggregations != NULL) {
 #ifdef DEBUG
 		for (i = 0; i < state->dts_naggregations; i++)
 			ASSERT(state->dts_aggregations[i] == NULL);
 #endif
 		ASSERT(state->dts_naggregations > 0);
 		kmem_free(state->dts_aggregations,
 		    state->dts_naggregations * sizeof (dtrace_aggregation_t *));
 	}
 
 	kmem_free(state->dts_buffer, bufsize);
 	kmem_free(state->dts_aggbuffer, bufsize);
 
 	for (i = 0; i < nspec; i++)
 		kmem_free(spec[i].dtsp_buffer, bufsize);
 
 	if (spec != NULL)
 		kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
 
 	dtrace_format_destroy(state);
 
 	if (state->dts_aggid_arena != NULL) {
 #ifdef illumos
 		vmem_destroy(state->dts_aggid_arena);
 #else
 		delete_unrhdr(state->dts_aggid_arena);
 #endif
 		state->dts_aggid_arena = NULL;
 	}
 #ifdef illumos
 	ddi_soft_state_free(dtrace_softstate, minor);
 	vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
 #endif
 }
 
 /*
  * DTrace Anonymous Enabling Functions
  */
 static dtrace_state_t *
 dtrace_anon_grab(void)
 {
 	dtrace_state_t *state;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if ((state = dtrace_anon.dta_state) == NULL) {
 		ASSERT(dtrace_anon.dta_enabling == NULL);
 		return (NULL);
 	}
 
 	ASSERT(dtrace_anon.dta_enabling != NULL);
 	ASSERT(dtrace_retained != NULL);
 
 	dtrace_enabling_destroy(dtrace_anon.dta_enabling);
 	dtrace_anon.dta_enabling = NULL;
 	dtrace_anon.dta_state = NULL;
 
 	return (state);
 }
 
 static void
 dtrace_anon_property(void)
 {
 	int i, rv;
 	dtrace_state_t *state;
 	dof_hdr_t *dof;
 	char c[32];		/* enough for "dof-data-" + digits */
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 	for (i = 0; ; i++) {
 		(void) snprintf(c, sizeof (c), "dof-data-%d", i);
 
 		dtrace_err_verbose = 1;
 
 		if ((dof = dtrace_dof_property(c)) == NULL) {
 			dtrace_err_verbose = 0;
 			break;
 		}
 
 #ifdef illumos
 		/*
 		 * We want to create anonymous state, so we need to transition
 		 * the kernel debugger to indicate that DTrace is active.  If
 		 * this fails (e.g. because the debugger has modified text in
 		 * some way), we won't continue with the processing.
 		 */
 		if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
 			cmn_err(CE_NOTE, "kernel debugger active; anonymous "
 			    "enabling ignored.");
 			dtrace_dof_destroy(dof);
 			break;
 		}
 #endif
 
 		/*
 		 * If we haven't allocated an anonymous state, we'll do so now.
 		 */
 		if ((state = dtrace_anon.dta_state) == NULL) {
 			state = dtrace_state_create(NULL, NULL);
 			dtrace_anon.dta_state = state;
 
 			if (state == NULL) {
 				/*
 				 * This basically shouldn't happen:  the only
 				 * failure mode from dtrace_state_create() is a
 				 * failure of ddi_soft_state_zalloc() that
 				 * itself should never happen.  Still, the
 				 * interface allows for a failure mode, and
 				 * we want to fail as gracefully as possible:
 				 * we'll emit an error message and cease
 				 * processing anonymous state in this case.
 				 */
 				cmn_err(CE_WARN, "failed to create "
 				    "anonymous state");
 				dtrace_dof_destroy(dof);
 				break;
 			}
 		}
 
 		rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(),
-		    &dtrace_anon.dta_enabling, 0, B_TRUE);
+		    &dtrace_anon.dta_enabling, 0, 0, B_TRUE);
 
 		if (rv == 0)
 			rv = dtrace_dof_options(dof, state);
 
 		dtrace_err_verbose = 0;
 		dtrace_dof_destroy(dof);
 
 		if (rv != 0) {
 			/*
 			 * This is malformed DOF; chuck any anonymous state
 			 * that we created.
 			 */
 			ASSERT(dtrace_anon.dta_enabling == NULL);
 			dtrace_state_destroy(state);
 			dtrace_anon.dta_state = NULL;
 			break;
 		}
 
 		ASSERT(dtrace_anon.dta_enabling != NULL);
 	}
 
 	if (dtrace_anon.dta_enabling != NULL) {
 		int rval;
 
 		/*
 		 * dtrace_enabling_retain() can only fail because we are
 		 * trying to retain more enablings than are allowed -- but
 		 * we only have one anonymous enabling, and we are guaranteed
 		 * to be allowed at least one retained enabling; we assert
 		 * that dtrace_enabling_retain() returns success.
 		 */
 		rval = dtrace_enabling_retain(dtrace_anon.dta_enabling);
 		ASSERT(rval == 0);
 
 		dtrace_enabling_dump(dtrace_anon.dta_enabling);
 	}
 }
 
 /*
  * DTrace Helper Functions
  */
 static void
 dtrace_helper_trace(dtrace_helper_action_t *helper,
     dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where)
 {
 	uint32_t size, next, nnext, i;
 	dtrace_helptrace_t *ent, *buffer;
 	uint16_t flags = cpu_core[curcpu].cpuc_dtrace_flags;
 
 	if ((buffer = dtrace_helptrace_buffer) == NULL)
 		return;
 
 	ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals);
 
 	/*
 	 * What would a tracing framework be without its own tracing
 	 * framework?  (Well, a hell of a lot simpler, for starters...)
 	 */
 	size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals *
 	    sizeof (uint64_t) - sizeof (uint64_t);
 
 	/*
 	 * Iterate until we can allocate a slot in the trace buffer.
 	 */
 	do {
 		next = dtrace_helptrace_next;
 
 		if (next + size < dtrace_helptrace_bufsize) {
 			nnext = next + size;
 		} else {
 			nnext = size;
 		}
 	} while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next);
 
 	/*
 	 * We have our slot; fill it in.
 	 */
 	if (nnext == size) {
 		dtrace_helptrace_wrapped++;
 		next = 0;
 	}
 
 	ent = (dtrace_helptrace_t *)((uintptr_t)buffer + next);
 	ent->dtht_helper = helper;
 	ent->dtht_where = where;
 	ent->dtht_nlocals = vstate->dtvs_nlocals;
 
 	ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ?
 	    mstate->dtms_fltoffs : -1;
 	ent->dtht_fault = DTRACE_FLAGS2FLT(flags);
 	ent->dtht_illval = cpu_core[curcpu].cpuc_dtrace_illval;
 
 	for (i = 0; i < vstate->dtvs_nlocals; i++) {
 		dtrace_statvar_t *svar;
 
 		if ((svar = vstate->dtvs_locals[i]) == NULL)
 			continue;
 
 		ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t));
 		ent->dtht_locals[i] =
 		    ((uint64_t *)(uintptr_t)svar->dtsv_data)[curcpu];
 	}
 }
 
 static uint64_t
 dtrace_helper(int which, dtrace_mstate_t *mstate,
     dtrace_state_t *state, uint64_t arg0, uint64_t arg1)
 {
 	uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags;
 	uint64_t sarg0 = mstate->dtms_arg[0];
 	uint64_t sarg1 = mstate->dtms_arg[1];
 	uint64_t rval = 0;
 	dtrace_helpers_t *helpers = curproc->p_dtrace_helpers;
 	dtrace_helper_action_t *helper;
 	dtrace_vstate_t *vstate;
 	dtrace_difo_t *pred;
 	int i, trace = dtrace_helptrace_buffer != NULL;
 
 	ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS);
 
 	if (helpers == NULL)
 		return (0);
 
 	if ((helper = helpers->dthps_actions[which]) == NULL)
 		return (0);
 
 	vstate = &helpers->dthps_vstate;
 	mstate->dtms_arg[0] = arg0;
 	mstate->dtms_arg[1] = arg1;
 
 	/*
 	 * Now iterate over each helper.  If its predicate evaluates to 'true',
 	 * we'll call the corresponding actions.  Note that the below calls
 	 * to dtrace_dif_emulate() may set faults in machine state.  This is
 	 * okay:  our caller (the outer dtrace_dif_emulate()) will simply plow
 	 * the stored DIF offset with its own (which is the desired behavior).
 	 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
 	 * from machine state; this is okay, too.
 	 */
 	for (; helper != NULL; helper = helper->dtha_next) {
 		if ((pred = helper->dtha_predicate) != NULL) {
 			if (trace)
 				dtrace_helper_trace(helper, mstate, vstate, 0);
 
 			if (!dtrace_dif_emulate(pred, mstate, vstate, state))
 				goto next;
 
 			if (*flags & CPU_DTRACE_FAULT)
 				goto err;
 		}
 
 		for (i = 0; i < helper->dtha_nactions; i++) {
 			if (trace)
 				dtrace_helper_trace(helper,
 				    mstate, vstate, i + 1);
 
 			rval = dtrace_dif_emulate(helper->dtha_actions[i],
 			    mstate, vstate, state);
 
 			if (*flags & CPU_DTRACE_FAULT)
 				goto err;
 		}
 
 next:
 		if (trace)
 			dtrace_helper_trace(helper, mstate, vstate,
 			    DTRACE_HELPTRACE_NEXT);
 	}
 
 	if (trace)
 		dtrace_helper_trace(helper, mstate, vstate,
 		    DTRACE_HELPTRACE_DONE);
 
 	/*
 	 * Restore the arg0 that we saved upon entry.
 	 */
 	mstate->dtms_arg[0] = sarg0;
 	mstate->dtms_arg[1] = sarg1;
 
 	return (rval);
 
 err:
 	if (trace)
 		dtrace_helper_trace(helper, mstate, vstate,
 		    DTRACE_HELPTRACE_ERR);
 
 	/*
 	 * Restore the arg0 that we saved upon entry.
 	 */
 	mstate->dtms_arg[0] = sarg0;
 	mstate->dtms_arg[1] = sarg1;
 
 	return (0);
 }
 
 static void
 dtrace_helper_action_destroy(dtrace_helper_action_t *helper,
     dtrace_vstate_t *vstate)
 {
 	int i;
 
 	if (helper->dtha_predicate != NULL)
 		dtrace_difo_release(helper->dtha_predicate, vstate);
 
 	for (i = 0; i < helper->dtha_nactions; i++) {
 		ASSERT(helper->dtha_actions[i] != NULL);
 		dtrace_difo_release(helper->dtha_actions[i], vstate);
 	}
 
 	kmem_free(helper->dtha_actions,
 	    helper->dtha_nactions * sizeof (dtrace_difo_t *));
 	kmem_free(helper, sizeof (dtrace_helper_action_t));
 }
 
 static int
 dtrace_helper_destroygen(dtrace_helpers_t *help, int gen)
 {
 	proc_t *p = curproc;
 	dtrace_vstate_t *vstate;
 	int i;
 
 	if (help == NULL)
 		help = p->p_dtrace_helpers;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if (help == NULL || gen > help->dthps_generation)
 		return (EINVAL);
 
 	vstate = &help->dthps_vstate;
 
 	for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
 		dtrace_helper_action_t *last = NULL, *h, *next;
 
 		for (h = help->dthps_actions[i]; h != NULL; h = next) {
 			next = h->dtha_next;
 
 			if (h->dtha_generation == gen) {
 				if (last != NULL) {
 					last->dtha_next = next;
 				} else {
 					help->dthps_actions[i] = next;
 				}
 
 				dtrace_helper_action_destroy(h, vstate);
 			} else {
 				last = h;
 			}
 		}
 	}
 
 	/*
 	 * Interate until we've cleared out all helper providers with the
 	 * given generation number.
 	 */
 	for (;;) {
 		dtrace_helper_provider_t *prov;
 
 		/*
 		 * Look for a helper provider with the right generation. We
 		 * have to start back at the beginning of the list each time
 		 * because we drop dtrace_lock. It's unlikely that we'll make
 		 * more than two passes.
 		 */
 		for (i = 0; i < help->dthps_nprovs; i++) {
 			prov = help->dthps_provs[i];
 
 			if (prov->dthp_generation == gen)
 				break;
 		}
 
 		/*
 		 * If there were no matches, we're done.
 		 */
 		if (i == help->dthps_nprovs)
 			break;
 
 		/*
 		 * Move the last helper provider into this slot.
 		 */
 		help->dthps_nprovs--;
 		help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs];
 		help->dthps_provs[help->dthps_nprovs] = NULL;
 
 		mutex_exit(&dtrace_lock);
 
 		/*
 		 * If we have a meta provider, remove this helper provider.
 		 */
 		mutex_enter(&dtrace_meta_lock);
 		if (dtrace_meta_pid != NULL) {
 			ASSERT(dtrace_deferred_pid == NULL);
 			dtrace_helper_provider_remove(&prov->dthp_prov,
 			    p->p_pid);
 		}
 		mutex_exit(&dtrace_meta_lock);
 
 		dtrace_helper_provider_destroy(prov);
 
 		mutex_enter(&dtrace_lock);
 	}
 
 	return (0);
 }
 
 static int
 dtrace_helper_validate(dtrace_helper_action_t *helper)
 {
 	int err = 0, i;
 	dtrace_difo_t *dp;
 
 	if ((dp = helper->dtha_predicate) != NULL)
 		err += dtrace_difo_validate_helper(dp);
 
 	for (i = 0; i < helper->dtha_nactions; i++)
 		err += dtrace_difo_validate_helper(helper->dtha_actions[i]);
 
 	return (err == 0);
 }
 
 static int
 dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep,
     dtrace_helpers_t *help)
 {
 	dtrace_helper_action_t *helper, *last;
 	dtrace_actdesc_t *act;
 	dtrace_vstate_t *vstate;
 	dtrace_predicate_t *pred;
 	int count = 0, nactions = 0, i;
 
 	if (which < 0 || which >= DTRACE_NHELPER_ACTIONS)
 		return (EINVAL);
 
 	last = help->dthps_actions[which];
 	vstate = &help->dthps_vstate;
 
 	for (count = 0; last != NULL; last = last->dtha_next) {
 		count++;
 		if (last->dtha_next == NULL)
 			break;
 	}
 
 	/*
 	 * If we already have dtrace_helper_actions_max helper actions for this
 	 * helper action type, we'll refuse to add a new one.
 	 */
 	if (count >= dtrace_helper_actions_max)
 		return (ENOSPC);
 
 	helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP);
 	helper->dtha_generation = help->dthps_generation;
 
 	if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) {
 		ASSERT(pred->dtp_difo != NULL);
 		dtrace_difo_hold(pred->dtp_difo);
 		helper->dtha_predicate = pred->dtp_difo;
 	}
 
 	for (act = ep->dted_action; act != NULL; act = act->dtad_next) {
 		if (act->dtad_kind != DTRACEACT_DIFEXPR)
 			goto err;
 
 		if (act->dtad_difo == NULL)
 			goto err;
 
 		nactions++;
 	}
 
 	helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) *
 	    (helper->dtha_nactions = nactions), KM_SLEEP);
 
 	for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) {
 		dtrace_difo_hold(act->dtad_difo);
 		helper->dtha_actions[i++] = act->dtad_difo;
 	}
 
 	if (!dtrace_helper_validate(helper))
 		goto err;
 
 	if (last == NULL) {
 		help->dthps_actions[which] = helper;
 	} else {
 		last->dtha_next = helper;
 	}
 
 	if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) {
 		dtrace_helptrace_nlocals = vstate->dtvs_nlocals;
 		dtrace_helptrace_next = 0;
 	}
 
 	return (0);
 err:
 	dtrace_helper_action_destroy(helper, vstate);
 	return (EINVAL);
 }
 
 static void
 dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help,
     dof_helper_t *dofhp)
 {
 	ASSERT(MUTEX_NOT_HELD(&dtrace_lock));
 
 	mutex_enter(&dtrace_meta_lock);
 	mutex_enter(&dtrace_lock);
 
 	if (!dtrace_attached() || dtrace_meta_pid == NULL) {
 		/*
 		 * If the dtrace module is loaded but not attached, or if
 		 * there aren't isn't a meta provider registered to deal with
 		 * these provider descriptions, we need to postpone creating
 		 * the actual providers until later.
 		 */
 
 		if (help->dthps_next == NULL && help->dthps_prev == NULL &&
 		    dtrace_deferred_pid != help) {
 			help->dthps_deferred = 1;
 			help->dthps_pid = p->p_pid;
 			help->dthps_next = dtrace_deferred_pid;
 			help->dthps_prev = NULL;
 			if (dtrace_deferred_pid != NULL)
 				dtrace_deferred_pid->dthps_prev = help;
 			dtrace_deferred_pid = help;
 		}
 
 		mutex_exit(&dtrace_lock);
 
 	} else if (dofhp != NULL) {
 		/*
 		 * If the dtrace module is loaded and we have a particular
 		 * helper provider description, pass that off to the
 		 * meta provider.
 		 */
 
 		mutex_exit(&dtrace_lock);
 
 		dtrace_helper_provide(dofhp, p->p_pid);
 
 	} else {
 		/*
 		 * Otherwise, just pass all the helper provider descriptions
 		 * off to the meta provider.
 		 */
 
 		int i;
 		mutex_exit(&dtrace_lock);
 
 		for (i = 0; i < help->dthps_nprovs; i++) {
 			dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
 			    p->p_pid);
 		}
 	}
 
 	mutex_exit(&dtrace_meta_lock);
 }
 
 static int
 dtrace_helper_provider_add(dof_helper_t *dofhp, dtrace_helpers_t *help, int gen)
 {
 	dtrace_helper_provider_t *hprov, **tmp_provs;
 	uint_t tmp_maxprovs, i;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(help != NULL);
 
 	/*
 	 * If we already have dtrace_helper_providers_max helper providers,
 	 * we're refuse to add a new one.
 	 */
 	if (help->dthps_nprovs >= dtrace_helper_providers_max)
 		return (ENOSPC);
 
 	/*
 	 * Check to make sure this isn't a duplicate.
 	 */
 	for (i = 0; i < help->dthps_nprovs; i++) {
 		if (dofhp->dofhp_addr ==
 		    help->dthps_provs[i]->dthp_prov.dofhp_addr)
 			return (EALREADY);
 	}
 
 	hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP);
 	hprov->dthp_prov = *dofhp;
 	hprov->dthp_ref = 1;
 	hprov->dthp_generation = gen;
 
 	/*
 	 * Allocate a bigger table for helper providers if it's already full.
 	 */
 	if (help->dthps_maxprovs == help->dthps_nprovs) {
 		tmp_maxprovs = help->dthps_maxprovs;
 		tmp_provs = help->dthps_provs;
 
 		if (help->dthps_maxprovs == 0)
 			help->dthps_maxprovs = 2;
 		else
 			help->dthps_maxprovs *= 2;
 		if (help->dthps_maxprovs > dtrace_helper_providers_max)
 			help->dthps_maxprovs = dtrace_helper_providers_max;
 
 		ASSERT(tmp_maxprovs < help->dthps_maxprovs);
 
 		help->dthps_provs = kmem_zalloc(help->dthps_maxprovs *
 		    sizeof (dtrace_helper_provider_t *), KM_SLEEP);
 
 		if (tmp_provs != NULL) {
 			bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs *
 			    sizeof (dtrace_helper_provider_t *));
 			kmem_free(tmp_provs, tmp_maxprovs *
 			    sizeof (dtrace_helper_provider_t *));
 		}
 	}
 
 	help->dthps_provs[help->dthps_nprovs] = hprov;
 	help->dthps_nprovs++;
 
 	return (0);
 }
 
 static void
 dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov)
 {
 	mutex_enter(&dtrace_lock);
 
 	if (--hprov->dthp_ref == 0) {
 		dof_hdr_t *dof;
 		mutex_exit(&dtrace_lock);
 		dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof;
 		dtrace_dof_destroy(dof);
 		kmem_free(hprov, sizeof (dtrace_helper_provider_t));
 	} else {
 		mutex_exit(&dtrace_lock);
 	}
 }
 
 static int
 dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec)
 {
 	uintptr_t daddr = (uintptr_t)dof;
 	dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
 	dof_provider_t *provider;
 	dof_probe_t *probe;
 	uint8_t *arg;
 	char *strtab, *typestr;
 	dof_stridx_t typeidx;
 	size_t typesz;
 	uint_t nprobes, j, k;
 
 	ASSERT(sec->dofs_type == DOF_SECT_PROVIDER);
 
 	if (sec->dofs_offset & (sizeof (uint_t) - 1)) {
 		dtrace_dof_error(dof, "misaligned section offset");
 		return (-1);
 	}
 
 	/*
 	 * The section needs to be large enough to contain the DOF provider
 	 * structure appropriate for the given version.
 	 */
 	if (sec->dofs_size <
 	    ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ?
 	    offsetof(dof_provider_t, dofpv_prenoffs) :
 	    sizeof (dof_provider_t))) {
 		dtrace_dof_error(dof, "provider section too small");
 		return (-1);
 	}
 
 	provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
 	str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab);
 	prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes);
 	arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs);
 	off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs);
 
 	if (str_sec == NULL || prb_sec == NULL ||
 	    arg_sec == NULL || off_sec == NULL)
 		return (-1);
 
 	enoff_sec = NULL;
 
 	if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
 	    provider->dofpv_prenoffs != DOF_SECT_NONE &&
 	    (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS,
 	    provider->dofpv_prenoffs)) == NULL)
 		return (-1);
 
 	strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
 
 	if (provider->dofpv_name >= str_sec->dofs_size ||
 	    strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) {
 		dtrace_dof_error(dof, "invalid provider name");
 		return (-1);
 	}
 
 	if (prb_sec->dofs_entsize == 0 ||
 	    prb_sec->dofs_entsize > prb_sec->dofs_size) {
 		dtrace_dof_error(dof, "invalid entry size");
 		return (-1);
 	}
 
 	if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) {
 		dtrace_dof_error(dof, "misaligned entry size");
 		return (-1);
 	}
 
 	if (off_sec->dofs_entsize != sizeof (uint32_t)) {
 		dtrace_dof_error(dof, "invalid entry size");
 		return (-1);
 	}
 
 	if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) {
 		dtrace_dof_error(dof, "misaligned section offset");
 		return (-1);
 	}
 
 	if (arg_sec->dofs_entsize != sizeof (uint8_t)) {
 		dtrace_dof_error(dof, "invalid entry size");
 		return (-1);
 	}
 
 	arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
 
 	nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
 
 	/*
 	 * Take a pass through the probes to check for errors.
 	 */
 	for (j = 0; j < nprobes; j++) {
 		probe = (dof_probe_t *)(uintptr_t)(daddr +
 		    prb_sec->dofs_offset + j * prb_sec->dofs_entsize);
 
 		if (probe->dofpr_func >= str_sec->dofs_size) {
 			dtrace_dof_error(dof, "invalid function name");
 			return (-1);
 		}
 
 		if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) {
 			dtrace_dof_error(dof, "function name too long");
 			/*
 			 * Keep going if the function name is too long.
 			 * Unlike provider and probe names, we cannot reasonably
 			 * impose restrictions on function names, since they're
 			 * a property of the code being instrumented. We will
 			 * skip this probe in dtrace_helper_provide_one().
 			 */
 		}
 
 		if (probe->dofpr_name >= str_sec->dofs_size ||
 		    strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) {
 			dtrace_dof_error(dof, "invalid probe name");
 			return (-1);
 		}
 
 		/*
 		 * The offset count must not wrap the index, and the offsets
 		 * must also not overflow the section's data.
 		 */
 		if (probe->dofpr_offidx + probe->dofpr_noffs <
 		    probe->dofpr_offidx ||
 		    (probe->dofpr_offidx + probe->dofpr_noffs) *
 		    off_sec->dofs_entsize > off_sec->dofs_size) {
 			dtrace_dof_error(dof, "invalid probe offset");
 			return (-1);
 		}
 
 		if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) {
 			/*
 			 * If there's no is-enabled offset section, make sure
 			 * there aren't any is-enabled offsets. Otherwise
 			 * perform the same checks as for probe offsets
 			 * (immediately above).
 			 */
 			if (enoff_sec == NULL) {
 				if (probe->dofpr_enoffidx != 0 ||
 				    probe->dofpr_nenoffs != 0) {
 					dtrace_dof_error(dof, "is-enabled "
 					    "offsets with null section");
 					return (-1);
 				}
 			} else if (probe->dofpr_enoffidx +
 			    probe->dofpr_nenoffs < probe->dofpr_enoffidx ||
 			    (probe->dofpr_enoffidx + probe->dofpr_nenoffs) *
 			    enoff_sec->dofs_entsize > enoff_sec->dofs_size) {
 				dtrace_dof_error(dof, "invalid is-enabled "
 				    "offset");
 				return (-1);
 			}
 
 			if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) {
 				dtrace_dof_error(dof, "zero probe and "
 				    "is-enabled offsets");
 				return (-1);
 			}
 		} else if (probe->dofpr_noffs == 0) {
 			dtrace_dof_error(dof, "zero probe offsets");
 			return (-1);
 		}
 
 		if (probe->dofpr_argidx + probe->dofpr_xargc <
 		    probe->dofpr_argidx ||
 		    (probe->dofpr_argidx + probe->dofpr_xargc) *
 		    arg_sec->dofs_entsize > arg_sec->dofs_size) {
 			dtrace_dof_error(dof, "invalid args");
 			return (-1);
 		}
 
 		typeidx = probe->dofpr_nargv;
 		typestr = strtab + probe->dofpr_nargv;
 		for (k = 0; k < probe->dofpr_nargc; k++) {
 			if (typeidx >= str_sec->dofs_size) {
 				dtrace_dof_error(dof, "bad "
 				    "native argument type");
 				return (-1);
 			}
 
 			typesz = strlen(typestr) + 1;
 			if (typesz > DTRACE_ARGTYPELEN) {
 				dtrace_dof_error(dof, "native "
 				    "argument type too long");
 				return (-1);
 			}
 			typeidx += typesz;
 			typestr += typesz;
 		}
 
 		typeidx = probe->dofpr_xargv;
 		typestr = strtab + probe->dofpr_xargv;
 		for (k = 0; k < probe->dofpr_xargc; k++) {
 			if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) {
 				dtrace_dof_error(dof, "bad "
 				    "native argument index");
 				return (-1);
 			}
 
 			if (typeidx >= str_sec->dofs_size) {
 				dtrace_dof_error(dof, "bad "
 				    "translated argument type");
 				return (-1);
 			}
 
 			typesz = strlen(typestr) + 1;
 			if (typesz > DTRACE_ARGTYPELEN) {
 				dtrace_dof_error(dof, "translated argument "
 				    "type too long");
 				return (-1);
 			}
 
 			typeidx += typesz;
 			typestr += typesz;
 		}
 	}
 
 	return (0);
 }
 
 static int
 dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp, struct proc *p)
 {
 	dtrace_helpers_t *help;
 	dtrace_vstate_t *vstate;
 	dtrace_enabling_t *enab = NULL;
 	int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1;
 	uintptr_t daddr = (uintptr_t)dof;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 
 	if ((help = p->p_dtrace_helpers) == NULL)
 		help = dtrace_helpers_create(p);
 
 	vstate = &help->dthps_vstate;
 
 	if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab, dhp->dofhp_addr,
-	    B_FALSE)) != 0) {
+	    dhp->dofhp_dof, B_FALSE)) != 0) {
 		dtrace_dof_destroy(dof);
 		return (rv);
 	}
 
 	/*
 	 * Look for helper providers and validate their descriptions.
 	 */
 	for (i = 0; i < dof->dofh_secnum; i++) {
 		dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
 		    dof->dofh_secoff + i * dof->dofh_secsize);
 
 		if (sec->dofs_type != DOF_SECT_PROVIDER)
 			continue;
 
 		if (dtrace_helper_provider_validate(dof, sec) != 0) {
 			dtrace_enabling_destroy(enab);
 			dtrace_dof_destroy(dof);
 			return (-1);
 		}
 
 		nprovs++;
 	}
 
 	/*
 	 * Now we need to walk through the ECB descriptions in the enabling.
 	 */
 	for (i = 0; i < enab->dten_ndesc; i++) {
 		dtrace_ecbdesc_t *ep = enab->dten_desc[i];
 		dtrace_probedesc_t *desc = &ep->dted_probe;
 
 		if (strcmp(desc->dtpd_provider, "dtrace") != 0)
 			continue;
 
 		if (strcmp(desc->dtpd_mod, "helper") != 0)
 			continue;
 
 		if (strcmp(desc->dtpd_func, "ustack") != 0)
 			continue;
 
 		if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK,
 		    ep, help)) != 0) {
 			/*
 			 * Adding this helper action failed -- we are now going
 			 * to rip out the entire generation and return failure.
 			 */
 			(void) dtrace_helper_destroygen(help,
 			    help->dthps_generation);
 			dtrace_enabling_destroy(enab);
 			dtrace_dof_destroy(dof);
 			return (-1);
 		}
 
 		nhelpers++;
 	}
 
 	if (nhelpers < enab->dten_ndesc)
 		dtrace_dof_error(dof, "unmatched helpers");
 
 	gen = help->dthps_generation++;
 	dtrace_enabling_destroy(enab);
 
 	if (nprovs > 0) {
 		/*
 		 * Now that this is in-kernel, we change the sense of the
 		 * members:  dofhp_dof denotes the in-kernel copy of the DOF
 		 * and dofhp_addr denotes the address at user-level.
 		 */
 		dhp->dofhp_addr = dhp->dofhp_dof;
 		dhp->dofhp_dof = (uint64_t)(uintptr_t)dof;
 
 		if (dtrace_helper_provider_add(dhp, help, gen) == 0) {
 			mutex_exit(&dtrace_lock);
 			dtrace_helper_provider_register(p, help, dhp);
 			mutex_enter(&dtrace_lock);
 
 			destroy = 0;
 		}
 	}
 
 	if (destroy)
 		dtrace_dof_destroy(dof);
 
 	return (gen);
 }
 
 static dtrace_helpers_t *
 dtrace_helpers_create(proc_t *p)
 {
 	dtrace_helpers_t *help;
 
 	ASSERT(MUTEX_HELD(&dtrace_lock));
 	ASSERT(p->p_dtrace_helpers == NULL);
 
 	help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP);
 	help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) *
 	    DTRACE_NHELPER_ACTIONS, KM_SLEEP);
 
 	p->p_dtrace_helpers = help;
 	dtrace_helpers++;
 
 	return (help);
 }
 
 #ifdef illumos
 static
 #endif
 void
 dtrace_helpers_destroy(proc_t *p)
 {
 	dtrace_helpers_t *help;
 	dtrace_vstate_t *vstate;
 #ifdef illumos
 	proc_t *p = curproc;
 #endif
 	int i;
 
 	mutex_enter(&dtrace_lock);
 
 	ASSERT(p->p_dtrace_helpers != NULL);
 	ASSERT(dtrace_helpers > 0);
 
 	help = p->p_dtrace_helpers;
 	vstate = &help->dthps_vstate;
 
 	/*
 	 * We're now going to lose the help from this process.
 	 */
 	p->p_dtrace_helpers = NULL;
 	dtrace_sync();
 
 	/*
 	 * Destory the helper actions.
 	 */
 	for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
 		dtrace_helper_action_t *h, *next;
 
 		for (h = help->dthps_actions[i]; h != NULL; h = next) {
 			next = h->dtha_next;
 			dtrace_helper_action_destroy(h, vstate);
 			h = next;
 		}
 	}
 
 	mutex_exit(&dtrace_lock);
 
 	/*
 	 * Destroy the helper providers.
 	 */
 	if (help->dthps_maxprovs > 0) {
 		mutex_enter(&dtrace_meta_lock);
 		if (dtrace_meta_pid != NULL) {
 			ASSERT(dtrace_deferred_pid == NULL);
 
 			for (i = 0; i < help->dthps_nprovs; i++) {
 				dtrace_helper_provider_remove(
 				    &help->dthps_provs[i]->dthp_prov, p->p_pid);
 			}
 		} else {
 			mutex_enter(&dtrace_lock);
 			ASSERT(help->dthps_deferred == 0 ||
 			    help->dthps_next != NULL ||
 			    help->dthps_prev != NULL ||
 			    help == dtrace_deferred_pid);
 
 			/*
 			 * Remove the helper from the deferred list.
 			 */
 			if (help->dthps_next != NULL)
 				help->dthps_next->dthps_prev = help->dthps_prev;
 			if (help->dthps_prev != NULL)
 				help->dthps_prev->dthps_next = help->dthps_next;
 			if (dtrace_deferred_pid == help) {
 				dtrace_deferred_pid = help->dthps_next;
 				ASSERT(help->dthps_prev == NULL);
 			}
 
 			mutex_exit(&dtrace_lock);
 		}
 
 		mutex_exit(&dtrace_meta_lock);
 
 		for (i = 0; i < help->dthps_nprovs; i++) {
 			dtrace_helper_provider_destroy(help->dthps_provs[i]);
 		}
 
 		kmem_free(help->dthps_provs, help->dthps_maxprovs *
 		    sizeof (dtrace_helper_provider_t *));
 	}
 
 	mutex_enter(&dtrace_lock);
 
 	dtrace_vstate_fini(&help->dthps_vstate);
 	kmem_free(help->dthps_actions,
 	    sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS);
 	kmem_free(help, sizeof (dtrace_helpers_t));
 
 	--dtrace_helpers;
 	mutex_exit(&dtrace_lock);
 }
 
 #ifdef illumos
 static
 #endif
 void
 dtrace_helpers_duplicate(proc_t *from, proc_t *to)
 {
 	dtrace_helpers_t *help, *newhelp;
 	dtrace_helper_action_t *helper, *new, *last;
 	dtrace_difo_t *dp;
 	dtrace_vstate_t *vstate;
 	int i, j, sz, hasprovs = 0;
 
 	mutex_enter(&dtrace_lock);
 	ASSERT(from->p_dtrace_helpers != NULL);
 	ASSERT(dtrace_helpers > 0);
 
 	help = from->p_dtrace_helpers;
 	newhelp = dtrace_helpers_create(to);
 	ASSERT(to->p_dtrace_helpers != NULL);
 
 	newhelp->dthps_generation = help->dthps_generation;
 	vstate = &newhelp->dthps_vstate;
 
 	/*
 	 * Duplicate the helper actions.
 	 */
 	for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
 		if ((helper = help->dthps_actions[i]) == NULL)
 			continue;
 
 		for (last = NULL; helper != NULL; helper = helper->dtha_next) {
 			new = kmem_zalloc(sizeof (dtrace_helper_action_t),
 			    KM_SLEEP);
 			new->dtha_generation = helper->dtha_generation;
 
 			if ((dp = helper->dtha_predicate) != NULL) {
 				dp = dtrace_difo_duplicate(dp, vstate);
 				new->dtha_predicate = dp;
 			}
 
 			new->dtha_nactions = helper->dtha_nactions;
 			sz = sizeof (dtrace_difo_t *) * new->dtha_nactions;
 			new->dtha_actions = kmem_alloc(sz, KM_SLEEP);
 
 			for (j = 0; j < new->dtha_nactions; j++) {
 				dtrace_difo_t *dp = helper->dtha_actions[j];
 
 				ASSERT(dp != NULL);
 				dp = dtrace_difo_duplicate(dp, vstate);
 				new->dtha_actions[j] = dp;
 			}
 
 			if (last != NULL) {
 				last->dtha_next = new;
 			} else {
 				newhelp->dthps_actions[i] = new;
 			}
 
 			last = new;
 		}
 	}
 
 	/*
 	 * Duplicate the helper providers and register them with the
 	 * DTrace framework.
 	 */
 	if (help->dthps_nprovs > 0) {
 		newhelp->dthps_nprovs = help->dthps_nprovs;
 		newhelp->dthps_maxprovs = help->dthps_nprovs;
 		newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs *
 		    sizeof (dtrace_helper_provider_t *), KM_SLEEP);
 		for (i = 0; i < newhelp->dthps_nprovs; i++) {
 			newhelp->dthps_provs[i] = help->dthps_provs[i];
 			newhelp->dthps_provs[i]->dthp_ref++;
 		}
 
 		hasprovs = 1;
 	}
 
 	mutex_exit(&dtrace_lock);
 
 	if (hasprovs)
 		dtrace_helper_provider_register(to, newhelp, NULL);
 }
 
 /*
  * DTrace Hook Functions
  */
 static void
 dtrace_module_loaded(modctl_t *ctl)
 {
 	dtrace_provider_t *prv;
 
 	mutex_enter(&dtrace_provider_lock);
 #ifdef illumos
 	mutex_enter(&mod_lock);
 #endif
 
 #ifdef illumos
 	ASSERT(ctl->mod_busy);
 #endif
 
 	/*
 	 * We're going to call each providers per-module provide operation
 	 * specifying only this module.
 	 */
 	for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
 		prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
 
 #ifdef illumos
 	mutex_exit(&mod_lock);
 #endif
 	mutex_exit(&dtrace_provider_lock);
 
 	/*
 	 * If we have any retained enablings, we need to match against them.
 	 * Enabling probes requires that cpu_lock be held, and we cannot hold
 	 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
 	 * module.  (In particular, this happens when loading scheduling
 	 * classes.)  So if we have any retained enablings, we need to dispatch
 	 * our task queue to do the match for us.
 	 */
 	mutex_enter(&dtrace_lock);
 
 	if (dtrace_retained == NULL) {
 		mutex_exit(&dtrace_lock);
 		return;
 	}
 
 	(void) taskq_dispatch(dtrace_taskq,
 	    (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP);
 
 	mutex_exit(&dtrace_lock);
 
 	/*
 	 * And now, for a little heuristic sleaze:  in general, we want to
 	 * match modules as soon as they load.  However, we cannot guarantee
 	 * this, because it would lead us to the lock ordering violation
 	 * outlined above.  The common case, of course, is that cpu_lock is
 	 * _not_ held -- so we delay here for a clock tick, hoping that that's
 	 * long enough for the task queue to do its work.  If it's not, it's
 	 * not a serious problem -- it just means that the module that we
 	 * just loaded may not be immediately instrumentable.
 	 */
 	delay(1);
 }
 
 static void
 #ifdef illumos
 dtrace_module_unloaded(modctl_t *ctl)
 #else
 dtrace_module_unloaded(modctl_t *ctl, int *error)
 #endif
 {
 	dtrace_probe_t template, *probe, *first, *next;
 	dtrace_provider_t *prov;
 #ifndef illumos
 	char modname[DTRACE_MODNAMELEN];
 	size_t len;
 #endif
 
 #ifdef illumos
 	template.dtpr_mod = ctl->mod_modname;
 #else
 	/* Handle the fact that ctl->filename may end in ".ko". */
 	strlcpy(modname, ctl->filename, sizeof(modname));
 	len = strlen(ctl->filename);
 	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
 		modname[len - 3] = '\0';
 	template.dtpr_mod = modname;
 #endif
 
 	mutex_enter(&dtrace_provider_lock);
 #ifdef illumos
 	mutex_enter(&mod_lock);
 #endif
 	mutex_enter(&dtrace_lock);
 
 #ifndef illumos
 	if (ctl->nenabled > 0) {
 		/* Don't allow unloads if a probe is enabled. */
 		mutex_exit(&dtrace_provider_lock);
 		mutex_exit(&dtrace_lock);
 		*error = -1;
 		printf(
 	"kldunload: attempt to unload module that has DTrace probes enabled\n");
 		return;
 	}
 #endif
 
 	if (dtrace_bymod == NULL) {
 		/*
 		 * The DTrace module is loaded (obviously) but not attached;
 		 * we don't have any work to do.
 		 */
 		mutex_exit(&dtrace_provider_lock);
 #ifdef illumos
 		mutex_exit(&mod_lock);
 #endif
 		mutex_exit(&dtrace_lock);
 		return;
 	}
 
 	for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
 	    probe != NULL; probe = probe->dtpr_nextmod) {
 		if (probe->dtpr_ecb != NULL) {
 			mutex_exit(&dtrace_provider_lock);
 #ifdef illumos
 			mutex_exit(&mod_lock);
 #endif
 			mutex_exit(&dtrace_lock);
 
 			/*
 			 * This shouldn't _actually_ be possible -- we're
 			 * unloading a module that has an enabled probe in it.
 			 * (It's normally up to the provider to make sure that
 			 * this can't happen.)  However, because dtps_enable()
 			 * doesn't have a failure mode, there can be an
 			 * enable/unload race.  Upshot:  we don't want to
 			 * assert, but we're not going to disable the
 			 * probe, either.
 			 */
 			if (dtrace_err_verbose) {
 #ifdef illumos
 				cmn_err(CE_WARN, "unloaded module '%s' had "
 				    "enabled probes", ctl->mod_modname);
 #else
 				cmn_err(CE_WARN, "unloaded module '%s' had "
 				    "enabled probes", modname);
 #endif
 			}
 
 			return;
 		}
 	}
 
 	probe = first;
 
 	for (first = NULL; probe != NULL; probe = next) {
 		ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
 
 		dtrace_probes[probe->dtpr_id - 1] = NULL;
 
 		next = probe->dtpr_nextmod;
 		dtrace_hash_remove(dtrace_bymod, probe);
 		dtrace_hash_remove(dtrace_byfunc, probe);
 		dtrace_hash_remove(dtrace_byname, probe);
 
 		if (first == NULL) {
 			first = probe;
 			probe->dtpr_nextmod = NULL;
 		} else {
 			probe->dtpr_nextmod = first;
 			first = probe;
 		}
 	}
 
 	/*
 	 * We've removed all of the module's probes from the hash chains and
 	 * from the probe array.  Now issue a dtrace_sync() to be sure that
 	 * everyone has cleared out from any probe array processing.
 	 */
 	dtrace_sync();
 
 	for (probe = first; probe != NULL; probe = first) {
 		first = probe->dtpr_nextmod;
 		prov = probe->dtpr_provider;
 		prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
 		    probe->dtpr_arg);
 		kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
 		kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
 		kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
 #ifdef illumos
 		vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
 #else
 		free_unr(dtrace_arena, probe->dtpr_id);
 #endif
 		kmem_free(probe, sizeof (dtrace_probe_t));
 	}
 
 	mutex_exit(&dtrace_lock);
 #ifdef illumos
 	mutex_exit(&mod_lock);
 #endif
 	mutex_exit(&dtrace_provider_lock);
 }
 
 #ifndef illumos
 static void
 dtrace_kld_load(void *arg __unused, linker_file_t lf)
 {
 
 	dtrace_module_loaded(lf);
 }
 
 static void
 dtrace_kld_unload_try(void *arg __unused, linker_file_t lf, int *error)
 {
 
 	if (*error != 0)
 		/* We already have an error, so don't do anything. */
 		return;
 	dtrace_module_unloaded(lf, error);
 }
 #endif
 
 #ifdef illumos
 static void
 dtrace_suspend(void)
 {
 	dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
 }
 
 static void
 dtrace_resume(void)
 {
 	dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume));
 }
 #endif
 
 static int
 dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu)
 {
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	mutex_enter(&dtrace_lock);
 
 	switch (what) {
 	case CPU_CONFIG: {
 		dtrace_state_t *state;
 		dtrace_optval_t *opt, rs, c;
 
 		/*
 		 * For now, we only allocate a new buffer for anonymous state.
 		 */
 		if ((state = dtrace_anon.dta_state) == NULL)
 			break;
 
 		if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
 			break;
 
 		opt = state->dts_options;
 		c = opt[DTRACEOPT_CPU];
 
 		if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu)
 			break;
 
 		/*
 		 * Regardless of what the actual policy is, we're going to
 		 * temporarily set our resize policy to be manual.  We're
 		 * also going to temporarily set our CPU option to denote
 		 * the newly configured CPU.
 		 */
 		rs = opt[DTRACEOPT_BUFRESIZE];
 		opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL;
 		opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu;
 
 		(void) dtrace_state_buffers(state);
 
 		opt[DTRACEOPT_BUFRESIZE] = rs;
 		opt[DTRACEOPT_CPU] = c;
 
 		break;
 	}
 
 	case CPU_UNCONFIG:
 		/*
 		 * We don't free the buffer in the CPU_UNCONFIG case.  (The
 		 * buffer will be freed when the consumer exits.)
 		 */
 		break;
 
 	default:
 		break;
 	}
 
 	mutex_exit(&dtrace_lock);
 	return (0);
 }
 
 #ifdef illumos
 static void
 dtrace_cpu_setup_initial(processorid_t cpu)
 {
 	(void) dtrace_cpu_setup(CPU_CONFIG, cpu);
 }
 #endif
 
 static void
 dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
 {
 	if (dtrace_toxranges >= dtrace_toxranges_max) {
 		int osize, nsize;
 		dtrace_toxrange_t *range;
 
 		osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
 
 		if (osize == 0) {
 			ASSERT(dtrace_toxrange == NULL);
 			ASSERT(dtrace_toxranges_max == 0);
 			dtrace_toxranges_max = 1;
 		} else {
 			dtrace_toxranges_max <<= 1;
 		}
 
 		nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
 		range = kmem_zalloc(nsize, KM_SLEEP);
 
 		if (dtrace_toxrange != NULL) {
 			ASSERT(osize != 0);
 			bcopy(dtrace_toxrange, range, osize);
 			kmem_free(dtrace_toxrange, osize);
 		}
 
 		dtrace_toxrange = range;
 	}
 
 	ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == 0);
 	ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == 0);
 
 	dtrace_toxrange[dtrace_toxranges].dtt_base = base;
 	dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
 	dtrace_toxranges++;
 }
 
 static void
 dtrace_getf_barrier()
 {
 #ifdef illumos
 	/*
 	 * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
 	 * that contain calls to getf(), this routine will be called on every
 	 * closef() before either the underlying vnode is released or the
 	 * file_t itself is freed.  By the time we are here, it is essential
 	 * that the file_t can no longer be accessed from a call to getf()
 	 * in probe context -- that assures that a dtrace_sync() can be used
 	 * to clear out any enablings referring to the old structures.
 	 */
 	if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
 	    kcred->cr_zone->zone_dtrace_getf != 0)
 		dtrace_sync();
 #endif
 }
 
 /*
  * DTrace Driver Cookbook Functions
  */
 #ifdef illumos
 /*ARGSUSED*/
 static int
 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 {
 	dtrace_provider_id_t id;
 	dtrace_state_t *state = NULL;
 	dtrace_enabling_t *enab;
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_provider_lock);
 	mutex_enter(&dtrace_lock);
 
 	if (ddi_soft_state_init(&dtrace_softstate,
 	    sizeof (dtrace_state_t), 0) != 0) {
 		cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
 		mutex_exit(&cpu_lock);
 		mutex_exit(&dtrace_provider_lock);
 		mutex_exit(&dtrace_lock);
 		return (DDI_FAILURE);
 	}
 
 	if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR,
 	    DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE ||
 	    ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR,
 	    DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 		cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes");
 		ddi_remove_minor_node(devi, NULL);
 		ddi_soft_state_fini(&dtrace_softstate);
 		mutex_exit(&cpu_lock);
 		mutex_exit(&dtrace_provider_lock);
 		mutex_exit(&dtrace_lock);
 		return (DDI_FAILURE);
 	}
 
 	ddi_report_dev(devi);
 	dtrace_devi = devi;
 
 	dtrace_modload = dtrace_module_loaded;
 	dtrace_modunload = dtrace_module_unloaded;
 	dtrace_cpu_init = dtrace_cpu_setup_initial;
 	dtrace_helpers_cleanup = dtrace_helpers_destroy;
 	dtrace_helpers_fork = dtrace_helpers_duplicate;
 	dtrace_cpustart_init = dtrace_suspend;
 	dtrace_cpustart_fini = dtrace_resume;
 	dtrace_debugger_init = dtrace_suspend;
 	dtrace_debugger_fini = dtrace_resume;
 
 	register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
 
 	ASSERT(MUTEX_HELD(&cpu_lock));
 
 	dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1,
 	    NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
 	dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE,
 	    UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0,
 	    VM_SLEEP | VMC_IDENTIFIER);
 	dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri,
 	    1, INT_MAX, 0);
 
 	dtrace_state_cache = kmem_cache_create("dtrace_state_cache",
 	    sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN,
 	    NULL, NULL, NULL, NULL, NULL, 0);
 
 	ASSERT(MUTEX_HELD(&cpu_lock));
 	dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod),
 	    offsetof(dtrace_probe_t, dtpr_nextmod),
 	    offsetof(dtrace_probe_t, dtpr_prevmod));
 
 	dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func),
 	    offsetof(dtrace_probe_t, dtpr_nextfunc),
 	    offsetof(dtrace_probe_t, dtpr_prevfunc));
 
 	dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name),
 	    offsetof(dtrace_probe_t, dtpr_nextname),
 	    offsetof(dtrace_probe_t, dtpr_prevname));
 
 	if (dtrace_retain_max < 1) {
 		cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; "
 		    "setting to 1", dtrace_retain_max);
 		dtrace_retain_max = 1;
 	}
 
 	/*
 	 * Now discover our toxic ranges.
 	 */
 	dtrace_toxic_ranges(dtrace_toxrange_add);
 
 	/*
 	 * Before we register ourselves as a provider to our own framework,
 	 * we would like to assert that dtrace_provider is NULL -- but that's
 	 * not true if we were loaded as a dependency of a DTrace provider.
 	 * Once we've registered, we can assert that dtrace_provider is our
 	 * pseudo provider.
 	 */
 	(void) dtrace_register("dtrace", &dtrace_provider_attr,
 	    DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id);
 
 	ASSERT(dtrace_provider != NULL);
 	ASSERT((dtrace_provider_id_t)dtrace_provider == id);
 
 	dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
 	    dtrace_provider, NULL, NULL, "BEGIN", 0, NULL);
 	dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
 	    dtrace_provider, NULL, NULL, "END", 0, NULL);
 	dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
 	    dtrace_provider, NULL, NULL, "ERROR", 1, NULL);
 
 	dtrace_anon_property();
 	mutex_exit(&cpu_lock);
 
 	/*
 	 * If there are already providers, we must ask them to provide their
 	 * probes, and then match any anonymous enabling against them.  Note
 	 * that there should be no other retained enablings at this time:
 	 * the only retained enablings at this time should be the anonymous
 	 * enabling.
 	 */
 	if (dtrace_anon.dta_enabling != NULL) {
 		ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
 
 		dtrace_enabling_provide(NULL);
 		state = dtrace_anon.dta_state;
 
 		/*
 		 * We couldn't hold cpu_lock across the above call to
 		 * dtrace_enabling_provide(), but we must hold it to actually
 		 * enable the probes.  We have to drop all of our locks, pick
 		 * up cpu_lock, and regain our locks before matching the
 		 * retained anonymous enabling.
 		 */
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&dtrace_provider_lock);
 
 		mutex_enter(&cpu_lock);
 		mutex_enter(&dtrace_provider_lock);
 		mutex_enter(&dtrace_lock);
 
 		if ((enab = dtrace_anon.dta_enabling) != NULL)
 			(void) dtrace_enabling_match(enab, NULL);
 
 		mutex_exit(&cpu_lock);
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_provider_lock);
 
 	if (state != NULL) {
 		/*
 		 * If we created any anonymous state, set it going now.
 		 */
 		(void) dtrace_state_go(state, &dtrace_anon.dta_beganon);
 	}
 
 	return (DDI_SUCCESS);
 }
 #endif	/* illumos */
 
 #ifndef illumos
 static void dtrace_dtr(void *);
 #endif
 
 /*ARGSUSED*/
 static int
 #ifdef illumos
 dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
 #else
 dtrace_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
 #endif
 {
 	dtrace_state_t *state;
 	uint32_t priv;
 	uid_t uid;
 	zoneid_t zoneid;
 
 #ifdef illumos
 	if (getminor(*devp) == DTRACEMNRN_HELPER)
 		return (0);
 
 	/*
 	 * If this wasn't an open with the "helper" minor, then it must be
 	 * the "dtrace" minor.
 	 */
 	if (getminor(*devp) == DTRACEMNRN_DTRACE)
 		return (ENXIO);
 #else
 	cred_t *cred_p = NULL;
 	cred_p = dev->si_cred;
 
 	/*
 	 * If no DTRACE_PRIV_* bits are set in the credential, then the
 	 * caller lacks sufficient permission to do anything with DTrace.
 	 */
 	dtrace_cred2priv(cred_p, &priv, &uid, &zoneid);
 	if (priv == DTRACE_PRIV_NONE) {
 #endif
 
 		return (EACCES);
 	}
 
 	/*
 	 * Ask all providers to provide all their probes.
 	 */
 	mutex_enter(&dtrace_provider_lock);
 	dtrace_probe_provide(NULL, NULL);
 	mutex_exit(&dtrace_provider_lock);
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_lock);
 	dtrace_opens++;
 	dtrace_membar_producer();
 
 #ifdef illumos
 	/*
 	 * If the kernel debugger is active (that is, if the kernel debugger
 	 * modified text in some way), we won't allow the open.
 	 */
 	if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
 		dtrace_opens--;
 		mutex_exit(&cpu_lock);
 		mutex_exit(&dtrace_lock);
 		return (EBUSY);
 	}
 
 	if (dtrace_helptrace_enable && dtrace_helptrace_buffer == NULL) {
 		/*
 		 * If DTrace helper tracing is enabled, we need to allocate the
 		 * trace buffer and initialize the values.
 		 */
 		dtrace_helptrace_buffer =
 		    kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
 		dtrace_helptrace_next = 0;
 		dtrace_helptrace_wrapped = 0;
 		dtrace_helptrace_enable = 0;
 	}
 
 	state = dtrace_state_create(devp, cred_p);
 #else
 	state = dtrace_state_create(dev, NULL);
 	devfs_set_cdevpriv(state, dtrace_dtr);
 #endif
 
 	mutex_exit(&cpu_lock);
 
 	if (state == NULL) {
 #ifdef illumos
 		if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
 			(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
 #else
 		--dtrace_opens;
 #endif
 		mutex_exit(&dtrace_lock);
 		return (EAGAIN);
 	}
 
 	mutex_exit(&dtrace_lock);
 
 	return (0);
 }
 
 /*ARGSUSED*/
 #ifdef illumos
 static int
 dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
 #else
 static void
 dtrace_dtr(void *data)
 #endif
 {
 #ifdef illumos
 	minor_t minor = getminor(dev);
 	dtrace_state_t *state;
 #endif
 	dtrace_helptrace_t *buf = NULL;
 
 #ifdef illumos
 	if (minor == DTRACEMNRN_HELPER)
 		return (0);
 
 	state = ddi_get_soft_state(dtrace_softstate, minor);
 #else
 	dtrace_state_t *state = data;
 #endif
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_lock);
 
 #ifdef illumos
 	if (state->dts_anon)
 #else
 	if (state != NULL && state->dts_anon)
 #endif
 	{
 		/*
 		 * There is anonymous state. Destroy that first.
 		 */
 		ASSERT(dtrace_anon.dta_state == NULL);
 		dtrace_state_destroy(state->dts_anon);
 	}
 
 	if (dtrace_helptrace_disable) {
 		/*
 		 * If we have been told to disable helper tracing, set the
 		 * buffer to NULL before calling into dtrace_state_destroy();
 		 * we take advantage of its dtrace_sync() to know that no
 		 * CPU is in probe context with enabled helper tracing
 		 * after it returns.
 		 */
 		buf = dtrace_helptrace_buffer;
 		dtrace_helptrace_buffer = NULL;
 	}
 
 #ifdef illumos
 	dtrace_state_destroy(state);
 #else
 	if (state != NULL) {
 		dtrace_state_destroy(state);
 		kmem_free(state, 0);
 	}
 #endif
 	ASSERT(dtrace_opens > 0);
 
 #ifdef illumos
 	/*
 	 * Only relinquish control of the kernel debugger interface when there
 	 * are no consumers and no anonymous enablings.
 	 */
 	if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
 		(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
 #else
 	--dtrace_opens;
 #endif
 
 	if (buf != NULL) {
 		kmem_free(buf, dtrace_helptrace_bufsize);
 		dtrace_helptrace_disable = 0;
 	}
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&cpu_lock);
 
 #ifdef illumos
 	return (0);
 #endif
 }
 
 #ifdef illumos
 /*ARGSUSED*/
 static int
 dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv)
 {
 	int rval;
 	dof_helper_t help, *dhp = NULL;
 
 	switch (cmd) {
 	case DTRACEHIOC_ADDDOF:
 		if (copyin((void *)arg, &help, sizeof (help)) != 0) {
 			dtrace_dof_error(NULL, "failed to copyin DOF helper");
 			return (EFAULT);
 		}
 
 		dhp = &help;
 		arg = (intptr_t)help.dofhp_dof;
 		/*FALLTHROUGH*/
 
 	case DTRACEHIOC_ADD: {
 		dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval);
 
 		if (dof == NULL)
 			return (rval);
 
 		mutex_enter(&dtrace_lock);
 
 		/*
 		 * dtrace_helper_slurp() takes responsibility for the dof --
 		 * it may free it now or it may save it and free it later.
 		 */
 		if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) {
 			*rv = rval;
 			rval = 0;
 		} else {
 			rval = EINVAL;
 		}
 
 		mutex_exit(&dtrace_lock);
 		return (rval);
 	}
 
 	case DTRACEHIOC_REMOVE: {
 		mutex_enter(&dtrace_lock);
 		rval = dtrace_helper_destroygen(NULL, arg);
 		mutex_exit(&dtrace_lock);
 
 		return (rval);
 	}
 
 	default:
 		break;
 	}
 
 	return (ENOTTY);
 }
 
 /*ARGSUSED*/
 static int
 dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
 {
 	minor_t minor = getminor(dev);
 	dtrace_state_t *state;
 	int rval;
 
 	if (minor == DTRACEMNRN_HELPER)
 		return (dtrace_ioctl_helper(cmd, arg, rv));
 
 	state = ddi_get_soft_state(dtrace_softstate, minor);
 
 	if (state->dts_anon) {
 		ASSERT(dtrace_anon.dta_state == NULL);
 		state = state->dts_anon;
 	}
 
 	switch (cmd) {
 	case DTRACEIOC_PROVIDER: {
 		dtrace_providerdesc_t pvd;
 		dtrace_provider_t *pvp;
 
 		if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0)
 			return (EFAULT);
 
 		pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
 		mutex_enter(&dtrace_provider_lock);
 
 		for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
 			if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0)
 				break;
 		}
 
 		mutex_exit(&dtrace_provider_lock);
 
 		if (pvp == NULL)
 			return (ESRCH);
 
 		bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t));
 		bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t));
 
 		if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_EPROBE: {
 		dtrace_eprobedesc_t epdesc;
 		dtrace_ecb_t *ecb;
 		dtrace_action_t *act;
 		void *buf;
 		size_t size;
 		uintptr_t dest;
 		int nrecs;
 
 		if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0)
 			return (EFAULT);
 
 		mutex_enter(&dtrace_lock);
 
 		if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		if (ecb->dte_probe == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
 		epdesc.dtepd_uarg = ecb->dte_uarg;
 		epdesc.dtepd_size = ecb->dte_size;
 
 		nrecs = epdesc.dtepd_nrecs;
 		epdesc.dtepd_nrecs = 0;
 		for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
 			if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
 				continue;
 
 			epdesc.dtepd_nrecs++;
 		}
 
 		/*
 		 * Now that we have the size, we need to allocate a temporary
 		 * buffer in which to store the complete description.  We need
 		 * the temporary buffer to be able to drop dtrace_lock()
 		 * across the copyout(), below.
 		 */
 		size = sizeof (dtrace_eprobedesc_t) +
 		    (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
 
 		buf = kmem_alloc(size, KM_SLEEP);
 		dest = (uintptr_t)buf;
 
 		bcopy(&epdesc, (void *)dest, sizeof (epdesc));
 		dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
 
 		for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
 			if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
 				continue;
 
 			if (nrecs-- == 0)
 				break;
 
 			bcopy(&act->dta_rec, (void *)dest,
 			    sizeof (dtrace_recdesc_t));
 			dest += sizeof (dtrace_recdesc_t);
 		}
 
 		mutex_exit(&dtrace_lock);
 
 		if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
 			kmem_free(buf, size);
 			return (EFAULT);
 		}
 
 		kmem_free(buf, size);
 		return (0);
 	}
 
 	case DTRACEIOC_AGGDESC: {
 		dtrace_aggdesc_t aggdesc;
 		dtrace_action_t *act;
 		dtrace_aggregation_t *agg;
 		int nrecs;
 		uint32_t offs;
 		dtrace_recdesc_t *lrec;
 		void *buf;
 		size_t size;
 		uintptr_t dest;
 
 		if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0)
 			return (EFAULT);
 
 		mutex_enter(&dtrace_lock);
 
 		if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
 
 		nrecs = aggdesc.dtagd_nrecs;
 		aggdesc.dtagd_nrecs = 0;
 
 		offs = agg->dtag_base;
 		lrec = &agg->dtag_action.dta_rec;
 		aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
 
 		for (act = agg->dtag_first; ; act = act->dta_next) {
 			ASSERT(act->dta_intuple ||
 			    DTRACEACT_ISAGG(act->dta_kind));
 
 			/*
 			 * If this action has a record size of zero, it
 			 * denotes an argument to the aggregating action.
 			 * Because the presence of this record doesn't (or
 			 * shouldn't) affect the way the data is interpreted,
 			 * we don't copy it out to save user-level the
 			 * confusion of dealing with a zero-length record.
 			 */
 			if (act->dta_rec.dtrd_size == 0) {
 				ASSERT(agg->dtag_hasarg);
 				continue;
 			}
 
 			aggdesc.dtagd_nrecs++;
 
 			if (act == &agg->dtag_action)
 				break;
 		}
 
 		/*
 		 * Now that we have the size, we need to allocate a temporary
 		 * buffer in which to store the complete description.  We need
 		 * the temporary buffer to be able to drop dtrace_lock()
 		 * across the copyout(), below.
 		 */
 		size = sizeof (dtrace_aggdesc_t) +
 		    (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
 
 		buf = kmem_alloc(size, KM_SLEEP);
 		dest = (uintptr_t)buf;
 
 		bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
 		dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
 
 		for (act = agg->dtag_first; ; act = act->dta_next) {
 			dtrace_recdesc_t rec = act->dta_rec;
 
 			/*
 			 * See the comment in the above loop for why we pass
 			 * over zero-length records.
 			 */
 			if (rec.dtrd_size == 0) {
 				ASSERT(agg->dtag_hasarg);
 				continue;
 			}
 
 			if (nrecs-- == 0)
 				break;
 
 			rec.dtrd_offset -= offs;
 			bcopy(&rec, (void *)dest, sizeof (rec));
 			dest += sizeof (dtrace_recdesc_t);
 
 			if (act == &agg->dtag_action)
 				break;
 		}
 
 		mutex_exit(&dtrace_lock);
 
 		if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
 			kmem_free(buf, size);
 			return (EFAULT);
 		}
 
 		kmem_free(buf, size);
 		return (0);
 	}
 
 	case DTRACEIOC_ENABLE: {
 		dof_hdr_t *dof;
 		dtrace_enabling_t *enab = NULL;
 		dtrace_vstate_t *vstate;
 		int err = 0;
 
 		*rv = 0;
 
 		/*
 		 * If a NULL argument has been passed, we take this as our
 		 * cue to reevaluate our enablings.
 		 */
 		if (arg == NULL) {
 			dtrace_enabling_matchall();
 
 			return (0);
 		}
 
 		if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL)
 			return (rval);
 
 		mutex_enter(&cpu_lock);
 		mutex_enter(&dtrace_lock);
 		vstate = &state->dts_vstate;
 
 		if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&cpu_lock);
 			dtrace_dof_destroy(dof);
 			return (EBUSY);
 		}
 
 		if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) {
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&cpu_lock);
 			dtrace_dof_destroy(dof);
 			return (EINVAL);
 		}
 
 		if ((rval = dtrace_dof_options(dof, state)) != 0) {
 			dtrace_enabling_destroy(enab);
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&cpu_lock);
 			dtrace_dof_destroy(dof);
 			return (rval);
 		}
 
 		if ((err = dtrace_enabling_match(enab, rv)) == 0) {
 			err = dtrace_enabling_retain(enab);
 		} else {
 			dtrace_enabling_destroy(enab);
 		}
 
 		mutex_exit(&cpu_lock);
 		mutex_exit(&dtrace_lock);
 		dtrace_dof_destroy(dof);
 
 		return (err);
 	}
 
 	case DTRACEIOC_REPLICATE: {
 		dtrace_repldesc_t desc;
 		dtrace_probedesc_t *match = &desc.dtrpd_match;
 		dtrace_probedesc_t *create = &desc.dtrpd_create;
 		int err;
 
 		if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
 		match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
 		match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
 		match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
 
 		create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
 		create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
 		create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
 		create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
 
 		mutex_enter(&dtrace_lock);
 		err = dtrace_enabling_replicate(state, match, create);
 		mutex_exit(&dtrace_lock);
 
 		return (err);
 	}
 
 	case DTRACEIOC_PROBEMATCH:
 	case DTRACEIOC_PROBES: {
 		dtrace_probe_t *probe = NULL;
 		dtrace_probedesc_t desc;
 		dtrace_probekey_t pkey;
 		dtrace_id_t i;
 		int m = 0;
 		uint32_t priv;
 		uid_t uid;
 		zoneid_t zoneid;
 
 		if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
 		desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
 		desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
 		desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0';
 
 		/*
 		 * Before we attempt to match this probe, we want to give
 		 * all providers the opportunity to provide it.
 		 */
 		if (desc.dtpd_id == DTRACE_IDNONE) {
 			mutex_enter(&dtrace_provider_lock);
 			dtrace_probe_provide(&desc, NULL);
 			mutex_exit(&dtrace_provider_lock);
 			desc.dtpd_id++;
 		}
 
 		if (cmd == DTRACEIOC_PROBEMATCH)  {
 			dtrace_probekey(&desc, &pkey);
 			pkey.dtpk_id = DTRACE_IDNONE;
 		}
 
 		dtrace_cred2priv(cr, &priv, &uid, &zoneid);
 
 		mutex_enter(&dtrace_lock);
 
 		if (cmd == DTRACEIOC_PROBEMATCH) {
 			for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
 				if ((probe = dtrace_probes[i - 1]) != NULL &&
 				    (m = dtrace_match_probe(probe, &pkey,
 				    priv, uid, zoneid)) != 0)
 					break;
 			}
 
 			if (m < 0) {
 				mutex_exit(&dtrace_lock);
 				return (EINVAL);
 			}
 
 		} else {
 			for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
 				if ((probe = dtrace_probes[i - 1]) != NULL &&
 				    dtrace_match_priv(probe, priv, uid, zoneid))
 					break;
 			}
 		}
 
 		if (probe == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (ESRCH);
 		}
 
 		dtrace_probe_description(probe, &desc);
 		mutex_exit(&dtrace_lock);
 
 		if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_PROBEARG: {
 		dtrace_argdesc_t desc;
 		dtrace_probe_t *probe;
 		dtrace_provider_t *prov;
 
 		if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		if (desc.dtargd_id == DTRACE_IDNONE)
 			return (EINVAL);
 
 		if (desc.dtargd_ndx == DTRACE_ARGNONE)
 			return (EINVAL);
 
 		mutex_enter(&dtrace_provider_lock);
 		mutex_enter(&mod_lock);
 		mutex_enter(&dtrace_lock);
 
 		if (desc.dtargd_id > dtrace_nprobes) {
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&mod_lock);
 			mutex_exit(&dtrace_provider_lock);
 			return (EINVAL);
 		}
 
 		if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) {
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&mod_lock);
 			mutex_exit(&dtrace_provider_lock);
 			return (EINVAL);
 		}
 
 		mutex_exit(&dtrace_lock);
 
 		prov = probe->dtpr_provider;
 
 		if (prov->dtpv_pops.dtps_getargdesc == NULL) {
 			/*
 			 * There isn't any typed information for this probe.
 			 * Set the argument number to DTRACE_ARGNONE.
 			 */
 			desc.dtargd_ndx = DTRACE_ARGNONE;
 		} else {
 			desc.dtargd_native[0] = '\0';
 			desc.dtargd_xlate[0] = '\0';
 			desc.dtargd_mapping = desc.dtargd_ndx;
 
 			prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
 			    probe->dtpr_id, probe->dtpr_arg, &desc);
 		}
 
 		mutex_exit(&mod_lock);
 		mutex_exit(&dtrace_provider_lock);
 
 		if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_GO: {
 		processorid_t cpuid;
 		rval = dtrace_state_go(state, &cpuid);
 
 		if (rval != 0)
 			return (rval);
 
 		if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_STOP: {
 		processorid_t cpuid;
 
 		mutex_enter(&dtrace_lock);
 		rval = dtrace_state_stop(state, &cpuid);
 		mutex_exit(&dtrace_lock);
 
 		if (rval != 0)
 			return (rval);
 
 		if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_DOFGET: {
 		dof_hdr_t hdr, *dof;
 		uint64_t len;
 
 		if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0)
 			return (EFAULT);
 
 		mutex_enter(&dtrace_lock);
 		dof = dtrace_dof_create(state);
 		mutex_exit(&dtrace_lock);
 
 		len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
 		rval = copyout(dof, (void *)arg, len);
 		dtrace_dof_destroy(dof);
 
 		return (rval == 0 ? 0 : EFAULT);
 	}
 
 	case DTRACEIOC_AGGSNAP:
 	case DTRACEIOC_BUFSNAP: {
 		dtrace_bufdesc_t desc;
 		caddr_t cached;
 		dtrace_buffer_t *buf;
 
 		if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
 			return (EINVAL);
 
 		mutex_enter(&dtrace_lock);
 
 		if (cmd == DTRACEIOC_BUFSNAP) {
 			buf = &state->dts_buffer[desc.dtbd_cpu];
 		} else {
 			buf = &state->dts_aggbuffer[desc.dtbd_cpu];
 		}
 
 		if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
 			size_t sz = buf->dtb_offset;
 
 			if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
 				mutex_exit(&dtrace_lock);
 				return (EBUSY);
 			}
 
 			/*
 			 * If this buffer has already been consumed, we're
 			 * going to indicate that there's nothing left here
 			 * to consume.
 			 */
 			if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
 				mutex_exit(&dtrace_lock);
 
 				desc.dtbd_size = 0;
 				desc.dtbd_drops = 0;
 				desc.dtbd_errors = 0;
 				desc.dtbd_oldest = 0;
 				sz = sizeof (desc);
 
 				if (copyout(&desc, (void *)arg, sz) != 0)
 					return (EFAULT);
 
 				return (0);
 			}
 
 			/*
 			 * If this is a ring buffer that has wrapped, we want
 			 * to copy the whole thing out.
 			 */
 			if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
 				dtrace_buffer_polish(buf);
 				sz = buf->dtb_size;
 			}
 
 			if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
 				mutex_exit(&dtrace_lock);
 				return (EFAULT);
 			}
 
 			desc.dtbd_size = sz;
 			desc.dtbd_drops = buf->dtb_drops;
 			desc.dtbd_errors = buf->dtb_errors;
 			desc.dtbd_oldest = buf->dtb_xamot_offset;
 			desc.dtbd_timestamp = dtrace_gethrtime();
 
 			mutex_exit(&dtrace_lock);
 
 			if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
 				return (EFAULT);
 
 			buf->dtb_flags |= DTRACEBUF_CONSUMED;
 
 			return (0);
 		}
 
 		if (buf->dtb_tomax == NULL) {
 			ASSERT(buf->dtb_xamot == NULL);
 			mutex_exit(&dtrace_lock);
 			return (ENOENT);
 		}
 
 		cached = buf->dtb_tomax;
 		ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
 
 		dtrace_xcall(desc.dtbd_cpu,
 		    (dtrace_xcall_t)dtrace_buffer_switch, buf);
 
 		state->dts_errors += buf->dtb_xamot_errors;
 
 		/*
 		 * If the buffers did not actually switch, then the cross call
 		 * did not take place -- presumably because the given CPU is
 		 * not in the ready set.  If this is the case, we'll return
 		 * ENOENT.
 		 */
 		if (buf->dtb_tomax == cached) {
 			ASSERT(buf->dtb_xamot != cached);
 			mutex_exit(&dtrace_lock);
 			return (ENOENT);
 		}
 
 		ASSERT(cached == buf->dtb_xamot);
 
 		/*
 		 * We have our snapshot; now copy it out.
 		 */
 		if (copyout(buf->dtb_xamot, desc.dtbd_data,
 		    buf->dtb_xamot_offset) != 0) {
 			mutex_exit(&dtrace_lock);
 			return (EFAULT);
 		}
 
 		desc.dtbd_size = buf->dtb_xamot_offset;
 		desc.dtbd_drops = buf->dtb_xamot_drops;
 		desc.dtbd_errors = buf->dtb_xamot_errors;
 		desc.dtbd_oldest = 0;
 		desc.dtbd_timestamp = buf->dtb_switched;
 
 		mutex_exit(&dtrace_lock);
 
 		/*
 		 * Finally, copy out the buffer description.
 		 */
 		if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_CONF: {
 		dtrace_conf_t conf;
 
 		bzero(&conf, sizeof (conf));
 		conf.dtc_difversion = DIF_VERSION;
 		conf.dtc_difintregs = DIF_DIR_NREGS;
 		conf.dtc_diftupregs = DIF_DTR_NREGS;
 		conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
 
 		if (copyout(&conf, (void *)arg, sizeof (conf)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_STATUS: {
 		dtrace_status_t stat;
 		dtrace_dstate_t *dstate;
 		int i, j;
 		uint64_t nerrs;
 
 		/*
 		 * See the comment in dtrace_state_deadman() for the reason
 		 * for setting dts_laststatus to INT64_MAX before setting
 		 * it to the correct value.
 		 */
 		state->dts_laststatus = INT64_MAX;
 		dtrace_membar_producer();
 		state->dts_laststatus = dtrace_gethrtime();
 
 		bzero(&stat, sizeof (stat));
 
 		mutex_enter(&dtrace_lock);
 
 		if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
 			mutex_exit(&dtrace_lock);
 			return (ENOENT);
 		}
 
 		if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
 			stat.dtst_exiting = 1;
 
 		nerrs = state->dts_errors;
 		dstate = &state->dts_vstate.dtvs_dynvars;
 
 		for (i = 0; i < NCPU; i++) {
 			dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
 
 			stat.dtst_dyndrops += dcpu->dtdsc_drops;
 			stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
 			stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
 
 			if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
 				stat.dtst_filled++;
 
 			nerrs += state->dts_buffer[i].dtb_errors;
 
 			for (j = 0; j < state->dts_nspeculations; j++) {
 				dtrace_speculation_t *spec;
 				dtrace_buffer_t *buf;
 
 				spec = &state->dts_speculations[j];
 				buf = &spec->dtsp_buffer[i];
 				stat.dtst_specdrops += buf->dtb_xamot_drops;
 			}
 		}
 
 		stat.dtst_specdrops_busy = state->dts_speculations_busy;
 		stat.dtst_specdrops_unavail = state->dts_speculations_unavail;
 		stat.dtst_stkstroverflows = state->dts_stkstroverflows;
 		stat.dtst_dblerrors = state->dts_dblerrors;
 		stat.dtst_killed =
 		    (state->dts_activity == DTRACE_ACTIVITY_KILLED);
 		stat.dtst_errors = nerrs;
 
 		mutex_exit(&dtrace_lock);
 
 		if (copyout(&stat, (void *)arg, sizeof (stat)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	case DTRACEIOC_FORMAT: {
 		dtrace_fmtdesc_t fmt;
 		char *str;
 		int len;
 
 		if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0)
 			return (EFAULT);
 
 		mutex_enter(&dtrace_lock);
 
 		if (fmt.dtfd_format == 0 ||
 		    fmt.dtfd_format > state->dts_nformats) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		/*
 		 * Format strings are allocated contiguously and they are
 		 * never freed; if a format index is less than the number
 		 * of formats, we can assert that the format map is non-NULL
 		 * and that the format for the specified index is non-NULL.
 		 */
 		ASSERT(state->dts_formats != NULL);
 		str = state->dts_formats[fmt.dtfd_format - 1];
 		ASSERT(str != NULL);
 
 		len = strlen(str) + 1;
 
 		if (len > fmt.dtfd_length) {
 			fmt.dtfd_length = len;
 
 			if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) {
 				mutex_exit(&dtrace_lock);
 				return (EINVAL);
 			}
 		} else {
 			if (copyout(str, fmt.dtfd_string, len) != 0) {
 				mutex_exit(&dtrace_lock);
 				return (EINVAL);
 			}
 		}
 
 		mutex_exit(&dtrace_lock);
 		return (0);
 	}
 
 	default:
 		break;
 	}
 
 	return (ENOTTY);
 }
 
 /*ARGSUSED*/
 static int
 dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 {
 	dtrace_state_t *state;
 
 	switch (cmd) {
 	case DDI_DETACH:
 		break;
 
 	case DDI_SUSPEND:
 		return (DDI_SUCCESS);
 
 	default:
 		return (DDI_FAILURE);
 	}
 
 	mutex_enter(&cpu_lock);
 	mutex_enter(&dtrace_provider_lock);
 	mutex_enter(&dtrace_lock);
 
 	ASSERT(dtrace_opens == 0);
 
 	if (dtrace_helpers > 0) {
 		mutex_exit(&dtrace_provider_lock);
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&cpu_lock);
 		return (DDI_FAILURE);
 	}
 
 	if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) {
 		mutex_exit(&dtrace_provider_lock);
 		mutex_exit(&dtrace_lock);
 		mutex_exit(&cpu_lock);
 		return (DDI_FAILURE);
 	}
 
 	dtrace_provider = NULL;
 
 	if ((state = dtrace_anon_grab()) != NULL) {
 		/*
 		 * If there were ECBs on this state, the provider should
 		 * have not been allowed to detach; assert that there is
 		 * none.
 		 */
 		ASSERT(state->dts_necbs == 0);
 		dtrace_state_destroy(state);
 
 		/*
 		 * If we're being detached with anonymous state, we need to
 		 * indicate to the kernel debugger that DTrace is now inactive.
 		 */
 		(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
 	}
 
 	bzero(&dtrace_anon, sizeof (dtrace_anon_t));
 	unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
 	dtrace_cpu_init = NULL;
 	dtrace_helpers_cleanup = NULL;
 	dtrace_helpers_fork = NULL;
 	dtrace_cpustart_init = NULL;
 	dtrace_cpustart_fini = NULL;
 	dtrace_debugger_init = NULL;
 	dtrace_debugger_fini = NULL;
 	dtrace_modload = NULL;
 	dtrace_modunload = NULL;
 
 	ASSERT(dtrace_getf == 0);
 	ASSERT(dtrace_closef == NULL);
 
 	mutex_exit(&cpu_lock);
 
 	kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
 	dtrace_probes = NULL;
 	dtrace_nprobes = 0;
 
 	dtrace_hash_destroy(dtrace_bymod);
 	dtrace_hash_destroy(dtrace_byfunc);
 	dtrace_hash_destroy(dtrace_byname);
 	dtrace_bymod = NULL;
 	dtrace_byfunc = NULL;
 	dtrace_byname = NULL;
 
 	kmem_cache_destroy(dtrace_state_cache);
 	vmem_destroy(dtrace_minor);
 	vmem_destroy(dtrace_arena);
 
 	if (dtrace_toxrange != NULL) {
 		kmem_free(dtrace_toxrange,
 		    dtrace_toxranges_max * sizeof (dtrace_toxrange_t));
 		dtrace_toxrange = NULL;
 		dtrace_toxranges = 0;
 		dtrace_toxranges_max = 0;
 	}
 
 	ddi_remove_minor_node(dtrace_devi, NULL);
 	dtrace_devi = NULL;
 
 	ddi_soft_state_fini(&dtrace_softstate);
 
 	ASSERT(dtrace_vtime_references == 0);
 	ASSERT(dtrace_opens == 0);
 	ASSERT(dtrace_retained == NULL);
 
 	mutex_exit(&dtrace_lock);
 	mutex_exit(&dtrace_provider_lock);
 
 	/*
 	 * We don't destroy the task queue until after we have dropped our
 	 * locks (taskq_destroy() may block on running tasks).  To prevent
 	 * attempting to do work after we have effectively detached but before
 	 * the task queue has been destroyed, all tasks dispatched via the
 	 * task queue must check that DTrace is still attached before
 	 * performing any operation.
 	 */
 	taskq_destroy(dtrace_taskq);
 	dtrace_taskq = NULL;
 
 	return (DDI_SUCCESS);
 }
 #endif
 
 #ifdef illumos
 /*ARGSUSED*/
 static int
 dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 {
 	int error;
 
 	switch (infocmd) {
 	case DDI_INFO_DEVT2DEVINFO:
 		*result = (void *)dtrace_devi;
 		error = DDI_SUCCESS;
 		break;
 	case DDI_INFO_DEVT2INSTANCE:
 		*result = (void *)0;
 		error = DDI_SUCCESS;
 		break;
 	default:
 		error = DDI_FAILURE;
 	}
 	return (error);
 }
 #endif
 
 #ifdef illumos
 static struct cb_ops dtrace_cb_ops = {
 	dtrace_open,		/* open */
 	dtrace_close,		/* close */
 	nulldev,		/* strategy */
 	nulldev,		/* print */
 	nodev,			/* dump */
 	nodev,			/* read */
 	nodev,			/* write */
 	dtrace_ioctl,		/* ioctl */
 	nodev,			/* devmap */
 	nodev,			/* mmap */
 	nodev,			/* segmap */
 	nochpoll,		/* poll */
 	ddi_prop_op,		/* cb_prop_op */
 	0,			/* streamtab  */
 	D_NEW | D_MP		/* Driver compatibility flag */
 };
 
 static struct dev_ops dtrace_ops = {
 	DEVO_REV,		/* devo_rev */
 	0,			/* refcnt */
 	dtrace_info,		/* get_dev_info */
 	nulldev,		/* identify */
 	nulldev,		/* probe */
 	dtrace_attach,		/* attach */
 	dtrace_detach,		/* detach */
 	nodev,			/* reset */
 	&dtrace_cb_ops,		/* driver operations */
 	NULL,			/* bus operations */
 	nodev			/* dev power */
 };
 
 static struct modldrv modldrv = {
 	&mod_driverops,		/* module type (this is a pseudo driver) */
 	"Dynamic Tracing",	/* name of module */
 	&dtrace_ops,		/* driver ops */
 };
 
 static struct modlinkage modlinkage = {
 	MODREV_1,
 	(void *)&modldrv,
 	NULL
 };
 
 int
 _init(void)
 {
 	return (mod_install(&modlinkage));
 }
 
 int
 _info(struct modinfo *modinfop)
 {
 	return (mod_info(&modlinkage, modinfop));
 }
 
 int
 _fini(void)
 {
 	return (mod_remove(&modlinkage));
 }
 #else
 
 static d_ioctl_t	dtrace_ioctl;
 static d_ioctl_t	dtrace_ioctl_helper;
 static void		dtrace_load(void *);
 static int		dtrace_unload(void);
 static struct cdev	*dtrace_dev;
 static struct cdev	*helper_dev;
 
 void dtrace_invop_init(void);
 void dtrace_invop_uninit(void);
 
 static struct cdevsw dtrace_cdevsw = {
 	.d_version	= D_VERSION,
 	.d_ioctl	= dtrace_ioctl,
 	.d_open		= dtrace_open,
 	.d_name		= "dtrace",
 };
 
 static struct cdevsw helper_cdevsw = {
 	.d_version	= D_VERSION,
 	.d_ioctl	= dtrace_ioctl_helper,
 	.d_name		= "helper",
 };
 
 #include <dtrace_anon.c>
 #include <dtrace_ioctl.c>
 #include <dtrace_load.c>
 #include <dtrace_modevent.c>
 #include <dtrace_sysctl.c>
 #include <dtrace_unload.c>
 #include <dtrace_vtime.c>
 #include <dtrace_hacks.c>
 #include <dtrace_isa.c>
 
 SYSINIT(dtrace_load, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_load, NULL);
 SYSUNINIT(dtrace_unload, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_unload, NULL);
 SYSINIT(dtrace_anon_init, SI_SUB_DTRACE_ANON, SI_ORDER_FIRST, dtrace_anon_init, NULL);
 
 DEV_MODULE(dtrace, dtrace_modevent, NULL);
 MODULE_VERSION(dtrace, 1);
 MODULE_DEPEND(dtrace, opensolaris, 1, 1, 1);
 #endif
Index: projects/netbsd-tests-upstream-01-2017/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h	(revision 313267)
@@ -1,2509 +1,2510 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_DTRACE_H
 #define	_SYS_DTRACE_H
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * DTrace Dynamic Tracing Software: Kernel Interfaces
  *
  * Note: The contents of this file are private to the implementation of the
  * Solaris system and DTrace subsystem and are subject to change at any time
  * without notice.  Applications and drivers using these interfaces will fail
  * to run on future releases.  These interfaces should not be used for any
  * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB).
  * Please refer to the "Solaris Dynamic Tracing Guide" for more information.
  */
 
 #ifndef _ASM
 
 #include <sys/types.h>
 #include <sys/modctl.h>
 #include <sys/processor.h>
 #ifdef illumos
 #include <sys/systm.h>
 #else
 #include <sys/cpuvar.h>
 #include <sys/param.h>
 #include <sys/linker.h>
 #include <sys/ioccom.h>
 #include <sys/ucred.h>
 typedef int model_t;
 #endif
 #include <sys/ctf_api.h>
 #ifdef illumos
 #include <sys/cyclic.h>
 #include <sys/int_limits.h>
 #else
 #include <sys/stdint.h>
 #endif
 
 /*
  * DTrace Universal Constants and Typedefs
  */
 #define	DTRACE_CPUALL		-1	/* all CPUs */
 #define	DTRACE_IDNONE		0	/* invalid probe identifier */
 #define	DTRACE_EPIDNONE		0	/* invalid enabled probe identifier */
 #define	DTRACE_AGGIDNONE	0	/* invalid aggregation identifier */
 #define	DTRACE_AGGVARIDNONE	0	/* invalid aggregation variable ID */
 #define	DTRACE_CACHEIDNONE	0	/* invalid predicate cache */
 #define	DTRACE_PROVNONE		0	/* invalid provider identifier */
 #define	DTRACE_METAPROVNONE	0	/* invalid meta-provider identifier */
 #define	DTRACE_ARGNONE		-1	/* invalid argument index */
 
 #define	DTRACE_PROVNAMELEN	64
 #define	DTRACE_MODNAMELEN	64
 #define	DTRACE_FUNCNAMELEN	192
 #define	DTRACE_NAMELEN		64
 #define	DTRACE_FULLNAMELEN	(DTRACE_PROVNAMELEN + DTRACE_MODNAMELEN + \
 				DTRACE_FUNCNAMELEN + DTRACE_NAMELEN + 4)
 #define	DTRACE_ARGTYPELEN	128
 
 typedef uint32_t dtrace_id_t;		/* probe identifier */
 typedef uint32_t dtrace_epid_t;		/* enabled probe identifier */
 typedef uint32_t dtrace_aggid_t;	/* aggregation identifier */
 typedef int64_t dtrace_aggvarid_t;	/* aggregation variable identifier */
 typedef uint16_t dtrace_actkind_t;	/* action kind */
 typedef int64_t dtrace_optval_t;	/* option value */
 typedef uint32_t dtrace_cacheid_t;	/* predicate cache identifier */
 
 typedef enum dtrace_probespec {
 	DTRACE_PROBESPEC_NONE = -1,
 	DTRACE_PROBESPEC_PROVIDER = 0,
 	DTRACE_PROBESPEC_MOD,
 	DTRACE_PROBESPEC_FUNC,
 	DTRACE_PROBESPEC_NAME
 } dtrace_probespec_t;
 
 /*
  * DTrace Intermediate Format (DIF)
  *
  * The following definitions describe the DTrace Intermediate Format (DIF), a
  * a RISC-like instruction set and program encoding used to represent
  * predicates and actions that can be bound to DTrace probes.  The constants
  * below defining the number of available registers are suggested minimums; the
  * compiler should use DTRACEIOC_CONF to dynamically obtain the number of
  * registers provided by the current DTrace implementation.
  */
 #define	DIF_VERSION_1	1		/* DIF version 1: Solaris 10 Beta */
 #define	DIF_VERSION_2	2		/* DIF version 2: Solaris 10 FCS */
 #define	DIF_VERSION	DIF_VERSION_2	/* latest DIF instruction set version */
 #define	DIF_DIR_NREGS	8		/* number of DIF integer registers */
 #define	DIF_DTR_NREGS	8		/* number of DIF tuple registers */
 
 #define	DIF_OP_OR	1		/* or	r1, r2, rd */
 #define	DIF_OP_XOR	2		/* xor	r1, r2, rd */
 #define	DIF_OP_AND	3		/* and	r1, r2, rd */
 #define	DIF_OP_SLL	4		/* sll	r1, r2, rd */
 #define	DIF_OP_SRL	5		/* srl	r1, r2, rd */
 #define	DIF_OP_SUB	6		/* sub	r1, r2, rd */
 #define	DIF_OP_ADD	7		/* add	r1, r2, rd */
 #define	DIF_OP_MUL	8		/* mul	r1, r2, rd */
 #define	DIF_OP_SDIV	9		/* sdiv	r1, r2, rd */
 #define	DIF_OP_UDIV	10		/* udiv r1, r2, rd */
 #define	DIF_OP_SREM	11		/* srem r1, r2, rd */
 #define	DIF_OP_UREM	12		/* urem r1, r2, rd */
 #define	DIF_OP_NOT	13		/* not	r1, rd */
 #define	DIF_OP_MOV	14		/* mov	r1, rd */
 #define	DIF_OP_CMP	15		/* cmp	r1, r2 */
 #define	DIF_OP_TST	16		/* tst  r1 */
 #define	DIF_OP_BA	17		/* ba	label */
 #define	DIF_OP_BE	18		/* be	label */
 #define	DIF_OP_BNE	19		/* bne	label */
 #define	DIF_OP_BG	20		/* bg	label */
 #define	DIF_OP_BGU	21		/* bgu	label */
 #define	DIF_OP_BGE	22		/* bge	label */
 #define	DIF_OP_BGEU	23		/* bgeu	label */
 #define	DIF_OP_BL	24		/* bl	label */
 #define	DIF_OP_BLU	25		/* blu	label */
 #define	DIF_OP_BLE	26		/* ble	label */
 #define	DIF_OP_BLEU	27		/* bleu	label */
 #define	DIF_OP_LDSB	28		/* ldsb	[r1], rd */
 #define	DIF_OP_LDSH	29		/* ldsh	[r1], rd */
 #define	DIF_OP_LDSW	30		/* ldsw [r1], rd */
 #define	DIF_OP_LDUB	31		/* ldub	[r1], rd */
 #define	DIF_OP_LDUH	32		/* lduh	[r1], rd */
 #define	DIF_OP_LDUW	33		/* lduw	[r1], rd */
 #define	DIF_OP_LDX	34		/* ldx	[r1], rd */
 #define	DIF_OP_RET	35		/* ret	rd */
 #define	DIF_OP_NOP	36		/* nop */
 #define	DIF_OP_SETX	37		/* setx	intindex, rd */
 #define	DIF_OP_SETS	38		/* sets strindex, rd */
 #define	DIF_OP_SCMP	39		/* scmp	r1, r2 */
 #define	DIF_OP_LDGA	40		/* ldga	var, ri, rd */
 #define	DIF_OP_LDGS	41		/* ldgs var, rd */
 #define	DIF_OP_STGS	42		/* stgs var, rs */
 #define	DIF_OP_LDTA	43		/* ldta var, ri, rd */
 #define	DIF_OP_LDTS	44		/* ldts var, rd */
 #define	DIF_OP_STTS	45		/* stts var, rs */
 #define	DIF_OP_SRA	46		/* sra	r1, r2, rd */
 #define	DIF_OP_CALL	47		/* call	subr, rd */
 #define	DIF_OP_PUSHTR	48		/* pushtr type, rs, rr */
 #define	DIF_OP_PUSHTV	49		/* pushtv type, rs, rv */
 #define	DIF_OP_POPTS	50		/* popts */
 #define	DIF_OP_FLUSHTS	51		/* flushts */
 #define	DIF_OP_LDGAA	52		/* ldgaa var, rd */
 #define	DIF_OP_LDTAA	53		/* ldtaa var, rd */
 #define	DIF_OP_STGAA	54		/* stgaa var, rs */
 #define	DIF_OP_STTAA	55		/* sttaa var, rs */
 #define	DIF_OP_LDLS	56		/* ldls	var, rd */
 #define	DIF_OP_STLS	57		/* stls	var, rs */
 #define	DIF_OP_ALLOCS	58		/* allocs r1, rd */
 #define	DIF_OP_COPYS	59		/* copys  r1, r2, rd */
 #define	DIF_OP_STB	60		/* stb	r1, [rd] */
 #define	DIF_OP_STH	61		/* sth	r1, [rd] */
 #define	DIF_OP_STW	62		/* stw	r1, [rd] */
 #define	DIF_OP_STX	63		/* stx	r1, [rd] */
 #define	DIF_OP_ULDSB	64		/* uldsb [r1], rd */
 #define	DIF_OP_ULDSH	65		/* uldsh [r1], rd */
 #define	DIF_OP_ULDSW	66		/* uldsw [r1], rd */
 #define	DIF_OP_ULDUB	67		/* uldub [r1], rd */
 #define	DIF_OP_ULDUH	68		/* ulduh [r1], rd */
 #define	DIF_OP_ULDUW	69		/* ulduw [r1], rd */
 #define	DIF_OP_ULDX	70		/* uldx  [r1], rd */
 #define	DIF_OP_RLDSB	71		/* rldsb [r1], rd */
 #define	DIF_OP_RLDSH	72		/* rldsh [r1], rd */
 #define	DIF_OP_RLDSW	73		/* rldsw [r1], rd */
 #define	DIF_OP_RLDUB	74		/* rldub [r1], rd */
 #define	DIF_OP_RLDUH	75		/* rlduh [r1], rd */
 #define	DIF_OP_RLDUW	76		/* rlduw [r1], rd */
 #define	DIF_OP_RLDX	77		/* rldx  [r1], rd */
 #define	DIF_OP_XLATE	78		/* xlate xlrindex, rd */
 #define	DIF_OP_XLARG	79		/* xlarg xlrindex, rd */
 
 #define	DIF_INTOFF_MAX		0xffff	/* highest integer table offset */
 #define	DIF_STROFF_MAX		0xffff	/* highest string table offset */
 #define	DIF_REGISTER_MAX	0xff	/* highest register number */
 #define	DIF_VARIABLE_MAX	0xffff	/* highest variable identifier */
 #define	DIF_SUBROUTINE_MAX	0xffff	/* highest subroutine code */
 
 #define	DIF_VAR_ARRAY_MIN	0x0000	/* lowest numbered array variable */
 #define	DIF_VAR_ARRAY_UBASE	0x0080	/* lowest user-defined array */
 #define	DIF_VAR_ARRAY_MAX	0x00ff	/* highest numbered array variable */
 
 #define	DIF_VAR_OTHER_MIN	0x0100	/* lowest numbered scalar or assc */
 #define	DIF_VAR_OTHER_UBASE	0x0500	/* lowest user-defined scalar or assc */
 #define	DIF_VAR_OTHER_MAX	0xffff	/* highest numbered scalar or assc */
 
 #define	DIF_VAR_ARGS		0x0000	/* arguments array */
 #define	DIF_VAR_REGS		0x0001	/* registers array */
 #define	DIF_VAR_UREGS		0x0002	/* user registers array */
 #define	DIF_VAR_CURTHREAD	0x0100	/* thread pointer */
 #define	DIF_VAR_TIMESTAMP	0x0101	/* timestamp */
 #define	DIF_VAR_VTIMESTAMP	0x0102	/* virtual timestamp */
 #define	DIF_VAR_IPL		0x0103	/* interrupt priority level */
 #define	DIF_VAR_EPID		0x0104	/* enabled probe ID */
 #define	DIF_VAR_ID		0x0105	/* probe ID */
 #define	DIF_VAR_ARG0		0x0106	/* first argument */
 #define	DIF_VAR_ARG1		0x0107	/* second argument */
 #define	DIF_VAR_ARG2		0x0108	/* third argument */
 #define	DIF_VAR_ARG3		0x0109	/* fourth argument */
 #define	DIF_VAR_ARG4		0x010a	/* fifth argument */
 #define	DIF_VAR_ARG5		0x010b	/* sixth argument */
 #define	DIF_VAR_ARG6		0x010c	/* seventh argument */
 #define	DIF_VAR_ARG7		0x010d	/* eighth argument */
 #define	DIF_VAR_ARG8		0x010e	/* ninth argument */
 #define	DIF_VAR_ARG9		0x010f	/* tenth argument */
 #define	DIF_VAR_STACKDEPTH	0x0110	/* stack depth */
 #define	DIF_VAR_CALLER		0x0111	/* caller */
 #define	DIF_VAR_PROBEPROV	0x0112	/* probe provider */
 #define	DIF_VAR_PROBEMOD	0x0113	/* probe module */
 #define	DIF_VAR_PROBEFUNC	0x0114	/* probe function */
 #define	DIF_VAR_PROBENAME	0x0115	/* probe name */
 #define	DIF_VAR_PID		0x0116	/* process ID */
 #define	DIF_VAR_TID		0x0117	/* (per-process) thread ID */
 #define	DIF_VAR_EXECNAME	0x0118	/* name of executable */
 #define	DIF_VAR_ZONENAME	0x0119	/* zone name associated with process */
 #define	DIF_VAR_WALLTIMESTAMP	0x011a	/* wall-clock timestamp */
 #define	DIF_VAR_USTACKDEPTH	0x011b	/* user-land stack depth */
 #define	DIF_VAR_UCALLER		0x011c	/* user-level caller */
 #define	DIF_VAR_PPID		0x011d	/* parent process ID */
 #define	DIF_VAR_UID		0x011e	/* process user ID */
 #define	DIF_VAR_GID		0x011f	/* process group ID */
 #define	DIF_VAR_ERRNO		0x0120	/* thread errno */
 #define	DIF_VAR_EXECARGS	0x0121	/* process arguments */
 
 #ifndef illumos
 #define	DIF_VAR_CPU		0x0200
 #endif
 
 #define	DIF_SUBR_RAND			0
 #define	DIF_SUBR_MUTEX_OWNED		1
 #define	DIF_SUBR_MUTEX_OWNER		2
 #define	DIF_SUBR_MUTEX_TYPE_ADAPTIVE	3
 #define	DIF_SUBR_MUTEX_TYPE_SPIN	4
 #define	DIF_SUBR_RW_READ_HELD		5
 #define	DIF_SUBR_RW_WRITE_HELD		6
 #define	DIF_SUBR_RW_ISWRITER		7
 #define	DIF_SUBR_COPYIN			8
 #define	DIF_SUBR_COPYINSTR		9
 #define	DIF_SUBR_SPECULATION		10
 #define	DIF_SUBR_PROGENYOF		11
 #define	DIF_SUBR_STRLEN			12
 #define	DIF_SUBR_COPYOUT		13
 #define	DIF_SUBR_COPYOUTSTR		14
 #define	DIF_SUBR_ALLOCA			15
 #define	DIF_SUBR_BCOPY			16
 #define	DIF_SUBR_COPYINTO		17
 #define	DIF_SUBR_MSGDSIZE		18
 #define	DIF_SUBR_MSGSIZE		19
 #define	DIF_SUBR_GETMAJOR		20
 #define	DIF_SUBR_GETMINOR		21
 #define	DIF_SUBR_DDI_PATHNAME		22
 #define	DIF_SUBR_STRJOIN		23
 #define	DIF_SUBR_LLTOSTR		24
 #define	DIF_SUBR_BASENAME		25
 #define	DIF_SUBR_DIRNAME		26
 #define	DIF_SUBR_CLEANPATH		27
 #define	DIF_SUBR_STRCHR			28
 #define	DIF_SUBR_STRRCHR		29
 #define	DIF_SUBR_STRSTR			30
 #define	DIF_SUBR_STRTOK			31
 #define	DIF_SUBR_SUBSTR			32
 #define	DIF_SUBR_INDEX			33
 #define	DIF_SUBR_RINDEX			34
 #define	DIF_SUBR_HTONS			35
 #define	DIF_SUBR_HTONL			36
 #define	DIF_SUBR_HTONLL			37
 #define	DIF_SUBR_NTOHS			38
 #define	DIF_SUBR_NTOHL			39
 #define	DIF_SUBR_NTOHLL			40
 #define	DIF_SUBR_INET_NTOP		41
 #define	DIF_SUBR_INET_NTOA		42
 #define	DIF_SUBR_INET_NTOA6		43
 #define	DIF_SUBR_TOUPPER		44
 #define	DIF_SUBR_TOLOWER		45
 #define	DIF_SUBR_MEMREF			46
 #define	DIF_SUBR_SX_SHARED_HELD		47
 #define	DIF_SUBR_SX_EXCLUSIVE_HELD	48
 #define	DIF_SUBR_SX_ISEXCLUSIVE		49
 #define	DIF_SUBR_MEMSTR			50
 #define	DIF_SUBR_GETF			51
 #define	DIF_SUBR_JSON			52
 #define	DIF_SUBR_STRTOLL		53
 #define	DIF_SUBR_MAX			53	/* max subroutine value */
 
 typedef uint32_t dif_instr_t;
 
 #define	DIF_INSTR_OP(i)			(((i) >> 24) & 0xff)
 #define	DIF_INSTR_R1(i)			(((i) >> 16) & 0xff)
 #define	DIF_INSTR_R2(i)			(((i) >>  8) & 0xff)
 #define	DIF_INSTR_RD(i)			((i) & 0xff)
 #define	DIF_INSTR_RS(i)			((i) & 0xff)
 #define	DIF_INSTR_LABEL(i)		((i) & 0xffffff)
 #define	DIF_INSTR_VAR(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_INTEGER(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_STRING(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_SUBR(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_TYPE(i)		(((i) >> 16) & 0xff)
 #define	DIF_INSTR_XLREF(i)		(((i) >>  8) & 0xffff)
 
 #define	DIF_INSTR_FMT(op, r1, r2, d) \
 	(((op) << 24) | ((r1) << 16) | ((r2) << 8) | (d))
 
 #define	DIF_INSTR_NOT(r1, d)		(DIF_INSTR_FMT(DIF_OP_NOT, r1, 0, d))
 #define	DIF_INSTR_MOV(r1, d)		(DIF_INSTR_FMT(DIF_OP_MOV, r1, 0, d))
 #define	DIF_INSTR_CMP(op, r1, r2)	(DIF_INSTR_FMT(op, r1, r2, 0))
 #define	DIF_INSTR_TST(r1)		(DIF_INSTR_FMT(DIF_OP_TST, r1, 0, 0))
 #define	DIF_INSTR_BRANCH(op, label)	(((op) << 24) | (label))
 #define	DIF_INSTR_LOAD(op, r1, d)	(DIF_INSTR_FMT(op, r1, 0, d))
 #define	DIF_INSTR_STORE(op, r1, d)	(DIF_INSTR_FMT(op, r1, 0, d))
 #define	DIF_INSTR_SETX(i, d)		((DIF_OP_SETX << 24) | ((i) << 8) | (d))
 #define	DIF_INSTR_SETS(s, d)		((DIF_OP_SETS << 24) | ((s) << 8) | (d))
 #define	DIF_INSTR_RET(d)		(DIF_INSTR_FMT(DIF_OP_RET, 0, 0, d))
 #define	DIF_INSTR_NOP			(DIF_OP_NOP << 24)
 #define	DIF_INSTR_LDA(op, v, r, d)	(DIF_INSTR_FMT(op, v, r, d))
 #define	DIF_INSTR_LDV(op, v, d)		(((op) << 24) | ((v) << 8) | (d))
 #define	DIF_INSTR_STV(op, v, rs)	(((op) << 24) | ((v) << 8) | (rs))
 #define	DIF_INSTR_CALL(s, d)		((DIF_OP_CALL << 24) | ((s) << 8) | (d))
 #define	DIF_INSTR_PUSHTS(op, t, r2, rs)	(DIF_INSTR_FMT(op, t, r2, rs))
 #define	DIF_INSTR_POPTS			(DIF_OP_POPTS << 24)
 #define	DIF_INSTR_FLUSHTS		(DIF_OP_FLUSHTS << 24)
 #define	DIF_INSTR_ALLOCS(r1, d)		(DIF_INSTR_FMT(DIF_OP_ALLOCS, r1, 0, d))
 #define	DIF_INSTR_COPYS(r1, r2, d)	(DIF_INSTR_FMT(DIF_OP_COPYS, r1, r2, d))
 #define	DIF_INSTR_XLATE(op, r, d)	(((op) << 24) | ((r) << 8) | (d))
 
 #define	DIF_REG_R0	0		/* %r0 is always set to zero */
 
 /*
  * A DTrace Intermediate Format Type (DIF Type) is used to represent the types
  * of variables, function and associative array arguments, and the return type
  * for each DIF object (shown below).  It contains a description of the type,
  * its size in bytes, and a module identifier.
  */
 typedef struct dtrace_diftype {
 	uint8_t dtdt_kind;		/* type kind (see below) */
 	uint8_t dtdt_ckind;		/* type kind in CTF */
 	uint8_t dtdt_flags;		/* type flags (see below) */
 	uint8_t dtdt_pad;		/* reserved for future use */
 	uint32_t dtdt_size;		/* type size in bytes (unless string) */
 } dtrace_diftype_t;
 
 #define	DIF_TYPE_CTF		0	/* type is a CTF type */
 #define	DIF_TYPE_STRING		1	/* type is a D string */
 
 #define	DIF_TF_BYREF		0x1	/* type is passed by reference */
 #define	DIF_TF_BYUREF		0x2	/* user type is passed by reference */
 
 /*
  * A DTrace Intermediate Format variable record is used to describe each of the
  * variables referenced by a given DIF object.  It contains an integer variable
  * identifier along with variable scope and properties, as shown below.  The
  * size of this structure must be sizeof (int) aligned.
  */
 typedef struct dtrace_difv {
 	uint32_t dtdv_name;		/* variable name index in dtdo_strtab */
 	uint32_t dtdv_id;		/* variable reference identifier */
 	uint8_t dtdv_kind;		/* variable kind (see below) */
 	uint8_t dtdv_scope;		/* variable scope (see below) */
 	uint16_t dtdv_flags;		/* variable flags (see below) */
 	dtrace_diftype_t dtdv_type;	/* variable type (see above) */
 } dtrace_difv_t;
 
 #define	DIFV_KIND_ARRAY		0	/* variable is an array of quantities */
 #define	DIFV_KIND_SCALAR	1	/* variable is a scalar quantity */
 
 #define	DIFV_SCOPE_GLOBAL	0	/* variable has global scope */
 #define	DIFV_SCOPE_THREAD	1	/* variable has thread scope */
 #define	DIFV_SCOPE_LOCAL	2	/* variable has local scope */
 
 #define	DIFV_F_REF		0x1	/* variable is referenced by DIFO */
 #define	DIFV_F_MOD		0x2	/* variable is written by DIFO */
 
 /*
  * DTrace Actions
  *
  * The upper byte determines the class of the action; the low bytes determines
  * the specific action within that class.  The classes of actions are as
  * follows:
  *
  *   [ no class ]                  <= May record process- or kernel-related data
  *   DTRACEACT_PROC                <= Only records process-related data
  *   DTRACEACT_PROC_DESTRUCTIVE    <= Potentially destructive to processes
  *   DTRACEACT_KERNEL              <= Only records kernel-related data
  *   DTRACEACT_KERNEL_DESTRUCTIVE  <= Potentially destructive to the kernel
  *   DTRACEACT_SPECULATIVE         <= Speculation-related action
  *   DTRACEACT_AGGREGATION         <= Aggregating action
  */
 #define	DTRACEACT_NONE			0	/* no action */
 #define	DTRACEACT_DIFEXPR		1	/* action is DIF expression */
 #define	DTRACEACT_EXIT			2	/* exit() action */
 #define	DTRACEACT_PRINTF		3	/* printf() action */
 #define	DTRACEACT_PRINTA		4	/* printa() action */
 #define	DTRACEACT_LIBACT		5	/* library-controlled action */
 #define	DTRACEACT_TRACEMEM		6	/* tracemem() action */
 #define	DTRACEACT_TRACEMEM_DYNSIZE	7	/* dynamic tracemem() size */
 #define	DTRACEACT_PRINTM		8	/* printm() action (BSD) */
 
 #define	DTRACEACT_PROC			0x0100
 #define	DTRACEACT_USTACK		(DTRACEACT_PROC + 1)
 #define	DTRACEACT_JSTACK		(DTRACEACT_PROC + 2)
 #define	DTRACEACT_USYM			(DTRACEACT_PROC + 3)
 #define	DTRACEACT_UMOD			(DTRACEACT_PROC + 4)
 #define	DTRACEACT_UADDR			(DTRACEACT_PROC + 5)
 
 #define	DTRACEACT_PROC_DESTRUCTIVE	0x0200
 #define	DTRACEACT_STOP			(DTRACEACT_PROC_DESTRUCTIVE + 1)
 #define	DTRACEACT_RAISE			(DTRACEACT_PROC_DESTRUCTIVE + 2)
 #define	DTRACEACT_SYSTEM		(DTRACEACT_PROC_DESTRUCTIVE + 3)
 #define	DTRACEACT_FREOPEN		(DTRACEACT_PROC_DESTRUCTIVE + 4)
 
 #define	DTRACEACT_PROC_CONTROL		0x0300
 
 #define	DTRACEACT_KERNEL		0x0400
 #define	DTRACEACT_STACK			(DTRACEACT_KERNEL + 1)
 #define	DTRACEACT_SYM			(DTRACEACT_KERNEL + 2)
 #define	DTRACEACT_MOD			(DTRACEACT_KERNEL + 3)
 
 #define	DTRACEACT_KERNEL_DESTRUCTIVE	0x0500
 #define	DTRACEACT_BREAKPOINT		(DTRACEACT_KERNEL_DESTRUCTIVE + 1)
 #define	DTRACEACT_PANIC			(DTRACEACT_KERNEL_DESTRUCTIVE + 2)
 #define	DTRACEACT_CHILL			(DTRACEACT_KERNEL_DESTRUCTIVE + 3)
 
 #define	DTRACEACT_SPECULATIVE		0x0600
 #define	DTRACEACT_SPECULATE		(DTRACEACT_SPECULATIVE + 1)
 #define	DTRACEACT_COMMIT		(DTRACEACT_SPECULATIVE + 2)
 #define	DTRACEACT_DISCARD		(DTRACEACT_SPECULATIVE + 3)
 
 #define	DTRACEACT_CLASS(x)		((x) & 0xff00)
 
 #define	DTRACEACT_ISDESTRUCTIVE(x)	\
 	(DTRACEACT_CLASS(x) == DTRACEACT_PROC_DESTRUCTIVE || \
 	DTRACEACT_CLASS(x) == DTRACEACT_KERNEL_DESTRUCTIVE)
 
 #define	DTRACEACT_ISSPECULATIVE(x)	\
 	(DTRACEACT_CLASS(x) == DTRACEACT_SPECULATIVE)
 
 #define	DTRACEACT_ISPRINTFLIKE(x)	\
 	((x) == DTRACEACT_PRINTF || (x) == DTRACEACT_PRINTA || \
 	(x) == DTRACEACT_SYSTEM || (x) == DTRACEACT_FREOPEN)
 
 /*
  * DTrace Aggregating Actions
  *
  * These are functions f(x) for which the following is true:
  *
  *    f(f(x_0) U f(x_1) U ... U f(x_n)) = f(x_0 U x_1 U ... U x_n)
  *
  * where x_n is a set of arbitrary data.  Aggregating actions are in their own
  * DTrace action class, DTTRACEACT_AGGREGATION.  The macros provided here allow
  * for easier processing of the aggregation argument and data payload for a few
  * aggregating actions (notably:  quantize(), lquantize(), and ustack()).
  */
 #define	DTRACEACT_AGGREGATION		0x0700
 #define	DTRACEAGG_COUNT			(DTRACEACT_AGGREGATION + 1)
 #define	DTRACEAGG_MIN			(DTRACEACT_AGGREGATION + 2)
 #define	DTRACEAGG_MAX			(DTRACEACT_AGGREGATION + 3)
 #define	DTRACEAGG_AVG			(DTRACEACT_AGGREGATION + 4)
 #define	DTRACEAGG_SUM			(DTRACEACT_AGGREGATION + 5)
 #define	DTRACEAGG_STDDEV		(DTRACEACT_AGGREGATION + 6)
 #define	DTRACEAGG_QUANTIZE		(DTRACEACT_AGGREGATION + 7)
 #define	DTRACEAGG_LQUANTIZE		(DTRACEACT_AGGREGATION + 8)
 #define	DTRACEAGG_LLQUANTIZE		(DTRACEACT_AGGREGATION + 9)
 
 #define	DTRACEACT_ISAGG(x)		\
 	(DTRACEACT_CLASS(x) == DTRACEACT_AGGREGATION)
 
 #define	DTRACE_QUANTIZE_NBUCKETS	\
 	(((sizeof (uint64_t) * NBBY) - 1) * 2 + 1)
 
 #define	DTRACE_QUANTIZE_ZEROBUCKET	((sizeof (uint64_t) * NBBY) - 1)
 
 #define	DTRACE_QUANTIZE_BUCKETVAL(buck)					\
 	(int64_t)((buck) < DTRACE_QUANTIZE_ZEROBUCKET ?			\
 	-(1LL << (DTRACE_QUANTIZE_ZEROBUCKET - 1 - (buck))) :		\
 	(buck) == DTRACE_QUANTIZE_ZEROBUCKET ? 0 :			\
 	1LL << ((buck) - DTRACE_QUANTIZE_ZEROBUCKET - 1))
 
 #define	DTRACE_LQUANTIZE_STEPSHIFT		48
 #define	DTRACE_LQUANTIZE_STEPMASK		((uint64_t)UINT16_MAX << 48)
 #define	DTRACE_LQUANTIZE_LEVELSHIFT		32
 #define	DTRACE_LQUANTIZE_LEVELMASK		((uint64_t)UINT16_MAX << 32)
 #define	DTRACE_LQUANTIZE_BASESHIFT		0
 #define	DTRACE_LQUANTIZE_BASEMASK		UINT32_MAX
 
 #define	DTRACE_LQUANTIZE_STEP(x)		\
 	(uint16_t)(((x) & DTRACE_LQUANTIZE_STEPMASK) >> \
 	DTRACE_LQUANTIZE_STEPSHIFT)
 
 #define	DTRACE_LQUANTIZE_LEVELS(x)		\
 	(uint16_t)(((x) & DTRACE_LQUANTIZE_LEVELMASK) >> \
 	DTRACE_LQUANTIZE_LEVELSHIFT)
 
 #define	DTRACE_LQUANTIZE_BASE(x)		\
 	(int32_t)(((x) & DTRACE_LQUANTIZE_BASEMASK) >> \
 	DTRACE_LQUANTIZE_BASESHIFT)
 
 #define	DTRACE_LLQUANTIZE_FACTORSHIFT		48
 #define	DTRACE_LLQUANTIZE_FACTORMASK		((uint64_t)UINT16_MAX << 48)
 #define	DTRACE_LLQUANTIZE_LOWSHIFT		32
 #define	DTRACE_LLQUANTIZE_LOWMASK		((uint64_t)UINT16_MAX << 32)
 #define	DTRACE_LLQUANTIZE_HIGHSHIFT		16
 #define	DTRACE_LLQUANTIZE_HIGHMASK		((uint64_t)UINT16_MAX << 16)
 #define	DTRACE_LLQUANTIZE_NSTEPSHIFT		0
 #define	DTRACE_LLQUANTIZE_NSTEPMASK		UINT16_MAX
 
 #define	DTRACE_LLQUANTIZE_FACTOR(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_FACTORMASK) >> \
 	DTRACE_LLQUANTIZE_FACTORSHIFT)
 
 #define	DTRACE_LLQUANTIZE_LOW(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_LOWMASK) >> \
 	DTRACE_LLQUANTIZE_LOWSHIFT)
 
 #define	DTRACE_LLQUANTIZE_HIGH(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_HIGHMASK) >> \
 	DTRACE_LLQUANTIZE_HIGHSHIFT)
 
 #define	DTRACE_LLQUANTIZE_NSTEP(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_NSTEPMASK) >> \
 	DTRACE_LLQUANTIZE_NSTEPSHIFT)
 
 #define	DTRACE_USTACK_NFRAMES(x)	(uint32_t)((x) & UINT32_MAX)
 #define	DTRACE_USTACK_STRSIZE(x)	(uint32_t)((x) >> 32)
 #define	DTRACE_USTACK_ARG(x, y)		\
 	((((uint64_t)(y)) << 32) | ((x) & UINT32_MAX))
 
 #ifndef _LP64
 #if BYTE_ORDER == _BIG_ENDIAN
 #define	DTRACE_PTR(type, name)	uint32_t name##pad; type *name
 #else
 #define	DTRACE_PTR(type, name)	type *name; uint32_t name##pad
 #endif
 #else
 #define	DTRACE_PTR(type, name)	type *name
 #endif
 
 /*
  * DTrace Object Format (DOF)
  *
  * DTrace programs can be persistently encoded in the DOF format so that they
  * may be embedded in other programs (for example, in an ELF file) or in the
  * dtrace driver configuration file for use in anonymous tracing.  The DOF
  * format is versioned and extensible so that it can be revised and so that
  * internal data structures can be modified or extended compatibly.  All DOF
  * structures use fixed-size types, so the 32-bit and 64-bit representations
  * are identical and consumers can use either data model transparently.
  *
  * The file layout is structured as follows:
  *
  * +---------------+-------------------+----- ... ----+---- ... ------+
  * |   dof_hdr_t   |  dof_sec_t[ ... ] |   loadable   | non-loadable  |
  * | (file header) | (section headers) | section data | section data  |
  * +---------------+-------------------+----- ... ----+---- ... ------+
  * |<------------ dof_hdr.dofh_loadsz --------------->|               |
  * |<------------ dof_hdr.dofh_filesz ------------------------------->|
  *
  * The file header stores meta-data including a magic number, data model for
  * the instrumentation, data encoding, and properties of the DIF code within.
  * The header describes its own size and the size of the section headers.  By
  * convention, an array of section headers follows the file header, and then
  * the data for all loadable sections and unloadable sections.  This permits
  * consumer code to easily download the headers and all loadable data into the
  * DTrace driver in one contiguous chunk, omitting other extraneous sections.
  *
  * The section headers describe the size, offset, alignment, and section type
  * for each section.  Sections are described using a set of #defines that tell
  * the consumer what kind of data is expected.  Sections can contain links to
  * other sections by storing a dof_secidx_t, an index into the section header
  * array, inside of the section data structures.  The section header includes
  * an entry size so that sections with data arrays can grow their structures.
  *
  * The DOF data itself can contain many snippets of DIF (i.e. >1 DIFOs), which
  * are represented themselves as a collection of related DOF sections.  This
  * permits us to change the set of sections associated with a DIFO over time,
  * and also permits us to encode DIFOs that contain different sets of sections.
  * When a DOF section wants to refer to a DIFO, it stores the dof_secidx_t of a
  * section of type DOF_SECT_DIFOHDR.  This section's data is then an array of
  * dof_secidx_t's which in turn denote the sections associated with this DIFO.
  *
  * This loose coupling of the file structure (header and sections) to the
  * structure of the DTrace program itself (ECB descriptions, action
  * descriptions, and DIFOs) permits activities such as relocation processing
  * to occur in a single pass without having to understand D program structure.
  *
  * Finally, strings are always stored in ELF-style string tables along with a
  * string table section index and string table offset.  Therefore strings in
  * DOF are always arbitrary-length and not bound to the current implementation.
  */
 
 #define	DOF_ID_SIZE	16	/* total size of dofh_ident[] in bytes */
 
 typedef struct dof_hdr {
 	uint8_t dofh_ident[DOF_ID_SIZE]; /* identification bytes (see below) */
 	uint32_t dofh_flags;		/* file attribute flags (if any) */
 	uint32_t dofh_hdrsize;		/* size of file header in bytes */
 	uint32_t dofh_secsize;		/* size of section header in bytes */
 	uint32_t dofh_secnum;		/* number of section headers */
 	uint64_t dofh_secoff;		/* file offset of section headers */
 	uint64_t dofh_loadsz;		/* file size of loadable portion */
 	uint64_t dofh_filesz;		/* file size of entire DOF file */
 	uint64_t dofh_pad;		/* reserved for future use */
 } dof_hdr_t;
 
 #define	DOF_ID_MAG0	0	/* first byte of magic number */
 #define	DOF_ID_MAG1	1	/* second byte of magic number */
 #define	DOF_ID_MAG2	2	/* third byte of magic number */
 #define	DOF_ID_MAG3	3	/* fourth byte of magic number */
 #define	DOF_ID_MODEL	4	/* DOF data model (see below) */
 #define	DOF_ID_ENCODING	5	/* DOF data encoding (see below) */
 #define	DOF_ID_VERSION	6	/* DOF file format major version (see below) */
 #define	DOF_ID_DIFVERS	7	/* DIF instruction set version */
 #define	DOF_ID_DIFIREG	8	/* DIF integer registers used by compiler */
 #define	DOF_ID_DIFTREG	9	/* DIF tuple registers used by compiler */
 #define	DOF_ID_PAD	10	/* start of padding bytes (all zeroes) */
 
 #define	DOF_MAG_MAG0	0x7F	/* DOF_ID_MAG[0-3] */
 #define	DOF_MAG_MAG1	'D'
 #define	DOF_MAG_MAG2	'O'
 #define	DOF_MAG_MAG3	'F'
 
 #define	DOF_MAG_STRING	"\177DOF"
 #define	DOF_MAG_STRLEN	4
 
 #define	DOF_MODEL_NONE	0	/* DOF_ID_MODEL */
 #define	DOF_MODEL_ILP32	1
 #define	DOF_MODEL_LP64	2
 
 #ifdef _LP64
 #define	DOF_MODEL_NATIVE	DOF_MODEL_LP64
 #else
 #define	DOF_MODEL_NATIVE	DOF_MODEL_ILP32
 #endif
 
 #define	DOF_ENCODE_NONE	0	/* DOF_ID_ENCODING */
 #define	DOF_ENCODE_LSB	1
 #define	DOF_ENCODE_MSB	2
 
 #if BYTE_ORDER == _BIG_ENDIAN
 #define	DOF_ENCODE_NATIVE	DOF_ENCODE_MSB
 #else
 #define	DOF_ENCODE_NATIVE	DOF_ENCODE_LSB
 #endif
 
 #define	DOF_VERSION_1	1	/* DOF version 1: Solaris 10 FCS */
 #define	DOF_VERSION_2	2	/* DOF version 2: Solaris Express 6/06 */
 #define	DOF_VERSION	DOF_VERSION_2	/* Latest DOF version */
 
 #define	DOF_FL_VALID	0	/* mask of all valid dofh_flags bits */
 
 typedef uint32_t dof_secidx_t;	/* section header table index type */
 typedef uint32_t dof_stridx_t;	/* string table index type */
 
 #define	DOF_SECIDX_NONE	(-1U)	/* null value for section indices */
 #define	DOF_STRIDX_NONE	(-1U)	/* null value for string indices */
 
 typedef struct dof_sec {
 	uint32_t dofs_type;	/* section type (see below) */
 	uint32_t dofs_align;	/* section data memory alignment */
 	uint32_t dofs_flags;	/* section flags (if any) */
 	uint32_t dofs_entsize;	/* size of section entry (if table) */
 	uint64_t dofs_offset;	/* offset of section data within file */
 	uint64_t dofs_size;	/* size of section data in bytes */
 } dof_sec_t;
 
 #define	DOF_SECT_NONE		0	/* null section */
 #define	DOF_SECT_COMMENTS	1	/* compiler comments */
 #define	DOF_SECT_SOURCE		2	/* D program source code */
 #define	DOF_SECT_ECBDESC	3	/* dof_ecbdesc_t */
 #define	DOF_SECT_PROBEDESC	4	/* dof_probedesc_t */
 #define	DOF_SECT_ACTDESC	5	/* dof_actdesc_t array */
 #define	DOF_SECT_DIFOHDR	6	/* dof_difohdr_t (variable length) */
 #define	DOF_SECT_DIF		7	/* uint32_t array of byte code */
 #define	DOF_SECT_STRTAB		8	/* string table */
 #define	DOF_SECT_VARTAB		9	/* dtrace_difv_t array */
 #define	DOF_SECT_RELTAB		10	/* dof_relodesc_t array */
 #define	DOF_SECT_TYPTAB		11	/* dtrace_diftype_t array */
 #define	DOF_SECT_URELHDR	12	/* dof_relohdr_t (user relocations) */
 #define	DOF_SECT_KRELHDR	13	/* dof_relohdr_t (kernel relocations) */
 #define	DOF_SECT_OPTDESC	14	/* dof_optdesc_t array */
 #define	DOF_SECT_PROVIDER	15	/* dof_provider_t */
 #define	DOF_SECT_PROBES		16	/* dof_probe_t array */
 #define	DOF_SECT_PRARGS		17	/* uint8_t array (probe arg mappings) */
 #define	DOF_SECT_PROFFS		18	/* uint32_t array (probe arg offsets) */
 #define	DOF_SECT_INTTAB		19	/* uint64_t array */
 #define	DOF_SECT_UTSNAME	20	/* struct utsname */
 #define	DOF_SECT_XLTAB		21	/* dof_xlref_t array */
 #define	DOF_SECT_XLMEMBERS	22	/* dof_xlmember_t array */
 #define	DOF_SECT_XLIMPORT	23	/* dof_xlator_t */
 #define	DOF_SECT_XLEXPORT	24	/* dof_xlator_t */
 #define	DOF_SECT_PREXPORT	25	/* dof_secidx_t array (exported objs) */
 #define	DOF_SECT_PRENOFFS	26	/* uint32_t array (enabled offsets) */
 
 #define	DOF_SECF_LOAD		1	/* section should be loaded */
 
 #define	DOF_SEC_ISLOADABLE(x)						\
 	(((x) == DOF_SECT_ECBDESC) || ((x) == DOF_SECT_PROBEDESC) ||	\
 	((x) == DOF_SECT_ACTDESC) || ((x) == DOF_SECT_DIFOHDR) ||	\
 	((x) == DOF_SECT_DIF) || ((x) == DOF_SECT_STRTAB) ||		\
 	((x) == DOF_SECT_VARTAB) || ((x) == DOF_SECT_RELTAB) ||		\
 	((x) == DOF_SECT_TYPTAB) || ((x) == DOF_SECT_URELHDR) ||	\
 	((x) == DOF_SECT_KRELHDR) || ((x) == DOF_SECT_OPTDESC) ||	\
 	((x) == DOF_SECT_PROVIDER) || ((x) == DOF_SECT_PROBES) ||	\
 	((x) == DOF_SECT_PRARGS) || ((x) == DOF_SECT_PROFFS) ||		\
 	((x) == DOF_SECT_INTTAB) || ((x) == DOF_SECT_XLTAB) ||		\
 	((x) == DOF_SECT_XLMEMBERS) || ((x) == DOF_SECT_XLIMPORT) ||	\
 	((x) == DOF_SECT_XLEXPORT) ||  ((x) == DOF_SECT_PREXPORT) || 	\
 	((x) == DOF_SECT_PRENOFFS))
 
 typedef struct dof_ecbdesc {
 	dof_secidx_t dofe_probes;	/* link to DOF_SECT_PROBEDESC */
 	dof_secidx_t dofe_pred;		/* link to DOF_SECT_DIFOHDR */
 	dof_secidx_t dofe_actions;	/* link to DOF_SECT_ACTDESC */
 	uint32_t dofe_pad;		/* reserved for future use */
 	uint64_t dofe_uarg;		/* user-supplied library argument */
 } dof_ecbdesc_t;
 
 typedef struct dof_probedesc {
 	dof_secidx_t dofp_strtab;	/* link to DOF_SECT_STRTAB section */
 	dof_stridx_t dofp_provider;	/* provider string */
 	dof_stridx_t dofp_mod;		/* module string */
 	dof_stridx_t dofp_func;		/* function string */
 	dof_stridx_t dofp_name;		/* name string */
 	uint32_t dofp_id;		/* probe identifier (or zero) */
 } dof_probedesc_t;
 
 typedef struct dof_actdesc {
 	dof_secidx_t dofa_difo;		/* link to DOF_SECT_DIFOHDR */
 	dof_secidx_t dofa_strtab;	/* link to DOF_SECT_STRTAB section */
 	uint32_t dofa_kind;		/* action kind (DTRACEACT_* constant) */
 	uint32_t dofa_ntuple;		/* number of subsequent tuple actions */
 	uint64_t dofa_arg;		/* kind-specific argument */
 	uint64_t dofa_uarg;		/* user-supplied argument */
 } dof_actdesc_t;
 
 typedef struct dof_difohdr {
 	dtrace_diftype_t dofd_rtype;	/* return type for this fragment */
 	dof_secidx_t dofd_links[1];	/* variable length array of indices */
 } dof_difohdr_t;
 
 typedef struct dof_relohdr {
 	dof_secidx_t dofr_strtab;	/* link to DOF_SECT_STRTAB for names */
 	dof_secidx_t dofr_relsec;	/* link to DOF_SECT_RELTAB for relos */
 	dof_secidx_t dofr_tgtsec;	/* link to section we are relocating */
 } dof_relohdr_t;
 
 typedef struct dof_relodesc {
 	dof_stridx_t dofr_name;		/* string name of relocation symbol */
 	uint32_t dofr_type;		/* relo type (DOF_RELO_* constant) */
 	uint64_t dofr_offset;		/* byte offset for relocation */
 	uint64_t dofr_data;		/* additional type-specific data */
 } dof_relodesc_t;
 
 #define	DOF_RELO_NONE	0		/* empty relocation entry */
 #define	DOF_RELO_SETX	1		/* relocate setx value */
+#define	DOF_RELO_DOFREL	2		/* relocate DOF-relative value */
 
 typedef struct dof_optdesc {
 	uint32_t dofo_option;		/* option identifier */
 	dof_secidx_t dofo_strtab;	/* string table, if string option */
 	uint64_t dofo_value;		/* option value or string index */
 } dof_optdesc_t;
 
 typedef uint32_t dof_attr_t;		/* encoded stability attributes */
 
 #define	DOF_ATTR(n, d, c)	(((n) << 24) | ((d) << 16) | ((c) << 8))
 #define	DOF_ATTR_NAME(a)	(((a) >> 24) & 0xff)
 #define	DOF_ATTR_DATA(a)	(((a) >> 16) & 0xff)
 #define	DOF_ATTR_CLASS(a)	(((a) >>  8) & 0xff)
 
 typedef struct dof_provider {
 	dof_secidx_t dofpv_strtab;	/* link to DOF_SECT_STRTAB section */
 	dof_secidx_t dofpv_probes;	/* link to DOF_SECT_PROBES section */
 	dof_secidx_t dofpv_prargs;	/* link to DOF_SECT_PRARGS section */
 	dof_secidx_t dofpv_proffs;	/* link to DOF_SECT_PROFFS section */
 	dof_stridx_t dofpv_name;	/* provider name string */
 	dof_attr_t dofpv_provattr;	/* provider attributes */
 	dof_attr_t dofpv_modattr;	/* module attributes */
 	dof_attr_t dofpv_funcattr;	/* function attributes */
 	dof_attr_t dofpv_nameattr;	/* name attributes */
 	dof_attr_t dofpv_argsattr;	/* args attributes */
 	dof_secidx_t dofpv_prenoffs;	/* link to DOF_SECT_PRENOFFS section */
 } dof_provider_t;
 
 typedef struct dof_probe {
 	uint64_t dofpr_addr;		/* probe base address or offset */
 	dof_stridx_t dofpr_func;	/* probe function string */
 	dof_stridx_t dofpr_name;	/* probe name string */
 	dof_stridx_t dofpr_nargv;	/* native argument type strings */
 	dof_stridx_t dofpr_xargv;	/* translated argument type strings */
 	uint32_t dofpr_argidx;		/* index of first argument mapping */
 	uint32_t dofpr_offidx;		/* index of first offset entry */
 	uint8_t dofpr_nargc;		/* native argument count */
 	uint8_t dofpr_xargc;		/* translated argument count */
 	uint16_t dofpr_noffs;		/* number of offset entries for probe */
 	uint32_t dofpr_enoffidx;	/* index of first is-enabled offset */
 	uint16_t dofpr_nenoffs;		/* number of is-enabled offsets */
 	uint16_t dofpr_pad1;		/* reserved for future use */
 	uint32_t dofpr_pad2;		/* reserved for future use */
 } dof_probe_t;
 
 typedef struct dof_xlator {
 	dof_secidx_t dofxl_members;	/* link to DOF_SECT_XLMEMBERS section */
 	dof_secidx_t dofxl_strtab;	/* link to DOF_SECT_STRTAB section */
 	dof_stridx_t dofxl_argv;	/* input parameter type strings */
 	uint32_t dofxl_argc;		/* input parameter list length */
 	dof_stridx_t dofxl_type;	/* output type string name */
 	dof_attr_t dofxl_attr;		/* output stability attributes */
 } dof_xlator_t;
 
 typedef struct dof_xlmember {
 	dof_secidx_t dofxm_difo;	/* member link to DOF_SECT_DIFOHDR */
 	dof_stridx_t dofxm_name;	/* member name */
 	dtrace_diftype_t dofxm_type;	/* member type */
 } dof_xlmember_t;
 
 typedef struct dof_xlref {
 	dof_secidx_t dofxr_xlator;	/* link to DOF_SECT_XLATORS section */
 	uint32_t dofxr_member;		/* index of referenced dof_xlmember */
 	uint32_t dofxr_argn;		/* index of argument for DIF_OP_XLARG */
 } dof_xlref_t;
 
 /*
  * DTrace Intermediate Format Object (DIFO)
  *
  * A DIFO is used to store the compiled DIF for a D expression, its return
  * type, and its string and variable tables.  The string table is a single
  * buffer of character data into which sets instructions and variable
  * references can reference strings using a byte offset.  The variable table
  * is an array of dtrace_difv_t structures that describe the name and type of
  * each variable and the id used in the DIF code.  This structure is described
  * above in the DIF section of this header file.  The DIFO is used at both
  * user-level (in the library) and in the kernel, but the structure is never
  * passed between the two: the DOF structures form the only interface.  As a
  * result, the definition can change depending on the presence of _KERNEL.
  */
 typedef struct dtrace_difo {
 	dif_instr_t *dtdo_buf;		/* instruction buffer */
 	uint64_t *dtdo_inttab;		/* integer table (optional) */
 	char *dtdo_strtab;		/* string table (optional) */
 	dtrace_difv_t *dtdo_vartab;	/* variable table (optional) */
 	uint_t dtdo_len;		/* length of instruction buffer */
 	uint_t dtdo_intlen;		/* length of integer table */
 	uint_t dtdo_strlen;		/* length of string table */
 	uint_t dtdo_varlen;		/* length of variable table */
 	dtrace_diftype_t dtdo_rtype;	/* return type */
 	uint_t dtdo_refcnt;		/* owner reference count */
 	uint_t dtdo_destructive;	/* invokes destructive subroutines */
 #ifndef _KERNEL
 	dof_relodesc_t *dtdo_kreltab;	/* kernel relocations */
 	dof_relodesc_t *dtdo_ureltab;	/* user relocations */
 	struct dt_node **dtdo_xlmtab;	/* translator references */
 	uint_t dtdo_krelen;		/* length of krelo table */
 	uint_t dtdo_urelen;		/* length of urelo table */
 	uint_t dtdo_xlmlen;		/* length of translator table */
 #endif
 } dtrace_difo_t;
 
 /*
  * DTrace Enabling Description Structures
  *
  * When DTrace is tracking the description of a DTrace enabling entity (probe,
  * predicate, action, ECB, record, etc.), it does so in a description
  * structure.  These structures all end in "desc", and are used at both
  * user-level and in the kernel -- but (with the exception of
  * dtrace_probedesc_t) they are never passed between them.  Typically,
  * user-level will use the description structures when assembling an enabling.
  * It will then distill those description structures into a DOF object (see
  * above), and send it into the kernel.  The kernel will again use the
  * description structures to create a description of the enabling as it reads
  * the DOF.  When the description is complete, the enabling will be actually
  * created -- turning it into the structures that represent the enabling
  * instead of merely describing it.  Not surprisingly, the description
  * structures bear a strong resemblance to the DOF structures that act as their
  * conduit.
  */
 struct dtrace_predicate;
 
 typedef struct dtrace_probedesc {
 	dtrace_id_t dtpd_id;			/* probe identifier */
 	char dtpd_provider[DTRACE_PROVNAMELEN]; /* probe provider name */
 	char dtpd_mod[DTRACE_MODNAMELEN];	/* probe module name */
 	char dtpd_func[DTRACE_FUNCNAMELEN];	/* probe function name */
 	char dtpd_name[DTRACE_NAMELEN];		/* probe name */
 } dtrace_probedesc_t;
 
 typedef struct dtrace_repldesc {
 	dtrace_probedesc_t dtrpd_match;		/* probe descr. to match */
 	dtrace_probedesc_t dtrpd_create;	/* probe descr. to create */
 } dtrace_repldesc_t;
 
 typedef struct dtrace_preddesc {
 	dtrace_difo_t *dtpdd_difo;		/* pointer to DIF object */
 	struct dtrace_predicate *dtpdd_predicate; /* pointer to predicate */
 } dtrace_preddesc_t;
 
 typedef struct dtrace_actdesc {
 	dtrace_difo_t *dtad_difo;		/* pointer to DIF object */
 	struct dtrace_actdesc *dtad_next;	/* next action */
 	dtrace_actkind_t dtad_kind;		/* kind of action */
 	uint32_t dtad_ntuple;			/* number in tuple */
 	uint64_t dtad_arg;			/* action argument */
 	uint64_t dtad_uarg;			/* user argument */
 	int dtad_refcnt;			/* reference count */
 } dtrace_actdesc_t;
 
 typedef struct dtrace_ecbdesc {
 	dtrace_actdesc_t *dted_action;		/* action description(s) */
 	dtrace_preddesc_t dted_pred;		/* predicate description */
 	dtrace_probedesc_t dted_probe;		/* probe description */
 	uint64_t dted_uarg;			/* library argument */
 	int dted_refcnt;			/* reference count */
 } dtrace_ecbdesc_t;
 
 /*
  * DTrace Metadata Description Structures
  *
  * DTrace separates the trace data stream from the metadata stream.  The only
  * metadata tokens placed in the data stream are the dtrace_rechdr_t (EPID +
  * timestamp) or (in the case of aggregations) aggregation identifiers.  To
  * determine the structure of the data, DTrace consumers pass the token to the
  * kernel, and receive in return a corresponding description of the enabled
  * probe (via the dtrace_eprobedesc structure) or the aggregation (via the
  * dtrace_aggdesc structure).  Both of these structures are expressed in terms
  * of record descriptions (via the dtrace_recdesc structure) that describe the
  * exact structure of the data.  Some record descriptions may also contain a
  * format identifier; this additional bit of metadata can be retrieved from the
  * kernel, for which a format description is returned via the dtrace_fmtdesc
  * structure.  Note that all four of these structures must be bitness-neutral
  * to allow for a 32-bit DTrace consumer on a 64-bit kernel.
  */
 typedef struct dtrace_recdesc {
 	dtrace_actkind_t dtrd_action;		/* kind of action */
 	uint32_t dtrd_size;			/* size of record */
 	uint32_t dtrd_offset;			/* offset in ECB's data */
 	uint16_t dtrd_alignment;		/* required alignment */
 	uint16_t dtrd_format;			/* format, if any */
 	uint64_t dtrd_arg;			/* action argument */
 	uint64_t dtrd_uarg;			/* user argument */
 } dtrace_recdesc_t;
 
 typedef struct dtrace_eprobedesc {
 	dtrace_epid_t dtepd_epid;		/* enabled probe ID */
 	dtrace_id_t dtepd_probeid;		/* probe ID */
 	uint64_t dtepd_uarg;			/* library argument */
 	uint32_t dtepd_size;			/* total size */
 	int dtepd_nrecs;			/* number of records */
 	dtrace_recdesc_t dtepd_rec[1];		/* records themselves */
 } dtrace_eprobedesc_t;
 
 typedef struct dtrace_aggdesc {
 	DTRACE_PTR(char, dtagd_name);		/* not filled in by kernel */
 	dtrace_aggvarid_t dtagd_varid;		/* not filled in by kernel */
 	int dtagd_flags;			/* not filled in by kernel */
 	dtrace_aggid_t dtagd_id;		/* aggregation ID */
 	dtrace_epid_t dtagd_epid;		/* enabled probe ID */
 	uint32_t dtagd_size;			/* size in bytes */
 	int dtagd_nrecs;			/* number of records */
 	uint32_t dtagd_pad;			/* explicit padding */
 	dtrace_recdesc_t dtagd_rec[1];		/* record descriptions */
 } dtrace_aggdesc_t;
 
 typedef struct dtrace_fmtdesc {
 	DTRACE_PTR(char, dtfd_string);		/* format string */
 	int dtfd_length;			/* length of format string */
 	uint16_t dtfd_format;			/* format identifier */
 } dtrace_fmtdesc_t;
 
 #define	DTRACE_SIZEOF_EPROBEDESC(desc)				\
 	(sizeof (dtrace_eprobedesc_t) + ((desc)->dtepd_nrecs ?	\
 	(((desc)->dtepd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0))
 
 #define	DTRACE_SIZEOF_AGGDESC(desc)				\
 	(sizeof (dtrace_aggdesc_t) + ((desc)->dtagd_nrecs ?	\
 	(((desc)->dtagd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0))
 
 /*
  * DTrace Option Interface
  *
  * Run-time DTrace options are set and retrieved via DOF_SECT_OPTDESC sections
  * in a DOF image.  The dof_optdesc structure contains an option identifier and
  * an option value.  The valid option identifiers are found below; the mapping
  * between option identifiers and option identifying strings is maintained at
  * user-level.  Note that the value of DTRACEOPT_UNSET is such that all of the
  * following are potentially valid option values:  all positive integers, zero
  * and negative one.  Some options (notably "bufpolicy" and "bufresize") take
  * predefined tokens as their values; these are defined with
  * DTRACEOPT_{option}_{token}.
  */
 #define	DTRACEOPT_BUFSIZE	0	/* buffer size */
 #define	DTRACEOPT_BUFPOLICY	1	/* buffer policy */
 #define	DTRACEOPT_DYNVARSIZE	2	/* dynamic variable size */
 #define	DTRACEOPT_AGGSIZE	3	/* aggregation size */
 #define	DTRACEOPT_SPECSIZE	4	/* speculation size */
 #define	DTRACEOPT_NSPEC		5	/* number of speculations */
 #define	DTRACEOPT_STRSIZE	6	/* string size */
 #define	DTRACEOPT_CLEANRATE	7	/* dynvar cleaning rate */
 #define	DTRACEOPT_CPU		8	/* CPU to trace */
 #define	DTRACEOPT_BUFRESIZE	9	/* buffer resizing policy */
 #define	DTRACEOPT_GRABANON	10	/* grab anonymous state, if any */
 #define	DTRACEOPT_FLOWINDENT	11	/* indent function entry/return */
 #define	DTRACEOPT_QUIET		12	/* only output explicitly traced data */
 #define	DTRACEOPT_STACKFRAMES	13	/* number of stack frames */
 #define	DTRACEOPT_USTACKFRAMES	14	/* number of user stack frames */
 #define	DTRACEOPT_AGGRATE	15	/* aggregation snapshot rate */
 #define	DTRACEOPT_SWITCHRATE	16	/* buffer switching rate */
 #define	DTRACEOPT_STATUSRATE	17	/* status rate */
 #define	DTRACEOPT_DESTRUCTIVE	18	/* destructive actions allowed */
 #define	DTRACEOPT_STACKINDENT	19	/* output indent for stack traces */
 #define	DTRACEOPT_RAWBYTES	20	/* always print bytes in raw form */
 #define	DTRACEOPT_JSTACKFRAMES	21	/* number of jstack() frames */
 #define	DTRACEOPT_JSTACKSTRSIZE	22	/* size of jstack() string table */
 #define	DTRACEOPT_AGGSORTKEY	23	/* sort aggregations by key */
 #define	DTRACEOPT_AGGSORTREV	24	/* reverse-sort aggregations */
 #define	DTRACEOPT_AGGSORTPOS	25	/* agg. position to sort on */
 #define	DTRACEOPT_AGGSORTKEYPOS	26	/* agg. key position to sort on */
 #define	DTRACEOPT_TEMPORAL	27	/* temporally ordered output */
 #define	DTRACEOPT_AGGHIST	28	/* histogram aggregation output */
 #define	DTRACEOPT_AGGPACK	29	/* packed aggregation output */
 #define	DTRACEOPT_AGGZOOM	30	/* zoomed aggregation scaling */
 #define	DTRACEOPT_ZONE		31	/* zone in which to enable probes */
 #define	DTRACEOPT_MAX		32	/* number of options */
 
 #define	DTRACEOPT_UNSET		(dtrace_optval_t)-2	/* unset option */
 
 #define	DTRACEOPT_BUFPOLICY_RING	0	/* ring buffer */
 #define	DTRACEOPT_BUFPOLICY_FILL	1	/* fill buffer, then stop */
 #define	DTRACEOPT_BUFPOLICY_SWITCH	2	/* switch buffers */
 
 #define	DTRACEOPT_BUFRESIZE_AUTO	0	/* automatic resizing */
 #define	DTRACEOPT_BUFRESIZE_MANUAL	1	/* manual resizing */
 
 /*
  * DTrace Buffer Interface
  *
  * In order to get a snapshot of the principal or aggregation buffer,
  * user-level passes a buffer description to the kernel with the dtrace_bufdesc
  * structure.  This describes which CPU user-level is interested in, and
  * where user-level wishes the kernel to snapshot the buffer to (the
  * dtbd_data field).  The kernel uses the same structure to pass back some
  * information regarding the buffer:  the size of data actually copied out, the
  * number of drops, the number of errors, the offset of the oldest record,
  * and the time of the snapshot.
  *
  * If the buffer policy is a "switch" policy, taking a snapshot of the
  * principal buffer has the additional effect of switching the active and
  * inactive buffers.  Taking a snapshot of the aggregation buffer _always_ has
  * the additional effect of switching the active and inactive buffers.
  */
 typedef struct dtrace_bufdesc {
 	uint64_t dtbd_size;			/* size of buffer */
 	uint32_t dtbd_cpu;			/* CPU or DTRACE_CPUALL */
 	uint32_t dtbd_errors;			/* number of errors */
 	uint64_t dtbd_drops;			/* number of drops */
 	DTRACE_PTR(char, dtbd_data);		/* data */
 	uint64_t dtbd_oldest;			/* offset of oldest record */
 	uint64_t dtbd_timestamp;		/* hrtime of snapshot */
 } dtrace_bufdesc_t;
 
 /*
  * Each record in the buffer (dtbd_data) begins with a header that includes
  * the epid and a timestamp.  The timestamp is split into two 4-byte parts
  * so that we do not require 8-byte alignment.
  */
 typedef struct dtrace_rechdr {
 	dtrace_epid_t dtrh_epid;		/* enabled probe id */
 	uint32_t dtrh_timestamp_hi;		/* high bits of hrtime_t */
 	uint32_t dtrh_timestamp_lo;		/* low bits of hrtime_t */
 } dtrace_rechdr_t;
 
 #define	DTRACE_RECORD_LOAD_TIMESTAMP(dtrh)			\
 	((dtrh)->dtrh_timestamp_lo +				\
 	((uint64_t)(dtrh)->dtrh_timestamp_hi << 32))
 
 #define	DTRACE_RECORD_STORE_TIMESTAMP(dtrh, hrtime) {		\
 	(dtrh)->dtrh_timestamp_lo = (uint32_t)hrtime;		\
 	(dtrh)->dtrh_timestamp_hi = hrtime >> 32;		\
 }
 
 /*
  * DTrace Status
  *
  * The status of DTrace is relayed via the dtrace_status structure.  This
  * structure contains members to count drops other than the capacity drops
  * available via the buffer interface (see above).  This consists of dynamic
  * drops (including capacity dynamic drops, rinsing drops and dirty drops), and
  * speculative drops (including capacity speculative drops, drops due to busy
  * speculative buffers and drops due to unavailable speculative buffers).
  * Additionally, the status structure contains a field to indicate the number
  * of "fill"-policy buffers have been filled and a boolean field to indicate
  * that exit() has been called.  If the dtst_exiting field is non-zero, no
  * further data will be generated until tracing is stopped (at which time any
  * enablings of the END action will be processed); if user-level sees that
  * this field is non-zero, tracing should be stopped as soon as possible.
  */
 typedef struct dtrace_status {
 	uint64_t dtst_dyndrops;			/* dynamic drops */
 	uint64_t dtst_dyndrops_rinsing;		/* dyn drops due to rinsing */
 	uint64_t dtst_dyndrops_dirty;		/* dyn drops due to dirty */
 	uint64_t dtst_specdrops;		/* speculative drops */
 	uint64_t dtst_specdrops_busy;		/* spec drops due to busy */
 	uint64_t dtst_specdrops_unavail;	/* spec drops due to unavail */
 	uint64_t dtst_errors;			/* total errors */
 	uint64_t dtst_filled;			/* number of filled bufs */
 	uint64_t dtst_stkstroverflows;		/* stack string tab overflows */
 	uint64_t dtst_dblerrors;		/* errors in ERROR probes */
 	char dtst_killed;			/* non-zero if killed */
 	char dtst_exiting;			/* non-zero if exit() called */
 	char dtst_pad[6];			/* pad out to 64-bit align */
 } dtrace_status_t;
 
 /*
  * DTrace Configuration
  *
  * User-level may need to understand some elements of the kernel DTrace
  * configuration in order to generate correct DIF.  This information is
  * conveyed via the dtrace_conf structure.
  */
 typedef struct dtrace_conf {
 	uint_t dtc_difversion;			/* supported DIF version */
 	uint_t dtc_difintregs;			/* # of DIF integer registers */
 	uint_t dtc_diftupregs;			/* # of DIF tuple registers */
 	uint_t dtc_ctfmodel;			/* CTF data model */
 	uint_t dtc_pad[8];			/* reserved for future use */
 } dtrace_conf_t;
 
 /*
  * DTrace Faults
  *
  * The constants below DTRACEFLT_LIBRARY indicate probe processing faults;
  * constants at or above DTRACEFLT_LIBRARY indicate faults in probe
  * postprocessing at user-level.  Probe processing faults induce an ERROR
  * probe and are replicated in unistd.d to allow users' ERROR probes to decode
  * the error condition using thse symbolic labels.
  */
 #define	DTRACEFLT_UNKNOWN		0	/* Unknown fault */
 #define	DTRACEFLT_BADADDR		1	/* Bad address */
 #define	DTRACEFLT_BADALIGN		2	/* Bad alignment */
 #define	DTRACEFLT_ILLOP			3	/* Illegal operation */
 #define	DTRACEFLT_DIVZERO		4	/* Divide-by-zero */
 #define	DTRACEFLT_NOSCRATCH		5	/* Out of scratch space */
 #define	DTRACEFLT_KPRIV			6	/* Illegal kernel access */
 #define	DTRACEFLT_UPRIV			7	/* Illegal user access */
 #define	DTRACEFLT_TUPOFLOW		8	/* Tuple stack overflow */
 #define	DTRACEFLT_BADSTACK		9	/* Bad stack */
 
 #define	DTRACEFLT_LIBRARY		1000	/* Library-level fault */
 
 /*
  * DTrace Argument Types
  *
  * Because it would waste both space and time, argument types do not reside
  * with the probe.  In order to determine argument types for args[X]
  * variables, the D compiler queries for argument types on a probe-by-probe
  * basis.  (This optimizes for the common case that arguments are either not
  * used or used in an untyped fashion.)  Typed arguments are specified with a
  * string of the type name in the dtragd_native member of the argument
  * description structure.  Typed arguments may be further translated to types
  * of greater stability; the provider indicates such a translated argument by
  * filling in the dtargd_xlate member with the string of the translated type.
  * Finally, the provider may indicate which argument value a given argument
  * maps to by setting the dtargd_mapping member -- allowing a single argument
  * to map to multiple args[X] variables.
  */
 typedef struct dtrace_argdesc {
 	dtrace_id_t dtargd_id;			/* probe identifier */
 	int dtargd_ndx;				/* arg number (-1 iff none) */
 	int dtargd_mapping;			/* value mapping */
 	char dtargd_native[DTRACE_ARGTYPELEN];	/* native type name */
 	char dtargd_xlate[DTRACE_ARGTYPELEN];	/* translated type name */
 } dtrace_argdesc_t;
 
 /*
  * DTrace Stability Attributes
  *
  * Each DTrace provider advertises the name and data stability of each of its
  * probe description components, as well as its architectural dependencies.
  * The D compiler can query the provider attributes (dtrace_pattr_t below) in
  * order to compute the properties of an input program and report them.
  */
 typedef uint8_t dtrace_stability_t;	/* stability code (see attributes(5)) */
 typedef uint8_t dtrace_class_t;		/* architectural dependency class */
 
 #define	DTRACE_STABILITY_INTERNAL	0	/* private to DTrace itself */
 #define	DTRACE_STABILITY_PRIVATE	1	/* private to Sun (see docs) */
 #define	DTRACE_STABILITY_OBSOLETE	2	/* scheduled for removal */
 #define	DTRACE_STABILITY_EXTERNAL	3	/* not controlled by Sun */
 #define	DTRACE_STABILITY_UNSTABLE	4	/* new or rapidly changing */
 #define	DTRACE_STABILITY_EVOLVING	5	/* less rapidly changing */
 #define	DTRACE_STABILITY_STABLE		6	/* mature interface from Sun */
 #define	DTRACE_STABILITY_STANDARD	7	/* industry standard */
 #define	DTRACE_STABILITY_MAX		7	/* maximum valid stability */
 
 #define	DTRACE_CLASS_UNKNOWN	0	/* unknown architectural dependency */
 #define	DTRACE_CLASS_CPU	1	/* CPU-module-specific */
 #define	DTRACE_CLASS_PLATFORM	2	/* platform-specific (uname -i) */
 #define	DTRACE_CLASS_GROUP	3	/* hardware-group-specific (uname -m) */
 #define	DTRACE_CLASS_ISA	4	/* ISA-specific (uname -p) */
 #define	DTRACE_CLASS_COMMON	5	/* common to all systems */
 #define	DTRACE_CLASS_MAX	5	/* maximum valid class */
 
 #define	DTRACE_PRIV_NONE	0x0000
 #define	DTRACE_PRIV_KERNEL	0x0001
 #define	DTRACE_PRIV_USER	0x0002
 #define	DTRACE_PRIV_PROC	0x0004
 #define	DTRACE_PRIV_OWNER	0x0008
 #define	DTRACE_PRIV_ZONEOWNER	0x0010
 
 #define	DTRACE_PRIV_ALL	\
 	(DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER | \
 	DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER | DTRACE_PRIV_ZONEOWNER)
 
 typedef struct dtrace_ppriv {
 	uint32_t dtpp_flags;			/* privilege flags */
 	uid_t dtpp_uid;				/* user ID */
 	zoneid_t dtpp_zoneid;			/* zone ID */
 } dtrace_ppriv_t;
 
 typedef struct dtrace_attribute {
 	dtrace_stability_t dtat_name;		/* entity name stability */
 	dtrace_stability_t dtat_data;		/* entity data stability */
 	dtrace_class_t dtat_class;		/* entity data dependency */
 } dtrace_attribute_t;
 
 typedef struct dtrace_pattr {
 	dtrace_attribute_t dtpa_provider;	/* provider attributes */
 	dtrace_attribute_t dtpa_mod;		/* module attributes */
 	dtrace_attribute_t dtpa_func;		/* function attributes */
 	dtrace_attribute_t dtpa_name;		/* name attributes */
 	dtrace_attribute_t dtpa_args;		/* args[] attributes */
 } dtrace_pattr_t;
 
 typedef struct dtrace_providerdesc {
 	char dtvd_name[DTRACE_PROVNAMELEN];	/* provider name */
 	dtrace_pattr_t dtvd_attr;		/* stability attributes */
 	dtrace_ppriv_t dtvd_priv;		/* privileges required */
 } dtrace_providerdesc_t;
 
 /*
  * DTrace Pseudodevice Interface
  *
  * DTrace is controlled through ioctl(2)'s to the in-kernel dtrace:dtrace
  * pseudodevice driver.  These ioctls comprise the user-kernel interface to
  * DTrace.
  */
 #ifdef illumos
 #define	DTRACEIOC		(('d' << 24) | ('t' << 16) | ('r' << 8))
 #define	DTRACEIOC_PROVIDER	(DTRACEIOC | 1)		/* provider query */
 #define	DTRACEIOC_PROBES	(DTRACEIOC | 2)		/* probe query */
 #define	DTRACEIOC_BUFSNAP	(DTRACEIOC | 4)		/* snapshot buffer */
 #define	DTRACEIOC_PROBEMATCH	(DTRACEIOC | 5)		/* match probes */
 #define	DTRACEIOC_ENABLE	(DTRACEIOC | 6)		/* enable probes */
 #define	DTRACEIOC_AGGSNAP	(DTRACEIOC | 7)		/* snapshot agg. */
 #define	DTRACEIOC_EPROBE	(DTRACEIOC | 8)		/* get eprobe desc. */
 #define	DTRACEIOC_PROBEARG	(DTRACEIOC | 9)		/* get probe arg */
 #define	DTRACEIOC_CONF		(DTRACEIOC | 10)	/* get config. */
 #define	DTRACEIOC_STATUS	(DTRACEIOC | 11)	/* get status */
 #define	DTRACEIOC_GO		(DTRACEIOC | 12)	/* start tracing */
 #define	DTRACEIOC_STOP		(DTRACEIOC | 13)	/* stop tracing */
 #define	DTRACEIOC_AGGDESC	(DTRACEIOC | 15)	/* get agg. desc. */
 #define	DTRACEIOC_FORMAT	(DTRACEIOC | 16)	/* get format str */
 #define	DTRACEIOC_DOFGET	(DTRACEIOC | 17)	/* get DOF */
 #define	DTRACEIOC_REPLICATE	(DTRACEIOC | 18)	/* replicate enab */
 #else
 #define	DTRACEIOC_PROVIDER	_IOWR('x',1,dtrace_providerdesc_t)
 							/* provider query */
 #define	DTRACEIOC_PROBES	_IOWR('x',2,dtrace_probedesc_t)
 							/* probe query */
 #define	DTRACEIOC_BUFSNAP	_IOW('x',4,dtrace_bufdesc_t *)	
 							/* snapshot buffer */
 #define	DTRACEIOC_PROBEMATCH	_IOWR('x',5,dtrace_probedesc_t)
 							/* match probes */
 typedef struct {
 	void	*dof;		/* DOF userland address written to driver. */
 	int	n_matched;	/* # matches returned by driver. */
 } dtrace_enable_io_t;
 #define	DTRACEIOC_ENABLE	_IOWR('x',6,dtrace_enable_io_t)
 							/* enable probes */
 #define	DTRACEIOC_AGGSNAP	_IOW('x',7,dtrace_bufdesc_t *)
 							/* snapshot agg. */
 #define	DTRACEIOC_EPROBE	_IOW('x',8,dtrace_eprobedesc_t)
 							/* get eprobe desc. */
 #define	DTRACEIOC_PROBEARG	_IOWR('x',9,dtrace_argdesc_t)
 							/* get probe arg */
 #define	DTRACEIOC_CONF		_IOR('x',10,dtrace_conf_t)
 							/* get config. */
 #define	DTRACEIOC_STATUS	_IOR('x',11,dtrace_status_t)
 							/* get status */
 #define	DTRACEIOC_GO		_IOR('x',12,processorid_t)
 							/* start tracing */
 #define	DTRACEIOC_STOP		_IOWR('x',13,processorid_t)
 							/* stop tracing */
 #define	DTRACEIOC_AGGDESC	_IOW('x',15,dtrace_aggdesc_t *)	
 							/* get agg. desc. */
 #define	DTRACEIOC_FORMAT	_IOWR('x',16,dtrace_fmtdesc_t)	
 							/* get format str */
 #define	DTRACEIOC_DOFGET	_IOW('x',17,dof_hdr_t *)
 							/* get DOF */
 #define	DTRACEIOC_REPLICATE	_IOW('x',18,dtrace_repldesc_t)	
 							/* replicate enab */
 #endif
 
 /*
  * DTrace Helpers
  *
  * In general, DTrace establishes probes in processes and takes actions on
  * processes without knowing their specific user-level structures.  Instead of
  * existing in the framework, process-specific knowledge is contained by the
  * enabling D program -- which can apply process-specific knowledge by making
  * appropriate use of DTrace primitives like copyin() and copyinstr() to
  * operate on user-level data.  However, there may exist some specific probes
  * of particular semantic relevance that the application developer may wish to
  * explicitly export.  For example, an application may wish to export a probe
  * at the point that it begins and ends certain well-defined transactions.  In
  * addition to providing probes, programs may wish to offer assistance for
  * certain actions.  For example, in highly dynamic environments (e.g., Java),
  * it may be difficult to obtain a stack trace in terms of meaningful symbol
  * names (the translation from instruction addresses to corresponding symbol
  * names may only be possible in situ); these environments may wish to define
  * a series of actions to be applied in situ to obtain a meaningful stack
  * trace.
  *
  * These two mechanisms -- user-level statically defined tracing and assisting
  * DTrace actions -- are provided via DTrace _helpers_.  Helpers are specified
  * via DOF, but unlike enabling DOF, helper DOF may contain definitions of
  * providers, probes and their arguments.  If a helper wishes to provide
  * action assistance, probe descriptions and corresponding DIF actions may be
  * specified in the helper DOF.  For such helper actions, however, the probe
  * description describes the specific helper:  all DTrace helpers have the
  * provider name "dtrace" and the module name "helper", and the name of the
  * helper is contained in the function name (for example, the ustack() helper
  * is named "ustack").  Any helper-specific name may be contained in the name
  * (for example, if a helper were to have a constructor, it might be named
  * "dtrace:helper:<helper>:init").  Helper actions are only called when the
  * action that they are helping is taken.  Helper actions may only return DIF
  * expressions, and may only call the following subroutines:
  *
  *    alloca()      <= Allocates memory out of the consumer's scratch space
  *    bcopy()       <= Copies memory to scratch space
  *    copyin()      <= Copies memory from user-level into consumer's scratch
  *    copyinto()    <= Copies memory into a specific location in scratch
  *    copyinstr()   <= Copies a string into a specific location in scratch
  *
  * Helper actions may only access the following built-in variables:
  *
  *    curthread     <= Current kthread_t pointer
  *    tid           <= Current thread identifier
  *    pid           <= Current process identifier
  *    ppid          <= Parent process identifier
  *    uid           <= Current user ID
  *    gid           <= Current group ID
  *    execname      <= Current executable name
  *    zonename      <= Current zone name
  *
  * Helper actions may not manipulate or allocate dynamic variables, but they
  * may have clause-local and statically-allocated global variables.  The
  * helper action variable state is specific to the helper action -- variables
  * used by the helper action may not be accessed outside of the helper
  * action, and the helper action may not access variables that like outside
  * of it.  Helper actions may not load from kernel memory at-large; they are
  * restricting to loading current user state (via copyin() and variants) and
  * scratch space.  As with probe enablings, helper actions are executed in
  * program order.  The result of the helper action is the result of the last
  * executing helper expression.
  *
  * Helpers -- composed of either providers/probes or probes/actions (or both)
  * -- are added by opening the "helper" minor node, and issuing an ioctl(2)
  * (DTRACEHIOC_ADDDOF) that specifies the dof_helper_t structure. This
  * encapsulates the name and base address of the user-level library or
  * executable publishing the helpers and probes as well as the DOF that
  * contains the definitions of those helpers and probes.
  *
  * The DTRACEHIOC_ADD and DTRACEHIOC_REMOVE are left in place for legacy
  * helpers and should no longer be used.  No other ioctls are valid on the
  * helper minor node.
  */
 #ifdef illumos
 #define	DTRACEHIOC		(('d' << 24) | ('t' << 16) | ('h' << 8))
 #define	DTRACEHIOC_ADD		(DTRACEHIOC | 1)	/* add helper */
 #define	DTRACEHIOC_REMOVE	(DTRACEHIOC | 2)	/* remove helper */
 #define	DTRACEHIOC_ADDDOF	(DTRACEHIOC | 3)	/* add helper DOF */
 #else
 #define	DTRACEHIOC_REMOVE	_IOW('z', 2, int)	/* remove helper */
 #define	DTRACEHIOC_ADDDOF	_IOWR('z', 3, dof_helper_t)/* add helper DOF */
 #endif
 
 typedef struct dof_helper {
 	char dofhp_mod[DTRACE_MODNAMELEN];	/* executable or library name */
 	uint64_t dofhp_addr;			/* base address of object */
 	uint64_t dofhp_dof;			/* address of helper DOF */
 #ifdef __FreeBSD__
 	pid_t dofhp_pid;			/* target process ID */
 	int dofhp_gen;
 #endif
 } dof_helper_t;
 
 #define	DTRACEMNR_DTRACE	"dtrace"	/* node for DTrace ops */
 #define	DTRACEMNR_HELPER	"helper"	/* node for helpers */
 #define	DTRACEMNRN_DTRACE	0		/* minor for DTrace ops */
 #define	DTRACEMNRN_HELPER	1		/* minor for helpers */
 #define	DTRACEMNRN_CLONE	2		/* first clone minor */
 
 #ifdef _KERNEL
 
 /*
  * DTrace Provider API
  *
  * The following functions are implemented by the DTrace framework and are
  * used to implement separate in-kernel DTrace providers.  Common functions
  * are provided in uts/common/os/dtrace.c.  ISA-dependent subroutines are
  * defined in uts/<isa>/dtrace/dtrace_asm.s or uts/<isa>/dtrace/dtrace_isa.c.
  *
  * The provider API has two halves:  the API that the providers consume from
  * DTrace, and the API that providers make available to DTrace.
  *
  * 1 Framework-to-Provider API
  *
  * 1.1  Overview
  *
  * The Framework-to-Provider API is represented by the dtrace_pops structure
  * that the provider passes to the framework when registering itself.  This
  * structure consists of the following members:
  *
  *   dtps_provide()          <-- Provide all probes, all modules
  *   dtps_provide_module()   <-- Provide all probes in specified module
  *   dtps_enable()           <-- Enable specified probe
  *   dtps_disable()          <-- Disable specified probe
  *   dtps_suspend()          <-- Suspend specified probe
  *   dtps_resume()           <-- Resume specified probe
  *   dtps_getargdesc()       <-- Get the argument description for args[X]
  *   dtps_getargval()        <-- Get the value for an argX or args[X] variable
  *   dtps_usermode()         <-- Find out if the probe was fired in user mode
  *   dtps_destroy()          <-- Destroy all state associated with this probe
  *
  * 1.2  void dtps_provide(void *arg, const dtrace_probedesc_t *spec)
  *
  * 1.2.1  Overview
  *
  *   Called to indicate that the provider should provide all probes.  If the
  *   specified description is non-NULL, dtps_provide() is being called because
  *   no probe matched a specified probe -- if the provider has the ability to
  *   create custom probes, it may wish to create a probe that matches the
  *   specified description.
  *
  * 1.2.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is a pointer to a probe description that the provider may
  *   wish to consider when creating custom probes.  The provider is expected to
  *   call back into the DTrace framework via dtrace_probe_create() to create
  *   any necessary probes.  dtps_provide() may be called even if the provider
  *   has made available all probes; the provider should check the return value
  *   of dtrace_probe_create() to handle this case.  Note that the provider need
  *   not implement both dtps_provide() and dtps_provide_module(); see
  *   "Arguments and Notes" for dtrace_register(), below.
  *
  * 1.2.3  Return value
  *
  *   None.
  *
  * 1.2.4  Caller's context
  *
  *   dtps_provide() is typically called from open() or ioctl() context, but may
  *   be called from other contexts as well.  The DTrace framework is locked in
  *   such a way that providers may not register or unregister.  This means that
  *   the provider may not call any DTrace API that affects its registration with
  *   the framework, including dtrace_register(), dtrace_unregister(),
  *   dtrace_invalidate(), and dtrace_condense().  However, the context is such
  *   that the provider may (and indeed, is expected to) call probe-related
  *   DTrace routines, including dtrace_probe_create(), dtrace_probe_lookup(),
  *   and dtrace_probe_arg().
  *
  * 1.3  void dtps_provide_module(void *arg, modctl_t *mp)
  *
  * 1.3.1  Overview
  *
  *   Called to indicate that the provider should provide all probes in the
  *   specified module.
  *
  * 1.3.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is a pointer to a modctl structure that indicates the
  *   module for which probes should be created.
  *
  * 1.3.3  Return value
  *
  *   None.
  *
  * 1.3.4  Caller's context
  *
  *   dtps_provide_module() may be called from open() or ioctl() context, but
  *   may also be called from a module loading context.  mod_lock is held, and
  *   the DTrace framework is locked in such a way that providers may not
  *   register or unregister.  This means that the provider may not call any
  *   DTrace API that affects its registration with the framework, including
  *   dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and
  *   dtrace_condense().  However, the context is such that the provider may (and
  *   indeed, is expected to) call probe-related DTrace routines, including
  *   dtrace_probe_create(), dtrace_probe_lookup(), and dtrace_probe_arg().  Note
  *   that the provider need not implement both dtps_provide() and
  *   dtps_provide_module(); see "Arguments and Notes" for dtrace_register(),
  *   below.
  *
  * 1.4  void dtps_enable(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.4.1  Overview
  *
  *   Called to enable the specified probe.
  *
  * 1.4.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be enabled.  The third
  *   argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_enable() will be called when a probe transitions from not being
  *   enabled at all to having one or more ECB.  The number of ECBs associated
  *   with the probe may change without subsequent calls into the provider.
  *   When the number of ECBs drops to zero, the provider will be explicitly
  *   told to disable the probe via dtps_disable().  dtrace_probe() should never
  *   be called for a probe identifier that hasn't been explicitly enabled via
  *   dtps_enable().
  *
  * 1.4.3  Return value
  *
  *   None.
  *
  * 1.4.4  Caller's context
  *
  *   The DTrace framework is locked in such a way that it may not be called
  *   back into at all.  cpu_lock is held.  mod_lock is not held and may not
  *   be acquired.
  *
  * 1.5  void dtps_disable(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.5.1  Overview
  *
  *   Called to disable the specified probe.
  *
  * 1.5.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be disabled.  The third
  *   argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_disable() will be called when a probe transitions from being enabled
  *   to having zero ECBs.  dtrace_probe() should never be called for a probe
  *   identifier that has been explicitly enabled via dtps_disable().
  *
  * 1.5.3  Return value
  *
  *   None.
  *
  * 1.5.4  Caller's context
  *
  *   The DTrace framework is locked in such a way that it may not be called
  *   back into at all.  cpu_lock is held.  mod_lock is not held and may not
  *   be acquired.
  *
  * 1.6  void dtps_suspend(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.6.1  Overview
  *
  *   Called to suspend the specified enabled probe.  This entry point is for
  *   providers that may need to suspend some or all of their probes when CPUs
  *   are being powered on or when the boot monitor is being entered for a
  *   prolonged period of time.
  *
  * 1.6.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be suspended.  The
  *   third argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_suspend will only be called on an enabled probe.  Providers that
  *   provide a dtps_suspend entry point will want to take roughly the action
  *   that it takes for dtps_disable.
  *
  * 1.6.3  Return value
  *
  *   None.
  *
  * 1.6.4  Caller's context
  *
  *   Interrupts are disabled.  The DTrace framework is in a state such that the
  *   specified probe cannot be disabled or destroyed for the duration of
  *   dtps_suspend().  As interrupts are disabled, the provider is afforded
  *   little latitude; the provider is expected to do no more than a store to
  *   memory.
  *
  * 1.7  void dtps_resume(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.7.1  Overview
  *
  *   Called to resume the specified enabled probe.  This entry point is for
  *   providers that may need to resume some or all of their probes after the
  *   completion of an event that induced a call to dtps_suspend().
  *
  * 1.7.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be resumed.  The
  *   third argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_resume will only be called on an enabled probe.  Providers that
  *   provide a dtps_resume entry point will want to take roughly the action
  *   that it takes for dtps_enable.
  *
  * 1.7.3  Return value
  *
  *   None.
  *
  * 1.7.4  Caller's context
  *
  *   Interrupts are disabled.  The DTrace framework is in a state such that the
  *   specified probe cannot be disabled or destroyed for the duration of
  *   dtps_resume().  As interrupts are disabled, the provider is afforded
  *   little latitude; the provider is expected to do no more than a store to
  *   memory.
  *
  * 1.8  void dtps_getargdesc(void *arg, dtrace_id_t id, void *parg,
  *           dtrace_argdesc_t *desc)
  *
  * 1.8.1  Overview
  *
  *   Called to retrieve the argument description for an args[X] variable.
  *
  * 1.8.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register(). The
  *   second argument is the identifier of the current probe. The third
  *   argument is the probe argument as passed to dtrace_probe_create(). The
  *   fourth argument is a pointer to the argument description.  This
  *   description is both an input and output parameter:  it contains the
  *   index of the desired argument in the dtargd_ndx field, and expects
  *   the other fields to be filled in upon return.  If there is no argument
  *   corresponding to the specified index, the dtargd_ndx field should be set
  *   to DTRACE_ARGNONE.
  *
  * 1.8.3  Return value
  *
  *   None.  The dtargd_ndx, dtargd_native, dtargd_xlate and dtargd_mapping
  *   members of the dtrace_argdesc_t structure are all output values.
  *
  * 1.8.4  Caller's context
  *
  *   dtps_getargdesc() is called from ioctl() context. mod_lock is held, and
  *   the DTrace framework is locked in such a way that providers may not
  *   register or unregister.  This means that the provider may not call any
  *   DTrace API that affects its registration with the framework, including
  *   dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and
  *   dtrace_condense().
  *
  * 1.9  uint64_t dtps_getargval(void *arg, dtrace_id_t id, void *parg,
  *               int argno, int aframes)
  *
  * 1.9.1  Overview
  *
  *   Called to retrieve a value for an argX or args[X] variable.
  *
  * 1.9.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register(). The
  *   second argument is the identifier of the current probe. The third
  *   argument is the probe argument as passed to dtrace_probe_create(). The
  *   fourth argument is the number of the argument (the X in the example in
  *   1.9.1). The fifth argument is the number of stack frames that were used
  *   to get from the actual place in the code that fired the probe to
  *   dtrace_probe() itself, the so-called artificial frames. This argument may
  *   be used to descend an appropriate number of frames to find the correct
  *   values. If this entry point is left NULL, the dtrace_getarg() built-in
  *   function is used.
  *
  * 1.9.3  Return value
  *
  *   The value of the argument.
  *
  * 1.9.4  Caller's context
  *
  *   This is called from within dtrace_probe() meaning that interrupts
  *   are disabled. No locks should be taken within this entry point.
  *
  * 1.10  int dtps_usermode(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.10.1  Overview
  *
  *   Called to determine if the probe was fired in a user context.
  *
  * 1.10.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register(). The
  *   second argument is the identifier of the current probe. The third
  *   argument is the probe argument as passed to dtrace_probe_create().  This
  *   entry point must not be left NULL for providers whose probes allow for
  *   mixed mode tracing, that is to say those probes that can fire during
  *   kernel- _or_ user-mode execution
  *
  * 1.10.3  Return value
  *
  *   A bitwise OR that encapsulates both the mode (either DTRACE_MODE_KERNEL
  *   or DTRACE_MODE_USER) and the policy when the privilege of the enabling
  *   is insufficient for that mode (a combination of DTRACE_MODE_NOPRIV_DROP,
  *   DTRACE_MODE_NOPRIV_RESTRICT, and DTRACE_MODE_LIMITEDPRIV_RESTRICT).  If
  *   DTRACE_MODE_NOPRIV_DROP bit is set, insufficient privilege will result
  *   in the probe firing being silently ignored for the enabling; if the
  *   DTRACE_NODE_NOPRIV_RESTRICT bit is set, insufficient privilege will not
  *   prevent probe processing for the enabling, but restrictions will be in
  *   place that induce a UPRIV fault upon attempt to examine probe arguments
  *   or current process state.  If the DTRACE_MODE_LIMITEDPRIV_RESTRICT bit
  *   is set, similar restrictions will be placed upon operation if the
  *   privilege is sufficient to process the enabling, but does not otherwise
  *   entitle the enabling to all zones.  The DTRACE_MODE_NOPRIV_DROP and
  *   DTRACE_MODE_NOPRIV_RESTRICT are mutually exclusive (and one of these
  *   two policies must be specified), but either may be combined (or not)
  *   with DTRACE_MODE_LIMITEDPRIV_RESTRICT.
  *
  * 1.10.4  Caller's context
  *
  *   This is called from within dtrace_probe() meaning that interrupts
  *   are disabled. No locks should be taken within this entry point.
  *
  * 1.11 void dtps_destroy(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.11.1 Overview
  *
  *   Called to destroy the specified probe.
  *
  * 1.11.2 Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be destroyed.  The third
  *   argument is the probe argument as passed to dtrace_probe_create().  The
  *   provider should free all state associated with the probe.  The framework
  *   guarantees that dtps_destroy() is only called for probes that have either
  *   been disabled via dtps_disable() or were never enabled via dtps_enable().
  *   Once dtps_disable() has been called for a probe, no further call will be
  *   made specifying the probe.
  *
  * 1.11.3 Return value
  *
  *   None.
  *
  * 1.11.4 Caller's context
  *
  *   The DTrace framework is locked in such a way that it may not be called
  *   back into at all.  mod_lock is held.  cpu_lock is not held, and may not be
  *   acquired.
  *
  *
  * 2 Provider-to-Framework API
  *
  * 2.1  Overview
  *
  * The Provider-to-Framework API provides the mechanism for the provider to
  * register itself with the DTrace framework, to create probes, to lookup
  * probes and (most importantly) to fire probes.  The Provider-to-Framework
  * consists of:
  *
  *   dtrace_register()       <-- Register a provider with the DTrace framework
  *   dtrace_unregister()     <-- Remove a provider's DTrace registration
  *   dtrace_invalidate()     <-- Invalidate the specified provider
  *   dtrace_condense()       <-- Remove a provider's unenabled probes
  *   dtrace_attached()       <-- Indicates whether or not DTrace has attached
  *   dtrace_probe_create()   <-- Create a DTrace probe
  *   dtrace_probe_lookup()   <-- Lookup a DTrace probe based on its name
  *   dtrace_probe_arg()      <-- Return the probe argument for a specific probe
  *   dtrace_probe()          <-- Fire the specified probe
  *
  * 2.2  int dtrace_register(const char *name, const dtrace_pattr_t *pap,
  *          uint32_t priv, cred_t *cr, const dtrace_pops_t *pops, void *arg,
  *          dtrace_provider_id_t *idp)
  *
  * 2.2.1  Overview
  *
  *   dtrace_register() registers the calling provider with the DTrace
  *   framework.  It should generally be called by DTrace providers in their
  *   attach(9E) entry point.
  *
  * 2.2.2  Arguments and Notes
  *
  *   The first argument is the name of the provider.  The second argument is a
  *   pointer to the stability attributes for the provider.  The third argument
  *   is the privilege flags for the provider, and must be some combination of:
  *
  *     DTRACE_PRIV_NONE     <= All users may enable probes from this provider
  *
  *     DTRACE_PRIV_PROC     <= Any user with privilege of PRIV_DTRACE_PROC may
  *                             enable probes from this provider
  *
  *     DTRACE_PRIV_USER     <= Any user with privilege of PRIV_DTRACE_USER may
  *                             enable probes from this provider
  *
  *     DTRACE_PRIV_KERNEL   <= Any user with privilege of PRIV_DTRACE_KERNEL
  *                             may enable probes from this provider
  *
  *     DTRACE_PRIV_OWNER    <= This flag places an additional constraint on
  *                             the privilege requirements above. These probes
  *                             require either (a) a user ID matching the user
  *                             ID of the cred passed in the fourth argument
  *                             or (b) the PRIV_PROC_OWNER privilege.
  *
  *     DTRACE_PRIV_ZONEOWNER<= This flag places an additional constraint on
  *                             the privilege requirements above. These probes
  *                             require either (a) a zone ID matching the zone
  *                             ID of the cred passed in the fourth argument
  *                             or (b) the PRIV_PROC_ZONE privilege.
  *
  *   Note that these flags designate the _visibility_ of the probes, not
  *   the conditions under which they may or may not fire.
  *
  *   The fourth argument is the credential that is associated with the
  *   provider.  This argument should be NULL if the privilege flags don't
  *   include DTRACE_PRIV_OWNER or DTRACE_PRIV_ZONEOWNER.  If non-NULL, the
  *   framework stashes the uid and zoneid represented by this credential
  *   for use at probe-time, in implicit predicates.  These limit visibility
  *   of the probes to users and/or zones which have sufficient privilege to
  *   access them.
  *
  *   The fifth argument is a DTrace provider operations vector, which provides
  *   the implementation for the Framework-to-Provider API.  (See Section 1,
  *   above.)  This must be non-NULL, and each member must be non-NULL.  The
  *   exceptions to this are (1) the dtps_provide() and dtps_provide_module()
  *   members (if the provider so desires, _one_ of these members may be left
  *   NULL -- denoting that the provider only implements the other) and (2)
  *   the dtps_suspend() and dtps_resume() members, which must either both be
  *   NULL or both be non-NULL.
  *
  *   The sixth argument is a cookie to be specified as the first argument for
  *   each function in the Framework-to-Provider API.  This argument may have
  *   any value.
  *
  *   The final argument is a pointer to dtrace_provider_id_t.  If
  *   dtrace_register() successfully completes, the provider identifier will be
  *   stored in the memory pointed to be this argument.  This argument must be
  *   non-NULL.
  *
  * 2.2.3  Return value
  *
  *   On success, dtrace_register() returns 0 and stores the new provider's
  *   identifier into the memory pointed to by the idp argument.  On failure,
  *   dtrace_register() returns an errno:
  *
  *     EINVAL   The arguments passed to dtrace_register() were somehow invalid.
  *              This may because a parameter that must be non-NULL was NULL,
  *              because the name was invalid (either empty or an illegal
  *              provider name) or because the attributes were invalid.
  *
  *   No other failure code is returned.
  *
  * 2.2.4  Caller's context
  *
  *   dtrace_register() may induce calls to dtrace_provide(); the provider must
  *   hold no locks across dtrace_register() that may also be acquired by
  *   dtrace_provide().  cpu_lock and mod_lock must not be held.
  *
  * 2.3  int dtrace_unregister(dtrace_provider_t id)
  *
  * 2.3.1  Overview
  *
  *   Unregisters the specified provider from the DTrace framework.  It should
  *   generally be called by DTrace providers in their detach(9E) entry point.
  *
  * 2.3.2  Arguments and Notes
  *
  *   The only argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  As a result of calling
  *   dtrace_unregister(), the DTrace framework will call back into the provider
  *   via the dtps_destroy() entry point.  Once dtrace_unregister() successfully
  *   completes, however, the DTrace framework will no longer make calls through
  *   the Framework-to-Provider API.
  *
  * 2.3.3  Return value
  *
  *   On success, dtrace_unregister returns 0.  On failure, dtrace_unregister()
  *   returns an errno:
  *
  *     EBUSY    There are currently processes that have the DTrace pseudodevice
  *              open, or there exists an anonymous enabling that hasn't yet
  *              been claimed.
  *
  *   No other failure code is returned.
  *
  * 2.3.4  Caller's context
  *
  *   Because a call to dtrace_unregister() may induce calls through the
  *   Framework-to-Provider API, the caller may not hold any lock across
  *   dtrace_register() that is also acquired in any of the Framework-to-
  *   Provider API functions.  Additionally, mod_lock may not be held.
  *
  * 2.4  void dtrace_invalidate(dtrace_provider_id_t id)
  *
  * 2.4.1  Overview
  *
  *   Invalidates the specified provider.  All subsequent probe lookups for the
  *   specified provider will fail, but its probes will not be removed.
  *
  * 2.4.2  Arguments and note
  *
  *   The only argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  In general, a provider's probes
  *   always remain valid; dtrace_invalidate() is a mechanism for invalidating
  *   an entire provider, regardless of whether or not probes are enabled or
  *   not.  Note that dtrace_invalidate() will _not_ prevent already enabled
  *   probes from firing -- it will merely prevent any new enablings of the
  *   provider's probes.
  *
  * 2.5 int dtrace_condense(dtrace_provider_id_t id)
  *
  * 2.5.1  Overview
  *
  *   Removes all the unenabled probes for the given provider. This function is
  *   not unlike dtrace_unregister(), except that it doesn't remove the
  *   provider just as many of its associated probes as it can.
  *
  * 2.5.2  Arguments and Notes
  *
  *   As with dtrace_unregister(), the sole argument is the provider identifier
  *   as returned from a successful call to dtrace_register().  As a result of
  *   calling dtrace_condense(), the DTrace framework will call back into the
  *   given provider's dtps_destroy() entry point for each of the provider's
  *   unenabled probes.
  *
  * 2.5.3  Return value
  *
  *   Currently, dtrace_condense() always returns 0.  However, consumers of this
  *   function should check the return value as appropriate; its behavior may
  *   change in the future.
  *
  * 2.5.4  Caller's context
  *
  *   As with dtrace_unregister(), the caller may not hold any lock across
  *   dtrace_condense() that is also acquired in the provider's entry points.
  *   Also, mod_lock may not be held.
  *
  * 2.6 int dtrace_attached()
  *
  * 2.6.1  Overview
  *
  *   Indicates whether or not DTrace has attached.
  *
  * 2.6.2  Arguments and Notes
  *
  *   For most providers, DTrace makes initial contact beyond registration.
  *   That is, once a provider has registered with DTrace, it waits to hear
  *   from DTrace to create probes.  However, some providers may wish to
  *   proactively create probes without first being told by DTrace to do so.
  *   If providers wish to do this, they must first call dtrace_attached() to
  *   determine if DTrace itself has attached.  If dtrace_attached() returns 0,
  *   the provider must not make any other Provider-to-Framework API call.
  *
  * 2.6.3  Return value
  *
  *   dtrace_attached() returns 1 if DTrace has attached, 0 otherwise.
  *
  * 2.7  int dtrace_probe_create(dtrace_provider_t id, const char *mod,
  *	    const char *func, const char *name, int aframes, void *arg)
  *
  * 2.7.1  Overview
  *
  *   Creates a probe with specified module name, function name, and name.
  *
  * 2.7.2  Arguments and Notes
  *
  *   The first argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  The second, third, and fourth
  *   arguments are the module name, function name, and probe name,
  *   respectively.  Of these, module name and function name may both be NULL
  *   (in which case the probe is considered to be unanchored), or they may both
  *   be non-NULL.  The name must be non-NULL, and must point to a non-empty
  *   string.
  *
  *   The fifth argument is the number of artificial stack frames that will be
  *   found on the stack when dtrace_probe() is called for the new probe.  These
  *   artificial frames will be automatically be pruned should the stack() or
  *   stackdepth() functions be called as part of one of the probe's ECBs.  If
  *   the parameter doesn't add an artificial frame, this parameter should be
  *   zero.
  *
  *   The final argument is a probe argument that will be passed back to the
  *   provider when a probe-specific operation is called.  (e.g., via
  *   dtps_enable(), dtps_disable(), etc.)
  *
  *   Note that it is up to the provider to be sure that the probe that it
  *   creates does not already exist -- if the provider is unsure of the probe's
  *   existence, it should assure its absence with dtrace_probe_lookup() before
  *   calling dtrace_probe_create().
  *
  * 2.7.3  Return value
  *
  *   dtrace_probe_create() always succeeds, and always returns the identifier
  *   of the newly-created probe.
  *
  * 2.7.4  Caller's context
  *
  *   While dtrace_probe_create() is generally expected to be called from
  *   dtps_provide() and/or dtps_provide_module(), it may be called from other
  *   non-DTrace contexts.  Neither cpu_lock nor mod_lock may be held.
  *
  * 2.8  dtrace_id_t dtrace_probe_lookup(dtrace_provider_t id, const char *mod,
  *	    const char *func, const char *name)
  *
  * 2.8.1  Overview
  *
  *   Looks up a probe based on provdider and one or more of module name,
  *   function name and probe name.
  *
  * 2.8.2  Arguments and Notes
  *
  *   The first argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  The second, third, and fourth
  *   arguments are the module name, function name, and probe name,
  *   respectively.  Any of these may be NULL; dtrace_probe_lookup() will return
  *   the identifier of the first probe that is provided by the specified
  *   provider and matches all of the non-NULL matching criteria.
  *   dtrace_probe_lookup() is generally used by a provider to be check the
  *   existence of a probe before creating it with dtrace_probe_create().
  *
  * 2.8.3  Return value
  *
  *   If the probe exists, returns its identifier.  If the probe does not exist,
  *   return DTRACE_IDNONE.
  *
  * 2.8.4  Caller's context
  *
  *   While dtrace_probe_lookup() is generally expected to be called from
  *   dtps_provide() and/or dtps_provide_module(), it may also be called from
  *   other non-DTrace contexts.  Neither cpu_lock nor mod_lock may be held.
  *
  * 2.9  void *dtrace_probe_arg(dtrace_provider_t id, dtrace_id_t probe)
  *
  * 2.9.1  Overview
  *
  *   Returns the probe argument associated with the specified probe.
  *
  * 2.9.2  Arguments and Notes
  *
  *   The first argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  The second argument is a probe
  *   identifier, as returned from dtrace_probe_lookup() or
  *   dtrace_probe_create().  This is useful if a probe has multiple
  *   provider-specific components to it:  the provider can create the probe
  *   once with provider-specific state, and then add to the state by looking
  *   up the probe based on probe identifier.
  *
  * 2.9.3  Return value
  *
  *   Returns the argument associated with the specified probe.  If the
  *   specified probe does not exist, or if the specified probe is not provided
  *   by the specified provider, NULL is returned.
  *
  * 2.9.4  Caller's context
  *
  *   While dtrace_probe_arg() is generally expected to be called from
  *   dtps_provide() and/or dtps_provide_module(), it may also be called from
  *   other non-DTrace contexts.  Neither cpu_lock nor mod_lock may be held.
  *
  * 2.10  void dtrace_probe(dtrace_id_t probe, uintptr_t arg0, uintptr_t arg1,
  *		uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
  *
  * 2.10.1  Overview
  *
  *   The epicenter of DTrace:  fires the specified probes with the specified
  *   arguments.
  *
  * 2.10.2  Arguments and Notes
  *
  *   The first argument is a probe identifier as returned by
  *   dtrace_probe_create() or dtrace_probe_lookup().  The second through sixth
  *   arguments are the values to which the D variables "arg0" through "arg4"
  *   will be mapped.
  *
  *   dtrace_probe() should be called whenever the specified probe has fired --
  *   however the provider defines it.
  *
  * 2.10.3  Return value
  *
  *   None.
  *
  * 2.10.4  Caller's context
  *
  *   dtrace_probe() may be called in virtually any context:  kernel, user,
  *   interrupt, high-level interrupt, with arbitrary adaptive locks held, with
  *   dispatcher locks held, with interrupts disabled, etc.  The only latitude
  *   that must be afforded to DTrace is the ability to make calls within
  *   itself (and to its in-kernel subroutines) and the ability to access
  *   arbitrary (but mapped) memory.  On some platforms, this constrains
  *   context.  For example, on UltraSPARC, dtrace_probe() cannot be called
  *   from any context in which TL is greater than zero.  dtrace_probe() may
  *   also not be called from any routine which may be called by dtrace_probe()
  *   -- which includes functions in the DTrace framework and some in-kernel
  *   DTrace subroutines.  All such functions "dtrace_"; providers that
  *   instrument the kernel arbitrarily should be sure to not instrument these
  *   routines.
  */
 typedef struct dtrace_pops {
 	void (*dtps_provide)(void *arg, dtrace_probedesc_t *spec);
 	void (*dtps_provide_module)(void *arg, modctl_t *mp);
 	void (*dtps_enable)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_disable)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_suspend)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_resume)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_getargdesc)(void *arg, dtrace_id_t id, void *parg,
 	    dtrace_argdesc_t *desc);
 	uint64_t (*dtps_getargval)(void *arg, dtrace_id_t id, void *parg,
 	    int argno, int aframes);
 	int (*dtps_usermode)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_destroy)(void *arg, dtrace_id_t id, void *parg);
 } dtrace_pops_t;
 
 #define	DTRACE_MODE_KERNEL			0x01
 #define	DTRACE_MODE_USER			0x02
 #define	DTRACE_MODE_NOPRIV_DROP			0x10
 #define	DTRACE_MODE_NOPRIV_RESTRICT		0x20
 #define	DTRACE_MODE_LIMITEDPRIV_RESTRICT	0x40
 
 typedef uintptr_t	dtrace_provider_id_t;
 
 extern int dtrace_register(const char *, const dtrace_pattr_t *, uint32_t,
     cred_t *, const dtrace_pops_t *, void *, dtrace_provider_id_t *);
 extern int dtrace_unregister(dtrace_provider_id_t);
 extern int dtrace_condense(dtrace_provider_id_t);
 extern void dtrace_invalidate(dtrace_provider_id_t);
 extern dtrace_id_t dtrace_probe_lookup(dtrace_provider_id_t, char *,
     char *, char *);
 extern dtrace_id_t dtrace_probe_create(dtrace_provider_id_t, const char *,
     const char *, const char *, int, void *);
 extern void *dtrace_probe_arg(dtrace_provider_id_t, dtrace_id_t);
 extern void dtrace_probe(dtrace_id_t, uintptr_t arg0, uintptr_t arg1,
     uintptr_t arg2, uintptr_t arg3, uintptr_t arg4);
 
 /*
  * DTrace Meta Provider API
  *
  * The following functions are implemented by the DTrace framework and are
  * used to implement meta providers. Meta providers plug into the DTrace
  * framework and are used to instantiate new providers on the fly. At
  * present, there is only one type of meta provider and only one meta
  * provider may be registered with the DTrace framework at a time. The
  * sole meta provider type provides user-land static tracing facilities
  * by taking meta probe descriptions and adding a corresponding provider
  * into the DTrace framework.
  *
  * 1 Framework-to-Provider
  *
  * 1.1 Overview
  *
  * The Framework-to-Provider API is represented by the dtrace_mops structure
  * that the meta provider passes to the framework when registering itself as
  * a meta provider. This structure consists of the following members:
  *
  *   dtms_create_probe()	<-- Add a new probe to a created provider
  *   dtms_provide_pid()		<-- Create a new provider for a given process
  *   dtms_remove_pid()		<-- Remove a previously created provider
  *
  * 1.2  void dtms_create_probe(void *arg, void *parg,
  *           dtrace_helper_probedesc_t *probedesc);
  *
  * 1.2.1  Overview
  *
  *   Called by the DTrace framework to create a new probe in a provider
  *   created by this meta provider.
  *
  * 1.2.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_meta_register().
  *   The second argument is the provider cookie for the associated provider;
  *   this is obtained from the return value of dtms_provide_pid(). The third
  *   argument is the helper probe description.
  *
  * 1.2.3  Return value
  *
  *   None
  *
  * 1.2.4  Caller's context
  *
  *   dtms_create_probe() is called from either ioctl() or module load context
  *   in the context of a newly-created provider (that is, a provider that
  *   is a result of a call to dtms_provide_pid()). The DTrace framework is
  *   locked in such a way that meta providers may not register or unregister,
  *   such that no other thread can call into a meta provider operation and that
  *   atomicity is assured with respect to meta provider operations across
  *   dtms_provide_pid() and subsequent calls to dtms_create_probe().
  *   The context is thus effectively single-threaded with respect to the meta
  *   provider, and that the meta provider cannot call dtrace_meta_register()
  *   or dtrace_meta_unregister(). However, the context is such that the
  *   provider may (and is expected to) call provider-related DTrace provider
  *   APIs including dtrace_probe_create().
  *
  * 1.3  void *dtms_provide_pid(void *arg, dtrace_meta_provider_t *mprov,
  *	      pid_t pid)
  *
  * 1.3.1  Overview
  *
  *   Called by the DTrace framework to instantiate a new provider given the
  *   description of the provider and probes in the mprov argument. The
  *   meta provider should call dtrace_register() to insert the new provider
  *   into the DTrace framework.
  *
  * 1.3.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_meta_register().
  *   The second argument is a pointer to a structure describing the new
  *   helper provider. The third argument is the process identifier for
  *   process associated with this new provider. Note that the name of the
  *   provider as passed to dtrace_register() should be the contatenation of
  *   the dtmpb_provname member of the mprov argument and the processs
  *   identifier as a string.
  *
  * 1.3.3  Return value
  *
  *   The cookie for the provider that the meta provider creates. This is
  *   the same value that it passed to dtrace_register().
  *
  * 1.3.4  Caller's context
  *
  *   dtms_provide_pid() is called from either ioctl() or module load context.
  *   The DTrace framework is locked in such a way that meta providers may not
  *   register or unregister. This means that the meta provider cannot call
  *   dtrace_meta_register() or dtrace_meta_unregister(). However, the context
  *   is such that the provider may -- and is expected to --  call
  *   provider-related DTrace provider APIs including dtrace_register().
  *
  * 1.4  void dtms_remove_pid(void *arg, dtrace_meta_provider_t *mprov,
  *	     pid_t pid)
  *
  * 1.4.1  Overview
  *
  *   Called by the DTrace framework to remove a provider that had previously
  *   been instantiated via the dtms_provide_pid() entry point. The meta
  *   provider need not remove the provider immediately, but this entry
  *   point indicates that the provider should be removed as soon as possible
  *   using the dtrace_unregister() API.
  *
  * 1.4.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_meta_register().
  *   The second argument is a pointer to a structure describing the helper
  *   provider. The third argument is the process identifier for process
  *   associated with this new provider.
  *
  * 1.4.3  Return value
  *
  *   None
  *
  * 1.4.4  Caller's context
  *
  *   dtms_remove_pid() is called from either ioctl() or exit() context.
  *   The DTrace framework is locked in such a way that meta providers may not
  *   register or unregister. This means that the meta provider cannot call
  *   dtrace_meta_register() or dtrace_meta_unregister(). However, the context
  *   is such that the provider may -- and is expected to -- call
  *   provider-related DTrace provider APIs including dtrace_unregister().
  */
 typedef struct dtrace_helper_probedesc {
 	char *dthpb_mod;			/* probe module */
 	char *dthpb_func; 			/* probe function */
 	char *dthpb_name; 			/* probe name */
 	uint64_t dthpb_base;			/* base address */
 	uint32_t *dthpb_offs;			/* offsets array */
 	uint32_t *dthpb_enoffs;			/* is-enabled offsets array */
 	uint32_t dthpb_noffs;			/* offsets count */
 	uint32_t dthpb_nenoffs;			/* is-enabled offsets count */
 	uint8_t *dthpb_args;			/* argument mapping array */
 	uint8_t dthpb_xargc;			/* translated argument count */
 	uint8_t dthpb_nargc;			/* native argument count */
 	char *dthpb_xtypes;			/* translated types strings */
 	char *dthpb_ntypes;			/* native types strings */
 } dtrace_helper_probedesc_t;
 
 typedef struct dtrace_helper_provdesc {
 	char *dthpv_provname;			/* provider name */
 	dtrace_pattr_t dthpv_pattr;		/* stability attributes */
 } dtrace_helper_provdesc_t;
 
 typedef struct dtrace_mops {
 	void (*dtms_create_probe)(void *, void *, dtrace_helper_probedesc_t *);
 	void *(*dtms_provide_pid)(void *, dtrace_helper_provdesc_t *, pid_t);
 	void (*dtms_remove_pid)(void *, dtrace_helper_provdesc_t *, pid_t);
 } dtrace_mops_t;
 
 typedef uintptr_t	dtrace_meta_provider_id_t;
 
 extern int dtrace_meta_register(const char *, const dtrace_mops_t *, void *,
     dtrace_meta_provider_id_t *);
 extern int dtrace_meta_unregister(dtrace_meta_provider_id_t);
 
 /*
  * DTrace Kernel Hooks
  *
  * The following functions are implemented by the base kernel and form a set of
  * hooks used by the DTrace framework.  DTrace hooks are implemented in either
  * uts/common/os/dtrace_subr.c, an ISA-specific assembly file, or in a
  * uts/<platform>/os/dtrace_subr.c corresponding to each hardware platform.
  */
 
 typedef enum dtrace_vtime_state {
 	DTRACE_VTIME_INACTIVE = 0,	/* No DTrace, no TNF */
 	DTRACE_VTIME_ACTIVE,		/* DTrace virtual time, no TNF */
 	DTRACE_VTIME_INACTIVE_TNF,	/* No DTrace, TNF active */
 	DTRACE_VTIME_ACTIVE_TNF		/* DTrace virtual time _and_ TNF */
 } dtrace_vtime_state_t;
 
 #ifdef illumos
 extern dtrace_vtime_state_t dtrace_vtime_active;
 #endif
 extern void dtrace_vtime_switch(kthread_t *next);
 extern void dtrace_vtime_enable_tnf(void);
 extern void dtrace_vtime_disable_tnf(void);
 extern void dtrace_vtime_enable(void);
 extern void dtrace_vtime_disable(void);
 
 struct regs;
 struct reg;
 
 #ifdef illumos
 extern int (*dtrace_pid_probe_ptr)(struct reg *);
 extern int (*dtrace_return_probe_ptr)(struct reg *);
 extern void (*dtrace_fasttrap_fork_ptr)(proc_t *, proc_t *);
 extern void (*dtrace_fasttrap_exec_ptr)(proc_t *);
 extern void (*dtrace_fasttrap_exit_ptr)(proc_t *);
 extern void dtrace_fasttrap_fork(proc_t *, proc_t *);
 #endif
 
 typedef uintptr_t dtrace_icookie_t;
 typedef void (*dtrace_xcall_t)(void *);
 
 extern dtrace_icookie_t dtrace_interrupt_disable(void);
 extern void dtrace_interrupt_enable(dtrace_icookie_t);
 
 extern void dtrace_membar_producer(void);
 extern void dtrace_membar_consumer(void);
 
 extern void (*dtrace_cpu_init)(processorid_t);
 #ifdef illumos
 extern void (*dtrace_modload)(modctl_t *);
 extern void (*dtrace_modunload)(modctl_t *);
 #endif
 extern void (*dtrace_helpers_cleanup)(void);
 extern void (*dtrace_helpers_fork)(proc_t *parent, proc_t *child);
 extern void (*dtrace_cpustart_init)(void);
 extern void (*dtrace_cpustart_fini)(void);
 extern void (*dtrace_closef)(void);
 
 extern void (*dtrace_debugger_init)(void);
 extern void (*dtrace_debugger_fini)(void);
 extern dtrace_cacheid_t dtrace_predcache_id;
 
 #ifdef illumos
 extern hrtime_t dtrace_gethrtime(void);
 #else
 void dtrace_debug_printf(const char *, ...) __printflike(1, 2);
 #endif
 extern void dtrace_sync(void);
 extern void dtrace_toxic_ranges(void (*)(uintptr_t, uintptr_t));
 extern void dtrace_xcall(processorid_t, dtrace_xcall_t, void *);
 extern void dtrace_vpanic(const char *, __va_list);
 extern void dtrace_panic(const char *, ...);
 
 extern int dtrace_safe_defer_signal(void);
 extern void dtrace_safe_synchronous_signal(void);
 
 extern int dtrace_mach_aframes(void);
 
 #if defined(__i386) || defined(__amd64)
 extern int dtrace_instr_size(uchar_t *instr);
 extern int dtrace_instr_size_isa(uchar_t *, model_t, int *);
 extern void dtrace_invop_callsite(void);
 #endif
 extern void dtrace_invop_add(int (*)(uintptr_t, struct trapframe *, uintptr_t));
 extern void dtrace_invop_remove(int (*)(uintptr_t, struct trapframe *,
     uintptr_t));
 
 #ifdef __sparc
 extern int dtrace_blksuword32(uintptr_t, uint32_t *, int);
 extern void dtrace_getfsr(uint64_t *);
 #endif
 
 #ifndef illumos
 extern void dtrace_helpers_duplicate(proc_t *, proc_t *);
 extern void dtrace_helpers_destroy(proc_t *);
 #endif
 
 #define	DTRACE_CPUFLAG_ISSET(flag) \
 	(cpu_core[curcpu].cpuc_dtrace_flags & (flag))
 
 #define	DTRACE_CPUFLAG_SET(flag) \
 	(cpu_core[curcpu].cpuc_dtrace_flags |= (flag))
 
 #define	DTRACE_CPUFLAG_CLEAR(flag) \
 	(cpu_core[curcpu].cpuc_dtrace_flags &= ~(flag))
 
 #endif /* _KERNEL */
 
 #endif	/* _ASM */
 
 #if defined(__i386) || defined(__amd64)
 
 #define	DTRACE_INVOP_PUSHL_EBP		1
 #define	DTRACE_INVOP_PUSHQ_RBP		DTRACE_INVOP_PUSHL_EBP
 #define	DTRACE_INVOP_POPL_EBP		2
 #define	DTRACE_INVOP_POPQ_RBP		DTRACE_INVOP_POPL_EBP
 #define	DTRACE_INVOP_LEAVE		3
 #define	DTRACE_INVOP_NOP		4
 #define	DTRACE_INVOP_RET		5
 
 #elif defined(__powerpc__)
 
 #define DTRACE_INVOP_RET	1
 #define DTRACE_INVOP_BCTR	2
 #define DTRACE_INVOP_BLR	3
 #define DTRACE_INVOP_JUMP	4
 #define DTRACE_INVOP_MFLR_R0	5
 #define DTRACE_INVOP_NOP	6
 
 #elif defined(__arm__)
 
 #define	DTRACE_INVOP_SHIFT	4
 #define	DTRACE_INVOP_MASK	((1 << DTRACE_INVOP_SHIFT) - 1)
 #define	DTRACE_INVOP_DATA(x)	((x) >> DTRACE_INVOP_SHIFT)
 
 #define DTRACE_INVOP_PUSHM	1
 #define DTRACE_INVOP_POPM	2
 #define DTRACE_INVOP_B		3
 
 #elif defined(__aarch64__)
 
 #define	INSN_SIZE	4
 
 #define	B_MASK		0xff000000
 #define	B_DATA_MASK	0x00ffffff
 #define	B_INSTR		0x14000000
 
 #define	RET_INSTR	0xd65f03c0
 
 #define	LDP_STP_MASK	0xffc00000
 #define	STP_32		0x29800000
 #define	STP_64		0xa9800000
 #define	LDP_32		0x28c00000
 #define	LDP_64		0xa8c00000
 #define	LDP_STP_PREIND	(1 << 24)
 #define	LDP_STP_DIR	(1 << 22) /* Load instruction */
 #define	ARG1_SHIFT	0
 #define	ARG1_MASK	0x1f
 #define	ARG2_SHIFT	10
 #define	ARG2_MASK	0x1f
 #define	OFFSET_SHIFT	15
 #define	OFFSET_SIZE	7
 #define	OFFSET_MASK	((1 << OFFSET_SIZE) - 1)
 
 #define	DTRACE_INVOP_PUSHM	1
 #define	DTRACE_INVOP_RET	2
 #define	DTRACE_INVOP_B		3
 
 #elif defined(__mips__)
 
 #define	INSN_SIZE		4
 
 /* Load/Store double RA to/from SP */
 #define	LDSD_RA_SP_MASK		0xffff0000
 #define	LDSD_DATA_MASK		0x0000ffff
 #define	SD_RA_SP		0xffbf0000
 #define	LD_RA_SP		0xdfbf0000
 
 #define	DTRACE_INVOP_SD		1
 #define	DTRACE_INVOP_LD		2
 
 #elif defined(__riscv__)
 
 #define	SD_RA_SP_MASK		0x01fff07f
 #define	SD_RA_SP		0x00113023
 
 #define	DTRACE_INVOP_SD		1
 #define	DTRACE_INVOP_RET	2
 #define	DTRACE_INVOP_NOP	3
 
 #endif
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_DTRACE_H */
Index: projects/netbsd-tests-upstream-01-2017/sys/cddl/contrib/opensolaris
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/cddl/contrib/opensolaris	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/cddl/contrib/opensolaris	(revision 313267)

Property changes on: projects/netbsd-tests-upstream-01-2017/sys/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/cddl/contrib/opensolaris:r313244-313266
Index: projects/netbsd-tests-upstream-01-2017/sys/cddl/dev/dtrace/dtrace_ioctl.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/cddl/dev/dtrace/dtrace_ioctl.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/cddl/dev/dtrace/dtrace_ioctl.c	(revision 313267)
@@ -1,853 +1,854 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * $FreeBSD$
  *
  */
 
 static int dtrace_verbose_ioctl;
 SYSCTL_INT(_debug_dtrace, OID_AUTO, verbose_ioctl, CTLFLAG_RW,
     &dtrace_verbose_ioctl, 0, "log DTrace ioctls");
 
 #define DTRACE_IOCTL_PRINTF(fmt, ...)	if (dtrace_verbose_ioctl) printf(fmt, ## __VA_ARGS__ )
 
 static int
 dtrace_ioctl_helper(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
     struct thread *td)
 {
 	struct proc *p;
 	dof_helper_t *dhp;
 	dof_hdr_t *dof;
 	int rval;
 
 	dhp = NULL;
 	dof = NULL;
 	rval = 0;
 	switch (cmd) {
 	case DTRACEHIOC_ADDDOF:
 		dhp = (dof_helper_t *)addr;
 		addr = (caddr_t)(uintptr_t)dhp->dofhp_dof;
 		p = curproc;
 		if (p->p_pid == dhp->dofhp_pid) {
 			dof = dtrace_dof_copyin((uintptr_t)addr, &rval);
 		} else {
 			p = pfind(dhp->dofhp_pid);
 			if (p == NULL)
 				return (EINVAL);
 			if (!P_SHOULDSTOP(p) ||
 			    (p->p_flag & (P_TRACED | P_WEXIT)) != P_TRACED ||
 			    p->p_pptr != curproc) {
 				PROC_UNLOCK(p);
 				return (EINVAL);
 			}
 			_PHOLD(p);
 			PROC_UNLOCK(p);
 			dof = dtrace_dof_copyin_proc(p, (uintptr_t)addr, &rval);
 		}
 
 		if (dof == NULL) {
 			if (p != curproc)
 				PRELE(p);
 			break;
 		}
 
 		mutex_enter(&dtrace_lock);
 		if ((rval = dtrace_helper_slurp(dof, dhp, p)) != -1) {
 			dhp->dofhp_gen = rval;
 			rval = 0;
 		} else {
 			rval = EINVAL;
 		}
 		mutex_exit(&dtrace_lock);
 		if (p != curproc)
 			PRELE(p);
 		break;
 	case DTRACEHIOC_REMOVE:
 		mutex_enter(&dtrace_lock);
 		rval = dtrace_helper_destroygen(NULL, *(int *)(uintptr_t)addr);
 		mutex_exit(&dtrace_lock);
 		break;
 	default:
 		rval = ENOTTY;
 		break;
 	}
 	return (rval);
 }
 
 /* ARGSUSED */
 static int
 dtrace_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
     int flags __unused, struct thread *td)
 {
 	dtrace_state_t *state;
 	devfs_get_cdevpriv((void **) &state);
 
 	int error = 0;
 	if (state == NULL)
 		return (EINVAL);
 
 	if (state->dts_anon) {
 		ASSERT(dtrace_anon.dta_state == NULL);
 		state = state->dts_anon;
 	}
 
 	switch (cmd) {
 	case DTRACEIOC_AGGDESC: {
 		dtrace_aggdesc_t **paggdesc = (dtrace_aggdesc_t **) addr;
 		dtrace_aggdesc_t aggdesc;
 		dtrace_action_t *act;
 		dtrace_aggregation_t *agg;
 		int nrecs;
 		uint32_t offs;
 		dtrace_recdesc_t *lrec;
 		void *buf;
 		size_t size;
 		uintptr_t dest;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_AGGDESC\n",__func__,__LINE__);
 
 		if (copyin((void *) *paggdesc, &aggdesc, sizeof (aggdesc)) != 0)
 			return (EFAULT);
 
 		mutex_enter(&dtrace_lock);
 
 		if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
 
 		nrecs = aggdesc.dtagd_nrecs;
 		aggdesc.dtagd_nrecs = 0;
 
 		offs = agg->dtag_base;
 		lrec = &agg->dtag_action.dta_rec;
 		aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
 
 		for (act = agg->dtag_first; ; act = act->dta_next) {
 			ASSERT(act->dta_intuple ||
 			    DTRACEACT_ISAGG(act->dta_kind));
 
 			/*
 			 * If this action has a record size of zero, it
 			 * denotes an argument to the aggregating action.
 			 * Because the presence of this record doesn't (or
 			 * shouldn't) affect the way the data is interpreted,
 			 * we don't copy it out to save user-level the
 			 * confusion of dealing with a zero-length record.
 			 */
 			if (act->dta_rec.dtrd_size == 0) {
 				ASSERT(agg->dtag_hasarg);
 				continue;
 			}
 
 			aggdesc.dtagd_nrecs++;
 
 			if (act == &agg->dtag_action)
 				break;
 		}
 
 		/*
 		 * Now that we have the size, we need to allocate a temporary
 		 * buffer in which to store the complete description.  We need
 		 * the temporary buffer to be able to drop dtrace_lock()
 		 * across the copyout(), below.
 		 */
 		size = sizeof (dtrace_aggdesc_t) +
 		    (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
 
 		buf = kmem_alloc(size, KM_SLEEP);
 		dest = (uintptr_t)buf;
 
 		bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
 		dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
 
 		for (act = agg->dtag_first; ; act = act->dta_next) {
 			dtrace_recdesc_t rec = act->dta_rec;
 
 			/*
 			 * See the comment in the above loop for why we pass
 			 * over zero-length records.
 			 */
 			if (rec.dtrd_size == 0) {
 				ASSERT(agg->dtag_hasarg);
 				continue;
 			}
 
 			if (nrecs-- == 0)
 				break;
 
 			rec.dtrd_offset -= offs;
 			bcopy(&rec, (void *)dest, sizeof (rec));
 			dest += sizeof (dtrace_recdesc_t);
 
 			if (act == &agg->dtag_action)
 				break;
 		}
 
 		mutex_exit(&dtrace_lock);
 
 		if (copyout(buf, (void *) *paggdesc, dest - (uintptr_t)buf) != 0) {
 			kmem_free(buf, size);
 			return (EFAULT);
 		}
 
 		kmem_free(buf, size);
 		return (0);
 	}
 	case DTRACEIOC_AGGSNAP:
 	case DTRACEIOC_BUFSNAP: {
 		dtrace_bufdesc_t **pdesc = (dtrace_bufdesc_t **) addr;
 		dtrace_bufdesc_t desc;
 		caddr_t cached;
 		dtrace_buffer_t *buf;
 
 		dtrace_debug_output();
 
 		if (copyin((void *) *pdesc, &desc, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		DTRACE_IOCTL_PRINTF("%s(%d): %s curcpu %d cpu %d\n",
 		    __func__,__LINE__,
 		    cmd == DTRACEIOC_AGGSNAP ?
 		    "DTRACEIOC_AGGSNAP":"DTRACEIOC_BUFSNAP",
 		    curcpu, desc.dtbd_cpu);
 
 		if (desc.dtbd_cpu >= NCPU)
 			return (ENOENT);
 		if (pcpu_find(desc.dtbd_cpu) == NULL)
 			return (ENOENT);
 
 		mutex_enter(&dtrace_lock);
 
 		if (cmd == DTRACEIOC_BUFSNAP) {
 			buf = &state->dts_buffer[desc.dtbd_cpu];
 		} else {
 			buf = &state->dts_aggbuffer[desc.dtbd_cpu];
 		}
 
 		if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
 			size_t sz = buf->dtb_offset;
 
 			if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
 				mutex_exit(&dtrace_lock);
 				return (EBUSY);
 			}
 
 			/*
 			 * If this buffer has already been consumed, we're
 			 * going to indicate that there's nothing left here
 			 * to consume.
 			 */
 			if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
 				mutex_exit(&dtrace_lock);
 
 				desc.dtbd_size = 0;
 				desc.dtbd_drops = 0;
 				desc.dtbd_errors = 0;
 				desc.dtbd_oldest = 0;
 				sz = sizeof (desc);
 
 				if (copyout(&desc, (void *) *pdesc, sz) != 0)
 					return (EFAULT);
 
 				return (0);
 			}
 
 			/*
 			 * If this is a ring buffer that has wrapped, we want
 			 * to copy the whole thing out.
 			 */
 			if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
 				dtrace_buffer_polish(buf);
 				sz = buf->dtb_size;
 			}
 
 			if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
 				mutex_exit(&dtrace_lock);
 				return (EFAULT);
 			}
 
 			desc.dtbd_size = sz;
 			desc.dtbd_drops = buf->dtb_drops;
 			desc.dtbd_errors = buf->dtb_errors;
 			desc.dtbd_oldest = buf->dtb_xamot_offset;
 			desc.dtbd_timestamp = dtrace_gethrtime();
 
 			mutex_exit(&dtrace_lock);
 
 			if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0)
 				return (EFAULT);
 
 			buf->dtb_flags |= DTRACEBUF_CONSUMED;
 
 			return (0);
 		}
 
 		if (buf->dtb_tomax == NULL) {
 			ASSERT(buf->dtb_xamot == NULL);
 			mutex_exit(&dtrace_lock);
 			return (ENOENT);
 		}
 
 		cached = buf->dtb_tomax;
 		ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
 
 		dtrace_xcall(desc.dtbd_cpu,
 		    (dtrace_xcall_t)dtrace_buffer_switch, buf);
 
 		state->dts_errors += buf->dtb_xamot_errors;
 
 		/*
 		 * If the buffers did not actually switch, then the cross call
 		 * did not take place -- presumably because the given CPU is
 		 * not in the ready set.  If this is the case, we'll return
 		 * ENOENT.
 		 */
 		if (buf->dtb_tomax == cached) {
 			ASSERT(buf->dtb_xamot != cached);
 			mutex_exit(&dtrace_lock);
 			return (ENOENT);
 		}
 
 		ASSERT(cached == buf->dtb_xamot);
 
 		DTRACE_IOCTL_PRINTF("%s(%d): copyout the buffer snapshot\n",__func__,__LINE__);
 
 		/*
 		 * We have our snapshot; now copy it out.
 		 */
 		if (copyout(buf->dtb_xamot, desc.dtbd_data,
 		    buf->dtb_xamot_offset) != 0) {
 			mutex_exit(&dtrace_lock);
 			return (EFAULT);
 		}
 
 		desc.dtbd_size = buf->dtb_xamot_offset;
 		desc.dtbd_drops = buf->dtb_xamot_drops;
 		desc.dtbd_errors = buf->dtb_xamot_errors;
 		desc.dtbd_oldest = 0;
 		desc.dtbd_timestamp = buf->dtb_switched;
 
 		mutex_exit(&dtrace_lock);
 
 		DTRACE_IOCTL_PRINTF("%s(%d): copyout buffer desc: size %zd drops %lu errors %lu\n",__func__,__LINE__,(size_t) desc.dtbd_size,(u_long) desc.dtbd_drops,(u_long) desc.dtbd_errors);
 
 		/*
 		 * Finally, copy out the buffer description.
 		 */
 		if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 	case DTRACEIOC_CONF: {
 		dtrace_conf_t conf;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_CONF\n",__func__,__LINE__);
 
 		bzero(&conf, sizeof (conf));
 		conf.dtc_difversion = DIF_VERSION;
 		conf.dtc_difintregs = DIF_DIR_NREGS;
 		conf.dtc_diftupregs = DIF_DTR_NREGS;
 		conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
 
 		*((dtrace_conf_t *) addr) = conf;
 
 		return (0);
 	}
 	case DTRACEIOC_DOFGET: {
 		dof_hdr_t **pdof = (dof_hdr_t **) addr;
 		dof_hdr_t hdr, *dof = *pdof;
 		int rval;
 		uint64_t len;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_DOFGET\n",__func__,__LINE__);
 
 		if (copyin((void *)dof, &hdr, sizeof (hdr)) != 0)
 			return (EFAULT);
 
 		mutex_enter(&dtrace_lock);
 		dof = dtrace_dof_create(state);
 		mutex_exit(&dtrace_lock);
 
 		len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
 		rval = copyout(dof, (void *) *pdof, len);
 		dtrace_dof_destroy(dof);
 
 		return (rval == 0 ? 0 : EFAULT);
 	}
 	case DTRACEIOC_ENABLE: {
 		dof_hdr_t *dof = NULL;
 		dtrace_enabling_t *enab = NULL;
 		dtrace_vstate_t *vstate;
 		int err = 0;
 		int rval;
 		dtrace_enable_io_t *p = (dtrace_enable_io_t *) addr;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_ENABLE\n",__func__,__LINE__);
 
 		/*
 		 * If a NULL argument has been passed, we take this as our
 		 * cue to reevaluate our enablings.
 		 */
 		if (p->dof == NULL) {
 			dtrace_enabling_matchall();
 
 			return (0);
 		}
 
 		if ((dof = dtrace_dof_copyin((uintptr_t) p->dof, &rval)) == NULL)
 			return (EINVAL);
 
 		mutex_enter(&cpu_lock);
 		mutex_enter(&dtrace_lock);
 		vstate = &state->dts_vstate;
 
 		if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&cpu_lock);
 			dtrace_dof_destroy(dof);
 			return (EBUSY);
 		}
 
-		if (dtrace_dof_slurp(dof, vstate, td->td_ucred, &enab, 0, B_TRUE) != 0) {
+		if (dtrace_dof_slurp(dof, vstate, td->td_ucred, &enab, 0, 0,
+		    B_TRUE) != 0) {
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&cpu_lock);
 			dtrace_dof_destroy(dof);
 			return (EINVAL);
 		}
 
 		if ((rval = dtrace_dof_options(dof, state)) != 0) {
 			dtrace_enabling_destroy(enab);
 			mutex_exit(&dtrace_lock);
 			mutex_exit(&cpu_lock);
 			dtrace_dof_destroy(dof);
 			return (rval);
 		}
 
 		if ((err = dtrace_enabling_match(enab, &p->n_matched)) == 0) {
 			err = dtrace_enabling_retain(enab);
 		} else {
 			dtrace_enabling_destroy(enab);
 		}
 
 		mutex_exit(&cpu_lock);
 		mutex_exit(&dtrace_lock);
 		dtrace_dof_destroy(dof);
 
 		return (err);
 	}
 	case DTRACEIOC_EPROBE: {
 		dtrace_eprobedesc_t **pepdesc = (dtrace_eprobedesc_t **) addr;
 		dtrace_eprobedesc_t epdesc;
 		dtrace_ecb_t *ecb;
 		dtrace_action_t *act;
 		void *buf;
 		size_t size;
 		uintptr_t dest;
 		int nrecs;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_EPROBE\n",__func__,__LINE__);
 
 		if (copyin((void *)*pepdesc, &epdesc, sizeof (epdesc)) != 0)
 			return (EFAULT);
 
 		mutex_enter(&dtrace_lock);
 
 		if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		if (ecb->dte_probe == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
 		epdesc.dtepd_uarg = ecb->dte_uarg;
 		epdesc.dtepd_size = ecb->dte_size;
 
 		nrecs = epdesc.dtepd_nrecs;
 		epdesc.dtepd_nrecs = 0;
 		for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
 			if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
 				continue;
 
 			epdesc.dtepd_nrecs++;
 		}
 
 		/*
 		 * Now that we have the size, we need to allocate a temporary
 		 * buffer in which to store the complete description.  We need
 		 * the temporary buffer to be able to drop dtrace_lock()
 		 * across the copyout(), below.
 		 */
 		size = sizeof (dtrace_eprobedesc_t) +
 		    (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
 
 		buf = kmem_alloc(size, KM_SLEEP);
 		dest = (uintptr_t)buf;
 
 		bcopy(&epdesc, (void *)dest, sizeof (epdesc));
 		dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
 
 		for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
 			if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
 				continue;
 
 			if (nrecs-- == 0)
 				break;
 
 			bcopy(&act->dta_rec, (void *)dest,
 			    sizeof (dtrace_recdesc_t));
 			dest += sizeof (dtrace_recdesc_t);
 		}
 
 		mutex_exit(&dtrace_lock);
 
 		if (copyout(buf, (void *) *pepdesc, dest - (uintptr_t)buf) != 0) {
 			kmem_free(buf, size);
 			return (EFAULT);
 		}
 
 		kmem_free(buf, size);
 		return (0);
 	}
 	case DTRACEIOC_FORMAT: {
 		dtrace_fmtdesc_t *fmt = (dtrace_fmtdesc_t *) addr;
 		char *str;
 		int len;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_FORMAT\n",__func__,__LINE__);
 
 		mutex_enter(&dtrace_lock);
 
 		if (fmt->dtfd_format == 0 ||
 		    fmt->dtfd_format > state->dts_nformats) {
 			mutex_exit(&dtrace_lock);
 			return (EINVAL);
 		}
 
 		/*
 		 * Format strings are allocated contiguously and they are
 		 * never freed; if a format index is less than the number
 		 * of formats, we can assert that the format map is non-NULL
 		 * and that the format for the specified index is non-NULL.
 		 */
 		ASSERT(state->dts_formats != NULL);
 		str = state->dts_formats[fmt->dtfd_format - 1];
 		ASSERT(str != NULL);
 
 		len = strlen(str) + 1;
 
 		if (len > fmt->dtfd_length) {
 			fmt->dtfd_length = len;
 		} else {
 			if (copyout(str, fmt->dtfd_string, len) != 0) {
 				mutex_exit(&dtrace_lock);
 				return (EINVAL);
 			}
 		}
 
 		mutex_exit(&dtrace_lock);
 		return (0);
 	}
 	case DTRACEIOC_GO: {
 		int rval;
 		processorid_t *cpuid = (processorid_t *) addr;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_GO\n",__func__,__LINE__);
 
 		rval = dtrace_state_go(state, cpuid);
 
 		return (rval);
 	}
 	case DTRACEIOC_PROBEARG: {
 		dtrace_argdesc_t *desc = (dtrace_argdesc_t *) addr;
 		dtrace_probe_t *probe;
 		dtrace_provider_t *prov;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROBEARG\n",__func__,__LINE__);
 
 		if (desc->dtargd_id == DTRACE_IDNONE)
 			return (EINVAL);
 
 		if (desc->dtargd_ndx == DTRACE_ARGNONE)
 			return (EINVAL);
 
 		mutex_enter(&dtrace_provider_lock);
 #ifdef illumos
 		mutex_enter(&mod_lock);
 #endif
 		mutex_enter(&dtrace_lock);
 
 		if (desc->dtargd_id > dtrace_nprobes) {
 			mutex_exit(&dtrace_lock);
 #ifdef illumos
 			mutex_exit(&mod_lock);
 #endif
 			mutex_exit(&dtrace_provider_lock);
 			return (EINVAL);
 		}
 
 		if ((probe = dtrace_probes[desc->dtargd_id - 1]) == NULL) {
 			mutex_exit(&dtrace_lock);
 #ifdef illumos
 			mutex_exit(&mod_lock);
 #endif
 			mutex_exit(&dtrace_provider_lock);
 			return (EINVAL);
 		}
 
 		mutex_exit(&dtrace_lock);
 
 		prov = probe->dtpr_provider;
 
 		if (prov->dtpv_pops.dtps_getargdesc == NULL) {
 			/*
 			 * There isn't any typed information for this probe.
 			 * Set the argument number to DTRACE_ARGNONE.
 			 */
 			desc->dtargd_ndx = DTRACE_ARGNONE;
 		} else {
 			desc->dtargd_native[0] = '\0';
 			desc->dtargd_xlate[0] = '\0';
 			desc->dtargd_mapping = desc->dtargd_ndx;
 
 			prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
 			    probe->dtpr_id, probe->dtpr_arg, desc);
 		}
 
 #ifdef illumos
 		mutex_exit(&mod_lock);
 #endif
 		mutex_exit(&dtrace_provider_lock);
 
 		return (0);
 	}
 	case DTRACEIOC_PROBEMATCH:
 	case DTRACEIOC_PROBES: {
 		dtrace_probedesc_t *p_desc = (dtrace_probedesc_t *) addr;
 		dtrace_probe_t *probe = NULL;
 		dtrace_probekey_t pkey;
 		dtrace_id_t i;
 		int m = 0;
 		uint32_t priv = 0;
 		uid_t uid = 0;
 		zoneid_t zoneid = 0;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): %s\n",__func__,__LINE__,
 		    cmd == DTRACEIOC_PROBEMATCH ?
 		    "DTRACEIOC_PROBEMATCH":"DTRACEIOC_PROBES");
 
 		p_desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
 		p_desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
 		p_desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
 		p_desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
 
 		/*
 		 * Before we attempt to match this probe, we want to give
 		 * all providers the opportunity to provide it.
 		 */
 		if (p_desc->dtpd_id == DTRACE_IDNONE) {
 			mutex_enter(&dtrace_provider_lock);
 			dtrace_probe_provide(p_desc, NULL);
 			mutex_exit(&dtrace_provider_lock);
 			p_desc->dtpd_id++;
 		}
 
 		if (cmd == DTRACEIOC_PROBEMATCH)  {
 			dtrace_probekey(p_desc, &pkey);
 			pkey.dtpk_id = DTRACE_IDNONE;
 		}
 
 		dtrace_cred2priv(td->td_ucred, &priv, &uid, &zoneid);
 
 		mutex_enter(&dtrace_lock);
 
 		if (cmd == DTRACEIOC_PROBEMATCH) {
 			for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) {
 				if ((probe = dtrace_probes[i - 1]) != NULL &&
 				    (m = dtrace_match_probe(probe, &pkey,
 				    priv, uid, zoneid)) != 0)
 					break;
 			}
 
 			if (m < 0) {
 				mutex_exit(&dtrace_lock);
 				return (EINVAL);
 			}
 
 		} else {
 			for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) {
 				if ((probe = dtrace_probes[i - 1]) != NULL &&
 				    dtrace_match_priv(probe, priv, uid, zoneid))
 					break;
 			}
 		}
 
 		if (probe == NULL) {
 			mutex_exit(&dtrace_lock);
 			return (ESRCH);
 		}
 
 		dtrace_probe_description(probe, p_desc);
 		mutex_exit(&dtrace_lock);
 
 		return (0);
 	}
 	case DTRACEIOC_PROVIDER: {
 		dtrace_providerdesc_t *pvd = (dtrace_providerdesc_t *) addr;
 		dtrace_provider_t *pvp;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROVIDER\n",__func__,__LINE__);
 
 		pvd->dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
 		mutex_enter(&dtrace_provider_lock);
 
 		for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
 			if (strcmp(pvp->dtpv_name, pvd->dtvd_name) == 0)
 				break;
 		}
 
 		mutex_exit(&dtrace_provider_lock);
 
 		if (pvp == NULL)
 			return (ESRCH);
 
 		bcopy(&pvp->dtpv_priv, &pvd->dtvd_priv, sizeof (dtrace_ppriv_t));
 		bcopy(&pvp->dtpv_attr, &pvd->dtvd_attr, sizeof (dtrace_pattr_t));
 
 		return (0);
 	}
 	case DTRACEIOC_REPLICATE: {
 		dtrace_repldesc_t *desc = (dtrace_repldesc_t *) addr;
 		dtrace_probedesc_t *match = &desc->dtrpd_match;
 		dtrace_probedesc_t *create = &desc->dtrpd_create;
 		int err;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_REPLICATE\n",__func__,__LINE__);
 
 		match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
 		match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
 		match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
 		match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
 
 		create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
 		create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
 		create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
 		create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
 
 		mutex_enter(&dtrace_lock);
 		err = dtrace_enabling_replicate(state, match, create);
 		mutex_exit(&dtrace_lock);
 
 		return (err);
 	}
 	case DTRACEIOC_STATUS: {
 		dtrace_status_t *stat = (dtrace_status_t *) addr;
 		dtrace_dstate_t *dstate;
 		int i, j;
 		uint64_t nerrs;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STATUS\n",__func__,__LINE__);
 
 		/*
 		 * See the comment in dtrace_state_deadman() for the reason
 		 * for setting dts_laststatus to INT64_MAX before setting
 		 * it to the correct value.
 		 */
 		state->dts_laststatus = INT64_MAX;
 		dtrace_membar_producer();
 		state->dts_laststatus = dtrace_gethrtime();
 
 		bzero(stat, sizeof (*stat));
 
 		mutex_enter(&dtrace_lock);
 
 		if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
 			mutex_exit(&dtrace_lock);
 			return (ENOENT);
 		}
 
 		if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
 			stat->dtst_exiting = 1;
 
 		nerrs = state->dts_errors;
 		dstate = &state->dts_vstate.dtvs_dynvars;
 
 		for (i = 0; i < NCPU; i++) {
 #ifndef illumos
 			if (pcpu_find(i) == NULL)
 				continue;
 #endif
 			dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
 
 			stat->dtst_dyndrops += dcpu->dtdsc_drops;
 			stat->dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
 			stat->dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
 
 			if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
 				stat->dtst_filled++;
 
 			nerrs += state->dts_buffer[i].dtb_errors;
 
 			for (j = 0; j < state->dts_nspeculations; j++) {
 				dtrace_speculation_t *spec;
 				dtrace_buffer_t *buf;
 
 				spec = &state->dts_speculations[j];
 				buf = &spec->dtsp_buffer[i];
 				stat->dtst_specdrops += buf->dtb_xamot_drops;
 			}
 		}
 
 		stat->dtst_specdrops_busy = state->dts_speculations_busy;
 		stat->dtst_specdrops_unavail = state->dts_speculations_unavail;
 		stat->dtst_stkstroverflows = state->dts_stkstroverflows;
 		stat->dtst_dblerrors = state->dts_dblerrors;
 		stat->dtst_killed =
 		    (state->dts_activity == DTRACE_ACTIVITY_KILLED);
 		stat->dtst_errors = nerrs;
 
 		mutex_exit(&dtrace_lock);
 
 		return (0);
 	}
 	case DTRACEIOC_STOP: {
 		int rval;
 		processorid_t *cpuid = (processorid_t *) addr;
 
 		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STOP\n",__func__,__LINE__);
 
 		mutex_enter(&dtrace_lock);
 		rval = dtrace_state_stop(state, cpuid);
 		mutex_exit(&dtrace_lock);
 
 		return (rval);
 	}
 	default:
 		error = ENOTTY;
 	}
 	return (error);
 }
Index: projects/netbsd-tests-upstream-01-2017/sys/dev/mmc/mmc.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/dev/mmc/mmc.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/dev/mmc/mmc.c	(revision 313267)
@@ -1,1829 +1,1829 @@
 /*-
  * Copyright (c) 2006 Bernd Walter.  All rights reserved.
  * Copyright (c) 2006 M. Warner Losh.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Portions of this software may have been developed with reference to
  * the SD Simplified Specification.  The following disclaimer may apply:
  *
  * The following conditions apply to the release of the simplified
  * specification ("Simplified Specification") by the SD Card Association and
  * the SD Group. The Simplified Specification is a subset of the complete SD
  * Specification which is owned by the SD Card Association and the SD
  * Group. This Simplified Specification is provided on a non-confidential
  * basis subject to the disclaimers below. Any implementation of the
  * Simplified Specification may require a license from the SD Card
  * Association, SD Group, SD-3C LLC or other third parties.
  *
  * Disclaimers:
  *
  * The information contained in the Simplified Specification is presented only
  * as a standard specification for SD Cards and SD Host/Ancillary products and
  * is provided "AS-IS" without any representations or warranties of any
  * kind. No responsibility is assumed by the SD Group, SD-3C LLC or the SD
  * Card Association for any damages, any infringements of patents or other
  * right of the SD Group, SD-3C LLC, the SD Card Association or any third
  * parties, which may result from its use. No license is granted by
  * implication, estoppel or otherwise under any patent or other rights of the
  * SD Group, SD-3C LLC, the SD Card Association or any third party. Nothing
  * herein shall be construed as an obligation by the SD Group, the SD-3C LLC
  * or the SD Card Association to disclose or distribute any technical
  * information, know-how or other confidential information to any third party.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 
 #include <dev/mmc/mmcreg.h>
 #include <dev/mmc/mmcbrvar.h>
 #include <dev/mmc/mmcvar.h>
 #include "mmcbr_if.h"
 #include "mmcbus_if.h"
 
 struct mmc_softc {
 	device_t dev;
 	struct mtx sc_mtx;
 	struct intr_config_hook config_intrhook;
 	device_t owner;
 	uint32_t last_rca;
 	int	 squelched; /* suppress reporting of (expected) errors */
 	int	 log_count;
 	struct timeval log_time;
 };
 
 #define	LOG_PPS		5 /* Log no more than 5 errors per second. */
 
 /*
  * Per-card data
  */
 struct mmc_ivars {
 	uint32_t raw_cid[4];	/* Raw bits of the CID */
 	uint32_t raw_csd[4];	/* Raw bits of the CSD */
 	uint32_t raw_scr[2];	/* Raw bits of the SCR */
 	uint8_t raw_ext_csd[512];	/* Raw bits of the EXT_CSD */
 	uint32_t raw_sd_status[16];	/* Raw bits of the SD_STATUS */
 	uint16_t rca;
 	enum mmc_card_mode mode;
 	struct mmc_cid cid;	/* cid decoded */
 	struct mmc_csd csd;	/* csd decoded */
 	struct mmc_scr scr;	/* scr decoded */
 	struct mmc_sd_status sd_status;	/* SD_STATUS decoded */
 	u_char read_only;	/* True when the device is read-only */
 	u_char bus_width;	/* Bus width to use */
 	u_char timing;		/* Bus timing support */
 	u_char high_cap;	/* High Capacity card (block addressed) */
 	uint32_t sec_count;	/* Card capacity in 512byte blocks */
 	uint32_t tran_speed;	/* Max speed in normal mode */
 	uint32_t hs_tran_speed;	/* Max speed in high speed mode */
 	uint32_t erase_sector;	/* Card native erase sector size */
 	char card_id_string[64];/* Formatted CID info (serial, MFG, etc) */
 	char card_sn_string[16];/* Formatted serial # for disk->d_ident */
 };
 
-#define CMD_RETRIES	3
+#define	CMD_RETRIES	3
 
 #define	CARD_ID_FREQUENCY 400000 /* Spec requires 400kHz max during ID phase. */
 
 static SYSCTL_NODE(_hw, OID_AUTO, mmc, CTLFLAG_RD, NULL, "mmc driver");
 
 static int mmc_debug;
-SYSCTL_INT(_hw_mmc, OID_AUTO, debug, CTLFLAG_RWTUN, &mmc_debug, 0, "Debug level");
+SYSCTL_INT(_hw_mmc, OID_AUTO, debug, CTLFLAG_RWTUN, &mmc_debug, 0,
+    "Debug level");
 
 /* bus entry points */
 static int mmc_acquire_bus(device_t busdev, device_t dev);
 static int mmc_attach(device_t dev);
 static int mmc_child_location_str(device_t dev, device_t child, char *buf,
     size_t buflen);
 static int mmc_detach(device_t dev);
 static int mmc_probe(device_t dev);
 static int mmc_read_ivar(device_t bus, device_t child, int which,
     uintptr_t *result);
 static int mmc_release_bus(device_t busdev, device_t dev);
 static int mmc_resume(device_t dev);
 static int mmc_suspend(device_t dev);
 static int mmc_wait_for_request(device_t brdev, device_t reqdev,
     struct mmc_request *req);
 static int mmc_write_ivar(device_t bus, device_t child, int which,
     uintptr_t value);
 
-#define MMC_LOCK(_sc)		mtx_lock(&(_sc)->sc_mtx)
+#define	MMC_LOCK(_sc)		mtx_lock(&(_sc)->sc_mtx)
 #define	MMC_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_mtx)
-#define MMC_LOCK_INIT(_sc)					\
-	mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev),	\
+#define	MMC_LOCK_INIT(_sc)						\
+	mtx_init(&(_sc)->sc_mtx, device_get_nameunit((_sc)->dev),	\
 	    "mmc", MTX_DEF)
-#define MMC_LOCK_DESTROY(_sc)	mtx_destroy(&_sc->sc_mtx);
-#define MMC_ASSERT_LOCKED(_sc)	mtx_assert(&_sc->sc_mtx, MA_OWNED);
-#define MMC_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED);
+#define	MMC_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->sc_mtx);
+#define	MMC_ASSERT_LOCKED(_sc)	mtx_assert(&(_sc)->sc_mtx, MA_OWNED);
+#define	MMC_ASSERT_UNLOCKED(_sc) mtx_assert(&(_sc)->sc_mtx, MA_NOTOWNED);
 
 static int mmc_all_send_cid(struct mmc_softc *sc, uint32_t *rawcid);
 static void mmc_app_decode_scr(uint32_t *raw_scr, struct mmc_scr *scr);
 static void mmc_app_decode_sd_status(uint32_t *raw_sd_status,
     struct mmc_sd_status *sd_status);
 static int mmc_app_sd_status(struct mmc_softc *sc, uint16_t rca,
     uint32_t *rawsdstatus);
 static int mmc_app_send_scr(struct mmc_softc *sc, uint16_t rca,
     uint32_t *rawscr);
 static int mmc_calculate_clock(struct mmc_softc *sc);
 static void mmc_decode_cid_mmc(uint32_t *raw_cid, struct mmc_cid *cid);
 static void mmc_decode_cid_sd(uint32_t *raw_cid, struct mmc_cid *cid);
 static void mmc_decode_csd_mmc(uint32_t *raw_csd, struct mmc_csd *csd);
 static void mmc_decode_csd_sd(uint32_t *raw_csd, struct mmc_csd *csd);
 static void mmc_delayed_attach(void *xsc);
 static int mmc_delete_cards(struct mmc_softc *sc);
 static void mmc_discover_cards(struct mmc_softc *sc);
 static void mmc_format_card_id_string(struct mmc_ivars *ivar);
 static void mmc_go_discovery(struct mmc_softc *sc);
 static uint32_t mmc_get_bits(uint32_t *bits, int bit_len, int start,
     int size);
 static int mmc_highest_voltage(uint32_t ocr);
 static void mmc_idle_cards(struct mmc_softc *sc);
 static void mmc_ms_delay(int ms);
 static void mmc_log_card(device_t dev, struct mmc_ivars *ivar, int newcard);
 static void mmc_power_down(struct mmc_softc *sc);
 static void mmc_power_up(struct mmc_softc *sc);
 static void mmc_rescan_cards(struct mmc_softc *sc);
 static void mmc_scan(struct mmc_softc *sc);
 static int mmc_sd_switch(struct mmc_softc *sc, uint8_t mode, uint8_t grp,
     uint8_t value, uint8_t *res);
 static int mmc_select_card(struct mmc_softc *sc, uint16_t rca);
 static uint32_t mmc_select_vdd(struct mmc_softc *sc, uint32_t ocr);
 static int mmc_send_app_op_cond(struct mmc_softc *sc, uint32_t ocr,
     uint32_t *rocr);
 static int mmc_send_csd(struct mmc_softc *sc, uint16_t rca, uint32_t *rawcsd);
 static int mmc_send_ext_csd(struct mmc_softc *sc, uint8_t *rawextcsd);
 static int mmc_send_if_cond(struct mmc_softc *sc, uint8_t vhs);
 static int mmc_send_op_cond(struct mmc_softc *sc, uint32_t ocr,
     uint32_t *rocr);
 static int mmc_send_relative_addr(struct mmc_softc *sc, uint32_t *resp);
 static int mmc_send_status(struct mmc_softc *sc, uint16_t rca,
     uint32_t *status);
 static int mmc_set_blocklen(struct mmc_softc *sc, uint32_t len);
 static int mmc_set_card_bus_width(struct mmc_softc *sc, uint16_t rca,
     int width);
 static int mmc_set_relative_addr(struct mmc_softc *sc, uint16_t resp);
 static int mmc_set_timing(struct mmc_softc *sc, int timing);
 static int mmc_switch(struct mmc_softc *sc, uint8_t set, uint8_t index,
     uint8_t value);
 static int mmc_test_bus_width(struct mmc_softc *sc);
 static int mmc_wait_for_app_cmd(struct mmc_softc *sc, uint32_t rca,
     struct mmc_command *cmd, int retries);
 static int mmc_wait_for_cmd(struct mmc_softc *sc, struct mmc_command *cmd,
     int retries);
 static int mmc_wait_for_command(struct mmc_softc *sc, uint32_t opcode,
     uint32_t arg, uint32_t flags, uint32_t *resp, int retries);
 static int mmc_wait_for_req(struct mmc_softc *sc, struct mmc_request *req);
 static void mmc_wakeup(struct mmc_request *req);
 
 static void
 mmc_ms_delay(int ms)
 {
 
 	DELAY(1000 * ms);	/* XXX BAD */
 }
 
 static int
 mmc_probe(device_t dev)
 {
 
 	device_set_desc(dev, "MMC/SD bus");
 	return (0);
 }
 
 static int
 mmc_attach(device_t dev)
 {
 	struct mmc_softc *sc;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 	MMC_LOCK_INIT(sc);
 
 	/* We'll probe and attach our children later, but before / mount */
 	sc->config_intrhook.ich_func = mmc_delayed_attach;
 	sc->config_intrhook.ich_arg = sc;
 	if (config_intrhook_establish(&sc->config_intrhook) != 0)
 		device_printf(dev, "config_intrhook_establish failed\n");
 	return (0);
 }
 
 static int
 mmc_detach(device_t dev)
 {
 	struct mmc_softc *sc = device_get_softc(dev);
 	int err;
 
 	if ((err = mmc_delete_cards(sc)) != 0)
 		return (err);
 	mmc_power_down(sc);
 	MMC_LOCK_DESTROY(sc);
 
 	return (0);
 }
 
 static int
 mmc_suspend(device_t dev)
 {
 	struct mmc_softc *sc = device_get_softc(dev);
 	int err;
 
 	err = bus_generic_suspend(dev);
 	if (err)
 		return (err);
 	mmc_power_down(sc);
 	return (0);
 }
 
 static int
 mmc_resume(device_t dev)
 {
 	struct mmc_softc *sc = device_get_softc(dev);
 
 	mmc_scan(sc);
 	return (bus_generic_resume(dev));
 }
 
 static int
 mmc_acquire_bus(device_t busdev, device_t dev)
 {
 	struct mmc_softc *sc;
 	struct mmc_ivars *ivar;
 	int err;
 	int rca;
 
 	err = MMCBR_ACQUIRE_HOST(device_get_parent(busdev), busdev);
 	if (err)
 		return (err);
 	sc = device_get_softc(busdev);
 	MMC_LOCK(sc);
 	if (sc->owner)
 		panic("mmc: host bridge didn't serialize us.");
 	sc->owner = dev;
 	MMC_UNLOCK(sc);
 
 	if (busdev != dev) {
 		/*
 		 * Keep track of the last rca that we've selected.  If
 		 * we're asked to do it again, don't.  We never
 		 * unselect unless the bus code itself wants the mmc
 		 * bus, and constantly reselecting causes problems.
 		 */
 		rca = mmc_get_rca(dev);
 		if (sc->last_rca != rca) {
 			mmc_select_card(sc, rca);
 			sc->last_rca = rca;
 			/* Prepare bus width for the new card. */
 			ivar = device_get_ivars(dev);
 			if (bootverbose || mmc_debug) {
 				device_printf(busdev,
 				    "setting bus width to %d bits\n",
 				    (ivar->bus_width == bus_width_4) ? 4 :
 				    (ivar->bus_width == bus_width_8) ? 8 : 1);
 			}
 			mmc_set_card_bus_width(sc, rca, ivar->bus_width);
 			mmcbr_set_bus_width(busdev, ivar->bus_width);
 			mmcbr_update_ios(busdev);
 		}
 	} else {
 		/*
 		 * If there's a card selected, stand down.
 		 */
 		if (sc->last_rca != 0) {
 			mmc_select_card(sc, 0);
 			sc->last_rca = 0;
 		}
 	}
 
 	return (0);
 }
 
 static int
 mmc_release_bus(device_t busdev, device_t dev)
 {
 	struct mmc_softc *sc;
 	int err;
 
 	sc = device_get_softc(busdev);
 
 	MMC_LOCK(sc);
 	if (!sc->owner)
 		panic("mmc: releasing unowned bus.");
 	if (sc->owner != dev)
 		panic("mmc: you don't own the bus.  game over.");
 	MMC_UNLOCK(sc);
 	err = MMCBR_RELEASE_HOST(device_get_parent(busdev), busdev);
 	if (err)
 		return (err);
 	MMC_LOCK(sc);
 	sc->owner = NULL;
 	MMC_UNLOCK(sc);
 	return (0);
 }
 
 static uint32_t
 mmc_select_vdd(struct mmc_softc *sc, uint32_t ocr)
 {
 
 	return (ocr & MMC_OCR_VOLTAGE);
 }
 
 static int
 mmc_highest_voltage(uint32_t ocr)
 {
 	int i;
 
 	for (i = MMC_OCR_MAX_VOLTAGE_SHIFT;
 	    i >= MMC_OCR_MIN_VOLTAGE_SHIFT; i--)
 		if (ocr & (1 << i))
 			return (i);
 	return (-1);
 }
 
 static void
 mmc_wakeup(struct mmc_request *req)
 {
 	struct mmc_softc *sc;
 
 	sc = (struct mmc_softc *)req->done_data;
 	MMC_LOCK(sc);
 	req->flags |= MMC_REQ_DONE;
 	MMC_UNLOCK(sc);
 	wakeup(req);
 }
 
 static int
 mmc_wait_for_req(struct mmc_softc *sc, struct mmc_request *req)
 {
 
 	req->done = mmc_wakeup;
 	req->done_data = sc;
 	if (mmc_debug > 1) {
 		device_printf(sc->dev, "REQUEST: CMD%d arg %#x flags %#x",
 		    req->cmd->opcode, req->cmd->arg, req->cmd->flags);
 		if (req->cmd->data) {
 			printf(" data %d\n", (int)req->cmd->data->len);
 		} else
 			printf("\n");
 	}
 	MMCBR_REQUEST(device_get_parent(sc->dev), sc->dev, req);
 	MMC_LOCK(sc);
 	while ((req->flags & MMC_REQ_DONE) == 0)
 		msleep(req, &sc->sc_mtx, 0, "mmcreq", 0);
 	MMC_UNLOCK(sc);
 	if (mmc_debug > 2 || (mmc_debug > 0 && req->cmd->error != MMC_ERR_NONE))
 		device_printf(sc->dev, "CMD%d RESULT: %d\n",
 		    req->cmd->opcode, req->cmd->error);
 	return (0);
 }
 
 static int
 mmc_wait_for_request(device_t brdev, device_t reqdev, struct mmc_request *req)
 {
 	struct mmc_softc *sc = device_get_softc(brdev);
 
 	return (mmc_wait_for_req(sc, req));
 }
 
 static int
 mmc_wait_for_cmd(struct mmc_softc *sc, struct mmc_command *cmd, int retries)
 {
 	struct mmc_request mreq;
 	int err;
 
 	do {
 		memset(&mreq, 0, sizeof(mreq));
 		memset(cmd->resp, 0, sizeof(cmd->resp));
 		cmd->retries = 0; /* Retries done here, not in hardware. */
 		cmd->mrq = &mreq;
 		mreq.cmd = cmd;
 		if (mmc_wait_for_req(sc, &mreq) != 0)
 			err = MMC_ERR_FAILED;
 		else
 			err = cmd->error;
 	} while (err != MMC_ERR_NONE && retries-- > 0);
 
 	if (err != MMC_ERR_NONE && sc->squelched == 0) {
 		if (ppsratecheck(&sc->log_time, &sc->log_count, LOG_PPS)) {
 			device_printf(sc->dev, "CMD%d failed, RESULT: %d\n",
 			    cmd->opcode, err);
 		}
 	}
 
 	return (err);
 }
 
 static int
 mmc_wait_for_app_cmd(struct mmc_softc *sc, uint32_t rca,
     struct mmc_command *cmd, int retries)
 {
 	struct mmc_command appcmd;
 	int err;
 
 	/* Squelch error reporting at lower levels, we report below. */
 	sc->squelched++;
 	do {
 		memset(&appcmd, 0, sizeof(appcmd));
 		appcmd.opcode = MMC_APP_CMD;
 		appcmd.arg = rca << 16;
 		appcmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 		appcmd.data = NULL;
 		if (mmc_wait_for_cmd(sc, &appcmd, 0) != 0)
 			err = MMC_ERR_FAILED;
 		else
 			err = appcmd.error;
 		if (err == MMC_ERR_NONE) {
 			if (!(appcmd.resp[0] & R1_APP_CMD))
 				err = MMC_ERR_FAILED;
 			else if (mmc_wait_for_cmd(sc, cmd, 0) != 0)
 				err = MMC_ERR_FAILED;
 			else
 				err = cmd->error;
 		}
 	} while (err != MMC_ERR_NONE && retries-- > 0);
 	sc->squelched--;
 
 	if (err != MMC_ERR_NONE && sc->squelched == 0) {
 		if (ppsratecheck(&sc->log_time, &sc->log_count, LOG_PPS)) {
 			device_printf(sc->dev, "ACMD%d failed, RESULT: %d\n",
 			    cmd->opcode, err);
 		}
 	}
 
 	return (err);
 }
 
 static int
 mmc_wait_for_command(struct mmc_softc *sc, uint32_t opcode,
     uint32_t arg, uint32_t flags, uint32_t *resp, int retries)
 {
 	struct mmc_command cmd;
 	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = opcode;
 	cmd.arg = arg;
 	cmd.flags = flags;
 	cmd.data = NULL;
 	err = mmc_wait_for_cmd(sc, &cmd, retries);
 	if (err)
 		return (err);
 	if (resp) {
 		if (flags & MMC_RSP_136)
 			memcpy(resp, cmd.resp, 4 * sizeof(uint32_t));
 		else
 			*resp = cmd.resp[0];
 	}
 	return (0);
 }
 
 static void
 mmc_idle_cards(struct mmc_softc *sc)
 {
 	device_t dev;
 	struct mmc_command cmd;
 
 	dev = sc->dev;
 	mmcbr_set_chip_select(dev, cs_high);
 	mmcbr_update_ios(dev);
 	mmc_ms_delay(1);
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = MMC_GO_IDLE_STATE;
 	cmd.arg = 0;
 	cmd.flags = MMC_RSP_NONE | MMC_CMD_BC;
 	cmd.data = NULL;
 	mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 	mmc_ms_delay(1);
 
 	mmcbr_set_chip_select(dev, cs_dontcare);
 	mmcbr_update_ios(dev);
 	mmc_ms_delay(1);
 }
 
 static int
 mmc_send_app_op_cond(struct mmc_softc *sc, uint32_t ocr, uint32_t *rocr)
 {
 	struct mmc_command cmd;
 	int err = MMC_ERR_NONE, i;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = ACMD_SD_SEND_OP_COND;
 	cmd.arg = ocr;
 	cmd.flags = MMC_RSP_R3 | MMC_CMD_BCR;
 	cmd.data = NULL;
 
 	for (i = 0; i < 1000; i++) {
 		err = mmc_wait_for_app_cmd(sc, 0, &cmd, CMD_RETRIES);
 		if (err != MMC_ERR_NONE)
 			break;
 		if ((cmd.resp[0] & MMC_OCR_CARD_BUSY) ||
 		    (ocr & MMC_OCR_VOLTAGE) == 0)
 			break;
 		err = MMC_ERR_TIMEOUT;
 		mmc_ms_delay(10);
 	}
 	if (rocr && err == MMC_ERR_NONE)
 		*rocr = cmd.resp[0];
 	return (err);
 }
 
 static int
 mmc_send_op_cond(struct mmc_softc *sc, uint32_t ocr, uint32_t *rocr)
 {
 	struct mmc_command cmd;
 	int err = MMC_ERR_NONE, i;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = MMC_SEND_OP_COND;
 	cmd.arg = ocr;
 	cmd.flags = MMC_RSP_R3 | MMC_CMD_BCR;
 	cmd.data = NULL;
 
 	for (i = 0; i < 1000; i++) {
 		err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 		if (err != MMC_ERR_NONE)
 			break;
 		if ((cmd.resp[0] & MMC_OCR_CARD_BUSY) ||
 		    (ocr & MMC_OCR_VOLTAGE) == 0)
 			break;
 		err = MMC_ERR_TIMEOUT;
 		mmc_ms_delay(10);
 	}
 	if (rocr && err == MMC_ERR_NONE)
 		*rocr = cmd.resp[0];
 	return (err);
 }
 
 static int
 mmc_send_if_cond(struct mmc_softc *sc, uint8_t vhs)
 {
 	struct mmc_command cmd;
 	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = SD_SEND_IF_COND;
 	cmd.arg = (vhs << 8) + 0xAA;
 	cmd.flags = MMC_RSP_R7 | MMC_CMD_BCR;
 	cmd.data = NULL;
 
 	err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 	return (err);
 }
 
 static void
 mmc_power_up(struct mmc_softc *sc)
 {
 	device_t dev;
 
 	dev = sc->dev;
 	mmcbr_set_vdd(dev, mmc_highest_voltage(mmcbr_get_host_ocr(dev)));
 	mmcbr_set_bus_mode(dev, opendrain);
 	mmcbr_set_chip_select(dev, cs_dontcare);
 	mmcbr_set_bus_width(dev, bus_width_1);
 	mmcbr_set_power_mode(dev, power_up);
 	mmcbr_set_clock(dev, 0);
 	mmcbr_update_ios(dev);
 	mmc_ms_delay(1);
 
 	mmcbr_set_clock(dev, CARD_ID_FREQUENCY);
 	mmcbr_set_timing(dev, bus_timing_normal);
 	mmcbr_set_power_mode(dev, power_on);
 	mmcbr_update_ios(dev);
 	mmc_ms_delay(2);
 }
 
 static void
 mmc_power_down(struct mmc_softc *sc)
 {
 	device_t dev = sc->dev;
 
 	mmcbr_set_bus_mode(dev, opendrain);
 	mmcbr_set_chip_select(dev, cs_dontcare);
 	mmcbr_set_bus_width(dev, bus_width_1);
 	mmcbr_set_power_mode(dev, power_off);
 	mmcbr_set_clock(dev, 0);
 	mmcbr_set_timing(dev, bus_timing_normal);
 	mmcbr_update_ios(dev);
 }
 
 static int
 mmc_select_card(struct mmc_softc *sc, uint16_t rca)
 {
 	int flags;
 
 	flags = (rca ? MMC_RSP_R1B : MMC_RSP_NONE) | MMC_CMD_AC;
 	return (mmc_wait_for_command(sc, MMC_SELECT_CARD, (uint32_t)rca << 16,
 	    flags, NULL, CMD_RETRIES));
 }
 
 static int
 mmc_switch(struct mmc_softc *sc, uint8_t set, uint8_t index, uint8_t value)
 {
 	struct mmc_command cmd;
 	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = MMC_SWITCH_FUNC;
 	cmd.arg = (MMC_SWITCH_FUNC_WR << 24) |
 	    (index << 16) |
 	    (value << 8) |
 	    set;
 	cmd.flags = MMC_RSP_R1B | MMC_CMD_AC;
 	cmd.data = NULL;
 	err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 	return (err);
 }
 
 static int
 mmc_sd_switch(struct mmc_softc *sc, uint8_t mode, uint8_t grp, uint8_t value,
     uint8_t *res)
 {
 	int err;
 	struct mmc_command cmd;
 	struct mmc_data data;
 
 	memset(&cmd, 0, sizeof(cmd));
 	memset(&data, 0, sizeof(data));
 	memset(res, 0, 64);
 
 	cmd.opcode = SD_SWITCH_FUNC;
 	cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 	cmd.arg = mode << 31;			/* 0 - check, 1 - set */
 	cmd.arg |= 0x00FFFFFF;
 	cmd.arg &= ~(0xF << (grp * 4));
 	cmd.arg |= value << (grp * 4);
 	cmd.data = &data;
 
 	data.data = res;
 	data.len = 64;
 	data.flags = MMC_DATA_READ;
 
 	err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 	return (err);
 }
 
 static int
 mmc_set_card_bus_width(struct mmc_softc *sc, uint16_t rca, int width)
 {
 	struct mmc_command cmd;
 	int err;
 	uint8_t	value;
 
 	if (mmcbr_get_mode(sc->dev) == mode_sd) {
 		memset(&cmd, 0, sizeof(cmd));
 		cmd.opcode = ACMD_SET_CLR_CARD_DETECT;
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 		cmd.arg = SD_CLR_CARD_DETECT;
 		err = mmc_wait_for_app_cmd(sc, rca, &cmd, CMD_RETRIES);
 		if (err != 0)
 			return (err);
 		memset(&cmd, 0, sizeof(cmd));
 		cmd.opcode = ACMD_SET_BUS_WIDTH;
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 		switch (width) {
 		case bus_width_1:
 			cmd.arg = SD_BUS_WIDTH_1;
 			break;
 		case bus_width_4:
 			cmd.arg = SD_BUS_WIDTH_4;
 			break;
 		default:
 			return (MMC_ERR_INVALID);
 		}
 		err = mmc_wait_for_app_cmd(sc, rca, &cmd, CMD_RETRIES);
 	} else {
 		switch (width) {
 		case bus_width_1:
 			value = EXT_CSD_BUS_WIDTH_1;
 			break;
 		case bus_width_4:
 			value = EXT_CSD_BUS_WIDTH_4;
 			break;
 		case bus_width_8:
 			value = EXT_CSD_BUS_WIDTH_8;
 			break;
 		default:
 			return (MMC_ERR_INVALID);
 		}
 		err = mmc_switch(sc, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BUS_WIDTH,
 		    value);
 	}
 	return (err);
 }
 
 static int
 mmc_set_timing(struct mmc_softc *sc, int timing)
 {
+	u_char switch_res[64];
 	int err;
 	uint8_t	value;
-	u_char switch_res[64];
 
 	switch (timing) {
 	case bus_timing_normal:
 		value = 0;
 		break;
 	case bus_timing_hs:
 		value = 1;
 		break;
 	default:
 		return (MMC_ERR_INVALID);
 	}
 	if (mmcbr_get_mode(sc->dev) == mode_sd)
 		err = mmc_sd_switch(sc, SD_SWITCH_MODE_SET, SD_SWITCH_GROUP1,
 		    value, switch_res);
 	else
 		err = mmc_switch(sc, EXT_CSD_CMD_SET_NORMAL,
 		    EXT_CSD_HS_TIMING, value);
 	return (err);
 }
 
 static int
 mmc_test_bus_width(struct mmc_softc *sc)
 {
 	struct mmc_command cmd;
 	struct mmc_data data;
 	int err;
 	uint8_t buf[8];
 	uint8_t	p8[8] =   { 0x55, 0xAA, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
 	uint8_t	p8ok[8] = { 0xAA, 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
 	uint8_t	p4[4] =   { 0x5A, 0x00, 0x00, 0x00, };
 	uint8_t	p4ok[4] = { 0xA5, 0x00, 0x00, 0x00, };
 
 	if (mmcbr_get_caps(sc->dev) & MMC_CAP_8_BIT_DATA) {
 		mmcbr_set_bus_width(sc->dev, bus_width_8);
 		mmcbr_update_ios(sc->dev);
 
 		sc->squelched++; /* Errors are expected, squelch reporting. */
 		memset(&cmd, 0, sizeof(cmd));
 		memset(&data, 0, sizeof(data));
 		cmd.opcode = MMC_BUSTEST_W;
 		cmd.arg = 0;
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 		cmd.data = &data;
 
 		data.data = p8;
 		data.len = 8;
 		data.flags = MMC_DATA_WRITE;
 		mmc_wait_for_cmd(sc, &cmd, 0);
 
 		memset(&cmd, 0, sizeof(cmd));
 		memset(&data, 0, sizeof(data));
 		cmd.opcode = MMC_BUSTEST_R;
 		cmd.arg = 0;
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 		cmd.data = &data;
 
 		data.data = buf;
 		data.len = 8;
 		data.flags = MMC_DATA_READ;
 		err = mmc_wait_for_cmd(sc, &cmd, 0);
 		sc->squelched--;
 
 		mmcbr_set_bus_width(sc->dev, bus_width_1);
 		mmcbr_update_ios(sc->dev);
 
 		if (err == MMC_ERR_NONE && memcmp(buf, p8ok, 8) == 0)
 			return (bus_width_8);
 	}
 
 	if (mmcbr_get_caps(sc->dev) & MMC_CAP_4_BIT_DATA) {
 		mmcbr_set_bus_width(sc->dev, bus_width_4);
 		mmcbr_update_ios(sc->dev);
 
 		sc->squelched++; /* Errors are expected, squelch reporting. */
 		memset(&cmd, 0, sizeof(cmd));
 		memset(&data, 0, sizeof(data));
 		cmd.opcode = MMC_BUSTEST_W;
 		cmd.arg = 0;
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 		cmd.data = &data;
 
 		data.data = p4;
 		data.len = 4;
 		data.flags = MMC_DATA_WRITE;
 		mmc_wait_for_cmd(sc, &cmd, 0);
 
 		memset(&cmd, 0, sizeof(cmd));
 		memset(&data, 0, sizeof(data));
 		cmd.opcode = MMC_BUSTEST_R;
 		cmd.arg = 0;
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 		cmd.data = &data;
 
 		data.data = buf;
 		data.len = 4;
 		data.flags = MMC_DATA_READ;
 		err = mmc_wait_for_cmd(sc, &cmd, 0);
 		sc->squelched--;
 
 		mmcbr_set_bus_width(sc->dev, bus_width_1);
 		mmcbr_update_ios(sc->dev);
 
 		if (err == MMC_ERR_NONE && memcmp(buf, p4ok, 4) == 0)
 			return (bus_width_4);
 	}
 	return (bus_width_1);
 }
 
 static uint32_t
 mmc_get_bits(uint32_t *bits, int bit_len, int start, int size)
 {
 	const int i = (bit_len / 32) - (start / 32) - 1;
 	const int shift = start & 31;
 	uint32_t retval = bits[i] >> shift;
 	if (size + shift > 32)
 		retval |= bits[i - 1] << (32 - shift);
 	return (retval & ((1llu << size) - 1));
 }
 
 static void
 mmc_decode_cid_sd(uint32_t *raw_cid, struct mmc_cid *cid)
 {
 	int i;
 
 	/* There's no version info, so we take it on faith */
 	memset(cid, 0, sizeof(*cid));
 	cid->mid = mmc_get_bits(raw_cid, 128, 120, 8);
 	cid->oid = mmc_get_bits(raw_cid, 128, 104, 16);
 	for (i = 0; i < 5; i++)
 		cid->pnm[i] = mmc_get_bits(raw_cid, 128, 96 - i * 8, 8);
 	cid->pnm[5] = 0;
 	cid->prv = mmc_get_bits(raw_cid, 128, 56, 8);
 	cid->psn = mmc_get_bits(raw_cid, 128, 24, 32);
 	cid->mdt_year = mmc_get_bits(raw_cid, 128, 12, 8) + 2000;
 	cid->mdt_month = mmc_get_bits(raw_cid, 128, 8, 4);
 }
 
 static void
 mmc_decode_cid_mmc(uint32_t *raw_cid, struct mmc_cid *cid)
 {
 	int i;
 
 	/* There's no version info, so we take it on faith */
 	memset(cid, 0, sizeof(*cid));
 	cid->mid = mmc_get_bits(raw_cid, 128, 120, 8);
 	cid->oid = mmc_get_bits(raw_cid, 128, 104, 8);
 	for (i = 0; i < 6; i++)
 		cid->pnm[i] = mmc_get_bits(raw_cid, 128, 96 - i * 8, 8);
 	cid->pnm[6] = 0;
 	cid->prv = mmc_get_bits(raw_cid, 128, 48, 8);
 	cid->psn = mmc_get_bits(raw_cid, 128, 16, 32);
 	cid->mdt_month = mmc_get_bits(raw_cid, 128, 12, 4);
 	cid->mdt_year = mmc_get_bits(raw_cid, 128, 8, 4) + 1997;
 }
 
 static void
 mmc_format_card_id_string(struct mmc_ivars *ivar)
 {
 	char oidstr[8];
 	uint8_t c1;
 	uint8_t c2;
 
 	/*
 	 * Format a card ID string for use by the mmcsd driver, it's what
 	 * appears between the <> in the following:
 	 * mmcsd0: 968MB <SD SD01G 8.0 SN 2686905 Mfg 08/2008 by 3 TN> at mmc0
 	 * 22.5MHz/4bit/128-block
 	 *
 	 * Also format just the card serial number, which the mmcsd driver will
 	 * use as the disk->d_ident string.
 	 *
 	 * The card_id_string in mmc_ivars is currently allocated as 64 bytes,
 	 * and our max formatted length is currently 55 bytes if every field
 	 * contains the largest value.
 	 *
 	 * Sometimes the oid is two printable ascii chars; when it's not,
 	 * format it as 0xnnnn instead.
 	 */
 	c1 = (ivar->cid.oid >> 8) & 0x0ff;
 	c2 = ivar->cid.oid & 0x0ff;
 	if (c1 > 0x1f && c1 < 0x7f && c2 > 0x1f && c2 < 0x7f)
 		snprintf(oidstr, sizeof(oidstr), "%c%c", c1, c2);
 	else
 		snprintf(oidstr, sizeof(oidstr), "0x%04x", ivar->cid.oid);
 	snprintf(ivar->card_sn_string, sizeof(ivar->card_sn_string),
 	    "%08X", ivar->cid.psn);
 	snprintf(ivar->card_id_string, sizeof(ivar->card_id_string),
 	    "%s%s %s %d.%d SN %08X MFG %02d/%04d by %d %s",
 	    ivar->mode == mode_sd ? "SD" : "MMC", ivar->high_cap ? "HC" : "",
 	    ivar->cid.pnm, ivar->cid.prv >> 4, ivar->cid.prv & 0x0f,
 	    ivar->cid.psn, ivar->cid.mdt_month, ivar->cid.mdt_year,
 	    ivar->cid.mid, oidstr);
 }
 
 static const int exp[8] = {
 	1, 10, 100, 1000, 10000, 100000, 1000000, 10000000
 };
 
 static const int mant[16] = {
 	0, 10, 12, 13, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 70, 80
 };
 
 static const int cur_min[8] = {
 	500, 1000, 5000, 10000, 25000, 35000, 60000, 100000
 };
 
 static const int cur_max[8] = {
 	1000, 5000, 10000, 25000, 35000, 45000, 800000, 200000
 };
 
 static void
 mmc_decode_csd_sd(uint32_t *raw_csd, struct mmc_csd *csd)
 {
 	int v;
 	int m;
 	int e;
 
 	memset(csd, 0, sizeof(*csd));
 	csd->csd_structure = v = mmc_get_bits(raw_csd, 128, 126, 2);
 	if (v == 0) {
 		m = mmc_get_bits(raw_csd, 128, 115, 4);
 		e = mmc_get_bits(raw_csd, 128, 112, 3);
 		csd->tacc = (exp[e] * mant[m] + 9) / 10;
 		csd->nsac = mmc_get_bits(raw_csd, 128, 104, 8) * 100;
 		m = mmc_get_bits(raw_csd, 128, 99, 4);
 		e = mmc_get_bits(raw_csd, 128, 96, 3);
 		csd->tran_speed = exp[e] * 10000 * mant[m];
 		csd->ccc = mmc_get_bits(raw_csd, 128, 84, 12);
 		csd->read_bl_len = 1 << mmc_get_bits(raw_csd, 128, 80, 4);
 		csd->read_bl_partial = mmc_get_bits(raw_csd, 128, 79, 1);
 		csd->write_blk_misalign = mmc_get_bits(raw_csd, 128, 78, 1);
 		csd->read_blk_misalign = mmc_get_bits(raw_csd, 128, 77, 1);
 		csd->dsr_imp = mmc_get_bits(raw_csd, 128, 76, 1);
 		csd->vdd_r_curr_min =
 		    cur_min[mmc_get_bits(raw_csd, 128, 59, 3)];
 		csd->vdd_r_curr_max =
 		    cur_max[mmc_get_bits(raw_csd, 128, 56, 3)];
 		csd->vdd_w_curr_min =
 		    cur_min[mmc_get_bits(raw_csd, 128, 53, 3)];
 		csd->vdd_w_curr_max =
 		    cur_max[mmc_get_bits(raw_csd, 128, 50, 3)];
 		m = mmc_get_bits(raw_csd, 128, 62, 12);
 		e = mmc_get_bits(raw_csd, 128, 47, 3);
 		csd->capacity = ((1 + m) << (e + 2)) * csd->read_bl_len;
 		csd->erase_blk_en = mmc_get_bits(raw_csd, 128, 46, 1);
 		csd->erase_sector = mmc_get_bits(raw_csd, 128, 39, 7) + 1;
 		csd->wp_grp_size = mmc_get_bits(raw_csd, 128, 32, 7);
 		csd->wp_grp_enable = mmc_get_bits(raw_csd, 128, 31, 1);
 		csd->r2w_factor = 1 << mmc_get_bits(raw_csd, 128, 26, 3);
 		csd->write_bl_len = 1 << mmc_get_bits(raw_csd, 128, 22, 4);
 		csd->write_bl_partial = mmc_get_bits(raw_csd, 128, 21, 1);
 	} else if (v == 1) {
 		m = mmc_get_bits(raw_csd, 128, 115, 4);
 		e = mmc_get_bits(raw_csd, 128, 112, 3);
 		csd->tacc = (exp[e] * mant[m] + 9) / 10;
 		csd->nsac = mmc_get_bits(raw_csd, 128, 104, 8) * 100;
 		m = mmc_get_bits(raw_csd, 128, 99, 4);
 		e = mmc_get_bits(raw_csd, 128, 96, 3);
 		csd->tran_speed = exp[e] * 10000 * mant[m];
 		csd->ccc = mmc_get_bits(raw_csd, 128, 84, 12);
 		csd->read_bl_len = 1 << mmc_get_bits(raw_csd, 128, 80, 4);
 		csd->read_bl_partial = mmc_get_bits(raw_csd, 128, 79, 1);
 		csd->write_blk_misalign = mmc_get_bits(raw_csd, 128, 78, 1);
 		csd->read_blk_misalign = mmc_get_bits(raw_csd, 128, 77, 1);
 		csd->dsr_imp = mmc_get_bits(raw_csd, 128, 76, 1);
 		csd->capacity = ((uint64_t)mmc_get_bits(raw_csd, 128, 48, 22) +
 		    1) * 512 * 1024;
 		csd->erase_blk_en = mmc_get_bits(raw_csd, 128, 46, 1);
 		csd->erase_sector = mmc_get_bits(raw_csd, 128, 39, 7) + 1;
 		csd->wp_grp_size = mmc_get_bits(raw_csd, 128, 32, 7);
 		csd->wp_grp_enable = mmc_get_bits(raw_csd, 128, 31, 1);
 		csd->r2w_factor = 1 << mmc_get_bits(raw_csd, 128, 26, 3);
 		csd->write_bl_len = 1 << mmc_get_bits(raw_csd, 128, 22, 4);
 		csd->write_bl_partial = mmc_get_bits(raw_csd, 128, 21, 1);
 	} else
 		panic("unknown SD CSD version");
 }
 
 static void
 mmc_decode_csd_mmc(uint32_t *raw_csd, struct mmc_csd *csd)
 {
 	int m;
 	int e;
 
 	memset(csd, 0, sizeof(*csd));
 	csd->csd_structure = mmc_get_bits(raw_csd, 128, 126, 2);
 	csd->spec_vers = mmc_get_bits(raw_csd, 128, 122, 4);
 	m = mmc_get_bits(raw_csd, 128, 115, 4);
 	e = mmc_get_bits(raw_csd, 128, 112, 3);
 	csd->tacc = exp[e] * mant[m] + 9 / 10;
 	csd->nsac = mmc_get_bits(raw_csd, 128, 104, 8) * 100;
 	m = mmc_get_bits(raw_csd, 128, 99, 4);
 	e = mmc_get_bits(raw_csd, 128, 96, 3);
 	csd->tran_speed = exp[e] * 10000 * mant[m];
 	csd->ccc = mmc_get_bits(raw_csd, 128, 84, 12);
 	csd->read_bl_len = 1 << mmc_get_bits(raw_csd, 128, 80, 4);
 	csd->read_bl_partial = mmc_get_bits(raw_csd, 128, 79, 1);
 	csd->write_blk_misalign = mmc_get_bits(raw_csd, 128, 78, 1);
 	csd->read_blk_misalign = mmc_get_bits(raw_csd, 128, 77, 1);
 	csd->dsr_imp = mmc_get_bits(raw_csd, 128, 76, 1);
 	csd->vdd_r_curr_min = cur_min[mmc_get_bits(raw_csd, 128, 59, 3)];
 	csd->vdd_r_curr_max = cur_max[mmc_get_bits(raw_csd, 128, 56, 3)];
 	csd->vdd_w_curr_min = cur_min[mmc_get_bits(raw_csd, 128, 53, 3)];
 	csd->vdd_w_curr_max = cur_max[mmc_get_bits(raw_csd, 128, 50, 3)];
 	m = mmc_get_bits(raw_csd, 128, 62, 12);
 	e = mmc_get_bits(raw_csd, 128, 47, 3);
 	csd->capacity = ((1 + m) << (e + 2)) * csd->read_bl_len;
 	csd->erase_blk_en = 0;
 	csd->erase_sector = (mmc_get_bits(raw_csd, 128, 42, 5) + 1) *
 	    (mmc_get_bits(raw_csd, 128, 37, 5) + 1);
 	csd->wp_grp_size = mmc_get_bits(raw_csd, 128, 32, 5);
 	csd->wp_grp_enable = mmc_get_bits(raw_csd, 128, 31, 1);
 	csd->r2w_factor = 1 << mmc_get_bits(raw_csd, 128, 26, 3);
 	csd->write_bl_len = 1 << mmc_get_bits(raw_csd, 128, 22, 4);
 	csd->write_bl_partial = mmc_get_bits(raw_csd, 128, 21, 1);
 }
 
 static void
 mmc_app_decode_scr(uint32_t *raw_scr, struct mmc_scr *scr)
 {
 	unsigned int scr_struct;
 
 	memset(scr, 0, sizeof(*scr));
 
 	scr_struct = mmc_get_bits(raw_scr, 64, 60, 4);
 	if (scr_struct != 0) {
 		printf("Unrecognised SCR structure version %d\n",
 		    scr_struct);
 		return;
 	}
 	scr->sda_vsn = mmc_get_bits(raw_scr, 64, 56, 4);
 	scr->bus_widths = mmc_get_bits(raw_scr, 64, 48, 4);
 }
 
 static void
 mmc_app_decode_sd_status(uint32_t *raw_sd_status,
     struct mmc_sd_status *sd_status)
 {
 
 	memset(sd_status, 0, sizeof(*sd_status));
 
 	sd_status->bus_width = mmc_get_bits(raw_sd_status, 512, 510, 2);
 	sd_status->secured_mode = mmc_get_bits(raw_sd_status, 512, 509, 1);
 	sd_status->card_type = mmc_get_bits(raw_sd_status, 512, 480, 16);
 	sd_status->prot_area = mmc_get_bits(raw_sd_status, 512, 448, 12);
 	sd_status->speed_class = mmc_get_bits(raw_sd_status, 512, 440, 8);
 	sd_status->perf_move = mmc_get_bits(raw_sd_status, 512, 432, 8);
 	sd_status->au_size = mmc_get_bits(raw_sd_status, 512, 428, 4);
 	sd_status->erase_size = mmc_get_bits(raw_sd_status, 512, 408, 16);
 	sd_status->erase_timeout = mmc_get_bits(raw_sd_status, 512, 402, 6);
 	sd_status->erase_offset = mmc_get_bits(raw_sd_status, 512, 400, 2);
 }
 
 static int
 mmc_all_send_cid(struct mmc_softc *sc, uint32_t *rawcid)
 {
 	struct mmc_command cmd;
 	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = MMC_ALL_SEND_CID;
 	cmd.arg = 0;
 	cmd.flags = MMC_RSP_R2 | MMC_CMD_BCR;
 	cmd.data = NULL;
 	err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 	memcpy(rawcid, cmd.resp, 4 * sizeof(uint32_t));
 	return (err);
 }
 
 static int
 mmc_send_csd(struct mmc_softc *sc, uint16_t rca, uint32_t *rawcsd)
 {
 	struct mmc_command cmd;
 	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = MMC_SEND_CSD;
 	cmd.arg = rca << 16;
 	cmd.flags = MMC_RSP_R2 | MMC_CMD_BCR;
 	cmd.data = NULL;
 	err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 	memcpy(rawcsd, cmd.resp, 4 * sizeof(uint32_t));
 	return (err);
 }
 
 static int
 mmc_app_send_scr(struct mmc_softc *sc, uint16_t rca, uint32_t *rawscr)
 {
 	int err;
 	struct mmc_command cmd;
 	struct mmc_data data;
 
 	memset(&cmd, 0, sizeof(cmd));
 	memset(&data, 0, sizeof(data));
 
 	memset(rawscr, 0, 8);
 	cmd.opcode = ACMD_SEND_SCR;
 	cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 	cmd.arg = 0;
 	cmd.data = &data;
 
 	data.data = rawscr;
 	data.len = 8;
 	data.flags = MMC_DATA_READ;
 
 	err = mmc_wait_for_app_cmd(sc, rca, &cmd, CMD_RETRIES);
 	rawscr[0] = be32toh(rawscr[0]);
 	rawscr[1] = be32toh(rawscr[1]);
 	return (err);
 }
 
 static int
 mmc_send_ext_csd(struct mmc_softc *sc, uint8_t *rawextcsd)
 {
-	int err;
 	struct mmc_command cmd;
 	struct mmc_data data;
+	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	memset(&data, 0, sizeof(data));
 
 	memset(rawextcsd, 0, 512);
 	cmd.opcode = MMC_SEND_EXT_CSD;
 	cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 	cmd.arg = 0;
 	cmd.data = &data;
 
 	data.data = rawextcsd;
 	data.len = 512;
 	data.flags = MMC_DATA_READ;
 
 	err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 	return (err);
 }
 
 static int
 mmc_app_sd_status(struct mmc_softc *sc, uint16_t rca, uint32_t *rawsdstatus)
 {
-	int err, i;
 	struct mmc_command cmd;
 	struct mmc_data data;
+	int err, i;
 
 	memset(&cmd, 0, sizeof(cmd));
 	memset(&data, 0, sizeof(data));
 
 	memset(rawsdstatus, 0, 64);
 	cmd.opcode = ACMD_SD_STATUS;
 	cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 	cmd.arg = 0;
 	cmd.data = &data;
 
 	data.data = rawsdstatus;
 	data.len = 64;
 	data.flags = MMC_DATA_READ;
 
 	err = mmc_wait_for_app_cmd(sc, rca, &cmd, CMD_RETRIES);
 	for (i = 0; i < 16; i++)
 	    rawsdstatus[i] = be32toh(rawsdstatus[i]);
 	return (err);
 }
 
 static int
 mmc_set_relative_addr(struct mmc_softc *sc, uint16_t resp)
 {
 	struct mmc_command cmd;
 	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = MMC_SET_RELATIVE_ADDR;
 	cmd.arg = resp << 16;
 	cmd.flags = MMC_RSP_R6 | MMC_CMD_BCR;
 	cmd.data = NULL;
 	err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 	return (err);
 }
 
 static int
 mmc_send_relative_addr(struct mmc_softc *sc, uint32_t *resp)
 {
 	struct mmc_command cmd;
 	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = SD_SEND_RELATIVE_ADDR;
 	cmd.arg = 0;
 	cmd.flags = MMC_RSP_R6 | MMC_CMD_BCR;
 	cmd.data = NULL;
 	err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 	*resp = cmd.resp[0];
 	return (err);
 }
 
 static int
 mmc_send_status(struct mmc_softc *sc, uint16_t rca, uint32_t *status)
 {
 	struct mmc_command cmd;
 	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = MMC_SEND_STATUS;
 	cmd.arg = rca << 16;
 	cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 	cmd.data = NULL;
 	err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 	*status = cmd.resp[0];
 	return (err);
 }
 
 static int
 mmc_set_blocklen(struct mmc_softc *sc, uint32_t len)
 {
 	struct mmc_command cmd;
 	int err;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = MMC_SET_BLOCKLEN;
 	cmd.arg = len;
 	cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 	cmd.data = NULL;
 	err = mmc_wait_for_cmd(sc, &cmd, CMD_RETRIES);
 	return (err);
 }
 
 static void
 mmc_log_card(device_t dev, struct mmc_ivars *ivar, int newcard)
 {
 	device_printf(dev, "Card at relative address 0x%04x%s:\n",
 	    ivar->rca, newcard ? " added" : "");
 	device_printf(dev, " card: %s\n", ivar->card_id_string);
 	device_printf(dev, " bus: %ubit, %uMHz%s\n",
 	    (ivar->bus_width == bus_width_1 ? 1 :
 	    (ivar->bus_width == bus_width_4 ? 4 : 8)),
 	    (ivar->timing == bus_timing_hs ?
 		ivar->hs_tran_speed : ivar->tran_speed) / 1000000,
 	    ivar->timing == bus_timing_hs ? ", high speed timing" : "");
 	device_printf(dev, " memory: %u blocks, erase sector %u blocks%s\n",
 	    ivar->sec_count, ivar->erase_sector,
 	    ivar->read_only ? ", read-only" : "");
 }
 
 static void
 mmc_discover_cards(struct mmc_softc *sc)
 {
 	struct mmc_ivars *ivar = NULL;
 	device_t *devlist;
 	int err, i, devcount, newcard;
 	uint32_t raw_cid[4], resp, sec_count, status;
 	device_t child;
 	uint16_t rca = 2;
 	u_char switch_res[64];
 
 	if (bootverbose || mmc_debug)
 		device_printf(sc->dev, "Probing cards\n");
 	while (1) {
 		sc->squelched++; /* Errors are expected, squelch reporting. */
 		err = mmc_all_send_cid(sc, raw_cid);
 		sc->squelched--;
 		if (err == MMC_ERR_TIMEOUT)
 			break;
 		if (err != MMC_ERR_NONE) {
 			device_printf(sc->dev, "Error reading CID %d\n", err);
 			break;
 		}
 		newcard = 1;
 		if ((err = device_get_children(sc->dev, &devlist,
 		    &devcount)) != 0)
 			return;
 		for (i = 0; i < devcount; i++) {
 			ivar = device_get_ivars(devlist[i]);
 			if (memcmp(ivar->raw_cid, raw_cid, sizeof(raw_cid)) ==
 			    0) {
 				newcard = 0;
 				break;
 			}
 		}
 		free(devlist, M_TEMP);
 		if (bootverbose || mmc_debug) {
 			device_printf(sc->dev,
 			    "%sard detected (CID %08x%08x%08x%08x)\n",
 			    newcard ? "New c" : "C",
 			    raw_cid[0], raw_cid[1], raw_cid[2], raw_cid[3]);
 		}
 		if (newcard) {
 			ivar = malloc(sizeof(struct mmc_ivars), M_DEVBUF,
 			    M_WAITOK | M_ZERO);
 			memcpy(ivar->raw_cid, raw_cid, sizeof(raw_cid));
 		}
 		if (mmcbr_get_ro(sc->dev))
 			ivar->read_only = 1;
 		ivar->bus_width = bus_width_1;
 		ivar->timing = bus_timing_normal;
 		ivar->mode = mmcbr_get_mode(sc->dev);
 		if (ivar->mode == mode_sd) {
 			mmc_decode_cid_sd(ivar->raw_cid, &ivar->cid);
 			mmc_send_relative_addr(sc, &resp);
 			ivar->rca = resp >> 16;
 			/* Get card CSD. */
 			mmc_send_csd(sc, ivar->rca, ivar->raw_csd);
 			if (bootverbose || mmc_debug)
 				device_printf(sc->dev,
 				    "%sard detected (CSD %08x%08x%08x%08x)\n",
 				    newcard ? "New c" : "C", ivar->raw_csd[0],
 				    ivar->raw_csd[1], ivar->raw_csd[2],
 				    ivar->raw_csd[3]);
 			mmc_decode_csd_sd(ivar->raw_csd, &ivar->csd);
 			ivar->sec_count = ivar->csd.capacity / MMC_SECTOR_SIZE;
 			if (ivar->csd.csd_structure > 0)
 				ivar->high_cap = 1;
 			ivar->tran_speed = ivar->csd.tran_speed;
 			ivar->erase_sector = ivar->csd.erase_sector *
 			    ivar->csd.write_bl_len / MMC_SECTOR_SIZE;
 
 			err = mmc_send_status(sc, ivar->rca, &status);
 			if (err != MMC_ERR_NONE) {
 				device_printf(sc->dev,
 				    "Error reading card status %d\n", err);
 				break;
 			}
 			if ((status & R1_CARD_IS_LOCKED) != 0) {
 				device_printf(sc->dev,
 				    "Card is password protected, skipping.\n");
 				break;
 			}
 
 			/* Get card SCR. Card must be selected to fetch it. */
 			mmc_select_card(sc, ivar->rca);
 			mmc_app_send_scr(sc, ivar->rca, ivar->raw_scr);
 			mmc_app_decode_scr(ivar->raw_scr, &ivar->scr);
 			/* Get card switch capabilities (command class 10). */
 			if ((ivar->scr.sda_vsn >= 1) &&
 			    (ivar->csd.ccc & (1 << 10))) {
 				mmc_sd_switch(sc, SD_SWITCH_MODE_CHECK,
 				    SD_SWITCH_GROUP1, SD_SWITCH_NOCHANGE,
 				    switch_res);
 				if (switch_res[13] & 2) {
 					ivar->timing = bus_timing_hs;
 					ivar->hs_tran_speed = SD_MAX_HS;
 				}
 			}
 
 			/*
 			 * We deselect then reselect the card here.  Some cards
 			 * become unselected and timeout with the above two
 			 * commands, although the state tables / diagrams in the
 			 * standard suggest they go back to the transfer state.
 			 * Other cards don't become deselected, and if we
-			 * atttempt to blindly re-select them, we get timeout
+			 * attempt to blindly re-select them, we get timeout
 			 * errors from some controllers.  So we deselect then
 			 * reselect to handle all situations.  The only thing we
 			 * use from the sd_status is the erase sector size, but
 			 * it is still nice to get that right.
 			 */
 			mmc_select_card(sc, 0);
 			mmc_select_card(sc, ivar->rca);
 			mmc_app_sd_status(sc, ivar->rca, ivar->raw_sd_status);
 			mmc_app_decode_sd_status(ivar->raw_sd_status,
 			    &ivar->sd_status);
 			if (ivar->sd_status.au_size != 0) {
 				ivar->erase_sector =
 				    16 << ivar->sd_status.au_size;
 			}
 			/* Find max supported bus width. */
 			if ((mmcbr_get_caps(sc->dev) & MMC_CAP_4_BIT_DATA) &&
 			    (ivar->scr.bus_widths & SD_SCR_BUS_WIDTH_4))
 				ivar->bus_width = bus_width_4;
 
 			/*
 			 * Some cards that report maximum I/O block sizes
 			 * greater than 512 require the block length to be
 			 * set to 512, even though that is supposed to be
 			 * the default.  Example:
 			 *
 			 * Transcend 2GB SDSC card, CID:
 			 * mid=0x1b oid=0x534d pnm="00000" prv=1.0 mdt=00.2000
 			 */
 			if (ivar->csd.read_bl_len != MMC_SECTOR_SIZE ||
 			    ivar->csd.write_bl_len != MMC_SECTOR_SIZE)
 				mmc_set_blocklen(sc, MMC_SECTOR_SIZE);
 
 			mmc_format_card_id_string(ivar);
 
 			if (bootverbose || mmc_debug)
 				mmc_log_card(sc->dev, ivar, newcard);
 			if (newcard) {
 				/* Add device. */
 				child = device_add_child(sc->dev, NULL, -1);
 				device_set_ivars(child, ivar);
 			}
 			mmc_select_card(sc, 0);
 			return;
 		}
 		mmc_decode_cid_mmc(ivar->raw_cid, &ivar->cid);
 		ivar->rca = rca++;
 		mmc_set_relative_addr(sc, ivar->rca);
 		/* Get card CSD. */
 		mmc_send_csd(sc, ivar->rca, ivar->raw_csd);
 		if (bootverbose || mmc_debug)
 			device_printf(sc->dev,
 			    "%sard detected (CSD %08x%08x%08x%08x)\n",
 			    newcard ? "New c" : "C", ivar->raw_csd[0],
 			    ivar->raw_csd[1], ivar->raw_csd[2],
 			    ivar->raw_csd[3]);
 
 		mmc_decode_csd_mmc(ivar->raw_csd, &ivar->csd);
 		ivar->sec_count = ivar->csd.capacity / MMC_SECTOR_SIZE;
 		ivar->tran_speed = ivar->csd.tran_speed;
 		ivar->erase_sector = ivar->csd.erase_sector *
 		    ivar->csd.write_bl_len / MMC_SECTOR_SIZE;
 
 		err = mmc_send_status(sc, ivar->rca, &status);
 		if (err != MMC_ERR_NONE) {
 			device_printf(sc->dev,
 			    "Error reading card status %d\n", err);
 			break;
 		}
 		if ((status & R1_CARD_IS_LOCKED) != 0) {
 			device_printf(sc->dev,
 			    "Card is password protected, skipping.\n");
 			break;
 		}
 
 		mmc_select_card(sc, ivar->rca);
 
 		/* Only MMC >= 4.x cards support EXT_CSD. */
 		if (ivar->csd.spec_vers >= 4) {
 			mmc_send_ext_csd(sc, ivar->raw_ext_csd);
 			/* Handle extended capacity from EXT_CSD */
 			sec_count = ivar->raw_ext_csd[EXT_CSD_SEC_CNT] +
 			    (ivar->raw_ext_csd[EXT_CSD_SEC_CNT + 1] << 8) +
 			    (ivar->raw_ext_csd[EXT_CSD_SEC_CNT + 2] << 16) +
 			    (ivar->raw_ext_csd[EXT_CSD_SEC_CNT + 3] << 24);
 			if (sec_count != 0) {
 				ivar->sec_count = sec_count;
 				ivar->high_cap = 1;
 			}
 			/* Get card speed in high speed mode. */
 			ivar->timing = bus_timing_hs;
 			if (ivar->raw_ext_csd[EXT_CSD_CARD_TYPE]
 			    & EXT_CSD_CARD_TYPE_52)
 				ivar->hs_tran_speed = MMC_TYPE_52_MAX_HS;
 			else if (ivar->raw_ext_csd[EXT_CSD_CARD_TYPE]
 			    & EXT_CSD_CARD_TYPE_26)
 				ivar->hs_tran_speed = MMC_TYPE_26_MAX_HS;
 			else
 				ivar->hs_tran_speed = ivar->tran_speed;
 			/* Find max supported bus width. */
 			ivar->bus_width = mmc_test_bus_width(sc);
 			/* Handle HC erase sector size. */
 			if (ivar->raw_ext_csd[EXT_CSD_ERASE_GRP_SIZE] != 0) {
 				ivar->erase_sector = 1024 *
 				    ivar->raw_ext_csd[EXT_CSD_ERASE_GRP_SIZE];
 				mmc_switch(sc, EXT_CSD_CMD_SET_NORMAL,
 				    EXT_CSD_ERASE_GRP_DEF, 1);
 			}
 		} else {
 			ivar->bus_width = bus_width_1;
 			ivar->timing = bus_timing_normal;
 		}
 
 		/*
 		 * Some cards that report maximum I/O block sizes greater
 		 * than 512 require the block length to be set to 512, even
 		 * though that is supposed to be the default.  Example:
 		 *
 		 * Transcend 2GB SDSC card, CID:
 		 * mid=0x1b oid=0x534d pnm="00000" prv=1.0 mdt=00.2000
 		 */
 		if (ivar->csd.read_bl_len != MMC_SECTOR_SIZE ||
 		    ivar->csd.write_bl_len != MMC_SECTOR_SIZE)
 			mmc_set_blocklen(sc, MMC_SECTOR_SIZE);
 
 		mmc_format_card_id_string(ivar);
 
 		if (bootverbose || mmc_debug)
 			mmc_log_card(sc->dev, ivar, newcard);
 		if (newcard) {
 			/* Add device. */
 			child = device_add_child(sc->dev, NULL, -1);
 			device_set_ivars(child, ivar);
 		}
 		mmc_select_card(sc, 0);
 	}
 }
 
 static void
 mmc_rescan_cards(struct mmc_softc *sc)
 {
-	struct mmc_ivars *ivar = NULL;
+	struct mmc_ivars *ivar;
 	device_t *devlist;
 	int err, i, devcount;
 
 	if ((err = device_get_children(sc->dev, &devlist, &devcount)) != 0)
 		return;
 	for (i = 0; i < devcount; i++) {
 		ivar = device_get_ivars(devlist[i]);
 		if (mmc_select_card(sc, ivar->rca)) {
 			if (bootverbose || mmc_debug)
 				device_printf(sc->dev,
 				    "Card at relative address %d lost.\n",
 				    ivar->rca);
 			device_delete_child(sc->dev, devlist[i]);
 			free(ivar, M_DEVBUF);
 		}
 	}
 	free(devlist, M_TEMP);
 	mmc_select_card(sc, 0);
 }
 
 static int
 mmc_delete_cards(struct mmc_softc *sc)
 {
 	struct mmc_ivars *ivar;
 	device_t *devlist;
 	int err, i, devcount;
 
 	if ((err = device_get_children(sc->dev, &devlist, &devcount)) != 0)
 		return (err);
 	for (i = 0; i < devcount; i++) {
 		ivar = device_get_ivars(devlist[i]);
 		if (bootverbose || mmc_debug)
 			device_printf(sc->dev,
 			    "Card at relative address %d deleted.\n",
 			    ivar->rca);
 		device_delete_child(sc->dev, devlist[i]);
 		free(ivar, M_DEVBUF);
 	}
 	free(devlist, M_TEMP);
 	return (0);
 }
 
 static void
 mmc_go_discovery(struct mmc_softc *sc)
 {
 	uint32_t ocr;
 	device_t dev;
 	int err;
 
 	dev = sc->dev;
 	if (mmcbr_get_power_mode(dev) != power_on) {
 		/*
 		 * First, try SD modes
 		 */
 		sc->squelched++; /* Errors are expected, squelch reporting. */
 		mmcbr_set_mode(dev, mode_sd);
 		mmc_power_up(sc);
 		mmcbr_set_bus_mode(dev, pushpull);
 		if (bootverbose || mmc_debug)
 			device_printf(sc->dev, "Probing bus\n");
 		mmc_idle_cards(sc);
 		err = mmc_send_if_cond(sc, 1);
 		if ((bootverbose || mmc_debug) && err == 0)
 			device_printf(sc->dev,
 			    "SD 2.0 interface conditions: OK\n");
 		if (mmc_send_app_op_cond(sc, 0, &ocr) != MMC_ERR_NONE) {
 			if (bootverbose || mmc_debug)
 				device_printf(sc->dev, "SD probe: failed\n");
 			/*
 			 * Failed, try MMC
 			 */
 			mmcbr_set_mode(dev, mode_mmc);
 			if (mmc_send_op_cond(sc, 0, &ocr) != MMC_ERR_NONE) {
 				if (bootverbose || mmc_debug)
 					device_printf(sc->dev,
 					    "MMC probe: failed\n");
 				ocr = 0; /* Failed both, powerdown. */
 			} else if (bootverbose || mmc_debug)
 				device_printf(sc->dev,
 				    "MMC probe: OK (OCR: 0x%08x)\n", ocr);
 		} else if (bootverbose || mmc_debug)
 			device_printf(sc->dev, "SD probe: OK (OCR: 0x%08x)\n",
 			    ocr);
 		sc->squelched--;
 
 		mmcbr_set_ocr(dev, mmc_select_vdd(sc, ocr));
 		if (mmcbr_get_ocr(dev) != 0)
 			mmc_idle_cards(sc);
 	} else {
 		mmcbr_set_bus_mode(dev, opendrain);
 		mmcbr_set_clock(dev, CARD_ID_FREQUENCY);
 		mmcbr_update_ios(dev);
 		/* XXX recompute vdd based on new cards? */
 	}
 	/*
 	 * Make sure that we have a mutually agreeable voltage to at least
 	 * one card on the bus.
 	 */
 	if (bootverbose || mmc_debug)
 		device_printf(sc->dev, "Current OCR: 0x%08x\n",
 		    mmcbr_get_ocr(dev));
 	if (mmcbr_get_ocr(dev) == 0) {
 		device_printf(sc->dev, "No compatible cards found on bus\n");
 		mmc_delete_cards(sc);
 		mmc_power_down(sc);
 		return;
 	}
 	/*
 	 * Reselect the cards after we've idled them above.
 	 */
 	if (mmcbr_get_mode(dev) == mode_sd) {
 		err = mmc_send_if_cond(sc, 1);
 		mmc_send_app_op_cond(sc,
 		    (err ? 0 : MMC_OCR_CCS) | mmcbr_get_ocr(dev), NULL);
 	} else
 		mmc_send_op_cond(sc, MMC_OCR_CCS | mmcbr_get_ocr(dev), NULL);
 	mmc_discover_cards(sc);
 	mmc_rescan_cards(sc);
 
 	mmcbr_set_bus_mode(dev, pushpull);
 	mmcbr_update_ios(dev);
 	mmc_calculate_clock(sc);
 	bus_generic_attach(dev);
 /*	mmc_update_children_sysctl(dev);*/
 }
 
 static int
 mmc_calculate_clock(struct mmc_softc *sc)
 {
-	int max_dtr, max_hs_dtr, max_timing;
-	int nkid, i, f_max;
 	device_t *kids;
 	struct mmc_ivars *ivar;
+	int i, f_max, max_dtr, max_hs_dtr, max_timing, nkid;
 
 	f_max = mmcbr_get_f_max(sc->dev);
 	max_dtr = max_hs_dtr = f_max;
-	if ((mmcbr_get_caps(sc->dev) & MMC_CAP_HSPEED))
+	if (mmcbr_get_caps(sc->dev) & MMC_CAP_HSPEED)
 		max_timing = bus_timing_hs;
 	else
 		max_timing = bus_timing_normal;
 	if (device_get_children(sc->dev, &kids, &nkid) != 0)
 		panic("can't get children");
 	for (i = 0; i < nkid; i++) {
 		ivar = device_get_ivars(kids[i]);
 		if (ivar->timing < max_timing)
 			max_timing = ivar->timing;
 		if (ivar->tran_speed < max_dtr)
 			max_dtr = ivar->tran_speed;
 		if (ivar->hs_tran_speed < max_hs_dtr)
 			max_hs_dtr = ivar->hs_tran_speed;
 	}
 	for (i = 0; i < nkid; i++) {
 		ivar = device_get_ivars(kids[i]);
 		if (ivar->timing == bus_timing_normal)
 			continue;
 		mmc_select_card(sc, ivar->rca);
 		mmc_set_timing(sc, max_timing);
 	}
 	mmc_select_card(sc, 0);
 	free(kids, M_TEMP);
 	if (max_timing == bus_timing_hs)
 		max_dtr = max_hs_dtr;
 	if (bootverbose || mmc_debug) {
 		device_printf(sc->dev,
 		    "setting transfer rate to %d.%03dMHz%s\n",
 		    max_dtr / 1000000, (max_dtr / 1000) % 1000,
 		    max_timing == bus_timing_hs ? " (high speed timing)" : "");
 	}
 	mmcbr_set_timing(sc->dev, max_timing);
 	mmcbr_set_clock(sc->dev, max_dtr);
 	mmcbr_update_ios(sc->dev);
 	return max_dtr;
 }
 
 static void
 mmc_scan(struct mmc_softc *sc)
 {
 	device_t dev = sc->dev;
 
 	mmc_acquire_bus(dev, dev);
 	mmc_go_discovery(sc);
 	mmc_release_bus(dev, dev);
 }
 
 static int
 mmc_read_ivar(device_t bus, device_t child, int which, uintptr_t *result)
 {
 	struct mmc_ivars *ivar = device_get_ivars(child);
 
 	switch (which) {
 	default:
 		return (EINVAL);
 	case MMC_IVAR_DSR_IMP:
 		*result = ivar->csd.dsr_imp;
 		break;
 	case MMC_IVAR_MEDIA_SIZE:
 		*result = ivar->sec_count;
 		break;
 	case MMC_IVAR_RCA:
 		*result = ivar->rca;
 		break;
 	case MMC_IVAR_SECTOR_SIZE:
 		*result = MMC_SECTOR_SIZE;
 		break;
 	case MMC_IVAR_TRAN_SPEED:
 		*result = mmcbr_get_clock(bus);
 		break;
 	case MMC_IVAR_READ_ONLY:
 		*result = ivar->read_only;
 		break;
 	case MMC_IVAR_HIGH_CAP:
 		*result = ivar->high_cap;
 		break;
 	case MMC_IVAR_CARD_TYPE:
 		*result = ivar->mode;
 		break;
 	case MMC_IVAR_BUS_WIDTH:
 		*result = ivar->bus_width;
 		break;
 	case MMC_IVAR_ERASE_SECTOR:
 		*result = ivar->erase_sector;
 		break;
 	case MMC_IVAR_MAX_DATA:
 		*result = mmcbr_get_max_data(bus);
 		break;
 	case MMC_IVAR_CARD_ID_STRING:
 		*(char **)result = ivar->card_id_string;
 		break;
 	case MMC_IVAR_CARD_SN_STRING:
 		*(char **)result = ivar->card_sn_string;
 		break;
 	}
 	return (0);
 }
 
 static int
 mmc_write_ivar(device_t bus, device_t child, int which, uintptr_t value)
 {
 
 	/*
 	 * None are writable ATM
 	 */
 	return (EINVAL);
 }
 
 static void
 mmc_delayed_attach(void *xsc)
 {
 	struct mmc_softc *sc = xsc;
 
 	mmc_scan(sc);
 	config_intrhook_disestablish(&sc->config_intrhook);
 }
 
 static int
 mmc_child_location_str(device_t dev, device_t child, char *buf,
     size_t buflen)
 {
 
 	snprintf(buf, buflen, "rca=0x%04x", mmc_get_rca(child));
 	return (0);
 }
 
 static device_method_t mmc_methods[] = {
 	/* device_if */
 	DEVMETHOD(device_probe, mmc_probe),
 	DEVMETHOD(device_attach, mmc_attach),
 	DEVMETHOD(device_detach, mmc_detach),
 	DEVMETHOD(device_suspend, mmc_suspend),
 	DEVMETHOD(device_resume, mmc_resume),
 
 	/* Bus interface */
 	DEVMETHOD(bus_read_ivar, mmc_read_ivar),
 	DEVMETHOD(bus_write_ivar, mmc_write_ivar),
 	DEVMETHOD(bus_child_location_str, mmc_child_location_str),
 
 	/* MMC Bus interface */
 	DEVMETHOD(mmcbus_wait_for_request, mmc_wait_for_request),
 	DEVMETHOD(mmcbus_acquire_bus, mmc_acquire_bus),
 	DEVMETHOD(mmcbus_release_bus, mmc_release_bus),
 
 	DEVMETHOD_END
 };
 
 driver_t mmc_driver = {
 	"mmc",
 	mmc_methods,
 	sizeof(struct mmc_softc),
 };
 devclass_t mmc_devclass;
 
 MODULE_VERSION(mmc, 1);
Index: projects/netbsd-tests-upstream-01-2017/sys/dev/mmc/mmcreg.h
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/dev/mmc/mmcreg.h	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/dev/mmc/mmcreg.h	(revision 313267)
@@ -1,448 +1,448 @@
 /*-
  * Copyright (c) 2006 M. Warner Losh.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Portions of this software may have been developed with reference to
  * the SD Simplified Specification.  The following disclaimer may apply:
  *
  * The following conditions apply to the release of the simplified
  * specification ("Simplified Specification") by the SD Card Association and
  * the SD Group. The Simplified Specification is a subset of the complete SD
  * Specification which is owned by the SD Card Association and the SD
  * Group. This Simplified Specification is provided on a non-confidential
  * basis subject to the disclaimers below. Any implementation of the
  * Simplified Specification may require a license from the SD Card
  * Association, SD Group, SD-3C LLC or other third parties.
  *
  * Disclaimers:
  *
  * The information contained in the Simplified Specification is presented only
  * as a standard specification for SD Cards and SD Host/Ancillary products and
  * is provided "AS-IS" without any representations or warranties of any
  * kind. No responsibility is assumed by the SD Group, SD-3C LLC or the SD
  * Card Association for any damages, any infringements of patents or other
  * right of the SD Group, SD-3C LLC, the SD Card Association or any third
  * parties, which may result from its use. No license is granted by
  * implication, estoppel or otherwise under any patent or other rights of the
  * SD Group, SD-3C LLC, the SD Card Association or any third party. Nothing
  * herein shall be construed as an obligation by the SD Group, the SD-3C LLC
  * or the SD Card Association to disclose or distribute any technical
  * information, know-how or other confidential information to any third party.
  *
  * $FreeBSD$
  */
 
 #ifndef DEV_MMC_MMCREG_H
 #define	DEV_MMC_MMCREG_H
 
 /*
  * This file contains the register definitions for the mmc and sd buses.
  * They are taken from publicly available sources.
  */
 
 struct mmc_data;
 struct mmc_request;
 
 struct mmc_command {
 	uint32_t	opcode;
 	uint32_t	arg;
 	uint32_t	resp[4];
 	uint32_t	flags;		/* Expected responses */
 #define	MMC_RSP_PRESENT	(1ul << 0)	/* Response */
 #define	MMC_RSP_136	(1ul << 1)	/* 136 bit response */
 #define	MMC_RSP_CRC	(1ul << 2)	/* Expect valid crc */
 #define	MMC_RSP_BUSY	(1ul << 3)	/* Card may send busy */
 #define	MMC_RSP_OPCODE	(1ul << 4)	/* Response include opcode */
 #define	MMC_RSP_MASK	0x1ful
 #define	MMC_CMD_AC	(0ul << 5)	/* Addressed Command, no data */
 #define	MMC_CMD_ADTC	(1ul << 5)	/* Addressed Data transfer cmd */
 #define	MMC_CMD_BC	(2ul << 5)	/* Broadcast command, no response */
 #define	MMC_CMD_BCR	(3ul << 5)	/* Broadcast command with response */
 #define	MMC_CMD_MASK	(3ul << 5)
 
 /* Possible response types defined in the standard: */
 #define	MMC_RSP_NONE	(0)
 #define	MMC_RSP_R1	(MMC_RSP_PRESENT | MMC_RSP_CRC | MMC_RSP_OPCODE)
 #define	MMC_RSP_R1B	(MMC_RSP_PRESENT | MMC_RSP_CRC | MMC_RSP_OPCODE | MMC_RSP_BUSY)
 #define	MMC_RSP_R2	(MMC_RSP_PRESENT | MMC_RSP_136 | MMC_RSP_CRC)
 #define	MMC_RSP_R3	(MMC_RSP_PRESENT)
 #define	MMC_RSP_R4	(MMC_RSP_PRESENT)
 #define	MMC_RSP_R5	(MMC_RSP_PRESENT | MMC_RSP_CRC | MMC_RSP_OPCODE)
 #define	MMC_RSP_R5B	(MMC_RSP_PRESENT | MMC_RSP_CRC | MMC_RSP_OPCODE | MMC_RSP_BUSY)
 #define	MMC_RSP_R6	(MMC_RSP_PRESENT | MMC_RSP_CRC | MMC_RSP_OPCODE)
 #define	MMC_RSP_R7	(MMC_RSP_PRESENT | MMC_RSP_CRC | MMC_RSP_OPCODE)
 #define	MMC_RSP(x)	((x) & MMC_RSP_MASK)
 	uint32_t	retries;
 	uint32_t	error;
 #define	MMC_ERR_NONE	0
 #define	MMC_ERR_TIMEOUT	1
 #define	MMC_ERR_BADCRC	2
 #define	MMC_ERR_FIFO	3
 #define	MMC_ERR_FAILED	4
 #define	MMC_ERR_INVALID	5
 #define	MMC_ERR_NO_MEMORY 6
-#define MMC_ERR_MAX	6
+#define	MMC_ERR_MAX	6
 	struct mmc_data	*data;		/* Data segment with cmd */
 	struct mmc_request *mrq;	/* backpointer to request */
 };
 
 /*
  * R1 responses
  *
  * Types (per SD 2.0 standard)
  *	e : error bit
  *	s : status bit
  *	r : detected and set for the actual command response
  *	x : Detected and set during command execution.  The host can get
  *	    the status by issuing a command with R1 response.
  *
  * Clear Condition (per SD 2.0 standard)
  *	a : according to the card current state.
  *	b : always related to the previous command.  reception of a valid
  *	    command will clear it (with a delay of one command).
  *	c : clear by read
  */
 #define	R1_OUT_OF_RANGE (1u << 31)		/* erx, c */
 #define	R1_ADDRESS_ERROR (1u << 30)		/* erx, c */
 #define	R1_BLOCK_LEN_ERROR (1u << 29)		/* erx, c */
 #define	R1_ERASE_SEQ_ERROR (1u << 28)		/* er, c */
 #define	R1_ERASE_PARAM (1u << 27)		/* erx, c */
 #define	R1_WP_VIOLATION (1u << 26)		/* erx, c */
 #define	R1_CARD_IS_LOCKED (1u << 25)		/* sx, a */
 #define	R1_LOCK_UNLOCK_FAILED (1u << 24)	/* erx, c */
 #define	R1_COM_CRC_ERROR (1u << 23)		/* er, b */
 #define	R1_ILLEGAL_COMMAND (1u << 22)		/* er, b */
 #define	R1_CARD_ECC_FAILED (1u << 21)		/* erx, c */
 #define	R1_CC_ERROR (1u << 20)			/* erx, c */
 #define	R1_ERROR (1u << 19)			/* erx, c */
 #define	R1_CSD_OVERWRITE (1u << 16)		/* erx, c */
 #define	R1_WP_ERASE_SKIP (1u << 15)		/* erx, c */
 #define	R1_CARD_ECC_DISABLED (1u << 14)		/* sx, a */
 #define	R1_ERASE_RESET (1u << 13)		/* sr, c */
 #define	R1_CURRENT_STATE_MASK (0xfu << 9)	/* sx, b */
 #define	R1_READY_FOR_DATA (1u << 8)		/* sx, a */
 #define	R1_APP_CMD (1u << 5)			/* sr, c */
 #define	R1_AKE_SEQ_ERROR (1u << 3)		/* er, c */
 #define	R1_STATUS(x)		((x) & 0xFFFFE000)
 #define	R1_CURRENT_STATE(x)	(((x) & R1_CURRENT_STATE_MASK) >> 9)
 #define	R1_STATE_IDLE	0
 #define	R1_STATE_READY	1
 #define	R1_STATE_IDENT	2
 #define	R1_STATE_STBY	3
 #define	R1_STATE_TRAN	4
 #define	R1_STATE_DATA	5
 #define	R1_STATE_RCV	6
 #define	R1_STATE_PRG	7
 #define	R1_STATE_DIS	8
 
 struct mmc_data {
 	size_t len;		/* size of the data */
 	size_t xfer_len;
 	void *data;		/* data buffer */
 	uint32_t	flags;
 #define	MMC_DATA_WRITE	(1UL << 0)
 #define	MMC_DATA_READ	(1UL << 1)
 #define	MMC_DATA_STREAM	(1UL << 2)
 #define	MMC_DATA_MULTI	(1UL << 3)
 	struct mmc_request *mrq;
 };
 
 struct mmc_request {
 	struct mmc_command *cmd;
 	struct mmc_command *stop;
 	void (*done)(struct mmc_request *); /* Completion function */
 	void *done_data;		/* requestor set data */
 	uint32_t flags;
 #define	MMC_REQ_DONE	1
 };
 
 /* Command definitions */
 
 /* Class 0 and 1: Basic commands & read stream commands */
 #define	MMC_GO_IDLE_STATE	0
 #define	MMC_SEND_OP_COND	1
 #define	MMC_ALL_SEND_CID	2
 #define	MMC_SET_RELATIVE_ADDR	3
 #define	SD_SEND_RELATIVE_ADDR	3
 #define	MMC_SET_DSR		4
 			/* reserved: 5 */
 #define	MMC_SWITCH_FUNC		6
 #define	 MMC_SWITCH_FUNC_CMDS	 0
 #define	 MMC_SWITCH_FUNC_SET	 1
 #define	 MMC_SWITCH_FUNC_CLR	 2
 #define	 MMC_SWITCH_FUNC_WR	 3
 #define	MMC_SELECT_CARD		7
 #define	MMC_DESELECT_CARD	7
 #define	MMC_SEND_EXT_CSD	8
 #define	SD_SEND_IF_COND		8
 #define	MMC_SEND_CSD		9
 #define	MMC_SEND_CID		10
 #define	MMC_READ_DAT_UNTIL_STOP	11
 #define	MMC_STOP_TRANSMISSION	12
 #define	MMC_SEND_STATUS		13
 #define	MMC_BUSTEST_R		14
 #define	MMC_GO_INACTIVE_STATE	15
 #define	MMC_BUSTEST_W		19
 
 /* Class 2: Block oriented read commands */
 #define	MMC_SET_BLOCKLEN	16
 #define	MMC_READ_SINGLE_BLOCK	17
 #define	MMC_READ_MULTIPLE_BLOCK	18
 			/* reserved: 19 */
 
 /* Class 3: Stream write commands */
 #define	MMC_WRITE_DAT_UNTIL_STOP 20
 			/* reserved: 21 */
 			/* reserved: 22 */
 
 /* Class 4: Block oriented write commands */
 #define	MMC_SET_BLOCK_COUNT	23
 #define	MMC_WRITE_BLOCK		24
 #define	MMC_WRITE_MULTIPLE_BLOCK 25
 #define	MMC_PROGARM_CID		26
 #define	MMC_PROGRAM_CSD		27
 
 /* Class 6: Block oriented write protection commands */
 #define	MMC_SET_WRITE_PROT	28
 #define	MMC_CLR_WRITE_PROT	29
 #define	MMC_SEND_WRITE_PROT	30
 			/* reserved: 31 */
 
 /* Class 5: Erase commands */
 #define	SD_ERASE_WR_BLK_START	32
 #define	SD_ERASE_WR_BLK_END	33
 			/* 34 -- reserved old command */
 #define	MMC_ERASE_GROUP_START	35
 #define	MMC_ERASE_GROUP_END	36
 			/* 37 -- reserved old command */
 #define	MMC_ERASE		38
 
 /* Class 9: I/O mode commands */
 #define	MMC_FAST_IO		39
 #define	MMC_GO_IRQ_STATE	40
 			/* reserved: 41 */
 
 /* Class 7: Lock card */
 #define	MMC_LOCK_UNLOCK		42
 			/* reserved: 43 */
 			/* reserved: 44 */
 			/* reserved: 45 */
 			/* reserved: 46 */
 			/* reserved: 47 */
 			/* reserved: 48 */
 			/* reserved: 49 */
 			/* reserved: 50 */
 			/* reserved: 51 */
 			/* reserved: 54 */
 
 /* Class 8: Application specific commands */
 #define	MMC_APP_CMD		55
 #define	MMC_GEN_CMD		56
 			/* reserved: 57 */
 			/* reserved: 58 */
 			/* reserved: 59 */
 			/* reserved for mfg: 60 */
 			/* reserved for mfg: 61 */
 			/* reserved for mfg: 62 */
 			/* reserved for mfg: 63 */
 
 /* Class 9: I/O cards (sd) */
 #define	SD_IO_RW_DIRECT		52
 #define	SD_IO_RW_EXTENDED	53
 
 /* Class 10: Switch function commands */
 #define	SD_SWITCH_FUNC		6
 			/* reserved: 34 */
 			/* reserved: 35 */
 			/* reserved: 36 */
 			/* reserved: 37 */
 			/* reserved: 50 */
 			/* reserved: 57 */
 
 
 /* Application specific commands for SD */
 #define	ACMD_SET_BUS_WIDTH	6
 #define	ACMD_SD_STATUS		13
 #define	ACMD_SEND_NUM_WR_BLOCKS	22
 #define	ACMD_SET_WR_BLK_ERASE_COUNT 23
 #define	ACMD_SD_SEND_OP_COND	41
 #define	ACMD_SET_CLR_CARD_DETECT 42
 #define	ACMD_SEND_SCR		51
 
 /*
  * EXT_CSD fields
  */
-#define EXT_CSD_ERASE_GRP_DEF	175	/* R/W */
-#define EXT_CSD_BUS_WIDTH	183	/* R/W */
-#define EXT_CSD_HS_TIMING	185	/* R/W */
-#define EXT_CSD_CARD_TYPE	196	/* RO */
-#define EXT_CSD_REV		192	/* RO */
-#define EXT_CSD_SEC_CNT		212	/* RO, 4 bytes */
-#define EXT_CSD_ERASE_TO_MULT	223	/* RO */
-#define EXT_CSD_ERASE_GRP_SIZE	224	/* RO */
+#define	EXT_CSD_ERASE_GRP_DEF	175	/* R/W */
+#define	EXT_CSD_BUS_WIDTH	183	/* R/W */
+#define	EXT_CSD_HS_TIMING	185	/* R/W */
+#define	EXT_CSD_CARD_TYPE	196	/* RO */
+#define	EXT_CSD_REV		192	/* RO */
+#define	EXT_CSD_SEC_CNT		212	/* RO, 4 bytes */
+#define	EXT_CSD_ERASE_TO_MULT	223	/* RO */
+#define	EXT_CSD_ERASE_GRP_SIZE	224	/* RO */
 
 /*
  * EXT_CSD field definitions
  */
-#define EXT_CSD_CMD_SET_NORMAL		1
-#define EXT_CSD_CMD_SET_SECURE		2
-#define EXT_CSD_CMD_SET_CPSECURE	4
+#define	EXT_CSD_CMD_SET_NORMAL		1
+#define	EXT_CSD_CMD_SET_SECURE		2
+#define	EXT_CSD_CMD_SET_CPSECURE	4
 
-#define EXT_CSD_CARD_TYPE_26	1
-#define EXT_CSD_CARD_TYPE_52	2
+#define	EXT_CSD_CARD_TYPE_26	1
+#define	EXT_CSD_CARD_TYPE_52	2
 
-#define EXT_CSD_BUS_WIDTH_1	0
-#define EXT_CSD_BUS_WIDTH_4	1
-#define EXT_CSD_BUS_WIDTH_8	2
+#define	EXT_CSD_BUS_WIDTH_1	0
+#define	EXT_CSD_BUS_WIDTH_4	1
+#define	EXT_CSD_BUS_WIDTH_8	2
 
-#define MMC_TYPE_26_MAX_HS	26000000
-#define MMC_TYPE_52_MAX_HS	52000000
+#define	MMC_TYPE_26_MAX_HS	26000000
+#define	MMC_TYPE_52_MAX_HS	52000000
 
 /*
  * SD bus widths
  */
-#define SD_BUS_WIDTH_1		0
-#define SD_BUS_WIDTH_4		2
+#define	SD_BUS_WIDTH_1		0
+#define	SD_BUS_WIDTH_4		2
 
 /*
  * SD Switch
  */
-#define SD_SWITCH_MODE_CHECK	0
-#define SD_SWITCH_MODE_SET	1
-#define SD_SWITCH_GROUP1	0
-#define SD_SWITCH_NORMAL_MODE	0
-#define SD_SWITCH_HS_MODE	1
-#define SD_SWITCH_NOCHANGE	0xF
+#define	SD_SWITCH_MODE_CHECK	0
+#define	SD_SWITCH_MODE_SET	1
+#define	SD_SWITCH_GROUP1	0
+#define	SD_SWITCH_NORMAL_MODE	0
+#define	SD_SWITCH_HS_MODE	1
+#define	SD_SWITCH_NOCHANGE	0xF
 
 #define	SD_CLR_CARD_DETECT	0
 #define	SD_SET_CARD_DETECT	1
 
 #define	SD_MAX_HS		50000000
 
 /* OCR bits */
 
 /*
  * in SD 2.0 spec, bits 8-14 are now marked reserved
  * Low voltage in SD2.0 spec is bit 7, TBD voltage
  * Low voltage in MC 3.31 spec is bit 7, 1.65-1.95V
  * Specs prior to  MMC 3.31 defined bits 0-7 as voltages down to 1.5V.
  * 3.31 redefined them to be reserved and also said that cards had to
  * support the 2.7-3.6V and fixed the OCR to be 0xfff8000 for high voltage
  * cards.  MMC 4.0 says that a dual voltage card responds with 0xfff8080.
  * Looks like the fine-grained control of the voltage tolerance ranges
  * was abandoned.
  *
  * The MMC_OCR_CCS appears to be valid for only SD cards.
  */
 #define	MMC_OCR_VOLTAGE	0x3fffffffU	/* Vdd Voltage mask */
 #define	MMC_OCR_LOW_VOLTAGE (1u << 7)	/* Low Voltage Range -- tbd */
 #define	MMC_OCR_MIN_VOLTAGE_SHIFT	7
 #define	MMC_OCR_200_210	(1U << 8)	/* Vdd voltage 2.00 ~ 2.10 */
 #define	MMC_OCR_210_220	(1U << 9)	/* Vdd voltage 2.10 ~ 2.20 */
 #define	MMC_OCR_220_230	(1U << 10)	/* Vdd voltage 2.20 ~ 2.30 */
 #define	MMC_OCR_230_240	(1U << 11)	/* Vdd voltage 2.30 ~ 2.40 */
 #define	MMC_OCR_240_250	(1U << 12)	/* Vdd voltage 2.40 ~ 2.50 */
 #define	MMC_OCR_250_260	(1U << 13)	/* Vdd voltage 2.50 ~ 2.60 */
 #define	MMC_OCR_260_270	(1U << 14)	/* Vdd voltage 2.60 ~ 2.70 */
 #define	MMC_OCR_270_280	(1U << 15)	/* Vdd voltage 2.70 ~ 2.80 */
 #define	MMC_OCR_280_290	(1U << 16)	/* Vdd voltage 2.80 ~ 2.90 */
 #define	MMC_OCR_290_300	(1U << 17)	/* Vdd voltage 2.90 ~ 3.00 */
 #define	MMC_OCR_300_310	(1U << 18)	/* Vdd voltage 3.00 ~ 3.10 */
 #define	MMC_OCR_310_320	(1U << 19)	/* Vdd voltage 3.10 ~ 3.20 */
 #define	MMC_OCR_320_330	(1U << 20)	/* Vdd voltage 3.20 ~ 3.30 */
 #define	MMC_OCR_330_340	(1U << 21)	/* Vdd voltage 3.30 ~ 3.40 */
 #define	MMC_OCR_340_350	(1U << 22)	/* Vdd voltage 3.40 ~ 3.50 */
 #define	MMC_OCR_350_360	(1U << 23)	/* Vdd voltage 3.50 ~ 3.60 */
 #define	MMC_OCR_MAX_VOLTAGE_SHIFT	23
 #define	MMC_OCR_CCS	(1u << 30)	/* Card Capacity status (SD vs SDHC) */
 #define	MMC_OCR_CARD_BUSY (1U << 31)	/* Card Power up status */
 
 /* CSD -- decoded structure */
 struct mmc_cid {
 	uint32_t mid;
 	char pnm[8];
 	uint32_t psn;
 	uint16_t oid;
 	uint16_t mdt_year;
 	uint8_t mdt_month;
 	uint8_t prv;
 	uint8_t fwrev;
 };
 
 struct mmc_csd
 {
 	uint8_t csd_structure;
 	uint8_t spec_vers;
 	uint16_t ccc;
 	uint16_t tacc;
 	uint32_t nsac;
 	uint32_t r2w_factor;
 	uint32_t tran_speed;
 	uint32_t read_bl_len;
 	uint32_t write_bl_len;
 	uint32_t vdd_r_curr_min;
 	uint32_t vdd_r_curr_max;
 	uint32_t vdd_w_curr_min;
 	uint32_t vdd_w_curr_max;
 	uint32_t wp_grp_size;
 	uint32_t erase_sector;
 	uint64_t capacity;
 	unsigned int read_bl_partial:1,
 	    read_blk_misalign:1,
 	    write_bl_partial:1,
 	    write_blk_misalign:1,
 	    dsr_imp:1,
 	    erase_blk_en:1,
 	    wp_grp_enable:1;
 };
 
 struct mmc_scr
 {
 	unsigned char		sda_vsn;
 	unsigned char		bus_widths;
-#define SD_SCR_BUS_WIDTH_1	(1<<0)
-#define SD_SCR_BUS_WIDTH_4	(1<<2)
+#define	SD_SCR_BUS_WIDTH_1	(1 << 0)
+#define	SD_SCR_BUS_WIDTH_4	(1 << 2)
 };
 
 struct mmc_sd_status
 {
 	uint8_t			bus_width;
 	uint8_t			secured_mode;
 	uint16_t		card_type;
 	uint16_t		prot_area;
 	uint8_t			speed_class;
 	uint8_t			perf_move;
 	uint8_t			au_size;
 	uint16_t		erase_size;
 	uint8_t			erase_timeout;
 	uint8_t			erase_offset;
 };
 
 /*
  * Older versions of the MMC standard had a variable sector size.  However,
  * I've been able to find no old MMC or SD cards that have a non 512
  * byte sector size anywhere, so we assume that such cards are very rare
  * and only note their existence in passing here...
  */
-#define MMC_SECTOR_SIZE	512
+#define	MMC_SECTOR_SIZE	512
 
 #endif /* DEV_MMCREG_H */
Index: projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci.c	(revision 313267)
@@ -1,1594 +1,1602 @@
 /*-
  * Copyright (c) 2008 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/resource.h>
 #include <sys/rman.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <machine/stdarg.h>
 
 #include <dev/mmc/bridge.h>
 #include <dev/mmc/mmcreg.h>
 #include <dev/mmc/mmcbrvar.h>
 
 #include "mmcbr_if.h"
 #include "sdhci.h"
 #include "sdhci_if.h"
 
 SYSCTL_NODE(_hw, OID_AUTO, sdhci, CTLFLAG_RD, 0, "sdhci driver");
 
 static int sdhci_debug;
-SYSCTL_INT(_hw_sdhci, OID_AUTO, debug, CTLFLAG_RWTUN, &sdhci_debug, 0, "Debug level");
+SYSCTL_INT(_hw_sdhci, OID_AUTO, debug, CTLFLAG_RWTUN, &sdhci_debug, 0,
+    "Debug level");
 
-#define RD1(slot, off)	SDHCI_READ_1((slot)->bus, (slot), (off))
-#define RD2(slot, off)	SDHCI_READ_2((slot)->bus, (slot), (off))
-#define RD4(slot, off)	SDHCI_READ_4((slot)->bus, (slot), (off))
-#define RD_MULTI_4(slot, off, ptr, count)	\
+#define	RD1(slot, off)	SDHCI_READ_1((slot)->bus, (slot), (off))
+#define	RD2(slot, off)	SDHCI_READ_2((slot)->bus, (slot), (off))
+#define	RD4(slot, off)	SDHCI_READ_4((slot)->bus, (slot), (off))
+#define	RD_MULTI_4(slot, off, ptr, count)	\
     SDHCI_READ_MULTI_4((slot)->bus, (slot), (off), (ptr), (count))
 
-#define WR1(slot, off, val)	SDHCI_WRITE_1((slot)->bus, (slot), (off), (val))
-#define WR2(slot, off, val)	SDHCI_WRITE_2((slot)->bus, (slot), (off), (val))
-#define WR4(slot, off, val)	SDHCI_WRITE_4((slot)->bus, (slot), (off), (val))
-#define WR_MULTI_4(slot, off, ptr, count)	\
+#define	WR1(slot, off, val)	SDHCI_WRITE_1((slot)->bus, (slot), (off), (val))
+#define	WR2(slot, off, val)	SDHCI_WRITE_2((slot)->bus, (slot), (off), (val))
+#define	WR4(slot, off, val)	SDHCI_WRITE_4((slot)->bus, (slot), (off), (val))
+#define	WR_MULTI_4(slot, off, ptr, count)	\
     SDHCI_WRITE_MULTI_4((slot)->bus, (slot), (off), (ptr), (count))
 
 static void sdhci_set_clock(struct sdhci_slot *slot, uint32_t clock);
 static void sdhci_start(struct sdhci_slot *slot);
 static void sdhci_start_data(struct sdhci_slot *slot, struct mmc_data *data);
 
 static void sdhci_card_poll(void *);
 static void sdhci_card_task(void *, int);
 
 /* helper routines */
-#define SDHCI_LOCK(_slot)		mtx_lock(&(_slot)->mtx)
+#define	SDHCI_LOCK(_slot)		mtx_lock(&(_slot)->mtx)
 #define	SDHCI_UNLOCK(_slot)		mtx_unlock(&(_slot)->mtx)
-#define SDHCI_LOCK_INIT(_slot) \
+#define	SDHCI_LOCK_INIT(_slot) \
 	mtx_init(&_slot->mtx, "SD slot mtx", "sdhci", MTX_DEF)
-#define SDHCI_LOCK_DESTROY(_slot)	mtx_destroy(&_slot->mtx);
-#define SDHCI_ASSERT_LOCKED(_slot)	mtx_assert(&_slot->mtx, MA_OWNED);
-#define SDHCI_ASSERT_UNLOCKED(_slot)	mtx_assert(&_slot->mtx, MA_NOTOWNED);
+#define	SDHCI_LOCK_DESTROY(_slot)	mtx_destroy(&_slot->mtx);
+#define	SDHCI_ASSERT_LOCKED(_slot)	mtx_assert(&_slot->mtx, MA_OWNED);
+#define	SDHCI_ASSERT_UNLOCKED(_slot)	mtx_assert(&_slot->mtx, MA_NOTOWNED);
 
 #define	SDHCI_DEFAULT_MAX_FREQ	50
 
 #define	SDHCI_200_MAX_DIVIDER	256
 #define	SDHCI_300_MAX_DIVIDER	2046
 
 #define	SDHCI_CARD_PRESENT_TICKS	(hz / 5)
 #define	SDHCI_INSERT_DELAY_TICKS	(hz / 2)
 
 /*
  * Broadcom BCM577xx Controller Constants
  */
 /* Maximum divider supported by the default clock source. */
-#define BCM577XX_DEFAULT_MAX_DIVIDER	256
+#define	BCM577XX_DEFAULT_MAX_DIVIDER	256
 /* Alternative clock's base frequency. */
-#define BCM577XX_ALT_CLOCK_BASE		63000000
+#define	BCM577XX_ALT_CLOCK_BASE		63000000
 
-#define BCM577XX_HOST_CONTROL		0x198
-#define BCM577XX_CTRL_CLKSEL_MASK	0xFFFFCFFF
-#define BCM577XX_CTRL_CLKSEL_SHIFT	12
-#define BCM577XX_CTRL_CLKSEL_DEFAULT	0x0
-#define BCM577XX_CTRL_CLKSEL_64MHZ	0x3
+#define	BCM577XX_HOST_CONTROL		0x198
+#define	BCM577XX_CTRL_CLKSEL_MASK	0xFFFFCFFF
+#define	BCM577XX_CTRL_CLKSEL_SHIFT	12
+#define	BCM577XX_CTRL_CLKSEL_DEFAULT	0x0
+#define	BCM577XX_CTRL_CLKSEL_64MHZ	0x3
 
-
 static void
 sdhci_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
+
 	if (error != 0) {
 		printf("getaddr: error %d\n", error);
 		return;
 	}
 	*(bus_addr_t *)arg = segs[0].ds_addr;
 }
 
 static int
 slot_printf(struct sdhci_slot *slot, const char * fmt, ...)
 {
 	va_list ap;
 	int retval;
 
 	retval = printf("%s-slot%d: ",
 	    device_get_nameunit(slot->bus), slot->num);
 
 	va_start(ap, fmt);
 	retval += vprintf(fmt, ap);
 	va_end(ap);
 	return (retval);
 }
 
 static void
 sdhci_dumpregs(struct sdhci_slot *slot)
 {
+
 	slot_printf(slot,
 	    "============== REGISTER DUMP ==============\n");
 
 	slot_printf(slot, "Sys addr: 0x%08x | Version:  0x%08x\n",
 	    RD4(slot, SDHCI_DMA_ADDRESS), RD2(slot, SDHCI_HOST_VERSION));
 	slot_printf(slot, "Blk size: 0x%08x | Blk cnt:  0x%08x\n",
 	    RD2(slot, SDHCI_BLOCK_SIZE), RD2(slot, SDHCI_BLOCK_COUNT));
 	slot_printf(slot, "Argument: 0x%08x | Trn mode: 0x%08x\n",
 	    RD4(slot, SDHCI_ARGUMENT), RD2(slot, SDHCI_TRANSFER_MODE));
 	slot_printf(slot, "Present:  0x%08x | Host ctl: 0x%08x\n",
 	    RD4(slot, SDHCI_PRESENT_STATE), RD1(slot, SDHCI_HOST_CONTROL));
 	slot_printf(slot, "Power:    0x%08x | Blk gap:  0x%08x\n",
 	    RD1(slot, SDHCI_POWER_CONTROL), RD1(slot, SDHCI_BLOCK_GAP_CONTROL));
 	slot_printf(slot, "Wake-up:  0x%08x | Clock:    0x%08x\n",
 	    RD1(slot, SDHCI_WAKE_UP_CONTROL), RD2(slot, SDHCI_CLOCK_CONTROL));
 	slot_printf(slot, "Timeout:  0x%08x | Int stat: 0x%08x\n",
 	    RD1(slot, SDHCI_TIMEOUT_CONTROL), RD4(slot, SDHCI_INT_STATUS));
 	slot_printf(slot, "Int enab: 0x%08x | Sig enab: 0x%08x\n",
 	    RD4(slot, SDHCI_INT_ENABLE), RD4(slot, SDHCI_SIGNAL_ENABLE));
 	slot_printf(slot, "AC12 err: 0x%08x | Slot int: 0x%08x\n",
 	    RD2(slot, SDHCI_ACMD12_ERR), RD2(slot, SDHCI_SLOT_INT_STATUS));
 	slot_printf(slot, "Caps:     0x%08x | Max curr: 0x%08x\n",
 	    RD4(slot, SDHCI_CAPABILITIES), RD4(slot, SDHCI_MAX_CURRENT));
 
 	slot_printf(slot,
 	    "===========================================\n");
 }
 
 static void
 sdhci_reset(struct sdhci_slot *slot, uint8_t mask)
 {
 	int timeout;
 
 	if (slot->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
 		if (!SDHCI_GET_CARD_PRESENT(slot->bus, slot))
 			return;
 	}
 
 	/* Some controllers need this kick or reset won't work. */
 	if ((mask & SDHCI_RESET_ALL) == 0 &&
 	    (slot->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET)) {
 		uint32_t clock;
 
 		/* This is to force an update */
 		clock = slot->clock;
 		slot->clock = 0;
 		sdhci_set_clock(slot, clock);
 	}
 
 	if (mask & SDHCI_RESET_ALL) {
 		slot->clock = 0;
 		slot->power = 0;
 	}
 
 	WR1(slot, SDHCI_SOFTWARE_RESET, mask);
 
 	if (slot->quirks & SDHCI_QUIRK_WAITFOR_RESET_ASSERTED) {
 		/*
 		 * Resets on TI OMAPs and AM335x are incompatible with SDHCI
 		 * specification.  The reset bit has internal propagation delay,
 		 * so a fast read after write returns 0 even if reset process is
 		 * in progress. The workaround is to poll for 1 before polling
 		 * for 0.  In the worst case, if we miss seeing it asserted the
 		 * time we spent waiting is enough to ensure the reset finishes.
 		 */
 		timeout = 10000;
 		while ((RD1(slot, SDHCI_SOFTWARE_RESET) & mask) != mask) {
 			if (timeout <= 0)
 				break;
 			timeout--;
 			DELAY(1);
 		}
 	}
 
 	/* Wait max 100 ms */
 	timeout = 10000;
 	/* Controller clears the bits when it's done */
 	while (RD1(slot, SDHCI_SOFTWARE_RESET) & mask) {
 		if (timeout <= 0) {
 			slot_printf(slot, "Reset 0x%x never completed.\n",
 			    mask);
 			sdhci_dumpregs(slot);
 			return;
 		}
 		timeout--;
 		DELAY(10);
 	}
 }
 
 static void
 sdhci_init(struct sdhci_slot *slot)
 {
 
 	sdhci_reset(slot, SDHCI_RESET_ALL);
 
 	/* Enable interrupts. */
 	slot->intmask = SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT |
 	    SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_INDEX |
 	    SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT |
 	    SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL |
 	    SDHCI_INT_DMA_END | SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE |
 	    SDHCI_INT_ACMD12ERR;
 
 	if (!(slot->quirks & SDHCI_QUIRK_POLL_CARD_PRESENT) &&
 	    !(slot->opt & SDHCI_NON_REMOVABLE)) {
 		slot->intmask |= SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT;
 	}
 
 	WR4(slot, SDHCI_INT_ENABLE, slot->intmask);
 	WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask);
 }
 
 static void
 sdhci_set_clock(struct sdhci_slot *slot, uint32_t clock)
 {
 	uint32_t clk_base;
 	uint32_t clk_sel;
 	uint32_t res;
 	uint16_t clk;
 	uint16_t div;
 	int timeout;
 
 	if (clock == slot->clock)
 		return;
 	slot->clock = clock;
 
 	/* Turn off the clock. */
 	clk = RD2(slot, SDHCI_CLOCK_CONTROL);
 	WR2(slot, SDHCI_CLOCK_CONTROL, clk & ~SDHCI_CLOCK_CARD_EN);
 	/* If no clock requested - left it so. */
 	if (clock == 0)
 		return;
 
 	/* Determine the clock base frequency */
 	clk_base = slot->max_clk;
 	if (slot->quirks & SDHCI_QUIRK_BCM577XX_400KHZ_CLKSRC) {
 		clk_sel = RD2(slot, BCM577XX_HOST_CONTROL) &
 		    BCM577XX_CTRL_CLKSEL_MASK;
 
 		/*
 		 * Select clock source appropriate for the requested frequency.
 		 */
 		if ((clk_base / BCM577XX_DEFAULT_MAX_DIVIDER) > clock) {
 			clk_base = BCM577XX_ALT_CLOCK_BASE;
 			clk_sel |= (BCM577XX_CTRL_CLKSEL_64MHZ <<
 			    BCM577XX_CTRL_CLKSEL_SHIFT);
 		} else {
 			clk_sel |= (BCM577XX_CTRL_CLKSEL_DEFAULT <<
 			    BCM577XX_CTRL_CLKSEL_SHIFT);
 		}
 
 		WR2(slot, BCM577XX_HOST_CONTROL, clk_sel);
 	}
 
 	/* Recalculate timeout clock frequency based on the new sd clock. */
 	if (slot->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)
 		slot->timeout_clk = slot->clock / 1000;
 
 	if (slot->version < SDHCI_SPEC_300) {
 		/* Looking for highest freq <= clock. */
 		res = clk_base;
 		for (div = 1; div < SDHCI_200_MAX_DIVIDER; div <<= 1) {
 			if (res <= clock)
 				break;
 			res >>= 1;
 		}
 		/* Divider 1:1 is 0x00, 2:1 is 0x01, 256:1 is 0x80 ... */
 		div >>= 1;
 	}
 	else {
 		/* Version 3.0 divisors are multiples of two up to 1023*2 */
 		if (clock >= clk_base)
 			div = 0;
 		else {
 			for (div = 2; div < SDHCI_300_MAX_DIVIDER; div += 2) {
 				if ((clk_base / div) <= clock)
 					break;
 			}
 		}
 		div >>= 1;
 	}
 
 	if (bootverbose || sdhci_debug)
 		slot_printf(slot, "Divider %d for freq %d (base %d)\n",
 			div, clock, clk_base);
 
 	/* Now we have got divider, set it. */
 	clk = (div & SDHCI_DIVIDER_MASK) << SDHCI_DIVIDER_SHIFT;
 	clk |= ((div >> SDHCI_DIVIDER_MASK_LEN) & SDHCI_DIVIDER_HI_MASK)
 		<< SDHCI_DIVIDER_HI_SHIFT;
 
 	WR2(slot, SDHCI_CLOCK_CONTROL, clk);
 	/* Enable clock. */
 	clk |= SDHCI_CLOCK_INT_EN;
 	WR2(slot, SDHCI_CLOCK_CONTROL, clk);
 	/* Wait up to 10 ms until it stabilize. */
 	timeout = 10;
 	while (!((clk = RD2(slot, SDHCI_CLOCK_CONTROL))
 		& SDHCI_CLOCK_INT_STABLE)) {
 		if (timeout == 0) {
 			slot_printf(slot,
 			    "Internal clock never stabilised.\n");
 			sdhci_dumpregs(slot);
 			return;
 		}
 		timeout--;
 		DELAY(1000);
 	}
 	/* Pass clock signal to the bus. */
 	clk |= SDHCI_CLOCK_CARD_EN;
 	WR2(slot, SDHCI_CLOCK_CONTROL, clk);
 }
 
 static void
 sdhci_set_power(struct sdhci_slot *slot, u_char power)
 {
 	uint8_t pwr;
 
 	if (slot->power == power)
 		return;
 
 	slot->power = power;
 
 	/* Turn off the power. */
 	pwr = 0;
 	WR1(slot, SDHCI_POWER_CONTROL, pwr);
 	/* If power down requested - left it so. */
 	if (power == 0)
 		return;
 	/* Set voltage. */
 	switch (1 << power) {
 	case MMC_OCR_LOW_VOLTAGE:
 		pwr |= SDHCI_POWER_180;
 		break;
 	case MMC_OCR_290_300:
 	case MMC_OCR_300_310:
 		pwr |= SDHCI_POWER_300;
 		break;
 	case MMC_OCR_320_330:
 	case MMC_OCR_330_340:
 		pwr |= SDHCI_POWER_330;
 		break;
 	}
 	WR1(slot, SDHCI_POWER_CONTROL, pwr);
 	/* Turn on the power. */
 	pwr |= SDHCI_POWER_ON;
 	WR1(slot, SDHCI_POWER_CONTROL, pwr);
 
 	if (slot->quirks & SDHCI_QUIRK_INTEL_POWER_UP_RESET) {
 		WR1(slot, SDHCI_POWER_CONTROL, pwr | 0x10);
 		DELAY(10);
 		WR1(slot, SDHCI_POWER_CONTROL, pwr);
 		DELAY(300);
 	}
 }
 
 static void
 sdhci_read_block_pio(struct sdhci_slot *slot)
 {
 	uint32_t data;
 	char *buffer;
 	size_t left;
 
 	buffer = slot->curcmd->data->data;
 	buffer += slot->offset;
 	/* Transfer one block at a time. */
 	left = min(512, slot->curcmd->data->len - slot->offset);
 	slot->offset += left;
 
 	/* If we are too fast, broken controllers return zeroes. */
 	if (slot->quirks & SDHCI_QUIRK_BROKEN_TIMINGS)
 		DELAY(10);
 	/* Handle unaligned and aligned buffer cases. */
 	if ((intptr_t)buffer & 3) {
 		while (left > 3) {
 			data = RD4(slot, SDHCI_BUFFER);
 			buffer[0] = data;
 			buffer[1] = (data >> 8);
 			buffer[2] = (data >> 16);
 			buffer[3] = (data >> 24);
 			buffer += 4;
 			left -= 4;
 		}
 	} else {
 		RD_MULTI_4(slot, SDHCI_BUFFER,
 		    (uint32_t *)buffer, left >> 2);
 		left &= 3;
 	}
 	/* Handle uneven size case. */
 	if (left > 0) {
 		data = RD4(slot, SDHCI_BUFFER);
 		while (left > 0) {
 			*(buffer++) = data;
 			data >>= 8;
 			left--;
 		}
 	}
 }
 
 static void
 sdhci_write_block_pio(struct sdhci_slot *slot)
 {
 	uint32_t data = 0;
 	char *buffer;
 	size_t left;
 
 	buffer = slot->curcmd->data->data;
 	buffer += slot->offset;
 	/* Transfer one block at a time. */
 	left = min(512, slot->curcmd->data->len - slot->offset);
 	slot->offset += left;
 
 	/* Handle unaligned and aligned buffer cases. */
 	if ((intptr_t)buffer & 3) {
 		while (left > 3) {
 			data = buffer[0] +
 			    (buffer[1] << 8) +
 			    (buffer[2] << 16) +
 			    (buffer[3] << 24);
 			left -= 4;
 			buffer += 4;
 			WR4(slot, SDHCI_BUFFER, data);
 		}
 	} else {
 		WR_MULTI_4(slot, SDHCI_BUFFER,
 		    (uint32_t *)buffer, left >> 2);
 		left &= 3;
 	}
 	/* Handle uneven size case. */
 	if (left > 0) {
 		while (left > 0) {
 			data <<= 8;
 			data += *(buffer++);
 			left--;
 		}
 		WR4(slot, SDHCI_BUFFER, data);
 	}
 }
 
 static void
 sdhci_transfer_pio(struct sdhci_slot *slot)
 {
 
 	/* Read as many blocks as possible. */
 	if (slot->curcmd->data->flags & MMC_DATA_READ) {
 		while (RD4(slot, SDHCI_PRESENT_STATE) &
 		    SDHCI_DATA_AVAILABLE) {
 			sdhci_read_block_pio(slot);
 			if (slot->offset >= slot->curcmd->data->len)
 				break;
 		}
 	} else {
 		while (RD4(slot, SDHCI_PRESENT_STATE) &
 		    SDHCI_SPACE_AVAILABLE) {
 			sdhci_write_block_pio(slot);
 			if (slot->offset >= slot->curcmd->data->len)
 				break;
 		}
 	}
 }
 
 static void
-sdhci_card_task(void *arg, int pending)
+sdhci_card_task(void *arg, int pending __unused)
 {
 	struct sdhci_slot *slot = arg;
+	device_t d;
 
 	SDHCI_LOCK(slot);
 	if (SDHCI_GET_CARD_PRESENT(slot->bus, slot)) {
 		if (slot->dev == NULL) {
 			/* If card is present - attach mmc bus. */
 			if (bootverbose || sdhci_debug)
 				slot_printf(slot, "Card inserted\n");
 			slot->dev = device_add_child(slot->bus, "mmc", -1);
 			device_set_ivars(slot->dev, slot);
 			SDHCI_UNLOCK(slot);
 			device_probe_and_attach(slot->dev);
 		} else
 			SDHCI_UNLOCK(slot);
 	} else {
 		if (slot->dev != NULL) {
 			/* If no card present - detach mmc bus. */
 			if (bootverbose || sdhci_debug)
 				slot_printf(slot, "Card removed\n");
-			device_t d = slot->dev;
+			d = slot->dev;
 			slot->dev = NULL;
 			SDHCI_UNLOCK(slot);
 			device_delete_child(slot->bus, d);
 		} else
 			SDHCI_UNLOCK(slot);
 	}
 }
 
 static void
 sdhci_handle_card_present_locked(struct sdhci_slot *slot, bool is_present)
 {
 	bool was_present;
 
 	/*
 	 * If there was no card and now there is one, schedule the task to
 	 * create the child device after a short delay.  The delay is to
 	 * debounce the card insert (sometimes the card detect pin stabilizes
 	 * before the other pins have made good contact).
 	 *
 	 * If there was a card present and now it's gone, immediately schedule
 	 * the task to delete the child device.  No debouncing -- gone is gone,
 	 * because once power is removed, a full card re-init is needed, and
 	 * that happens by deleting and recreating the child device.
 	 */
 	was_present = slot->dev != NULL;
 	if (!was_present && is_present) {
 		taskqueue_enqueue_timeout(taskqueue_swi_giant,
 		    &slot->card_delayed_task, -SDHCI_INSERT_DELAY_TICKS);
 	} else if (was_present && !is_present) {
 		taskqueue_enqueue(taskqueue_swi_giant, &slot->card_task);
 	}
 }
 
 void
 sdhci_handle_card_present(struct sdhci_slot *slot, bool is_present)
 {
 
 	SDHCI_LOCK(slot);
 	sdhci_handle_card_present_locked(slot, is_present);
 	SDHCI_UNLOCK(slot);
 }
 
 static void
 sdhci_card_poll(void *arg)
 {
 	struct sdhci_slot *slot = arg;
 
 	sdhci_handle_card_present(slot,
 	    SDHCI_GET_CARD_PRESENT(slot->bus, slot));
 	callout_reset(&slot->card_poll_callout, SDHCI_CARD_PRESENT_TICKS,
 	    sdhci_card_poll, slot);
 }
 
 int
 sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num)
 {
 	uint32_t caps, freq;
 	int err;
 
 	SDHCI_LOCK_INIT(slot);
 	slot->num = num;
 	slot->bus = dev;
 
 	/* Allocate DMA tag. */
 	err = bus_dma_tag_create(bus_get_dma_tag(dev),
 	    DMA_BLOCK_SIZE, 0, BUS_SPACE_MAXADDR_32BIT,
 	    BUS_SPACE_MAXADDR, NULL, NULL,
 	    DMA_BLOCK_SIZE, 1, DMA_BLOCK_SIZE,
 	    BUS_DMA_ALLOCNOW, NULL, NULL,
 	    &slot->dmatag);
 	if (err != 0) {
 		device_printf(dev, "Can't create DMA tag\n");
 		SDHCI_LOCK_DESTROY(slot);
 		return (err);
 	}
 	/* Allocate DMA memory. */
 	err = bus_dmamem_alloc(slot->dmatag, (void **)&slot->dmamem,
 	    BUS_DMA_NOWAIT, &slot->dmamap);
 	if (err != 0) {
 		device_printf(dev, "Can't alloc DMA memory\n");
 		SDHCI_LOCK_DESTROY(slot);
 		return (err);
 	}
 	/* Map the memory. */
 	err = bus_dmamap_load(slot->dmatag, slot->dmamap,
 	    (void *)slot->dmamem, DMA_BLOCK_SIZE,
 	    sdhci_getaddr, &slot->paddr, 0);
 	if (err != 0 || slot->paddr == 0) {
 		device_printf(dev, "Can't load DMA memory\n");
 		SDHCI_LOCK_DESTROY(slot);
 		if(err)
 			return (err);
 		else
 			return (EFAULT);
 	}
 
 	/* Initialize slot. */
 	sdhci_init(slot);
 	slot->version = (RD2(slot, SDHCI_HOST_VERSION)
 		>> SDHCI_SPEC_VER_SHIFT) & SDHCI_SPEC_VER_MASK;
 	if (slot->quirks & SDHCI_QUIRK_MISSING_CAPS)
 		caps = slot->caps;
 	else
 		caps = RD4(slot, SDHCI_CAPABILITIES);
 	/* Calculate base clock frequency. */
 	if (slot->version >= SDHCI_SPEC_300)
 		freq = (caps & SDHCI_CLOCK_V3_BASE_MASK) >>
 		    SDHCI_CLOCK_BASE_SHIFT;
 	else
 		freq = (caps & SDHCI_CLOCK_BASE_MASK) >>
 		    SDHCI_CLOCK_BASE_SHIFT;
 	if (freq != 0)
 		slot->max_clk = freq * 1000000;
 	/*
 	 * If the frequency wasn't in the capabilities and the hardware driver
 	 * hasn't already set max_clk we're probably not going to work right
 	 * with an assumption, so complain about it.
 	 */
 	if (slot->max_clk == 0) {
 		slot->max_clk = SDHCI_DEFAULT_MAX_FREQ * 1000000;
 		device_printf(dev, "Hardware doesn't specify base clock "
 		    "frequency, using %dMHz as default.\n",
 		    SDHCI_DEFAULT_MAX_FREQ);
 	}
 	/* Calculate/set timeout clock frequency. */
 	if (slot->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) {
 		slot->timeout_clk = slot->max_clk / 1000;
 	} else if (slot->quirks & SDHCI_QUIRK_DATA_TIMEOUT_1MHZ) {
 		slot->timeout_clk = 1000;
 	} else {
 		slot->timeout_clk = (caps & SDHCI_TIMEOUT_CLK_MASK) >>
 		    SDHCI_TIMEOUT_CLK_SHIFT;
 		if (caps & SDHCI_TIMEOUT_CLK_UNIT)
 			slot->timeout_clk *= 1000;
 	}
 	/*
 	 * If the frequency wasn't in the capabilities and the hardware driver
 	 * hasn't already set timeout_clk we'll probably work okay using the
 	 * max timeout, but still mention it.
 	 */
 	if (slot->timeout_clk == 0) {
 		device_printf(dev, "Hardware doesn't specify timeout clock "
 		    "frequency, setting BROKEN_TIMEOUT quirk.\n");
 		slot->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
 	}
 
 	slot->host.f_min = SDHCI_MIN_FREQ(slot->bus, slot);
 	slot->host.f_max = slot->max_clk;
 	slot->host.host_ocr = 0;
 	if (caps & SDHCI_CAN_VDD_330)
 	    slot->host.host_ocr |= MMC_OCR_320_330 | MMC_OCR_330_340;
 	if (caps & SDHCI_CAN_VDD_300)
 	    slot->host.host_ocr |= MMC_OCR_290_300 | MMC_OCR_300_310;
 	if (caps & SDHCI_CAN_VDD_180)
 	    slot->host.host_ocr |= MMC_OCR_LOW_VOLTAGE;
 	if (slot->host.host_ocr == 0) {
 		device_printf(dev, "Hardware doesn't report any "
 		    "support voltages.\n");
 	}
 	slot->host.caps = MMC_CAP_4_BIT_DATA;
 	if (caps & SDHCI_CAN_DO_8BITBUS)
 		slot->host.caps |= MMC_CAP_8_BIT_DATA;
 	if (caps & SDHCI_CAN_DO_HISPD)
 		slot->host.caps |= MMC_CAP_HSPEED;
 	/* Decide if we have usable DMA. */
 	if (caps & SDHCI_CAN_DO_DMA)
 		slot->opt |= SDHCI_HAVE_DMA;
 
 	if (slot->quirks & SDHCI_QUIRK_BROKEN_DMA)
 		slot->opt &= ~SDHCI_HAVE_DMA;
 	if (slot->quirks & SDHCI_QUIRK_FORCE_DMA)
 		slot->opt |= SDHCI_HAVE_DMA;
 	if (slot->quirks & SDHCI_QUIRK_ALL_SLOTS_NON_REMOVABLE)
 		slot->opt |= SDHCI_NON_REMOVABLE;
 
 	/*
 	 * Use platform-provided transfer backend
 	 * with PIO as a fallback mechanism
 	 */
 	if (slot->opt & SDHCI_PLATFORM_TRANSFER)
 		slot->opt &= ~SDHCI_HAVE_DMA;
 
 	if (bootverbose || sdhci_debug) {
 		slot_printf(slot, "%uMHz%s %s%s%s%s %s\n",
 		    slot->max_clk / 1000000,
 		    (caps & SDHCI_CAN_DO_HISPD) ? " HS" : "",
 		    (slot->host.caps & MMC_CAP_8_BIT_DATA) ? "8bits" :
 			((slot->host.caps & MMC_CAP_4_BIT_DATA) ? "4bits" :
 			"1bit"),
 		    (caps & SDHCI_CAN_VDD_330) ? " 3.3V" : "",
 		    (caps & SDHCI_CAN_VDD_300) ? " 3.0V" : "",
 		    (caps & SDHCI_CAN_VDD_180) ? " 1.8V" : "",
 		    (slot->opt & SDHCI_HAVE_DMA) ? "DMA" : "PIO");
 		sdhci_dumpregs(slot);
 	}
 
 	slot->timeout = 10;
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(slot->bus),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(slot->bus)), OID_AUTO,
 	    "timeout", CTLFLAG_RW, &slot->timeout, 0,
 	    "Maximum timeout for SDHCI transfers (in secs)");
 	TASK_INIT(&slot->card_task, 0, sdhci_card_task, slot);
 	TIMEOUT_TASK_INIT(taskqueue_swi_giant, &slot->card_delayed_task, 0,
 		sdhci_card_task, slot);
 	callout_init(&slot->card_poll_callout, 1);
 	callout_init_mtx(&slot->timeout_callout, &slot->mtx, 0);
 
 	if ((slot->quirks & SDHCI_QUIRK_POLL_CARD_PRESENT) &&
 	    !(slot->opt & SDHCI_NON_REMOVABLE)) {
 		callout_reset(&slot->card_poll_callout,
 		    SDHCI_CARD_PRESENT_TICKS, sdhci_card_poll, slot);
 	}
 
 	return (0);
 }
 
 void
 sdhci_start_slot(struct sdhci_slot *slot)
 {
+
 	sdhci_card_task(slot, 0);
 }
 
 int
 sdhci_cleanup_slot(struct sdhci_slot *slot)
 {
 	device_t d;
 
 	callout_drain(&slot->timeout_callout);
 	callout_drain(&slot->card_poll_callout);
 	taskqueue_drain(taskqueue_swi_giant, &slot->card_task);
 	taskqueue_drain_timeout(taskqueue_swi_giant, &slot->card_delayed_task);
 
 	SDHCI_LOCK(slot);
 	d = slot->dev;
 	slot->dev = NULL;
 	SDHCI_UNLOCK(slot);
 	if (d != NULL)
 		device_delete_child(slot->bus, d);
 
 	SDHCI_LOCK(slot);
 	sdhci_reset(slot, SDHCI_RESET_ALL);
 	SDHCI_UNLOCK(slot);
 	bus_dmamap_unload(slot->dmatag, slot->dmamap);
 	bus_dmamem_free(slot->dmatag, slot->dmamem, slot->dmamap);
 	bus_dma_tag_destroy(slot->dmatag);
 
 	SDHCI_LOCK_DESTROY(slot);
 
 	return (0);
 }
 
 int
 sdhci_generic_suspend(struct sdhci_slot *slot)
 {
+
 	sdhci_reset(slot, SDHCI_RESET_ALL);
 
 	return (0);
 }
 
 int
 sdhci_generic_resume(struct sdhci_slot *slot)
 {
+
 	sdhci_init(slot);
 
 	return (0);
 }
 
 uint32_t
 sdhci_generic_min_freq(device_t brdev, struct sdhci_slot *slot)
 {
+
 	if (slot->version >= SDHCI_SPEC_300)
 		return (slot->max_clk / SDHCI_300_MAX_DIVIDER);
 	else
 		return (slot->max_clk / SDHCI_200_MAX_DIVIDER);
 }
 
 bool
 sdhci_generic_get_card_present(device_t brdev, struct sdhci_slot *slot)
 {
 
 	if (slot->opt & SDHCI_NON_REMOVABLE)
 		return true;
 
 	return (RD4(slot, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
 }
 
 int
 sdhci_generic_update_ios(device_t brdev, device_t reqdev)
 {
 	struct sdhci_slot *slot = device_get_ivars(reqdev);
 	struct mmc_ios *ios = &slot->host.ios;
 
 	SDHCI_LOCK(slot);
 	/* Do full reset on bus power down to clear from any state. */
 	if (ios->power_mode == power_off) {
 		WR4(slot, SDHCI_SIGNAL_ENABLE, 0);
 		sdhci_init(slot);
 	}
 	/* Configure the bus. */
 	sdhci_set_clock(slot, ios->clock);
 	sdhci_set_power(slot, (ios->power_mode == power_off) ? 0 : ios->vdd);
 	if (ios->bus_width == bus_width_8) {
 		slot->hostctrl |= SDHCI_CTRL_8BITBUS;
 		slot->hostctrl &= ~SDHCI_CTRL_4BITBUS;
 	} else if (ios->bus_width == bus_width_4) {
 		slot->hostctrl &= ~SDHCI_CTRL_8BITBUS;
 		slot->hostctrl |= SDHCI_CTRL_4BITBUS;
 	} else if (ios->bus_width == bus_width_1) {
 		slot->hostctrl &= ~SDHCI_CTRL_8BITBUS;
 		slot->hostctrl &= ~SDHCI_CTRL_4BITBUS;
 	} else {
 		panic("Invalid bus width: %d", ios->bus_width);
 	}
 	if (ios->timing == bus_timing_hs &&
 	    !(slot->quirks & SDHCI_QUIRK_DONT_SET_HISPD_BIT))
 		slot->hostctrl |= SDHCI_CTRL_HISPD;
 	else
 		slot->hostctrl &= ~SDHCI_CTRL_HISPD;
 	WR1(slot, SDHCI_HOST_CONTROL, slot->hostctrl);
 	/* Some controllers like reset after bus changes. */
 	if(slot->quirks & SDHCI_QUIRK_RESET_ON_IOS)
 		sdhci_reset(slot, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
 
 	SDHCI_UNLOCK(slot);
 	return (0);
 }
 
 static void
 sdhci_req_done(struct sdhci_slot *slot)
 {
 	struct mmc_request *req;
 
 	if (slot->req != NULL && slot->curcmd != NULL) {
 		callout_stop(&slot->timeout_callout);
 		req = slot->req;
 		slot->req = NULL;
 		slot->curcmd = NULL;
 		req->done(req);
 	}
 }
 
 static void
 sdhci_timeout(void *arg)
 {
 	struct sdhci_slot *slot = arg;
 
 	if (slot->curcmd != NULL) {
 		slot_printf(slot, " Controller timeout\n");
 		sdhci_dumpregs(slot);
-		sdhci_reset(slot, SDHCI_RESET_CMD|SDHCI_RESET_DATA);
+		sdhci_reset(slot, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
 		slot->curcmd->error = MMC_ERR_TIMEOUT;
 		sdhci_req_done(slot);
 	} else {
 		slot_printf(slot, " Spurious timeout - no active command\n");
 	}
 }
 
 static void
 sdhci_set_transfer_mode(struct sdhci_slot *slot,
 	struct mmc_data *data)
 {
 	uint16_t mode;
 
 	if (data == NULL)
 		return;
 
 	mode = SDHCI_TRNS_BLK_CNT_EN;
 	if (data->len > 512)
 		mode |= SDHCI_TRNS_MULTI;
 	if (data->flags & MMC_DATA_READ)
 		mode |= SDHCI_TRNS_READ;
 	if (slot->req->stop)
 		mode |= SDHCI_TRNS_ACMD12;
 	if (slot->flags & SDHCI_USE_DMA)
 		mode |= SDHCI_TRNS_DMA;
 
 	WR2(slot, SDHCI_TRANSFER_MODE, mode);
 }
 
 static void
 sdhci_start_command(struct sdhci_slot *slot, struct mmc_command *cmd)
 {
 	int flags, timeout;
 	uint32_t mask;
 
 	slot->curcmd = cmd;
 	slot->cmd_done = 0;
 
 	cmd->error = MMC_ERR_NONE;
 
 	/* This flags combination is not supported by controller. */
 	if ((cmd->flags & MMC_RSP_136) && (cmd->flags & MMC_RSP_BUSY)) {
 		slot_printf(slot, "Unsupported response type!\n");
 		cmd->error = MMC_ERR_FAILED;
 		sdhci_req_done(slot);
 		return;
 	}
 
 	/* Do not issue command if there is no card, clock or power.
 	 * Controller will not detect timeout without clock active. */
 	if (!SDHCI_GET_CARD_PRESENT(slot->bus, slot) ||
 	    slot->power == 0 ||
 	    slot->clock == 0) {
 		cmd->error = MMC_ERR_FAILED;
 		sdhci_req_done(slot);
 		return;
 	}
 	/* Always wait for free CMD bus. */
 	mask = SDHCI_CMD_INHIBIT;
 	/* Wait for free DAT if we have data or busy signal. */
 	if (cmd->data || (cmd->flags & MMC_RSP_BUSY))
 		mask |= SDHCI_DAT_INHIBIT;
 	/* We shouldn't wait for DAT for stop commands. */
 	if (cmd == slot->req->stop)
 		mask &= ~SDHCI_DAT_INHIBIT;
 	/*
 	 *  Wait for bus no more then 250 ms.  Typically there will be no wait
 	 *  here at all, but when writing a crash dump we may be bypassing the
 	 *  host platform's interrupt handler, and in some cases that handler
 	 *  may be working around hardware quirks such as not respecting r1b
 	 *  busy indications.  In those cases, this wait-loop serves the purpose
 	 *  of waiting for the prior command and data transfers to be done, and
 	 *  SD cards are allowed to take up to 250ms for write and erase ops.
 	 *  (It's usually more like 20-30ms in the real world.)
 	 */
 	timeout = 250;
 	while (mask & RD4(slot, SDHCI_PRESENT_STATE)) {
 		if (timeout == 0) {
 			slot_printf(slot, "Controller never released "
 			    "inhibit bit(s).\n");
 			sdhci_dumpregs(slot);
 			cmd->error = MMC_ERR_FAILED;
 			sdhci_req_done(slot);
 			return;
 		}
 		timeout--;
 		DELAY(1000);
 	}
 
 	/* Prepare command flags. */
 	if (!(cmd->flags & MMC_RSP_PRESENT))
 		flags = SDHCI_CMD_RESP_NONE;
 	else if (cmd->flags & MMC_RSP_136)
 		flags = SDHCI_CMD_RESP_LONG;
 	else if (cmd->flags & MMC_RSP_BUSY)
 		flags = SDHCI_CMD_RESP_SHORT_BUSY;
 	else
 		flags = SDHCI_CMD_RESP_SHORT;
 	if (cmd->flags & MMC_RSP_CRC)
 		flags |= SDHCI_CMD_CRC;
 	if (cmd->flags & MMC_RSP_OPCODE)
 		flags |= SDHCI_CMD_INDEX;
 	if (cmd->data)
 		flags |= SDHCI_CMD_DATA;
 	if (cmd->opcode == MMC_STOP_TRANSMISSION)
 		flags |= SDHCI_CMD_TYPE_ABORT;
 	/* Prepare data. */
 	sdhci_start_data(slot, cmd->data);
 	/*
 	 * Interrupt aggregation: To reduce total number of interrupts
 	 * group response interrupt with data interrupt when possible.
 	 * If there going to be data interrupt, mask response one.
 	 */
 	if (slot->data_done == 0) {
 		WR4(slot, SDHCI_SIGNAL_ENABLE,
 		    slot->intmask &= ~SDHCI_INT_RESPONSE);
 	}
 	/* Set command argument. */
 	WR4(slot, SDHCI_ARGUMENT, cmd->arg);
 	/* Set data transfer mode. */
 	sdhci_set_transfer_mode(slot, cmd->data);
 	/* Start command. */
 	WR2(slot, SDHCI_COMMAND_FLAGS, (cmd->opcode << 8) | (flags & 0xff));
 	/* Start timeout callout. */
 	callout_reset(&slot->timeout_callout, slot->timeout * hz,
 	    sdhci_timeout, slot);
 }
 
 static void
 sdhci_finish_command(struct sdhci_slot *slot)
 {
 	int i;
 	uint32_t val;
 	uint8_t extra;
 
 	slot->cmd_done = 1;
 	/* Interrupt aggregation: Restore command interrupt.
 	 * Main restore point for the case when command interrupt
 	 * happened first. */
 	WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask |= SDHCI_INT_RESPONSE);
 	/* In case of error - reset host and return. */
 	if (slot->curcmd->error) {
 		sdhci_reset(slot, SDHCI_RESET_CMD);
 		sdhci_reset(slot, SDHCI_RESET_DATA);
 		sdhci_start(slot);
 		return;
 	}
 	/* If command has response - fetch it. */
 	if (slot->curcmd->flags & MMC_RSP_PRESENT) {
 		if (slot->curcmd->flags & MMC_RSP_136) {
 			/* CRC is stripped so we need one byte shift. */
 			extra = 0;
 			for (i = 0; i < 4; i++) {
 				val = RD4(slot, SDHCI_RESPONSE + i * 4);
 				if (slot->quirks &
 				    SDHCI_QUIRK_DONT_SHIFT_RESPONSE)
 					slot->curcmd->resp[3 - i] = val;
 				else {
 					slot->curcmd->resp[3 - i] =
 					    (val << 8) | extra;
 					extra = val >> 24;
 				}
 			}
 		} else
 			slot->curcmd->resp[0] = RD4(slot, SDHCI_RESPONSE);
 	}
 	/* If data ready - finish. */
 	if (slot->data_done)
 		sdhci_start(slot);
 }
 
 static void
 sdhci_start_data(struct sdhci_slot *slot, struct mmc_data *data)
 {
 	uint32_t target_timeout, current_timeout;
 	uint8_t div;
 
 	if (data == NULL && (slot->curcmd->flags & MMC_RSP_BUSY) == 0) {
 		slot->data_done = 1;
 		return;
 	}
 
 	slot->data_done = 0;
 
 	/* Calculate and set data timeout.*/
 	/* XXX: We should have this from mmc layer, now assume 1 sec. */
 	if (slot->quirks & SDHCI_QUIRK_BROKEN_TIMEOUT_VAL) {
 		div = 0xE;
 	} else {
 		target_timeout = 1000000;
 		div = 0;
 		current_timeout = (1 << 13) * 1000 / slot->timeout_clk;
 		while (current_timeout < target_timeout && div < 0xE) {
 			++div;
 			current_timeout <<= 1;
 		}
 		/* Compensate for an off-by-one error in the CaFe chip.*/
 		if (div < 0xE &&
 		    (slot->quirks & SDHCI_QUIRK_INCR_TIMEOUT_CONTROL)) {
 			++div;
 		}
 	}
 	WR1(slot, SDHCI_TIMEOUT_CONTROL, div);
 
 	if (data == NULL)
 		return;
 
 	/* Use DMA if possible. */
 	if ((slot->opt & SDHCI_HAVE_DMA))
 		slot->flags |= SDHCI_USE_DMA;
 	/* If data is small, broken DMA may return zeroes instead of data, */
 	if ((slot->quirks & SDHCI_QUIRK_BROKEN_TIMINGS) &&
 	    (data->len <= 512))
 		slot->flags &= ~SDHCI_USE_DMA;
 	/* Some controllers require even block sizes. */
 	if ((slot->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE) &&
 	    ((data->len) & 0x3))
 		slot->flags &= ~SDHCI_USE_DMA;
 	/* Load DMA buffer. */
 	if (slot->flags & SDHCI_USE_DMA) {
 		if (data->flags & MMC_DATA_READ)
 			bus_dmamap_sync(slot->dmatag, slot->dmamap,
 			    BUS_DMASYNC_PREREAD);
 		else {
 			memcpy(slot->dmamem, data->data,
 			    (data->len < DMA_BLOCK_SIZE) ?
 			    data->len : DMA_BLOCK_SIZE);
 			bus_dmamap_sync(slot->dmatag, slot->dmamap,
 			    BUS_DMASYNC_PREWRITE);
 		}
 		WR4(slot, SDHCI_DMA_ADDRESS, slot->paddr);
 		/* Interrupt aggregation: Mask border interrupt
 		 * for the last page and unmask else. */
 		if (data->len == DMA_BLOCK_SIZE)
 			slot->intmask &= ~SDHCI_INT_DMA_END;
 		else
 			slot->intmask |= SDHCI_INT_DMA_END;
 		WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask);
 	}
 	/* Current data offset for both PIO and DMA. */
 	slot->offset = 0;
 	/* Set block size and request IRQ on 4K border. */
 	WR2(slot, SDHCI_BLOCK_SIZE, SDHCI_MAKE_BLKSZ(DMA_BOUNDARY,
 	    (data->len < 512) ? data->len : 512));
 	/* Set block count. */
 	WR2(slot, SDHCI_BLOCK_COUNT, (data->len + 511) / 512);
 }
 
 void
 sdhci_finish_data(struct sdhci_slot *slot)
 {
 	struct mmc_data *data = slot->curcmd->data;
+	size_t left;
 
 	/* Interrupt aggregation: Restore command interrupt.
 	 * Auxiliary restore point for the case when data interrupt
 	 * happened first. */
 	if (!slot->cmd_done) {
 		WR4(slot, SDHCI_SIGNAL_ENABLE,
 		    slot->intmask |= SDHCI_INT_RESPONSE);
 	}
 	/* Unload rest of data from DMA buffer. */
 	if (!slot->data_done && (slot->flags & SDHCI_USE_DMA)) {
 		if (data->flags & MMC_DATA_READ) {
-			size_t left = data->len - slot->offset;
+			left = data->len - slot->offset;
 			bus_dmamap_sync(slot->dmatag, slot->dmamap,
 			    BUS_DMASYNC_POSTREAD);
 			memcpy((u_char*)data->data + slot->offset, slot->dmamem,
 			    (left < DMA_BLOCK_SIZE) ? left : DMA_BLOCK_SIZE);
 		} else
 			bus_dmamap_sync(slot->dmatag, slot->dmamap,
 			    BUS_DMASYNC_POSTWRITE);
 	}
 	slot->data_done = 1;
 	/* If there was error - reset the host. */
 	if (slot->curcmd->error) {
 		sdhci_reset(slot, SDHCI_RESET_CMD);
 		sdhci_reset(slot, SDHCI_RESET_DATA);
 		sdhci_start(slot);
 		return;
 	}
 	/* If we already have command response - finish. */
 	if (slot->cmd_done)
 		sdhci_start(slot);
 }
 
 static void
 sdhci_start(struct sdhci_slot *slot)
 {
 	struct mmc_request *req;
 
 	req = slot->req;
 	if (req == NULL)
 		return;
 
 	if (!(slot->flags & CMD_STARTED)) {
 		slot->flags |= CMD_STARTED;
 		sdhci_start_command(slot, req->cmd);
 		return;
 	}
 /* 	We don't need this until using Auto-CMD12 feature
 	if (!(slot->flags & STOP_STARTED) && req->stop) {
 		slot->flags |= STOP_STARTED;
 		sdhci_start_command(slot, req->stop);
 		return;
 	}
 */
 	if (sdhci_debug > 1)
 		slot_printf(slot, "result: %d\n", req->cmd->error);
 	if (!req->cmd->error &&
 	    (slot->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST)) {
 		sdhci_reset(slot, SDHCI_RESET_CMD);
 		sdhci_reset(slot, SDHCI_RESET_DATA);
 	}
 
 	sdhci_req_done(slot);
 }
 
 int
 sdhci_generic_request(device_t brdev, device_t reqdev, struct mmc_request *req)
 {
 	struct sdhci_slot *slot = device_get_ivars(reqdev);
 
 	SDHCI_LOCK(slot);
 	if (slot->req != NULL) {
 		SDHCI_UNLOCK(slot);
 		return (EBUSY);
 	}
 	if (sdhci_debug > 1) {
 		slot_printf(slot,
 		    "CMD%u arg %#x flags %#x dlen %u dflags %#x\n",
 		    req->cmd->opcode, req->cmd->arg, req->cmd->flags,
 		    (req->cmd->data)?(u_int)req->cmd->data->len:0,
 		    (req->cmd->data)?req->cmd->data->flags:0);
 	}
 	slot->req = req;
 	slot->flags = 0;
 	sdhci_start(slot);
 	SDHCI_UNLOCK(slot);
 	if (dumping) {
 		while (slot->req != NULL) {
 			sdhci_generic_intr(slot);
 			DELAY(10);
 		}
 	}
 	return (0);
 }
 
 int
 sdhci_generic_get_ro(device_t brdev, device_t reqdev)
 {
 	struct sdhci_slot *slot = device_get_ivars(reqdev);
 	uint32_t val;
 
 	SDHCI_LOCK(slot);
 	val = RD4(slot, SDHCI_PRESENT_STATE);
 	SDHCI_UNLOCK(slot);
 	return (!(val & SDHCI_WRITE_PROTECT));
 }
 
 int
 sdhci_generic_acquire_host(device_t brdev, device_t reqdev)
 {
 	struct sdhci_slot *slot = device_get_ivars(reqdev);
 	int err = 0;
 
 	SDHCI_LOCK(slot);
 	while (slot->bus_busy)
 		msleep(slot, &slot->mtx, 0, "sdhciah", 0);
 	slot->bus_busy++;
 	/* Activate led. */
 	WR1(slot, SDHCI_HOST_CONTROL, slot->hostctrl |= SDHCI_CTRL_LED);
 	SDHCI_UNLOCK(slot);
 	return (err);
 }
 
 int
 sdhci_generic_release_host(device_t brdev, device_t reqdev)
 {
 	struct sdhci_slot *slot = device_get_ivars(reqdev);
 
 	SDHCI_LOCK(slot);
 	/* Deactivate led. */
 	WR1(slot, SDHCI_HOST_CONTROL, slot->hostctrl &= ~SDHCI_CTRL_LED);
 	slot->bus_busy--;
 	SDHCI_UNLOCK(slot);
 	wakeup(slot);
 	return (0);
 }
 
 static void
 sdhci_cmd_irq(struct sdhci_slot *slot, uint32_t intmask)
 {
 
 	if (!slot->curcmd) {
 		slot_printf(slot, "Got command interrupt 0x%08x, but "
 		    "there is no active command.\n", intmask);
 		sdhci_dumpregs(slot);
 		return;
 	}
 	if (intmask & SDHCI_INT_TIMEOUT)
 		slot->curcmd->error = MMC_ERR_TIMEOUT;
 	else if (intmask & SDHCI_INT_CRC)
 		slot->curcmd->error = MMC_ERR_BADCRC;
 	else if (intmask & (SDHCI_INT_END_BIT | SDHCI_INT_INDEX))
 		slot->curcmd->error = MMC_ERR_FIFO;
 
 	sdhci_finish_command(slot);
 }
 
 static void
 sdhci_data_irq(struct sdhci_slot *slot, uint32_t intmask)
 {
 	struct mmc_data *data;
 	size_t left;
 
 	if (!slot->curcmd) {
 		slot_printf(slot, "Got data interrupt 0x%08x, but "
 		    "there is no active command.\n", intmask);
 		sdhci_dumpregs(slot);
 		return;
 	}
 	if (slot->curcmd->data == NULL &&
 	    (slot->curcmd->flags & MMC_RSP_BUSY) == 0) {
 		slot_printf(slot, "Got data interrupt 0x%08x, but "
 		    "there is no active data operation.\n",
 		    intmask);
 		sdhci_dumpregs(slot);
 		return;
 	}
 	if (intmask & SDHCI_INT_DATA_TIMEOUT)
 		slot->curcmd->error = MMC_ERR_TIMEOUT;
 	else if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_END_BIT))
 		slot->curcmd->error = MMC_ERR_BADCRC;
 	if (slot->curcmd->data == NULL &&
 	    (intmask & (SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL |
 	    SDHCI_INT_DMA_END))) {
 		slot_printf(slot, "Got data interrupt 0x%08x, but "
 		    "there is busy-only command.\n", intmask);
 		sdhci_dumpregs(slot);
 		slot->curcmd->error = MMC_ERR_INVALID;
 	}
 	if (slot->curcmd->error) {
 		/* No need to continue after any error. */
 		goto done;
 	}
 
 	/* Handle PIO interrupt. */
 	if (intmask & (SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL)) {
 		if ((slot->opt & SDHCI_PLATFORM_TRANSFER) &&
 		    SDHCI_PLATFORM_WILL_HANDLE(slot->bus, slot)) {
 			SDHCI_PLATFORM_START_TRANSFER(slot->bus, slot,
 			    &intmask);
 			slot->flags |= PLATFORM_DATA_STARTED;
 		} else
 			sdhci_transfer_pio(slot);
 	}
 	/* Handle DMA border. */
 	if (intmask & SDHCI_INT_DMA_END) {
 		data = slot->curcmd->data;
 
 		/* Unload DMA buffer... */
 		left = data->len - slot->offset;
 		if (data->flags & MMC_DATA_READ) {
 			bus_dmamap_sync(slot->dmatag, slot->dmamap,
 			    BUS_DMASYNC_POSTREAD);
 			memcpy((u_char*)data->data + slot->offset, slot->dmamem,
 			    (left < DMA_BLOCK_SIZE) ? left : DMA_BLOCK_SIZE);
 		} else {
 			bus_dmamap_sync(slot->dmatag, slot->dmamap,
 			    BUS_DMASYNC_POSTWRITE);
 		}
 		/* ... and reload it again. */
 		slot->offset += DMA_BLOCK_SIZE;
 		left = data->len - slot->offset;
 		if (data->flags & MMC_DATA_READ) {
 			bus_dmamap_sync(slot->dmatag, slot->dmamap,
 			    BUS_DMASYNC_PREREAD);
 		} else {
 			memcpy(slot->dmamem, (u_char*)data->data + slot->offset,
 			    (left < DMA_BLOCK_SIZE)? left : DMA_BLOCK_SIZE);
 			bus_dmamap_sync(slot->dmatag, slot->dmamap,
 			    BUS_DMASYNC_PREWRITE);
 		}
 		/* Interrupt aggregation: Mask border interrupt
 		 * for the last page. */
 		if (left == DMA_BLOCK_SIZE) {
 			slot->intmask &= ~SDHCI_INT_DMA_END;
 			WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask);
 		}
 		/* Restart DMA. */
 		WR4(slot, SDHCI_DMA_ADDRESS, slot->paddr);
 	}
 	/* We have got all data. */
 	if (intmask & SDHCI_INT_DATA_END) {
 		if (slot->flags & PLATFORM_DATA_STARTED) {
 			slot->flags &= ~PLATFORM_DATA_STARTED;
 			SDHCI_PLATFORM_FINISH_TRANSFER(slot->bus, slot);
 		} else
 			sdhci_finish_data(slot);
 	}
 done:
 	if (slot->curcmd != NULL && slot->curcmd->error != 0) {
 		if (slot->flags & PLATFORM_DATA_STARTED) {
 			slot->flags &= ~PLATFORM_DATA_STARTED;
 			SDHCI_PLATFORM_FINISH_TRANSFER(slot->bus, slot);
 		} else
 			sdhci_finish_data(slot);
 	}
 }
 
 static void
 sdhci_acmd_irq(struct sdhci_slot *slot)
 {
 	uint16_t err;
 
 	err = RD4(slot, SDHCI_ACMD12_ERR);
 	if (!slot->curcmd) {
 		slot_printf(slot, "Got AutoCMD12 error 0x%04x, but "
 		    "there is no active command.\n", err);
 		sdhci_dumpregs(slot);
 		return;
 	}
 	slot_printf(slot, "Got AutoCMD12 error 0x%04x\n", err);
 	sdhci_reset(slot, SDHCI_RESET_CMD);
 }
 
 void
 sdhci_generic_intr(struct sdhci_slot *slot)
 {
 	uint32_t intmask, present;
 
 	SDHCI_LOCK(slot);
 	/* Read slot interrupt status. */
 	intmask = RD4(slot, SDHCI_INT_STATUS);
 	if (intmask == 0 || intmask == 0xffffffff) {
 		SDHCI_UNLOCK(slot);
 		return;
 	}
 	if (sdhci_debug > 2)
 		slot_printf(slot, "Interrupt %#x\n", intmask);
 
 	/* Handle card presence interrupts. */
 	if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
 		present = (intmask & SDHCI_INT_CARD_INSERT) != 0;
 		slot->intmask &=
 		    ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE);
 		slot->intmask |= present ? SDHCI_INT_CARD_REMOVE :
 		    SDHCI_INT_CARD_INSERT;
 		WR4(slot, SDHCI_INT_ENABLE, slot->intmask);
 		WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask);
 		WR4(slot, SDHCI_INT_STATUS, intmask &
 		    (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE));
 		sdhci_handle_card_present_locked(slot, present);
 		intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE);
 	}
 	/* Handle command interrupts. */
 	if (intmask & SDHCI_INT_CMD_MASK) {
 		WR4(slot, SDHCI_INT_STATUS, intmask & SDHCI_INT_CMD_MASK);
 		sdhci_cmd_irq(slot, intmask & SDHCI_INT_CMD_MASK);
 	}
 	/* Handle data interrupts. */
 	if (intmask & SDHCI_INT_DATA_MASK) {
 		WR4(slot, SDHCI_INT_STATUS, intmask & SDHCI_INT_DATA_MASK);
-		/* Dont call data_irq in case of errored command */
+		/* Don't call data_irq in case of errored command. */
 		if ((intmask & SDHCI_INT_CMD_ERROR_MASK) == 0)
 			sdhci_data_irq(slot, intmask & SDHCI_INT_DATA_MASK);
 	}
 	/* Handle AutoCMD12 error interrupt. */
 	if (intmask & SDHCI_INT_ACMD12ERR) {
 		WR4(slot, SDHCI_INT_STATUS, SDHCI_INT_ACMD12ERR);
 		sdhci_acmd_irq(slot);
 	}
 	intmask &= ~(SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK);
 	intmask &= ~SDHCI_INT_ACMD12ERR;
 	intmask &= ~SDHCI_INT_ERROR;
 	/* Handle bus power interrupt. */
 	if (intmask & SDHCI_INT_BUS_POWER) {
 		WR4(slot, SDHCI_INT_STATUS, SDHCI_INT_BUS_POWER);
 		slot_printf(slot,
 		    "Card is consuming too much power!\n");
 		intmask &= ~SDHCI_INT_BUS_POWER;
 	}
 	/* The rest is unknown. */
 	if (intmask) {
 		WR4(slot, SDHCI_INT_STATUS, intmask);
 		slot_printf(slot, "Unexpected interrupt 0x%08x.\n",
 		    intmask);
 		sdhci_dumpregs(slot);
 	}
 
 	SDHCI_UNLOCK(slot);
 }
 
 int
 sdhci_generic_read_ivar(device_t bus, device_t child, int which,
     uintptr_t *result)
 {
 	struct sdhci_slot *slot = device_get_ivars(child);
 
 	switch (which) {
 	default:
 		return (EINVAL);
 	case MMCBR_IVAR_BUS_MODE:
 		*result = slot->host.ios.bus_mode;
 		break;
 	case MMCBR_IVAR_BUS_WIDTH:
 		*result = slot->host.ios.bus_width;
 		break;
 	case MMCBR_IVAR_CHIP_SELECT:
 		*result = slot->host.ios.chip_select;
 		break;
 	case MMCBR_IVAR_CLOCK:
 		*result = slot->host.ios.clock;
 		break;
 	case MMCBR_IVAR_F_MIN:
 		*result = slot->host.f_min;
 		break;
 	case MMCBR_IVAR_F_MAX:
 		*result = slot->host.f_max;
 		break;
 	case MMCBR_IVAR_HOST_OCR:
 		*result = slot->host.host_ocr;
 		break;
 	case MMCBR_IVAR_MODE:
 		*result = slot->host.mode;
 		break;
 	case MMCBR_IVAR_OCR:
 		*result = slot->host.ocr;
 		break;
 	case MMCBR_IVAR_POWER_MODE:
 		*result = slot->host.ios.power_mode;
 		break;
 	case MMCBR_IVAR_VDD:
 		*result = slot->host.ios.vdd;
 		break;
 	case MMCBR_IVAR_CAPS:
 		*result = slot->host.caps;
 		break;
 	case MMCBR_IVAR_TIMING:
 		*result = slot->host.ios.timing;
 		break;
 	case MMCBR_IVAR_MAX_DATA:
 		*result = 65535;
 		break;
 	}
 	return (0);
 }
 
 int
 sdhci_generic_write_ivar(device_t bus, device_t child, int which,
     uintptr_t value)
 {
 	struct sdhci_slot *slot = device_get_ivars(child);
 
 	switch (which) {
 	default:
 		return (EINVAL);
 	case MMCBR_IVAR_BUS_MODE:
 		slot->host.ios.bus_mode = value;
 		break;
 	case MMCBR_IVAR_BUS_WIDTH:
 		slot->host.ios.bus_width = value;
 		break;
 	case MMCBR_IVAR_CHIP_SELECT:
 		slot->host.ios.chip_select = value;
 		break;
 	case MMCBR_IVAR_CLOCK:
 		if (value > 0) {
 			uint32_t max_clock;
 			uint32_t clock;
 			int i;
 
 			max_clock = slot->max_clk;
 			clock = max_clock;
 
 			if (slot->version < SDHCI_SPEC_300) {
 				for (i = 0; i < SDHCI_200_MAX_DIVIDER;
 				    i <<= 1) {
 					if (clock <= value)
 						break;
 					clock >>= 1;
 				}
 			}
 			else {
 				for (i = 0; i < SDHCI_300_MAX_DIVIDER;
 				    i += 2) {
 					if (clock <= value)
 						break;
 					clock = max_clock / (i + 2);
 				}
 			}
 
 			slot->host.ios.clock = clock;
 		} else
 			slot->host.ios.clock = 0;
 		break;
 	case MMCBR_IVAR_MODE:
 		slot->host.mode = value;
 		break;
 	case MMCBR_IVAR_OCR:
 		slot->host.ocr = value;
 		break;
 	case MMCBR_IVAR_POWER_MODE:
 		slot->host.ios.power_mode = value;
 		break;
 	case MMCBR_IVAR_VDD:
 		slot->host.ios.vdd = value;
 		break;
 	case MMCBR_IVAR_TIMING:
 		slot->host.ios.timing = value;
 		break;
 	case MMCBR_IVAR_CAPS:
 	case MMCBR_IVAR_HOST_OCR:
 	case MMCBR_IVAR_F_MIN:
 	case MMCBR_IVAR_F_MAX:
 	case MMCBR_IVAR_MAX_DATA:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 MODULE_VERSION(sdhci, 1);
Index: projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci.h
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci.h	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci.h	(revision 313267)
@@ -1,342 +1,342 @@
 /*-
  * Copyright (c) 2008 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	__SDHCI_H__
 #define	__SDHCI_H__
 
-#define DMA_BLOCK_SIZE	4096
-#define DMA_BOUNDARY	0	/* DMA reload every 4K */
+#define	DMA_BLOCK_SIZE	4096
+#define	DMA_BOUNDARY	0	/* DMA reload every 4K */
 
 /* Controller doesn't honor resets unless we touch the clock register */
-#define SDHCI_QUIRK_CLOCK_BEFORE_RESET			(1<<0)
+#define	SDHCI_QUIRK_CLOCK_BEFORE_RESET			(1 << 0)
 /* Controller really supports DMA */
-#define SDHCI_QUIRK_FORCE_DMA				(1<<1)
+#define	SDHCI_QUIRK_FORCE_DMA				(1 << 1)
 /* Controller has unusable DMA engine */
-#define SDHCI_QUIRK_BROKEN_DMA				(1<<2)
+#define	SDHCI_QUIRK_BROKEN_DMA				(1 << 2)
 /* Controller doesn't like to be reset when there is no card inserted. */
-#define SDHCI_QUIRK_NO_CARD_NO_RESET			(1<<3)
+#define	SDHCI_QUIRK_NO_CARD_NO_RESET			(1 << 3)
 /* Controller has flaky internal state so reset it on each ios change */
-#define SDHCI_QUIRK_RESET_ON_IOS			(1<<4)
+#define	SDHCI_QUIRK_RESET_ON_IOS			(1 << 4)
 /* Controller can only DMA chunk sizes that are a multiple of 32 bits */
-#define SDHCI_QUIRK_32BIT_DMA_SIZE			(1<<5)
+#define	SDHCI_QUIRK_32BIT_DMA_SIZE			(1 << 5)
 /* Controller needs to be reset after each request to stay stable */
-#define SDHCI_QUIRK_RESET_AFTER_REQUEST			(1<<6)
+#define	SDHCI_QUIRK_RESET_AFTER_REQUEST			(1 << 6)
 /* Controller has an off-by-one issue with timeout value */
-#define SDHCI_QUIRK_INCR_TIMEOUT_CONTROL		(1<<7)
+#define	SDHCI_QUIRK_INCR_TIMEOUT_CONTROL		(1 << 7)
 /* Controller has broken read timings */
-#define SDHCI_QUIRK_BROKEN_TIMINGS			(1<<8)
+#define	SDHCI_QUIRK_BROKEN_TIMINGS			(1 << 8)
 /* Controller needs lowered frequency */
-#define	SDHCI_QUIRK_LOWER_FREQUENCY			(1<<9)
+#define	SDHCI_QUIRK_LOWER_FREQUENCY			(1 << 9)
 /* Data timeout is invalid, should use SD clock */
-#define	SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK		(1<<10)
+#define	SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK		(1 << 10)
 /* Timeout value is invalid, should be overriden */
-#define	SDHCI_QUIRK_BROKEN_TIMEOUT_VAL			(1<<11)
+#define	SDHCI_QUIRK_BROKEN_TIMEOUT_VAL			(1 << 11)
 /* SDHCI_CAPABILITIES is invalid */
-#define	SDHCI_QUIRK_MISSING_CAPS			(1<<12)
+#define	SDHCI_QUIRK_MISSING_CAPS			(1 << 12)
 /* Hardware shifts the 136-bit response, don't do it in software. */
-#define	SDHCI_QUIRK_DONT_SHIFT_RESPONSE			(1<<13)
+#define	SDHCI_QUIRK_DONT_SHIFT_RESPONSE			(1 << 13)
 /* Wait to see reset bit asserted before waiting for de-asserted  */
-#define	SDHCI_QUIRK_WAITFOR_RESET_ASSERTED		(1<<14)
+#define	SDHCI_QUIRK_WAITFOR_RESET_ASSERTED		(1 << 14)
 /* Leave controller in standard mode when putting card in HS mode. */
-#define	SDHCI_QUIRK_DONT_SET_HISPD_BIT			(1<<15)
+#define	SDHCI_QUIRK_DONT_SET_HISPD_BIT			(1 << 15)
 /* Alternate clock source is required when supplying a 400 KHz clock. */
-#define	SDHCI_QUIRK_BCM577XX_400KHZ_CLKSRC		(1<<16)
+#define	SDHCI_QUIRK_BCM577XX_400KHZ_CLKSRC		(1 << 16)
 /* Card insert/remove interrupts don't work, polling required. */
-#define	SDHCI_QUIRK_POLL_CARD_PRESENT			(1<<17)
+#define	SDHCI_QUIRK_POLL_CARD_PRESENT			(1 << 17)
 /* All controller slots are non-removable. */
-#define	SDHCI_QUIRK_ALL_SLOTS_NON_REMOVABLE		(1<<18)
+#define	SDHCI_QUIRK_ALL_SLOTS_NON_REMOVABLE		(1 << 18)
 /* Issue custom Intel controller reset sequence after power-up. */
-#define	SDHCI_QUIRK_INTEL_POWER_UP_RESET		(1<<19)
+#define	SDHCI_QUIRK_INTEL_POWER_UP_RESET		(1 << 19)
 /* Data timeout is invalid, use 1 MHz clock instead. */
-#define	SDHCI_QUIRK_DATA_TIMEOUT_1MHZ			(1<<20)
+#define	SDHCI_QUIRK_DATA_TIMEOUT_1MHZ			(1 << 20)
 
 /*
  * Controller registers
  */
-#define SDHCI_DMA_ADDRESS	0x00
+#define	SDHCI_DMA_ADDRESS	0x00
 
-#define SDHCI_BLOCK_SIZE	0x04
-#define  SDHCI_MAKE_BLKSZ(dma, blksz) (((dma & 0x7) << 12) | (blksz & 0xFFF))
+#define	SDHCI_BLOCK_SIZE	0x04
+#define	 SDHCI_MAKE_BLKSZ(dma, blksz) (((dma & 0x7) << 12) | (blksz & 0xFFF))
 
-#define SDHCI_BLOCK_COUNT	0x06
+#define	SDHCI_BLOCK_COUNT	0x06
 
-#define SDHCI_ARGUMENT		0x08
+#define	SDHCI_ARGUMENT		0x08
 
-#define SDHCI_TRANSFER_MODE	0x0C
-#define  SDHCI_TRNS_DMA		0x01
-#define  SDHCI_TRNS_BLK_CNT_EN	0x02
-#define  SDHCI_TRNS_ACMD12	0x04
-#define  SDHCI_TRNS_READ	0x10
-#define  SDHCI_TRNS_MULTI	0x20
+#define	SDHCI_TRANSFER_MODE	0x0C
+#define	 SDHCI_TRNS_DMA		0x01
+#define	 SDHCI_TRNS_BLK_CNT_EN	0x02
+#define	 SDHCI_TRNS_ACMD12	0x04
+#define	 SDHCI_TRNS_READ	0x10
+#define	 SDHCI_TRNS_MULTI	0x20
 
-#define SDHCI_COMMAND_FLAGS	0x0E
-#define  SDHCI_CMD_RESP_NONE	0x00
-#define  SDHCI_CMD_RESP_LONG	0x01
-#define  SDHCI_CMD_RESP_SHORT	0x02
-#define  SDHCI_CMD_RESP_SHORT_BUSY 0x03
-#define  SDHCI_CMD_RESP_MASK	0x03
-#define  SDHCI_CMD_CRC		0x08
-#define  SDHCI_CMD_INDEX	0x10
-#define  SDHCI_CMD_DATA		0x20
-#define  SDHCI_CMD_TYPE_NORMAL	0x00
-#define  SDHCI_CMD_TYPE_SUSPEND	0x40
-#define  SDHCI_CMD_TYPE_RESUME	0x80
-#define  SDHCI_CMD_TYPE_ABORT	0xc0
-#define  SDHCI_CMD_TYPE_MASK	0xc0
+#define	SDHCI_COMMAND_FLAGS	0x0E
+#define	 SDHCI_CMD_RESP_NONE	0x00
+#define	 SDHCI_CMD_RESP_LONG	0x01
+#define	 SDHCI_CMD_RESP_SHORT	0x02
+#define	 SDHCI_CMD_RESP_SHORT_BUSY 0x03
+#define	 SDHCI_CMD_RESP_MASK	0x03
+#define	 SDHCI_CMD_CRC		0x08
+#define	 SDHCI_CMD_INDEX	0x10
+#define	 SDHCI_CMD_DATA		0x20
+#define	 SDHCI_CMD_TYPE_NORMAL	0x00
+#define	 SDHCI_CMD_TYPE_SUSPEND	0x40
+#define	 SDHCI_CMD_TYPE_RESUME	0x80
+#define	 SDHCI_CMD_TYPE_ABORT	0xc0
+#define	 SDHCI_CMD_TYPE_MASK	0xc0
 
-#define SDHCI_COMMAND		0x0F
+#define	SDHCI_COMMAND		0x0F
 
-#define SDHCI_RESPONSE		0x10
+#define	SDHCI_RESPONSE		0x10
 
-#define SDHCI_BUFFER		0x20
+#define	SDHCI_BUFFER		0x20
 
-#define SDHCI_PRESENT_STATE	0x24
-#define  SDHCI_CMD_INHIBIT	0x00000001
-#define  SDHCI_DAT_INHIBIT	0x00000002
-#define  SDHCI_DAT_ACTIVE	0x00000004
-#define  SDHCI_RETUNE_REQUEST	0x00000008
-#define  SDHCI_DOING_WRITE	0x00000100
-#define  SDHCI_DOING_READ	0x00000200
-#define  SDHCI_SPACE_AVAILABLE	0x00000400
-#define  SDHCI_DATA_AVAILABLE	0x00000800
-#define  SDHCI_CARD_PRESENT	0x00010000
-#define  SDHCI_CARD_STABLE	0x00020000
-#define  SDHCI_CARD_PIN		0x00040000
-#define  SDHCI_WRITE_PROTECT	0x00080000
-#define  SDHCI_STATE_DAT_MASK	0x00f00000
-#define  SDHCI_STATE_CMD	0x01000000
+#define	SDHCI_PRESENT_STATE	0x24
+#define	 SDHCI_CMD_INHIBIT	0x00000001
+#define	 SDHCI_DAT_INHIBIT	0x00000002
+#define	 SDHCI_DAT_ACTIVE	0x00000004
+#define	 SDHCI_RETUNE_REQUEST	0x00000008
+#define	 SDHCI_DOING_WRITE	0x00000100
+#define	 SDHCI_DOING_READ	0x00000200
+#define	 SDHCI_SPACE_AVAILABLE	0x00000400
+#define	 SDHCI_DATA_AVAILABLE	0x00000800
+#define	 SDHCI_CARD_PRESENT	0x00010000
+#define	 SDHCI_CARD_STABLE	0x00020000
+#define	 SDHCI_CARD_PIN		0x00040000
+#define	 SDHCI_WRITE_PROTECT	0x00080000
+#define	 SDHCI_STATE_DAT_MASK	0x00f00000
+#define	 SDHCI_STATE_CMD	0x01000000
 
-#define SDHCI_HOST_CONTROL	0x28
-#define  SDHCI_CTRL_LED		0x01
-#define  SDHCI_CTRL_4BITBUS	0x02
-#define  SDHCI_CTRL_HISPD	0x04
-#define  SDHCI_CTRL_SDMA	0x08
-#define  SDHCI_CTRL_ADMA2	0x10
-#define  SDHCI_CTRL_ADMA264	0x18
-#define  SDHCI_CTRL_DMA_MASK	0x18
-#define  SDHCI_CTRL_8BITBUS	0x20
-#define  SDHCI_CTRL_CARD_DET	0x40
-#define  SDHCI_CTRL_FORCE_CARD	0x80
+#define	SDHCI_HOST_CONTROL	0x28
+#define	 SDHCI_CTRL_LED		0x01
+#define	 SDHCI_CTRL_4BITBUS	0x02
+#define	 SDHCI_CTRL_HISPD	0x04
+#define	 SDHCI_CTRL_SDMA	0x08
+#define	 SDHCI_CTRL_ADMA2	0x10
+#define	 SDHCI_CTRL_ADMA264	0x18
+#define	 SDHCI_CTRL_DMA_MASK	0x18
+#define	 SDHCI_CTRL_8BITBUS	0x20
+#define	 SDHCI_CTRL_CARD_DET	0x40
+#define	 SDHCI_CTRL_FORCE_CARD	0x80
 
-#define SDHCI_POWER_CONTROL	0x29
-#define  SDHCI_POWER_ON		0x01
-#define  SDHCI_POWER_180	0x0A
-#define  SDHCI_POWER_300	0x0C
-#define  SDHCI_POWER_330	0x0E
+#define	SDHCI_POWER_CONTROL	0x29
+#define	 SDHCI_POWER_ON		0x01
+#define	 SDHCI_POWER_180	0x0A
+#define	 SDHCI_POWER_300	0x0C
+#define	 SDHCI_POWER_330	0x0E
 
-#define SDHCI_BLOCK_GAP_CONTROL	0x2A
+#define	SDHCI_BLOCK_GAP_CONTROL	0x2A
 
-#define SDHCI_WAKE_UP_CONTROL	0x2B
+#define	SDHCI_WAKE_UP_CONTROL	0x2B
 
-#define SDHCI_CLOCK_CONTROL	0x2C
-#define  SDHCI_DIVIDER_MASK	0xff
-#define  SDHCI_DIVIDER_MASK_LEN	8
-#define  SDHCI_DIVIDER_SHIFT	8
-#define  SDHCI_DIVIDER_HI_MASK	3
-#define  SDHCI_DIVIDER_HI_SHIFT	6
-#define  SDHCI_CLOCK_CARD_EN	0x0004
-#define  SDHCI_CLOCK_INT_STABLE	0x0002
-#define  SDHCI_CLOCK_INT_EN	0x0001
-#define  SDHCI_DIVIDERS_MASK	\
+#define	SDHCI_CLOCK_CONTROL	0x2C
+#define	 SDHCI_DIVIDER_MASK	0xff
+#define	 SDHCI_DIVIDER_MASK_LEN	8
+#define	 SDHCI_DIVIDER_SHIFT	8
+#define	 SDHCI_DIVIDER_HI_MASK	3
+#define	 SDHCI_DIVIDER_HI_SHIFT	6
+#define	 SDHCI_CLOCK_CARD_EN	0x0004
+#define	 SDHCI_CLOCK_INT_STABLE	0x0002
+#define	 SDHCI_CLOCK_INT_EN	0x0001
+#define	 SDHCI_DIVIDERS_MASK	\
     ((SDHCI_DIVIDER_MASK << SDHCI_DIVIDER_SHIFT) | \
     (SDHCI_DIVIDER_HI_MASK << SDHCI_DIVIDER_HI_SHIFT))
 
-#define SDHCI_TIMEOUT_CONTROL	0x2E
+#define	SDHCI_TIMEOUT_CONTROL	0x2E
 
-#define SDHCI_SOFTWARE_RESET	0x2F
-#define  SDHCI_RESET_ALL	0x01
-#define  SDHCI_RESET_CMD	0x02
-#define  SDHCI_RESET_DATA	0x04
+#define	SDHCI_SOFTWARE_RESET	0x2F
+#define	 SDHCI_RESET_ALL	0x01
+#define	 SDHCI_RESET_CMD	0x02
+#define	 SDHCI_RESET_DATA	0x04
 
-#define SDHCI_INT_STATUS	0x30
-#define SDHCI_INT_ENABLE	0x34
-#define SDHCI_SIGNAL_ENABLE	0x38
-#define  SDHCI_INT_RESPONSE	0x00000001
-#define  SDHCI_INT_DATA_END	0x00000002
-#define  SDHCI_INT_BLOCK_GAP	0x00000004
-#define  SDHCI_INT_DMA_END	0x00000008
-#define  SDHCI_INT_SPACE_AVAIL	0x00000010
-#define  SDHCI_INT_DATA_AVAIL	0x00000020
-#define  SDHCI_INT_CARD_INSERT	0x00000040
-#define  SDHCI_INT_CARD_REMOVE	0x00000080
-#define  SDHCI_INT_CARD_INT	0x00000100
-#define  SDHCI_INT_INT_A	0x00000200
-#define  SDHCI_INT_INT_B	0x00000400
-#define  SDHCI_INT_INT_C	0x00000800
-#define  SDHCI_INT_RETUNE	0x00001000
-#define  SDHCI_INT_ERROR	0x00008000
-#define  SDHCI_INT_TIMEOUT	0x00010000
-#define  SDHCI_INT_CRC		0x00020000
-#define  SDHCI_INT_END_BIT	0x00040000
-#define  SDHCI_INT_INDEX	0x00080000
-#define  SDHCI_INT_DATA_TIMEOUT	0x00100000
-#define  SDHCI_INT_DATA_CRC	0x00200000
-#define  SDHCI_INT_DATA_END_BIT	0x00400000
-#define  SDHCI_INT_BUS_POWER	0x00800000
-#define  SDHCI_INT_ACMD12ERR	0x01000000
-#define  SDHCI_INT_ADMAERR	0x02000000
-#define  SDHCI_INT_TUNEERR	0x04000000
+#define	SDHCI_INT_STATUS	0x30
+#define	SDHCI_INT_ENABLE	0x34
+#define	SDHCI_SIGNAL_ENABLE	0x38
+#define	 SDHCI_INT_RESPONSE	0x00000001
+#define	 SDHCI_INT_DATA_END	0x00000002
+#define	 SDHCI_INT_BLOCK_GAP	0x00000004
+#define	 SDHCI_INT_DMA_END	0x00000008
+#define	 SDHCI_INT_SPACE_AVAIL	0x00000010
+#define	 SDHCI_INT_DATA_AVAIL	0x00000020
+#define	 SDHCI_INT_CARD_INSERT	0x00000040
+#define	 SDHCI_INT_CARD_REMOVE	0x00000080
+#define	 SDHCI_INT_CARD_INT	0x00000100
+#define	 SDHCI_INT_INT_A	0x00000200
+#define	 SDHCI_INT_INT_B	0x00000400
+#define	 SDHCI_INT_INT_C	0x00000800
+#define	 SDHCI_INT_RETUNE	0x00001000
+#define	 SDHCI_INT_ERROR	0x00008000
+#define	 SDHCI_INT_TIMEOUT	0x00010000
+#define	 SDHCI_INT_CRC		0x00020000
+#define	 SDHCI_INT_END_BIT	0x00040000
+#define	 SDHCI_INT_INDEX	0x00080000
+#define	 SDHCI_INT_DATA_TIMEOUT	0x00100000
+#define	 SDHCI_INT_DATA_CRC	0x00200000
+#define	 SDHCI_INT_DATA_END_BIT	0x00400000
+#define	 SDHCI_INT_BUS_POWER	0x00800000
+#define	 SDHCI_INT_ACMD12ERR	0x01000000
+#define	 SDHCI_INT_ADMAERR	0x02000000
+#define	 SDHCI_INT_TUNEERR	0x04000000
 
-#define  SDHCI_INT_NORMAL_MASK	0x00007FFF
-#define  SDHCI_INT_ERROR_MASK	0xFFFF8000
+#define	 SDHCI_INT_NORMAL_MASK	0x00007FFF
+#define	 SDHCI_INT_ERROR_MASK	0xFFFF8000
 
-#define  SDHCI_INT_CMD_ERROR_MASK	(SDHCI_INT_TIMEOUT | \
+#define	 SDHCI_INT_CMD_ERROR_MASK	(SDHCI_INT_TIMEOUT | \
 		SDHCI_INT_CRC | SDHCI_INT_END_BIT | SDHCI_INT_INDEX)
 
-#define  SDHCI_INT_CMD_MASK	(SDHCI_INT_RESPONSE | SDHCI_INT_CMD_ERROR_MASK)
+#define	 SDHCI_INT_CMD_MASK	(SDHCI_INT_RESPONSE | SDHCI_INT_CMD_ERROR_MASK)
 
-#define  SDHCI_INT_DATA_MASK	(SDHCI_INT_DATA_END | SDHCI_INT_DMA_END | \
+#define	 SDHCI_INT_DATA_MASK	(SDHCI_INT_DATA_END | SDHCI_INT_DMA_END | \
 		SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | \
 		SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_DATA_CRC | \
 		SDHCI_INT_DATA_END_BIT)
 
-#define SDHCI_ACMD12_ERR	0x3C
-#define SDHCI_HOST_CONTROL2	0x3E
+#define	SDHCI_ACMD12_ERR	0x3C
+#define	SDHCI_HOST_CONTROL2	0x3E
 
-#define SDHCI_CAPABILITIES	0x40
-#define  SDHCI_TIMEOUT_CLK_MASK	0x0000003F
-#define  SDHCI_TIMEOUT_CLK_SHIFT 0
-#define  SDHCI_TIMEOUT_CLK_UNIT	0x00000080
-#define  SDHCI_CLOCK_BASE_MASK	0x00003F00
-#define  SDHCI_CLOCK_V3_BASE_MASK	0x0000FF00
-#define  SDHCI_CLOCK_BASE_SHIFT	8
-#define  SDHCI_MAX_BLOCK_MASK	0x00030000
-#define  SDHCI_MAX_BLOCK_SHIFT  16
-#define  SDHCI_CAN_DO_8BITBUS	0x00040000
-#define  SDHCI_CAN_DO_ADMA2	0x00080000
-#define  SDHCI_CAN_DO_HISPD	0x00200000
-#define  SDHCI_CAN_DO_DMA	0x00400000
-#define  SDHCI_CAN_DO_SUSPEND	0x00800000
-#define  SDHCI_CAN_VDD_330	0x01000000
-#define  SDHCI_CAN_VDD_300	0x02000000
-#define  SDHCI_CAN_VDD_180	0x04000000
-#define  SDHCI_CAN_DO_64BIT	0x10000000
-#define  SDHCI_CAN_ASYNC_INTR	0x20000000
+#define	SDHCI_CAPABILITIES	0x40
+#define	 SDHCI_TIMEOUT_CLK_MASK	0x0000003F
+#define	 SDHCI_TIMEOUT_CLK_SHIFT 0
+#define	 SDHCI_TIMEOUT_CLK_UNIT	0x00000080
+#define	 SDHCI_CLOCK_BASE_MASK	0x00003F00
+#define	 SDHCI_CLOCK_V3_BASE_MASK	0x0000FF00
+#define	 SDHCI_CLOCK_BASE_SHIFT	8
+#define	 SDHCI_MAX_BLOCK_MASK	0x00030000
+#define	 SDHCI_MAX_BLOCK_SHIFT  16
+#define	 SDHCI_CAN_DO_8BITBUS	0x00040000
+#define	 SDHCI_CAN_DO_ADMA2	0x00080000
+#define	 SDHCI_CAN_DO_HISPD	0x00200000
+#define	 SDHCI_CAN_DO_DMA	0x00400000
+#define	 SDHCI_CAN_DO_SUSPEND	0x00800000
+#define	 SDHCI_CAN_VDD_330	0x01000000
+#define	 SDHCI_CAN_VDD_300	0x02000000
+#define	 SDHCI_CAN_VDD_180	0x04000000
+#define	 SDHCI_CAN_DO_64BIT	0x10000000
+#define	 SDHCI_CAN_ASYNC_INTR	0x20000000
 
-#define SDHCI_CAPABILITIES2	0x44
-#define  SDHCI_CAN_SDR50	0x00000001
-#define  SDHCI_CAN_SDR104	0x00000002
-#define  SDHCI_CAN_DDR50	0x00000004
-#define  SDHCI_CAN_DRIVE_TYPE_A	0x00000010
-#define  SDHCI_CAN_DRIVE_TYPE_B	0x00000020
-#define  SDHCI_CAN_DRIVE_TYPE_C	0x00000040
-#define  SDHCI_RETUNE_CNT_MASK	0x00000F00
-#define  SDHCI_RETUNE_CNT_SHIFT	8
-#define  SDHCI_TUNE_SDR50	0x00002000
-#define  SDHCI_RETUNE_MODES_MASK  0x0000C000
-#define  SDHCI_RETUNE_MODES_SHIFT 14
-#define  SDHCI_CLOCK_MULT_MASK	0x00FF0000
-#define  SDHCI_CLOCK_MULT_SHIFT	16
+#define	SDHCI_CAPABILITIES2	0x44
+#define	 SDHCI_CAN_SDR50	0x00000001
+#define	 SDHCI_CAN_SDR104	0x00000002
+#define	 SDHCI_CAN_DDR50	0x00000004
+#define	 SDHCI_CAN_DRIVE_TYPE_A	0x00000010
+#define	 SDHCI_CAN_DRIVE_TYPE_B	0x00000020
+#define	 SDHCI_CAN_DRIVE_TYPE_C	0x00000040
+#define	 SDHCI_RETUNE_CNT_MASK	0x00000F00
+#define	 SDHCI_RETUNE_CNT_SHIFT	8
+#define	 SDHCI_TUNE_SDR50	0x00002000
+#define	 SDHCI_RETUNE_MODES_MASK  0x0000C000
+#define	 SDHCI_RETUNE_MODES_SHIFT 14
+#define	 SDHCI_CLOCK_MULT_MASK	0x00FF0000
+#define	 SDHCI_CLOCK_MULT_SHIFT	16
 
-#define SDHCI_MAX_CURRENT	0x48
-#define SDHCI_FORCE_AUTO_EVENT	0x50
-#define SDHCI_FORCE_INTR_EVENT	0x52
-#define SDHCI_ADMA_ERR		0x54
-#define SDHCI_ADMA_ADDRESS_LOW	0x58
-#define SDHCI_ADMA_ADDRESS_HI	0x5C
-#define SDHCI_PRESET_VALUE	0x60
-#define SDHCI_SHARED_BUS_CTRL	0xE0
+#define	SDHCI_MAX_CURRENT	0x48
+#define	SDHCI_FORCE_AUTO_EVENT	0x50
+#define	SDHCI_FORCE_INTR_EVENT	0x52
+#define	SDHCI_ADMA_ERR		0x54
+#define	SDHCI_ADMA_ADDRESS_LOW	0x58
+#define	SDHCI_ADMA_ADDRESS_HI	0x5C
+#define	SDHCI_PRESET_VALUE	0x60
+#define	SDHCI_SHARED_BUS_CTRL	0xE0
 
-#define SDHCI_SLOT_INT_STATUS	0xFC
+#define	SDHCI_SLOT_INT_STATUS	0xFC
 
-#define SDHCI_HOST_VERSION	0xFE
-#define  SDHCI_VENDOR_VER_MASK	0xFF00
-#define  SDHCI_VENDOR_VER_SHIFT	8
-#define  SDHCI_SPEC_VER_MASK	0x00FF
-#define  SDHCI_SPEC_VER_SHIFT	0
+#define	SDHCI_HOST_VERSION	0xFE
+#define	 SDHCI_VENDOR_VER_MASK	0xFF00
+#define	 SDHCI_VENDOR_VER_SHIFT	8
+#define	 SDHCI_SPEC_VER_MASK	0x00FF
+#define	 SDHCI_SPEC_VER_SHIFT	0
 #define	SDHCI_SPEC_100		0
 #define	SDHCI_SPEC_200		1
 #define	SDHCI_SPEC_300		2
 
 SYSCTL_DECL(_hw_sdhci);
 
 struct sdhci_slot {
 	u_int		quirks;		/* Chip specific quirks */
 	u_int		caps;		/* Override SDHCI_CAPABILITIES */
 	device_t	bus;		/* Bus device */
 	device_t	dev;		/* Slot device */
 	u_char		num;		/* Slot number */
 	u_char		opt;		/* Slot options */
 #define	SDHCI_HAVE_DMA			0x01
 #define	SDHCI_PLATFORM_TRANSFER		0x02
 #define	SDHCI_NON_REMOVABLE		0x04
 	u_char		version;
 	int		timeout;	/* Transfer timeout */
 	uint32_t	max_clk;	/* Max possible freq */
 	uint32_t	timeout_clk;	/* Timeout freq */
 	bus_dma_tag_t	dmatag;
 	bus_dmamap_t	dmamap;
 	u_char		*dmamem;
 	bus_addr_t	paddr;		/* DMA buffer address */
 	struct task	card_task;	/* Card presence check task */
 	struct timeout_task
 			card_delayed_task;/* Card insert delayed task */
 	struct callout	card_poll_callout;/* Card present polling callout */
 	struct callout	timeout_callout;/* Card command/data response timeout */
 	struct mmc_host host;		/* Host parameters */
 	struct mmc_request *req;	/* Current request */
 	struct mmc_command *curcmd;	/* Current command of current request */
 
 	uint32_t	intmask;	/* Current interrupt mask */
 	uint32_t	clock;		/* Current clock freq. */
 	size_t		offset;		/* Data buffer offset */
 	uint8_t		hostctrl;	/* Current host control register */
 	u_char		power;		/* Current power */
 	u_char		bus_busy;	/* Bus busy status */
 	u_char		cmd_done;	/* CMD command part done flag */
 	u_char		data_done;	/* DAT command part done flag */
 	u_char		flags;		/* Request execution flags */
-#define CMD_STARTED		1
-#define STOP_STARTED		2
-#define SDHCI_USE_DMA		4	/* Use DMA for this req. */
-#define PLATFORM_DATA_STARTED	8	/* Data xfer is handled by platform */
+#define	CMD_STARTED		1
+#define	STOP_STARTED		2
+#define	SDHCI_USE_DMA		4	/* Use DMA for this req. */
+#define	PLATFORM_DATA_STARTED	8	/* Data xfer is handled by platform */
 	struct mtx	mtx;		/* Slot mutex */
 };
 
 int sdhci_generic_read_ivar(device_t bus, device_t child, int which,
     uintptr_t *result);
 int sdhci_generic_write_ivar(device_t bus, device_t child, int which,
     uintptr_t value);
 int sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num);
 void sdhci_start_slot(struct sdhci_slot *slot);
 /* performs generic clean-up for platform transfers */
 void sdhci_finish_data(struct sdhci_slot *slot);
 int sdhci_cleanup_slot(struct sdhci_slot *slot);
 int sdhci_generic_suspend(struct sdhci_slot *slot);
 int sdhci_generic_resume(struct sdhci_slot *slot);
 int sdhci_generic_update_ios(device_t brdev, device_t reqdev);
 int sdhci_generic_request(device_t brdev, device_t reqdev,
     struct mmc_request *req);
 int sdhci_generic_get_ro(device_t brdev, device_t reqdev);
 int sdhci_generic_acquire_host(device_t brdev, device_t reqdev);
 int sdhci_generic_release_host(device_t brdev, device_t reqdev);
 void sdhci_generic_intr(struct sdhci_slot *slot);
 uint32_t sdhci_generic_min_freq(device_t brdev, struct sdhci_slot *slot);
 bool sdhci_generic_get_card_present(device_t brdev, struct sdhci_slot *slot);
 void sdhci_handle_card_present(struct sdhci_slot *slot, bool is_present);
 
 #endif	/* __SDHCI_H__ */
Index: projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci_fdt.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci_fdt.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci_fdt.c	(revision 313267)
@@ -1,311 +1,313 @@
 /*-
  * Copyright (c) 2012 Thomas Skibo
  * Copyright (c) 2008 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /* Generic driver to attach sdhci controllers on simplebus.
  * Derived mainly from sdhci_pci.c
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/resource.h>
 #include <sys/rman.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <machine/stdarg.h>
 
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <dev/mmc/bridge.h>
 #include <dev/mmc/mmcreg.h>
 #include <dev/mmc/mmcbrvar.h>
 #include <dev/sdhci/sdhci.h>
 
 #include "mmcbr_if.h"
 #include "sdhci_if.h"
 
-#define MAX_SLOTS	6
+#define	MAX_SLOTS	6
 
 struct sdhci_fdt_softc {
 	device_t	dev;		/* Controller device */
 	u_int		quirks;		/* Chip specific quirks */
 	u_int		caps;		/* If we override SDHCI_CAPABILITIES */
 	uint32_t	max_clk;	/* Max possible freq */
 	struct resource *irq_res;	/* IRQ resource */
-	void 		*intrhand;	/* Interrupt handle */
+	void		*intrhand;	/* Interrupt handle */
 
 	int		num_slots;	/* Number of slots on this controller*/
 	struct sdhci_slot slots[MAX_SLOTS];
 	struct resource	*mem_res[MAX_SLOTS];	/* Memory resource */
 };
 
 static uint8_t
 sdhci_fdt_read_1(device_t dev, struct sdhci_slot *slot, bus_size_t off)
 {
 	struct sdhci_fdt_softc *sc = device_get_softc(dev);
+
 	return (bus_read_1(sc->mem_res[slot->num], off));
 }
 
 static void
 sdhci_fdt_write_1(device_t dev, struct sdhci_slot *slot, bus_size_t off,
-		  uint8_t val)
+    uint8_t val)
 {
 	struct sdhci_fdt_softc *sc = device_get_softc(dev);
+
 	bus_write_1(sc->mem_res[slot->num], off, val);
 }
 
 static uint16_t
 sdhci_fdt_read_2(device_t dev, struct sdhci_slot *slot, bus_size_t off)
 {
 	struct sdhci_fdt_softc *sc = device_get_softc(dev);
+
 	return (bus_read_2(sc->mem_res[slot->num], off));
 }
 
 static void
 sdhci_fdt_write_2(device_t dev, struct sdhci_slot *slot, bus_size_t off,
-		  uint16_t val)
+    uint16_t val)
 {
 	struct sdhci_fdt_softc *sc = device_get_softc(dev);
+
 	bus_write_2(sc->mem_res[slot->num], off, val);
 }
 
 static uint32_t
 sdhci_fdt_read_4(device_t dev, struct sdhci_slot *slot, bus_size_t off)
 {
 	struct sdhci_fdt_softc *sc = device_get_softc(dev);
+
 	return (bus_read_4(sc->mem_res[slot->num], off));
 }
 
 static void
 sdhci_fdt_write_4(device_t dev, struct sdhci_slot *slot, bus_size_t off,
-		  uint32_t val)
+    uint32_t val)
 {
 	struct sdhci_fdt_softc *sc = device_get_softc(dev);
+
 	bus_write_4(sc->mem_res[slot->num], off, val);
 }
 
 static void
 sdhci_fdt_read_multi_4(device_t dev, struct sdhci_slot *slot,
     bus_size_t off, uint32_t *data, bus_size_t count)
 {
 	struct sdhci_fdt_softc *sc = device_get_softc(dev);
+
 	bus_read_multi_4(sc->mem_res[slot->num], off, data, count);
 }
 
 static void
 sdhci_fdt_write_multi_4(device_t dev, struct sdhci_slot *slot,
     bus_size_t off, uint32_t *data, bus_size_t count)
 {
 	struct sdhci_fdt_softc *sc = device_get_softc(dev);
+
 	bus_write_multi_4(sc->mem_res[slot->num], off, data, count);
 }
 
 static void
 sdhci_fdt_intr(void *arg)
 {
 	struct sdhci_fdt_softc *sc = (struct sdhci_fdt_softc *)arg;
 	int i;
 
-	for (i = 0; i < sc->num_slots; i++) {
-		struct sdhci_slot *slot = &sc->slots[i];
-		sdhci_generic_intr(slot);
-	}
+	for (i = 0; i < sc->num_slots; i++)
+		sdhci_generic_intr(&sc->slots[i]);
 }
 
 static int
 sdhci_fdt_probe(device_t dev)
 {
 	struct sdhci_fdt_softc *sc = device_get_softc(dev);
 	phandle_t node;
 	pcell_t cid;
 
 	sc->quirks = 0;
 	sc->num_slots = 1;
 	sc->max_clk = 0;
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (ofw_bus_is_compatible(dev, "sdhci_generic")) {
 		device_set_desc(dev, "generic fdt SDHCI controller");
 	} else if (ofw_bus_is_compatible(dev, "xlnx,zy7_sdhci")) {
 		sc->quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK;
 		device_set_desc(dev, "Zynq-7000 generic fdt SDHCI controller");
 	} else
 		return (ENXIO);
 
 	node = ofw_bus_get_node(dev);
 
 	/* Allow dts to patch quirks, slots, and max-frequency. */
 	if ((OF_getencprop(node, "quirks", &cid, sizeof(cid))) > 0)
 		sc->quirks = cid;
 	if ((OF_getencprop(node, "num-slots", &cid, sizeof(cid))) > 0)
 		sc->num_slots = cid;
 	if ((OF_getencprop(node, "max-frequency", &cid, sizeof(cid))) > 0)
 		sc->max_clk = cid;
 
 	return (0);
 }
 
 static int
 sdhci_fdt_attach(device_t dev)
 {
 	struct sdhci_fdt_softc *sc = device_get_softc(dev);
+	struct sdhci_slot *slot;
 	int err, slots, rid, i;
 
 	sc->dev = dev;
 
 	/* Allocate IRQ. */
 	rid = 0;
 	sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
-					     RF_ACTIVE);
+	    RF_ACTIVE);
 	if (sc->irq_res == NULL) {
 		device_printf(dev, "Can't allocate IRQ\n");
 		return (ENOMEM);
 	}
 
 	/* Scan all slots. */
 	slots = sc->num_slots;	/* number of slots determined in probe(). */
 	sc->num_slots = 0;
 	for (i = 0; i < slots; i++) {
-		struct sdhci_slot *slot = &sc->slots[sc->num_slots];
+		slot = &sc->slots[sc->num_slots];
 
 		/* Allocate memory. */
 		rid = 0;
 		sc->mem_res[i] = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 							&rid, RF_ACTIVE);
 		if (sc->mem_res[i] == NULL) {
-			device_printf(dev, "Can't allocate memory for "
-				      "slot %d\n", i);
+			device_printf(dev,
+			    "Can't allocate memory for slot %d\n", i);
 			continue;
 		}
 
 		slot->quirks = sc->quirks;
 		slot->caps = sc->caps;
 		slot->max_clk = sc->max_clk;
 
 		if (sdhci_init_slot(dev, slot, i) != 0)
 			continue;
 
 		sc->num_slots++;
 	}
 	device_printf(dev, "%d slot(s) allocated\n", sc->num_slots);
 
 	/* Activate the interrupt */
 	err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE,
 	    NULL, sdhci_fdt_intr, sc, &sc->intrhand);
 	if (err) {
 		device_printf(dev, "Cannot setup IRQ\n");
 		return (err);
 	}
 
 	/* Process cards detection. */
-	for (i = 0; i < sc->num_slots; i++) {
-		struct sdhci_slot *slot = &sc->slots[i];
-		sdhci_start_slot(slot);
-	}
+	for (i = 0; i < sc->num_slots; i++)
+		sdhci_start_slot(&sc->slots[i]);
 
 	return (0);
 }
 
 static int
 sdhci_fdt_detach(device_t dev)
 {
 	struct sdhci_fdt_softc *sc = device_get_softc(dev);
 	int i;
 
 	bus_generic_detach(dev);
 	bus_teardown_intr(dev, sc->irq_res, sc->intrhand);
 	bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res),
-			     sc->irq_res);
+	    sc->irq_res);
 
 	for (i = 0; i < sc->num_slots; i++) {
-		struct sdhci_slot *slot = &sc->slots[i];
-
-		sdhci_cleanup_slot(slot);
+		sdhci_cleanup_slot(&sc->slots[i]);
 		bus_release_resource(dev, SYS_RES_MEMORY,
-				     rman_get_rid(sc->mem_res[i]),
-				     sc->mem_res[i]);
+		    rman_get_rid(sc->mem_res[i]), sc->mem_res[i]);
 	}
 
 	return (0);
 }
 
 static device_method_t sdhci_fdt_methods[] = {
 	/* device_if */
-	DEVMETHOD(device_probe, 	sdhci_fdt_probe),
-	DEVMETHOD(device_attach, 	sdhci_fdt_attach),
-	DEVMETHOD(device_detach, 	sdhci_fdt_detach),
+	DEVMETHOD(device_probe,		sdhci_fdt_probe),
+	DEVMETHOD(device_attach,	sdhci_fdt_attach),
+	DEVMETHOD(device_detach,	sdhci_fdt_detach),
 
 	/* Bus interface */
 	DEVMETHOD(bus_read_ivar,	sdhci_generic_read_ivar),
 	DEVMETHOD(bus_write_ivar,	sdhci_generic_write_ivar),
 
 	/* mmcbr_if */
-	DEVMETHOD(mmcbr_update_ios, 	sdhci_generic_update_ios),
-	DEVMETHOD(mmcbr_request, 	sdhci_generic_request),
-	DEVMETHOD(mmcbr_get_ro, 	sdhci_generic_get_ro),
-	DEVMETHOD(mmcbr_acquire_host, 	sdhci_generic_acquire_host),
-	DEVMETHOD(mmcbr_release_host, 	sdhci_generic_release_host),
+	DEVMETHOD(mmcbr_update_ios,	sdhci_generic_update_ios),
+	DEVMETHOD(mmcbr_request,	sdhci_generic_request),
+	DEVMETHOD(mmcbr_get_ro,		sdhci_generic_get_ro),
+	DEVMETHOD(mmcbr_acquire_host,	sdhci_generic_acquire_host),
+	DEVMETHOD(mmcbr_release_host,	sdhci_generic_release_host),
 
 	/* SDHCI registers accessors */
 	DEVMETHOD(sdhci_read_1,		sdhci_fdt_read_1),
 	DEVMETHOD(sdhci_read_2,		sdhci_fdt_read_2),
 	DEVMETHOD(sdhci_read_4,		sdhci_fdt_read_4),
 	DEVMETHOD(sdhci_read_multi_4,	sdhci_fdt_read_multi_4),
 	DEVMETHOD(sdhci_write_1,	sdhci_fdt_write_1),
 	DEVMETHOD(sdhci_write_2,	sdhci_fdt_write_2),
 	DEVMETHOD(sdhci_write_4,	sdhci_fdt_write_4),
 	DEVMETHOD(sdhci_write_multi_4,	sdhci_fdt_write_multi_4),
 
 	DEVMETHOD_END
 };
 
 static driver_t sdhci_fdt_driver = {
 	"sdhci_fdt",
 	sdhci_fdt_methods,
 	sizeof(struct sdhci_fdt_softc),
 };
 static devclass_t sdhci_fdt_devclass;
 
 DRIVER_MODULE(sdhci_fdt, simplebus, sdhci_fdt_driver, sdhci_fdt_devclass,
     NULL, NULL);
 MODULE_DEPEND(sdhci_fdt, sdhci, 1, 1, 1);
 DRIVER_MODULE(mmc, sdhci_fdt, mmc_driver, mmc_devclass, NULL, NULL);
 MODULE_DEPEND(sdhci_fdt, mmc, 1, 1, 1);
Index: projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci_pci.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci_pci.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/dev/sdhci/sdhci_pci.c	(revision 313267)
@@ -1,500 +1,494 @@
 /*-
  * Copyright (c) 2008 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/resource.h>
 #include <sys/rman.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <machine/stdarg.h>
 
 #include <dev/mmc/bridge.h>
 #include <dev/mmc/mmcreg.h>
 #include <dev/mmc/mmcbrvar.h>
 
 #include "sdhci.h"
 #include "mmcbr_if.h"
 #include "sdhci_if.h"
 
 /*
  * PCI registers
  */
 
-#define PCI_SDHCI_IFPIO			0x00
-#define PCI_SDHCI_IFDMA			0x01
-#define PCI_SDHCI_IFVENDOR		0x02
+#define	PCI_SDHCI_IFPIO			0x00
+#define	PCI_SDHCI_IFDMA			0x01
+#define	PCI_SDHCI_IFVENDOR		0x02
 
-#define PCI_SLOT_INFO			0x40	/* 8 bits */
-#define PCI_SLOT_INFO_SLOTS(x)		(((x >> 4) & 7) + 1)
-#define PCI_SLOT_INFO_FIRST_BAR(x)	((x) & 7)
+#define	PCI_SLOT_INFO			0x40	/* 8 bits */
+#define	PCI_SLOT_INFO_SLOTS(x)		(((x >> 4) & 7) + 1)
+#define	PCI_SLOT_INFO_FIRST_BAR(x)	((x) & 7)
 
 /*
  * RICOH specific PCI registers
  */
 #define	SDHC_PCI_MODE_KEY		0xf9
 #define	SDHC_PCI_MODE			0x150
 #define	SDHC_PCI_MODE_SD20		0x10
 #define	SDHC_PCI_BASE_FREQ_KEY		0xfc
 #define	SDHC_PCI_BASE_FREQ		0xe1
 
 static const struct sdhci_device {
 	uint32_t	model;
 	uint16_t	subvendor;
 	const char	*desc;
 	u_int		quirks;
 } sdhci_devices[] = {
 	{ 0x08221180,	0xffff,	"RICOH R5C822 SD",
 	    SDHCI_QUIRK_FORCE_DMA },
 	{ 0xe8221180,	0xffff,	"RICOH R5CE822 SD",
 	    SDHCI_QUIRK_FORCE_DMA |
 	    SDHCI_QUIRK_LOWER_FREQUENCY },
 	{ 0xe8231180,	0xffff,	"RICOH R5CE823 SD",
 	    SDHCI_QUIRK_LOWER_FREQUENCY },
 	{ 0x8034104c,	0xffff, "TI XX21/XX11 SD",
 	    SDHCI_QUIRK_FORCE_DMA },
 	{ 0x05501524,	0xffff, "ENE CB712 SD",
 	    SDHCI_QUIRK_BROKEN_TIMINGS },
 	{ 0x05511524,	0xffff, "ENE CB712 SD 2",
 	    SDHCI_QUIRK_BROKEN_TIMINGS },
 	{ 0x07501524,	0xffff, "ENE CB714 SD",
 	    SDHCI_QUIRK_RESET_ON_IOS |
 	    SDHCI_QUIRK_BROKEN_TIMINGS },
 	{ 0x07511524,	0xffff, "ENE CB714 SD 2",
 	    SDHCI_QUIRK_RESET_ON_IOS |
 	    SDHCI_QUIRK_BROKEN_TIMINGS },
 	{ 0x410111ab,	0xffff, "Marvell CaFe SD",
 	    SDHCI_QUIRK_INCR_TIMEOUT_CONTROL },
 	{ 0x2381197B,	0xffff,	"JMicron JMB38X SD",
 	    SDHCI_QUIRK_32BIT_DMA_SIZE |
 	    SDHCI_QUIRK_RESET_AFTER_REQUEST },
 	{ 0x16bc14e4,	0xffff,	"Broadcom BCM577xx SDXC/MMC Card Reader",
 	    SDHCI_QUIRK_BCM577XX_400KHZ_CLKSRC },
 	{ 0x0f148086,	0xffff,	"Intel Bay Trail eMMC 4.5 Controller",
 	    SDHCI_QUIRK_ALL_SLOTS_NON_REMOVABLE |
 	    SDHCI_QUIRK_INTEL_POWER_UP_RESET },
 	{ 0x0f508086,	0xffff,	"Intel Bay Trail eMMC 4.5 Controller",
 	    SDHCI_QUIRK_ALL_SLOTS_NON_REMOVABLE |
 	    SDHCI_QUIRK_INTEL_POWER_UP_RESET },
 	{ 0x22948086,	0xffff,	"Intel Braswell eMMC 4.5.1 Controller",
 	    SDHCI_QUIRK_ALL_SLOTS_NON_REMOVABLE |
 	    SDHCI_QUIRK_DATA_TIMEOUT_1MHZ |
 	    SDHCI_QUIRK_INTEL_POWER_UP_RESET },
 	{ 0x5acc8086,	0xffff,	"Intel Apollo Lake eMMC 5.0 Controller",
 	    SDHCI_QUIRK_ALL_SLOTS_NON_REMOVABLE |
 	    SDHCI_QUIRK_INTEL_POWER_UP_RESET },
 	{ 0,		0xffff,	NULL,
 	    0 }
 };
 
 struct sdhci_pci_softc {
 	u_int		quirks;		/* Chip specific quirks */
 	struct resource *irq_res;	/* IRQ resource */
 	void		*intrhand;	/* Interrupt handle */
 
 	int		num_slots;	/* Number of slots on this controller */
 	struct sdhci_slot slots[6];
 	struct resource	*mem_res[6];	/* Memory resource */
 	uint8_t		cfg_freq;	/* Saved frequency */
 	uint8_t		cfg_mode;	/* Saved mode */
 };
 
 static int sdhci_enable_msi = 1;
 SYSCTL_INT(_hw_sdhci, OID_AUTO, enable_msi, CTLFLAG_RDTUN, &sdhci_enable_msi,
     0, "Enable MSI interrupts");
 
 static uint8_t
 sdhci_pci_read_1(device_t dev, struct sdhci_slot *slot, bus_size_t off)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 
 	bus_barrier(sc->mem_res[slot->num], 0, 0xFF,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	return bus_read_1(sc->mem_res[slot->num], off);
 }
 
 static void
 sdhci_pci_write_1(device_t dev, struct sdhci_slot *slot, bus_size_t off,
     uint8_t val)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 
 	bus_barrier(sc->mem_res[slot->num], 0, 0xFF,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	bus_write_1(sc->mem_res[slot->num], off, val);
 }
 
 static uint16_t
 sdhci_pci_read_2(device_t dev, struct sdhci_slot *slot, bus_size_t off)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 
 	bus_barrier(sc->mem_res[slot->num], 0, 0xFF,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	return bus_read_2(sc->mem_res[slot->num], off);
 }
 
 static void
 sdhci_pci_write_2(device_t dev, struct sdhci_slot *slot, bus_size_t off,
     uint16_t val)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 
 	bus_barrier(sc->mem_res[slot->num], 0, 0xFF,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	bus_write_2(sc->mem_res[slot->num], off, val);
 }
 
 static uint32_t
 sdhci_pci_read_4(device_t dev, struct sdhci_slot *slot, bus_size_t off)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 
 	bus_barrier(sc->mem_res[slot->num], 0, 0xFF,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	return bus_read_4(sc->mem_res[slot->num], off);
 }
 
 static void
 sdhci_pci_write_4(device_t dev, struct sdhci_slot *slot, bus_size_t off,
     uint32_t val)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 
 	bus_barrier(sc->mem_res[slot->num], 0, 0xFF,
 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
 	bus_write_4(sc->mem_res[slot->num], off, val);
 }
 
 static void
 sdhci_pci_read_multi_4(device_t dev, struct sdhci_slot *slot,
     bus_size_t off, uint32_t *data, bus_size_t count)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 
 	bus_read_multi_stream_4(sc->mem_res[slot->num], off, data, count);
 }
 
 static void
 sdhci_pci_write_multi_4(device_t dev, struct sdhci_slot *slot,
     bus_size_t off, uint32_t *data, bus_size_t count)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 
 	bus_write_multi_stream_4(sc->mem_res[slot->num], off, data, count);
 }
 
 static void sdhci_pci_intr(void *arg);
 
 static void
 sdhci_lower_frequency(device_t dev)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 
 	/*
 	 * Enable SD2.0 mode.
 	 * NB: for RICOH R5CE823, this changes the PCI device ID to 0xe822.
 	 */
 	pci_write_config(dev, SDHC_PCI_MODE_KEY, 0xfc, 1);
 	sc->cfg_mode = pci_read_config(dev, SDHC_PCI_MODE, 1);
 	pci_write_config(dev, SDHC_PCI_MODE, SDHC_PCI_MODE_SD20, 1);
 	pci_write_config(dev, SDHC_PCI_MODE_KEY, 0x00, 1);
 
 	/*
 	 * Some SD/MMC cards don't work with the default base
 	 * clock frequency of 200 MHz.  Lower it to 50 MHz.
 	 */
 	pci_write_config(dev, SDHC_PCI_BASE_FREQ_KEY, 0x01, 1);
 	sc->cfg_freq = pci_read_config(dev, SDHC_PCI_BASE_FREQ, 1);
 	pci_write_config(dev, SDHC_PCI_BASE_FREQ, 50, 1);
 	pci_write_config(dev, SDHC_PCI_BASE_FREQ_KEY, 0x00, 1);
 }
 
 static void
 sdhci_restore_frequency(device_t dev)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 
 	/* Restore mode. */
 	pci_write_config(dev, SDHC_PCI_MODE_KEY, 0xfc, 1);
 	pci_write_config(dev, SDHC_PCI_MODE, sc->cfg_mode, 1);
 	pci_write_config(dev, SDHC_PCI_MODE_KEY, 0x00, 1);
 
 	/* Restore frequency. */
 	pci_write_config(dev, SDHC_PCI_BASE_FREQ_KEY, 0x01, 1);
 	pci_write_config(dev, SDHC_PCI_BASE_FREQ, sc->cfg_freq, 1);
 	pci_write_config(dev, SDHC_PCI_BASE_FREQ_KEY, 0x00, 1);
 }
 
 static int
 sdhci_pci_probe(device_t dev)
 {
 	uint32_t model;
 	uint16_t subvendor;
 	uint8_t class, subclass;
 	int i, result;
 
 	model = (uint32_t)pci_get_device(dev) << 16;
 	model |= (uint32_t)pci_get_vendor(dev) & 0x0000ffff;
 	subvendor = pci_get_subvendor(dev);
 	class = pci_get_class(dev);
 	subclass = pci_get_subclass(dev);
 
 	result = ENXIO;
 	for (i = 0; sdhci_devices[i].model != 0; i++) {
 		if (sdhci_devices[i].model == model &&
 		    (sdhci_devices[i].subvendor == 0xffff ||
 		    sdhci_devices[i].subvendor == subvendor)) {
 			device_set_desc(dev, sdhci_devices[i].desc);
 			result = BUS_PROBE_DEFAULT;
 			break;
 		}
 	}
 	if (result == ENXIO && class == PCIC_BASEPERIPH &&
 	    subclass == PCIS_BASEPERIPH_SDHC) {
 		device_set_desc(dev, "Generic SD HCI");
 		result = BUS_PROBE_GENERIC;
 	}
 
 	return (result);
 }
 
 static int
 sdhci_pci_attach(device_t dev)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
+	struct sdhci_slot *slot;
 	uint32_t model;
 	uint16_t subvendor;
 	int bar, err, rid, slots, i;
 
 	model = (uint32_t)pci_get_device(dev) << 16;
 	model |= (uint32_t)pci_get_vendor(dev) & 0x0000ffff;
 	subvendor = pci_get_subvendor(dev);
 	/* Apply chip specific quirks. */
 	for (i = 0; sdhci_devices[i].model != 0; i++) {
 		if (sdhci_devices[i].model == model &&
 		    (sdhci_devices[i].subvendor == 0xffff ||
 		    sdhci_devices[i].subvendor == subvendor)) {
 			sc->quirks = sdhci_devices[i].quirks;
 			break;
 		}
 	}
 	/* Some controllers need to be bumped into the right mode. */
 	if (sc->quirks & SDHCI_QUIRK_LOWER_FREQUENCY)
 		sdhci_lower_frequency(dev);
 	/* Read slots info from PCI registers. */
 	slots = pci_read_config(dev, PCI_SLOT_INFO, 1);
 	bar = PCI_SLOT_INFO_FIRST_BAR(slots);
 	slots = PCI_SLOT_INFO_SLOTS(slots);
 	if (slots > 6 || bar > 5) {
 		device_printf(dev, "Incorrect slots information (%d, %d).\n",
 		    slots, bar);
 		return (EINVAL);
 	}
 	/* Allocate IRQ. */
 	i = 1;
 	rid = 0;
 	if (sdhci_enable_msi != 0 && pci_alloc_msi(dev, &i) == 0)
 		rid = 1;
 	sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 		RF_ACTIVE | (rid != 0 ? 0 : RF_SHAREABLE));
 	if (sc->irq_res == NULL) {
 		device_printf(dev, "Can't allocate IRQ\n");
 		pci_release_msi(dev);
 		return (ENOMEM);
 	}
 	/* Scan all slots. */
 	for (i = 0; i < slots; i++) {
-		struct sdhci_slot *slot = &sc->slots[sc->num_slots];
+		slot = &sc->slots[sc->num_slots];
 
 		/* Allocate memory. */
 		rid = PCIR_BAR(bar + i);
 		sc->mem_res[i] = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 		    &rid, RF_ACTIVE);
 		if (sc->mem_res[i] == NULL) {
 			device_printf(dev,
 			    "Can't allocate memory for slot %d\n", i);
 			continue;
 		}
-		
+
 		slot->quirks = sc->quirks;
 
 		if (sdhci_init_slot(dev, slot, i) != 0)
 			continue;
 
 		sc->num_slots++;
 	}
 	device_printf(dev, "%d slot(s) allocated\n", sc->num_slots);
 	/* Activate the interrupt */
 	err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE,
 	    NULL, sdhci_pci_intr, sc, &sc->intrhand);
 	if (err)
 		device_printf(dev, "Can't setup IRQ\n");
 	pci_enable_busmaster(dev);
 	/* Process cards detection. */
-	for (i = 0; i < sc->num_slots; i++) {
-		struct sdhci_slot *slot = &sc->slots[i];
+	for (i = 0; i < sc->num_slots; i++)
+		sdhci_start_slot(&sc->slots[i]);
 
-		sdhci_start_slot(slot);
-	}
-
 	return (0);
 }
 
 static int
 sdhci_pci_detach(device_t dev)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 	int i;
 
 	bus_teardown_intr(dev, sc->irq_res, sc->intrhand);
 	bus_release_resource(dev, SYS_RES_IRQ,
 	    rman_get_rid(sc->irq_res), sc->irq_res);
 	pci_release_msi(dev);
 
 	for (i = 0; i < sc->num_slots; i++) {
-		struct sdhci_slot *slot = &sc->slots[i];
-
-		sdhci_cleanup_slot(slot);
+		sdhci_cleanup_slot(&sc->slots[i]);
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    rman_get_rid(sc->mem_res[i]), sc->mem_res[i]);
 	}
 	if (sc->quirks & SDHCI_QUIRK_LOWER_FREQUENCY)
 		sdhci_restore_frequency(dev);
 	return (0);
 }
 
 static int
 sdhci_pci_shutdown(device_t dev)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 
 	if (sc->quirks & SDHCI_QUIRK_LOWER_FREQUENCY)
 		sdhci_restore_frequency(dev);
 	return (0);
 }
 
 static int
 sdhci_pci_suspend(device_t dev)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 	int i, err;
 
 	err = bus_generic_suspend(dev);
 	if (err)
 		return (err);
 	for (i = 0; i < sc->num_slots; i++)
 		sdhci_generic_suspend(&sc->slots[i]);
 	return (0);
 }
 
 static int
 sdhci_pci_resume(device_t dev)
 {
 	struct sdhci_pci_softc *sc = device_get_softc(dev);
 	int i, err;
 
 	for (i = 0; i < sc->num_slots; i++)
 		sdhci_generic_resume(&sc->slots[i]);
 	err = bus_generic_resume(dev);
 	if (err)
 		return (err);
 	if (sc->quirks & SDHCI_QUIRK_LOWER_FREQUENCY)
 		sdhci_lower_frequency(dev);
 	return (0);
 }
 
 static void
 sdhci_pci_intr(void *arg)
 {
 	struct sdhci_pci_softc *sc = (struct sdhci_pci_softc *)arg;
 	int i;
 
-	for (i = 0; i < sc->num_slots; i++) {
-		struct sdhci_slot *slot = &sc->slots[i];
-		sdhci_generic_intr(slot);
-	}
+	for (i = 0; i < sc->num_slots; i++)
+		sdhci_generic_intr(&sc->slots[i]);
 }
 
 static device_method_t sdhci_methods[] = {
 	/* device_if */
-	DEVMETHOD(device_probe, sdhci_pci_probe),
-	DEVMETHOD(device_attach, sdhci_pci_attach),
-	DEVMETHOD(device_detach, sdhci_pci_detach),
-	DEVMETHOD(device_shutdown, sdhci_pci_shutdown),
-	DEVMETHOD(device_suspend, sdhci_pci_suspend),
-	DEVMETHOD(device_resume, sdhci_pci_resume),
+	DEVMETHOD(device_probe,		sdhci_pci_probe),
+	DEVMETHOD(device_attach,	sdhci_pci_attach),
+	DEVMETHOD(device_detach,	sdhci_pci_detach),
+	DEVMETHOD(device_shutdown,	sdhci_pci_shutdown),
+	DEVMETHOD(device_suspend,	sdhci_pci_suspend),
+	DEVMETHOD(device_resume,	sdhci_pci_resume),
 
 	/* Bus interface */
 	DEVMETHOD(bus_read_ivar,	sdhci_generic_read_ivar),
 	DEVMETHOD(bus_write_ivar,	sdhci_generic_write_ivar),
 
 	/* mmcbr_if */
 	DEVMETHOD(mmcbr_update_ios,	sdhci_generic_update_ios),
 	DEVMETHOD(mmcbr_request,	sdhci_generic_request),
 	DEVMETHOD(mmcbr_get_ro,		sdhci_generic_get_ro),
 	DEVMETHOD(mmcbr_acquire_host,   sdhci_generic_acquire_host),
 	DEVMETHOD(mmcbr_release_host,   sdhci_generic_release_host),
 
 	/* SDHCI registers accessors */
 	DEVMETHOD(sdhci_read_1,		sdhci_pci_read_1),
 	DEVMETHOD(sdhci_read_2,		sdhci_pci_read_2),
 	DEVMETHOD(sdhci_read_4,		sdhci_pci_read_4),
 	DEVMETHOD(sdhci_read_multi_4,	sdhci_pci_read_multi_4),
 	DEVMETHOD(sdhci_write_1,	sdhci_pci_write_1),
 	DEVMETHOD(sdhci_write_2,	sdhci_pci_write_2),
 	DEVMETHOD(sdhci_write_4,	sdhci_pci_write_4),
 	DEVMETHOD(sdhci_write_multi_4,	sdhci_pci_write_multi_4),
 
 	DEVMETHOD_END
 };
 
 static driver_t sdhci_pci_driver = {
 	"sdhci_pci",
 	sdhci_methods,
 	sizeof(struct sdhci_pci_softc),
 };
 static devclass_t sdhci_pci_devclass;
 
 DRIVER_MODULE(sdhci_pci, pci, sdhci_pci_driver, sdhci_pci_devclass, NULL,
     NULL);
 MODULE_DEPEND(sdhci_pci, sdhci, 1, 1, 1);
 DRIVER_MODULE(mmc, sdhci_pci, mmc_driver, mmc_devclass, NULL, NULL);
 MODULE_DEPEND(sdhci_pci, mmc, 1, 1, 1);
Index: projects/netbsd-tests-upstream-01-2017/sys/kern/kern_descrip.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/kern/kern_descrip.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/kern/kern_descrip.c	(revision 313267)
@@ -1,4129 +1,4129 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 
 #include <sys/capsicum.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/selinfo.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sbuf.h>
 #include <sys/signalvar.h>
 #include <sys/kdb.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/unistd.h>
 #include <sys/user.h>
 #include <sys/vnode.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <net/vnet.h>
 
 #include <security/audit/audit.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 
 #include <ddb/ddb.h>
 
 static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
     "file desc to leader structures");
 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
 MALLOC_DEFINE(M_FILECAPS, "filecaps", "descriptor capabilities");
 
 MALLOC_DECLARE(M_FADVISE);
 
 static __read_mostly uma_zone_t file_zone;
 static __read_mostly uma_zone_t filedesc0_zone;
 
 static int	closefp(struct filedesc *fdp, int fd, struct file *fp,
 		    struct thread *td, int holdleaders);
 static int	fd_first_free(struct filedesc *fdp, int low, int size);
 static int	fd_last_used(struct filedesc *fdp, int size);
 static void	fdgrowtable(struct filedesc *fdp, int nfd);
 static void	fdgrowtable_exp(struct filedesc *fdp, int nfd);
 static void	fdunused(struct filedesc *fdp, int fd);
 static void	fdused(struct filedesc *fdp, int fd);
 static int	getmaxfd(struct thread *td);
 
 /*
  * Each process has:
  *
  * - An array of open file descriptors (fd_ofiles)
  * - An array of file flags (fd_ofileflags)
  * - A bitmap recording which descriptors are in use (fd_map)
  *
  * A process starts out with NDFILE descriptors.  The value of NDFILE has
  * been selected based the historical limit of 20 open files, and an
  * assumption that the majority of processes, especially short-lived
  * processes like shells, will never need more.
  *
  * If this initial allocation is exhausted, a larger descriptor table and
  * map are allocated dynamically, and the pointers in the process's struct
  * filedesc are updated to point to those.  This is repeated every time
  * the process runs out of file descriptors (provided it hasn't hit its
  * resource limit).
  *
  * Since threads may hold references to individual descriptor table
  * entries, the tables are never freed.  Instead, they are placed on a
  * linked list and freed only when the struct filedesc is released.
  */
 #define NDFILE		20
 #define NDSLOTSIZE	sizeof(NDSLOTTYPE)
 #define	NDENTRIES	(NDSLOTSIZE * __CHAR_BIT)
 #define NDSLOT(x)	((x) / NDENTRIES)
 #define NDBIT(x)	((NDSLOTTYPE)1 << ((x) % NDENTRIES))
 #define	NDSLOTS(x)	(((x) + NDENTRIES - 1) / NDENTRIES)
 
 /*
  * SLIST entry used to keep track of ofiles which must be reclaimed when
  * the process exits.
  */
 struct freetable {
 	struct fdescenttbl *ft_table;
 	SLIST_ENTRY(freetable) ft_next;
 };
 
 /*
  * Initial allocation: a filedesc structure + the head of SLIST used to
  * keep track of old ofiles + enough space for NDFILE descriptors.
  */
 
 struct fdescenttbl0 {
 	int	fdt_nfiles;
 	struct	filedescent fdt_ofiles[NDFILE];
 };
 
 struct filedesc0 {
 	struct filedesc fd_fd;
 	SLIST_HEAD(, freetable) fd_free;
 	struct	fdescenttbl0 fd_dfiles;
 	NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
 };
 
 /*
  * Descriptor management.
  */
 volatile int __exclusive_cache_line openfiles; /* actual number of open files */
 struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
 void __read_mostly (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
 
 /*
  * If low >= size, just return low. Otherwise find the first zero bit in the
  * given bitmap, starting at low and not exceeding size - 1. Return size if
  * not found.
  */
 static int
 fd_first_free(struct filedesc *fdp, int low, int size)
 {
 	NDSLOTTYPE *map = fdp->fd_map;
 	NDSLOTTYPE mask;
 	int off, maxoff;
 
 	if (low >= size)
 		return (low);
 
 	off = NDSLOT(low);
 	if (low % NDENTRIES) {
 		mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES)));
 		if ((mask &= ~map[off]) != 0UL)
 			return (off * NDENTRIES + ffsl(mask) - 1);
 		++off;
 	}
 	for (maxoff = NDSLOTS(size); off < maxoff; ++off)
 		if (map[off] != ~0UL)
 			return (off * NDENTRIES + ffsl(~map[off]) - 1);
 	return (size);
 }
 
 /*
  * Find the highest non-zero bit in the given bitmap, starting at 0 and
  * not exceeding size - 1. Return -1 if not found.
  */
 static int
 fd_last_used(struct filedesc *fdp, int size)
 {
 	NDSLOTTYPE *map = fdp->fd_map;
 	NDSLOTTYPE mask;
 	int off, minoff;
 
 	off = NDSLOT(size);
 	if (size % NDENTRIES) {
 		mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES));
 		if ((mask &= map[off]) != 0)
 			return (off * NDENTRIES + flsl(mask) - 1);
 		--off;
 	}
 	for (minoff = NDSLOT(0); off >= minoff; --off)
 		if (map[off] != 0)
 			return (off * NDENTRIES + flsl(map[off]) - 1);
 	return (-1);
 }
 
 static int
 fdisused(struct filedesc *fdp, int fd)
 {
 
 	KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
 	    ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
 
 	return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
 }
 
 /*
  * Mark a file descriptor as used.
  */
 static void
 fdused_init(struct filedesc *fdp, int fd)
 {
 
 	KASSERT(!fdisused(fdp, fd), ("fd=%d is already used", fd));
 
 	fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
 }
 
 static void
 fdused(struct filedesc *fdp, int fd)
 {
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	fdused_init(fdp, fd);
 	if (fd > fdp->fd_lastfile)
 		fdp->fd_lastfile = fd;
 	if (fd == fdp->fd_freefile)
 		fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles);
 }
 
 /*
  * Mark a file descriptor as unused.
  */
 static void
 fdunused(struct filedesc *fdp, int fd)
 {
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd));
 	KASSERT(fdp->fd_ofiles[fd].fde_file == NULL,
 	    ("fd=%d is still in use", fd));
 
 	fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
 	if (fd < fdp->fd_freefile)
 		fdp->fd_freefile = fd;
 	if (fd == fdp->fd_lastfile)
 		fdp->fd_lastfile = fd_last_used(fdp, fd);
 }
 
 /*
  * Free a file descriptor.
  *
  * Avoid some work if fdp is about to be destroyed.
  */
 static inline void
 fdefree_last(struct filedescent *fde)
 {
 
 	filecaps_free(&fde->fde_caps);
 }
 
 static inline void
 fdfree(struct filedesc *fdp, int fd)
 {
 	struct filedescent *fde;
 
 	fde = &fdp->fd_ofiles[fd];
 #ifdef CAPABILITIES
 	seq_write_begin(&fde->fde_seq);
 #endif
 	fdefree_last(fde);
 	fde->fde_file = NULL;
 	fdunused(fdp, fd);
 #ifdef CAPABILITIES
 	seq_write_end(&fde->fde_seq);
 #endif
 }
 
 void
 pwd_ensure_dirs(void)
 {
 	struct filedesc *fdp;
 
 	fdp = curproc->p_fd;
 	FILEDESC_XLOCK(fdp);
 	if (fdp->fd_cdir == NULL) {
 		fdp->fd_cdir = rootvnode;
 		vrefact(rootvnode);
 	}
 	if (fdp->fd_rdir == NULL) {
 		fdp->fd_rdir = rootvnode;
 		vrefact(rootvnode);
 	}
 	FILEDESC_XUNLOCK(fdp);
 }
 
 /*
  * System calls on descriptors.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getdtablesize_args {
 	int	dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getdtablesize(struct thread *td, struct getdtablesize_args *uap)
 {
 #ifdef	RACCT
 	uint64_t lim;
 #endif
 
 	td->td_retval[0] =
 	    min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc);
 #ifdef	RACCT
 	PROC_LOCK(td->td_proc);
 	lim = racct_get_limit(td->td_proc, RACCT_NOFILE);
 	PROC_UNLOCK(td->td_proc);
 	if (lim < td->td_retval[0])
 		td->td_retval[0] = lim;
 #endif
 	return (0);
 }
 
 /*
  * Duplicate a file descriptor to a particular value.
  *
  * Note: keep in mind that a potential race condition exists when closing
  * descriptors from a shared descriptor table (via rfork).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct dup2_args {
 	u_int	from;
 	u_int	to;
 };
 #endif
 /* ARGSUSED */
 int
 sys_dup2(struct thread *td, struct dup2_args *uap)
 {
 
 	return (kern_dup(td, FDDUP_FIXED, 0, (int)uap->from, (int)uap->to));
 }
 
 /*
  * Duplicate a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct dup_args {
 	u_int	fd;
 };
 #endif
 /* ARGSUSED */
 int
 sys_dup(struct thread *td, struct dup_args *uap)
 {
 
 	return (kern_dup(td, FDDUP_NORMAL, 0, (int)uap->fd, 0));
 }
 
 /*
  * The file control system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fcntl_args {
 	int	fd;
 	int	cmd;
 	long	arg;
 };
 #endif
 /* ARGSUSED */
 int
 sys_fcntl(struct thread *td, struct fcntl_args *uap)
 {
 
 	return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, uap->arg));
 }
 
 int
 kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg)
 {
 	struct flock fl;
 	struct __oflock ofl;
 	intptr_t arg1;
 	int error, newcmd;
 
 	error = 0;
 	newcmd = cmd;
 	switch (cmd) {
 	case F_OGETLK:
 	case F_OSETLK:
 	case F_OSETLKW:
 		/*
 		 * Convert old flock structure to new.
 		 */
 		error = copyin((void *)(intptr_t)arg, &ofl, sizeof(ofl));
 		fl.l_start = ofl.l_start;
 		fl.l_len = ofl.l_len;
 		fl.l_pid = ofl.l_pid;
 		fl.l_type = ofl.l_type;
 		fl.l_whence = ofl.l_whence;
 		fl.l_sysid = 0;
 
 		switch (cmd) {
 		case F_OGETLK:
 			newcmd = F_GETLK;
 			break;
 		case F_OSETLK:
 			newcmd = F_SETLK;
 			break;
 		case F_OSETLKW:
 			newcmd = F_SETLKW;
 			break;
 		}
 		arg1 = (intptr_t)&fl;
 		break;
 	case F_GETLK:
 	case F_SETLK:
 	case F_SETLKW:
 	case F_SETLK_REMOTE:
 		error = copyin((void *)(intptr_t)arg, &fl, sizeof(fl));
 		arg1 = (intptr_t)&fl;
 		break;
 	default:
 		arg1 = arg;
 		break;
 	}
 	if (error)
 		return (error);
 	error = kern_fcntl(td, fd, newcmd, arg1);
 	if (error)
 		return (error);
 	if (cmd == F_OGETLK) {
 		ofl.l_start = fl.l_start;
 		ofl.l_len = fl.l_len;
 		ofl.l_pid = fl.l_pid;
 		ofl.l_type = fl.l_type;
 		ofl.l_whence = fl.l_whence;
 		error = copyout(&ofl, (void *)(intptr_t)arg, sizeof(ofl));
 	} else if (cmd == F_GETLK) {
 		error = copyout(&fl, (void *)(intptr_t)arg, sizeof(fl));
 	}
 	return (error);
 }
 
 int
 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
 {
 	struct filedesc *fdp;
 	struct flock *flp;
 	struct file *fp, *fp2;
 	struct filedescent *fde;
 	struct proc *p;
 	struct vnode *vp;
 	cap_rights_t rights;
 	int error, flg, tmp;
 	uint64_t bsize;
 	off_t foffset;
 
 	error = 0;
 	flg = F_POSIX;
 	p = td->td_proc;
 	fdp = p->p_fd;
 
 	AUDIT_ARG_FD(cmd);
 	AUDIT_ARG_CMD(cmd);
 	switch (cmd) {
 	case F_DUPFD:
 		tmp = arg;
 		error = kern_dup(td, FDDUP_FCNTL, 0, fd, tmp);
 		break;
 
 	case F_DUPFD_CLOEXEC:
 		tmp = arg;
 		error = kern_dup(td, FDDUP_FCNTL, FDDUP_FLAG_CLOEXEC, fd, tmp);
 		break;
 
 	case F_DUP2FD:
 		tmp = arg;
 		error = kern_dup(td, FDDUP_FIXED, 0, fd, tmp);
 		break;
 
 	case F_DUP2FD_CLOEXEC:
 		tmp = arg;
 		error = kern_dup(td, FDDUP_FIXED, FDDUP_FLAG_CLOEXEC, fd, tmp);
 		break;
 
 	case F_GETFD:
 		error = EBADF;
 		FILEDESC_SLOCK(fdp);
 		fde = fdeget_locked(fdp, fd);
 		if (fde != NULL) {
 			td->td_retval[0] =
 			    (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0;
 			error = 0;
 		}
 		FILEDESC_SUNLOCK(fdp);
 		break;
 
 	case F_SETFD:
 		error = EBADF;
 		FILEDESC_XLOCK(fdp);
 		fde = fdeget_locked(fdp, fd);
 		if (fde != NULL) {
 			fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) |
 			    (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
 			error = 0;
 		}
 		FILEDESC_XUNLOCK(fdp);
 		break;
 
 	case F_GETFL:
 		error = fget_fcntl(td, fd,
 		    cap_rights_init(&rights, CAP_FCNTL), F_GETFL, &fp);
 		if (error != 0)
 			break;
 		td->td_retval[0] = OFLAGS(fp->f_flag);
 		fdrop(fp, td);
 		break;
 
 	case F_SETFL:
 		error = fget_fcntl(td, fd,
 		    cap_rights_init(&rights, CAP_FCNTL), F_SETFL, &fp);
 		if (error != 0)
 			break;
 		do {
 			tmp = flg = fp->f_flag;
 			tmp &= ~FCNTLFLAGS;
 			tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
 		} while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
 		tmp = fp->f_flag & FNONBLOCK;
 		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
 		if (error != 0) {
 			fdrop(fp, td);
 			break;
 		}
 		tmp = fp->f_flag & FASYNC;
 		error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
 		if (error == 0) {
 			fdrop(fp, td);
 			break;
 		}
 		atomic_clear_int(&fp->f_flag, FNONBLOCK);
 		tmp = 0;
 		(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
 		fdrop(fp, td);
 		break;
 
 	case F_GETOWN:
 		error = fget_fcntl(td, fd,
 		    cap_rights_init(&rights, CAP_FCNTL), F_GETOWN, &fp);
 		if (error != 0)
 			break;
 		error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
 		if (error == 0)
 			td->td_retval[0] = tmp;
 		fdrop(fp, td);
 		break;
 
 	case F_SETOWN:
 		error = fget_fcntl(td, fd,
 		    cap_rights_init(&rights, CAP_FCNTL), F_SETOWN, &fp);
 		if (error != 0)
 			break;
 		tmp = arg;
 		error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
 		fdrop(fp, td);
 		break;
 
 	case F_SETLK_REMOTE:
 		error = priv_check(td, PRIV_NFS_LOCKD);
 		if (error)
 			return (error);
 		flg = F_REMOTE;
 		goto do_setlk;
 
 	case F_SETLKW:
 		flg |= F_WAIT;
 		/* FALLTHROUGH F_SETLK */
 
 	case F_SETLK:
 	do_setlk:
 		cap_rights_init(&rights, CAP_FLOCK);
 		error = fget_unlocked(fdp, fd, &rights, &fp, NULL);
 		if (error != 0)
 			break;
 		if (fp->f_type != DTYPE_VNODE) {
 			error = EBADF;
 			fdrop(fp, td);
 			break;
 		}
 
 		flp = (struct flock *)arg;
 		if (flp->l_whence == SEEK_CUR) {
 			foffset = foffset_get(fp);
 			if (foffset < 0 ||
 			    (flp->l_start > 0 &&
 			     foffset > OFF_MAX - flp->l_start)) {
 				error = EOVERFLOW;
 				fdrop(fp, td);
 				break;
 			}
 			flp->l_start += foffset;
 		}
 
 		vp = fp->f_vnode;
 		switch (flp->l_type) {
 		case F_RDLCK:
 			if ((fp->f_flag & FREAD) == 0) {
 				error = EBADF;
 				break;
 			}
 			PROC_LOCK(p->p_leader);
 			p->p_leader->p_flag |= P_ADVLOCK;
 			PROC_UNLOCK(p->p_leader);
 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
 			    flp, flg);
 			break;
 		case F_WRLCK:
 			if ((fp->f_flag & FWRITE) == 0) {
 				error = EBADF;
 				break;
 			}
 			PROC_LOCK(p->p_leader);
 			p->p_leader->p_flag |= P_ADVLOCK;
 			PROC_UNLOCK(p->p_leader);
 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
 			    flp, flg);
 			break;
 		case F_UNLCK:
 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
 			    flp, flg);
 			break;
 		case F_UNLCKSYS:
 			/*
 			 * Temporary api for testing remote lock
 			 * infrastructure.
 			 */
 			if (flg != F_REMOTE) {
 				error = EINVAL;
 				break;
 			}
 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
 			    F_UNLCKSYS, flp, flg);
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 		if (error != 0 || flp->l_type == F_UNLCK ||
 		    flp->l_type == F_UNLCKSYS) {
 			fdrop(fp, td);
 			break;
 		}
 
 		/*
 		 * Check for a race with close.
 		 *
 		 * The vnode is now advisory locked (or unlocked, but this case
 		 * is not really important) as the caller requested.
 		 * We had to drop the filedesc lock, so we need to recheck if
 		 * the descriptor is still valid, because if it was closed
 		 * in the meantime we need to remove advisory lock from the
 		 * vnode - close on any descriptor leading to an advisory
 		 * locked vnode, removes that lock.
 		 * We will return 0 on purpose in that case, as the result of
 		 * successful advisory lock might have been externally visible
 		 * already. This is fine - effectively we pretend to the caller
 		 * that the closing thread was a bit slower and that the
 		 * advisory lock succeeded before the close.
 		 */
 		error = fget_unlocked(fdp, fd, &rights, &fp2, NULL);
 		if (error != 0) {
 			fdrop(fp, td);
 			break;
 		}
 		if (fp != fp2) {
 			flp->l_whence = SEEK_SET;
 			flp->l_start = 0;
 			flp->l_len = 0;
 			flp->l_type = F_UNLCK;
 			(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
 			    F_UNLCK, flp, F_POSIX);
 		}
 		fdrop(fp, td);
 		fdrop(fp2, td);
 		break;
 
 	case F_GETLK:
 		error = fget_unlocked(fdp, fd,
 		    cap_rights_init(&rights, CAP_FLOCK), &fp, NULL);
 		if (error != 0)
 			break;
 		if (fp->f_type != DTYPE_VNODE) {
 			error = EBADF;
 			fdrop(fp, td);
 			break;
 		}
 		flp = (struct flock *)arg;
 		if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
 		    flp->l_type != F_UNLCK) {
 			error = EINVAL;
 			fdrop(fp, td);
 			break;
 		}
 		if (flp->l_whence == SEEK_CUR) {
 			foffset = foffset_get(fp);
 			if ((flp->l_start > 0 &&
 			    foffset > OFF_MAX - flp->l_start) ||
 			    (flp->l_start < 0 &&
 			    foffset < OFF_MIN - flp->l_start)) {
 				error = EOVERFLOW;
 				fdrop(fp, td);
 				break;
 			}
 			flp->l_start += foffset;
 		}
 		vp = fp->f_vnode;
 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
 		    F_POSIX);
 		fdrop(fp, td);
 		break;
 
 	case F_RDAHEAD:
 		arg = arg ? 128 * 1024: 0;
 		/* FALLTHROUGH */
 	case F_READAHEAD:
 		error = fget_unlocked(fdp, fd,
 		    cap_rights_init(&rights), &fp, NULL);
 		if (error != 0)
 			break;
 		if (fp->f_type != DTYPE_VNODE) {
 			fdrop(fp, td);
 			error = EBADF;
 			break;
 		}
 		vp = fp->f_vnode;
 		/*
 		 * Exclusive lock synchronizes against f_seqcount reads and
 		 * writes in sequential_heuristic().
 		 */
 		error = vn_lock(vp, LK_EXCLUSIVE);
 		if (error != 0) {
 			fdrop(fp, td);
 			break;
 		}
 		if (arg >= 0) {
 			bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize;
 			fp->f_seqcount = (arg + bsize - 1) / bsize;
 			atomic_set_int(&fp->f_flag, FRDAHEAD);
 		} else {
 			atomic_clear_int(&fp->f_flag, FRDAHEAD);
 		}
 		VOP_UNLOCK(vp, 0);
 		fdrop(fp, td);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 static int
 getmaxfd(struct thread *td)
 {
 
 	return (min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc));
 }
 
 /*
  * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
  */
 int
 kern_dup(struct thread *td, u_int mode, int flags, int old, int new)
 {
 	struct filedesc *fdp;
 	struct filedescent *oldfde, *newfde;
 	struct proc *p;
 	struct file *delfp;
 	int error, maxfd;
 
 	p = td->td_proc;
 	fdp = p->p_fd;
 
 	MPASS((flags & ~(FDDUP_FLAG_CLOEXEC)) == 0);
 	MPASS(mode < FDDUP_LASTMODE);
 
 	AUDIT_ARG_FD(old);
 	/* XXXRW: if (flags & FDDUP_FIXED) AUDIT_ARG_FD2(new); */
 
 	/*
 	 * Verify we have a valid descriptor to dup from and possibly to
 	 * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should
 	 * return EINVAL when the new descriptor is out of bounds.
 	 */
 	if (old < 0)
 		return (EBADF);
 	if (new < 0)
 		return (mode == FDDUP_FCNTL ? EINVAL : EBADF);
 	maxfd = getmaxfd(td);
 	if (new >= maxfd)
 		return (mode == FDDUP_FCNTL ? EINVAL : EBADF);
 
 	error = EBADF;
 	FILEDESC_XLOCK(fdp);
 	if (fget_locked(fdp, old) == NULL)
 		goto unlock;
 	if ((mode == FDDUP_FIXED || mode == FDDUP_MUSTREPLACE) && old == new) {
 		td->td_retval[0] = new;
 		if (flags & FDDUP_FLAG_CLOEXEC)
 			fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE;
 		error = 0;
 		goto unlock;
 	}
 
 	/*
 	 * If the caller specified a file descriptor, make sure the file
 	 * table is large enough to hold it, and grab it.  Otherwise, just
 	 * allocate a new descriptor the usual way.
 	 */
 	switch (mode) {
 	case FDDUP_NORMAL:
 	case FDDUP_FCNTL:
 		if ((error = fdalloc(td, new, &new)) != 0)
 			goto unlock;
 		break;
 	case FDDUP_MUSTREPLACE:
 		/* Target file descriptor must exist. */
 		if (fget_locked(fdp, new) == NULL)
 			goto unlock;
 		break;
 	case FDDUP_FIXED:
 		if (new >= fdp->fd_nfiles) {
 			/*
 			 * The resource limits are here instead of e.g.
 			 * fdalloc(), because the file descriptor table may be
 			 * shared between processes, so we can't really use
 			 * racct_add()/racct_sub().  Instead of counting the
 			 * number of actually allocated descriptors, just put
 			 * the limit on the size of the file descriptor table.
 			 */
 #ifdef RACCT
 			if (racct_enable) {
 				PROC_LOCK(p);
 				error = racct_set(p, RACCT_NOFILE, new + 1);
 				PROC_UNLOCK(p);
 				if (error != 0) {
 					error = EMFILE;
 					goto unlock;
 				}
 			}
 #endif
 			fdgrowtable_exp(fdp, new + 1);
 		}
 		if (!fdisused(fdp, new))
 			fdused(fdp, new);
 		break;
 	default:
 		KASSERT(0, ("%s unsupported mode %d", __func__, mode));
 	}
 
 	KASSERT(old != new, ("new fd is same as old"));
 
 	oldfde = &fdp->fd_ofiles[old];
 	fhold(oldfde->fde_file);
 	newfde = &fdp->fd_ofiles[new];
 	delfp = newfde->fde_file;
 
 	/*
 	 * Duplicate the source descriptor.
 	 */
 #ifdef CAPABILITIES
 	seq_write_begin(&newfde->fde_seq);
 #endif
 	filecaps_free(&newfde->fde_caps);
 	memcpy(newfde, oldfde, fde_change_size);
 	filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps, true);
 	if ((flags & FDDUP_FLAG_CLOEXEC) != 0)
 		newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE;
 	else
 		newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE;
 #ifdef CAPABILITIES
 	seq_write_end(&newfde->fde_seq);
 #endif
 	td->td_retval[0] = new;
 
 	error = 0;
 
 	if (delfp != NULL) {
 		(void) closefp(fdp, new, delfp, td, 1);
 		FILEDESC_UNLOCK_ASSERT(fdp);
 	} else {
 unlock:
 		FILEDESC_XUNLOCK(fdp);
 	}
 
 	return (error);
 }
 
 /*
  * If sigio is on the list associated with a process or process group,
  * disable signalling from the device, remove sigio from the list and
  * free sigio.
  */
 void
 funsetown(struct sigio **sigiop)
 {
 	struct sigio *sigio;
 
 	if (*sigiop == NULL)
 		return;
 	SIGIO_LOCK();
 	sigio = *sigiop;
 	if (sigio == NULL) {
 		SIGIO_UNLOCK();
 		return;
 	}
 	*(sigio->sio_myref) = NULL;
 	if ((sigio)->sio_pgid < 0) {
 		struct pgrp *pg = (sigio)->sio_pgrp;
 		PGRP_LOCK(pg);
 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
 			    sigio, sio_pgsigio);
 		PGRP_UNLOCK(pg);
 	} else {
 		struct proc *p = (sigio)->sio_proc;
 		PROC_LOCK(p);
 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
 			    sigio, sio_pgsigio);
 		PROC_UNLOCK(p);
 	}
 	SIGIO_UNLOCK();
 	crfree(sigio->sio_ucred);
 	free(sigio, M_SIGIO);
 }
 
 /*
  * Free a list of sigio structures.
  * We only need to lock the SIGIO_LOCK because we have made ourselves
  * inaccessible to callers of fsetown and therefore do not need to lock
  * the proc or pgrp struct for the list manipulation.
  */
 void
 funsetownlst(struct sigiolst *sigiolst)
 {
 	struct proc *p;
 	struct pgrp *pg;
 	struct sigio *sigio;
 
 	sigio = SLIST_FIRST(sigiolst);
 	if (sigio == NULL)
 		return;
 	p = NULL;
 	pg = NULL;
 
 	/*
 	 * Every entry of the list should belong
 	 * to a single proc or pgrp.
 	 */
 	if (sigio->sio_pgid < 0) {
 		pg = sigio->sio_pgrp;
 		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
 	} else /* if (sigio->sio_pgid > 0) */ {
 		p = sigio->sio_proc;
 		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	}
 
 	SIGIO_LOCK();
 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
 		*(sigio->sio_myref) = NULL;
 		if (pg != NULL) {
 			KASSERT(sigio->sio_pgid < 0,
 			    ("Proc sigio in pgrp sigio list"));
 			KASSERT(sigio->sio_pgrp == pg,
 			    ("Bogus pgrp in sigio list"));
 			PGRP_LOCK(pg);
 			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
 			    sio_pgsigio);
 			PGRP_UNLOCK(pg);
 		} else /* if (p != NULL) */ {
 			KASSERT(sigio->sio_pgid > 0,
 			    ("Pgrp sigio in proc sigio list"));
 			KASSERT(sigio->sio_proc == p,
 			    ("Bogus proc in sigio list"));
 			PROC_LOCK(p);
 			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
 			    sio_pgsigio);
 			PROC_UNLOCK(p);
 		}
 		SIGIO_UNLOCK();
 		crfree(sigio->sio_ucred);
 		free(sigio, M_SIGIO);
 		SIGIO_LOCK();
 	}
 	SIGIO_UNLOCK();
 }
 
 /*
  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
  *
  * After permission checking, add a sigio structure to the sigio list for
  * the process or process group.
  */
 int
 fsetown(pid_t pgid, struct sigio **sigiop)
 {
 	struct proc *proc;
 	struct pgrp *pgrp;
 	struct sigio *sigio;
 	int ret;
 
 	if (pgid == 0) {
 		funsetown(sigiop);
 		return (0);
 	}
 
 	ret = 0;
 
 	/* Allocate and fill in the new sigio out of locks. */
 	sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK);
 	sigio->sio_pgid = pgid;
 	sigio->sio_ucred = crhold(curthread->td_ucred);
 	sigio->sio_myref = sigiop;
 
 	sx_slock(&proctree_lock);
 	if (pgid > 0) {
 		proc = pfind(pgid);
 		if (proc == NULL) {
 			ret = ESRCH;
 			goto fail;
 		}
 
 		/*
 		 * Policy - Don't allow a process to FSETOWN a process
 		 * in another session.
 		 *
 		 * Remove this test to allow maximum flexibility or
 		 * restrict FSETOWN to the current process or process
 		 * group for maximum safety.
 		 */
 		PROC_UNLOCK(proc);
 		if (proc->p_session != curthread->td_proc->p_session) {
 			ret = EPERM;
 			goto fail;
 		}
 
 		pgrp = NULL;
 	} else /* if (pgid < 0) */ {
 		pgrp = pgfind(-pgid);
 		if (pgrp == NULL) {
 			ret = ESRCH;
 			goto fail;
 		}
 		PGRP_UNLOCK(pgrp);
 
 		/*
 		 * Policy - Don't allow a process to FSETOWN a process
 		 * in another session.
 		 *
 		 * Remove this test to allow maximum flexibility or
 		 * restrict FSETOWN to the current process or process
 		 * group for maximum safety.
 		 */
 		if (pgrp->pg_session != curthread->td_proc->p_session) {
 			ret = EPERM;
 			goto fail;
 		}
 
 		proc = NULL;
 	}
 	funsetown(sigiop);
 	if (pgid > 0) {
 		PROC_LOCK(proc);
 		/*
 		 * Since funsetownlst() is called without the proctree
 		 * locked, we need to check for P_WEXIT.
 		 * XXX: is ESRCH correct?
 		 */
 		if ((proc->p_flag & P_WEXIT) != 0) {
 			PROC_UNLOCK(proc);
 			ret = ESRCH;
 			goto fail;
 		}
 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
 		sigio->sio_proc = proc;
 		PROC_UNLOCK(proc);
 	} else {
 		PGRP_LOCK(pgrp);
 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
 		sigio->sio_pgrp = pgrp;
 		PGRP_UNLOCK(pgrp);
 	}
 	sx_sunlock(&proctree_lock);
 	SIGIO_LOCK();
 	*sigiop = sigio;
 	SIGIO_UNLOCK();
 	return (0);
 
 fail:
 	sx_sunlock(&proctree_lock);
 	crfree(sigio->sio_ucred);
 	free(sigio, M_SIGIO);
 	return (ret);
 }
 
 /*
  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
  */
 pid_t
 fgetown(sigiop)
 	struct sigio **sigiop;
 {
 	pid_t pgid;
 
 	SIGIO_LOCK();
 	pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
 	SIGIO_UNLOCK();
 	return (pgid);
 }
 
 /*
  * Function drops the filedesc lock on return.
  */
 static int
 closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td,
     int holdleaders)
 {
 	int error;
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	if (holdleaders) {
 		if (td->td_proc->p_fdtol != NULL) {
 			/*
 			 * Ask fdfree() to sleep to ensure that all relevant
 			 * process leaders can be traversed in closef().
 			 */
 			fdp->fd_holdleaderscount++;
 		} else {
 			holdleaders = 0;
 		}
 	}
 
 	/*
 	 * We now hold the fp reference that used to be owned by the
 	 * descriptor array.  We have to unlock the FILEDESC *AFTER*
 	 * knote_fdclose to prevent a race of the fd getting opened, a knote
 	 * added, and deleteing a knote for the new fd.
 	 */
 	knote_fdclose(td, fd);
 
 	/*
 	 * We need to notify mqueue if the object is of type mqueue.
 	 */
 	if (fp->f_type == DTYPE_MQUEUE)
 		mq_fdclose(td, fd, fp);
 	FILEDESC_XUNLOCK(fdp);
 
 	error = closef(fp, td);
 	if (holdleaders) {
 		FILEDESC_XLOCK(fdp);
 		fdp->fd_holdleaderscount--;
 		if (fdp->fd_holdleaderscount == 0 &&
 		    fdp->fd_holdleaderswakeup != 0) {
 			fdp->fd_holdleaderswakeup = 0;
 			wakeup(&fdp->fd_holdleaderscount);
 		}
 		FILEDESC_XUNLOCK(fdp);
 	}
 	return (error);
 }
 
 /*
  * Close a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct close_args {
 	int     fd;
 };
 #endif
 /* ARGSUSED */
 int
 sys_close(struct thread *td, struct close_args *uap)
 {
 
 	return (kern_close(td, uap->fd));
 }
 
 int
 kern_close(struct thread *td, int fd)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 
 	fdp = td->td_proc->p_fd;
 
 	AUDIT_SYSCLOSE(td, fd);
 
 	FILEDESC_XLOCK(fdp);
 	if ((fp = fget_locked(fdp, fd)) == NULL) {
 		FILEDESC_XUNLOCK(fdp);
 		return (EBADF);
 	}
 	fdfree(fdp, fd);
 
 	/* closefp() drops the FILEDESC lock for us. */
 	return (closefp(fdp, fd, fp, td, 1));
 }
 
 /*
  * Close open file descriptors.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct closefrom_args {
 	int	lowfd;
 };
 #endif
 /* ARGSUSED */
 int
 sys_closefrom(struct thread *td, struct closefrom_args *uap)
 {
 	struct filedesc *fdp;
 	int fd;
 
 	fdp = td->td_proc->p_fd;
 	AUDIT_ARG_FD(uap->lowfd);
 
 	/*
 	 * Treat negative starting file descriptor values identical to
 	 * closefrom(0) which closes all files.
 	 */
 	if (uap->lowfd < 0)
 		uap->lowfd = 0;
 	FILEDESC_SLOCK(fdp);
 	for (fd = uap->lowfd; fd <= fdp->fd_lastfile; fd++) {
 		if (fdp->fd_ofiles[fd].fde_file != NULL) {
 			FILEDESC_SUNLOCK(fdp);
 			(void)kern_close(td, fd);
 			FILEDESC_SLOCK(fdp);
 		}
 	}
 	FILEDESC_SUNLOCK(fdp);
 	return (0);
 }
 
 #if defined(COMPAT_43)
 /*
  * Return status information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ofstat_args {
 	int	fd;
 	struct	ostat *sb;
 };
 #endif
 /* ARGSUSED */
 int
 ofstat(struct thread *td, struct ofstat_args *uap)
 {
 	struct ostat oub;
 	struct stat ub;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error == 0) {
 		cvtstat(&ub, &oub);
 		error = copyout(&oub, uap->sb, sizeof(oub));
 	}
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Return status information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstat_args {
 	int	fd;
 	struct	stat *sb;
 };
 #endif
 /* ARGSUSED */
 int
 sys_fstat(struct thread *td, struct fstat_args *uap)
 {
 	struct stat ub;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error == 0)
 		error = copyout(&ub, uap->sb, sizeof(ub));
 	return (error);
 }
 
 int
 kern_fstat(struct thread *td, int fd, struct stat *sbp)
 {
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 
 	error = fget(td, fd, cap_rights_init(&rights, CAP_FSTAT), &fp);
 	if (error != 0)
 		return (error);
 
 	AUDIT_ARG_FILE(td->td_proc, fp);
 
 	error = fo_stat(fp, sbp, td->td_ucred, td);
 	fdrop(fp, td);
 #ifdef KTRACE
 	if (error == 0 && KTRPOINT(td, KTR_STRUCT))
 		ktrstat(sbp);
 #endif
 	return (error);
 }
 
 /*
  * Return status information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct nfstat_args {
 	int	fd;
 	struct	nstat *sb;
 };
 #endif
 /* ARGSUSED */
 int
 sys_nfstat(struct thread *td, struct nfstat_args *uap)
 {
 	struct nstat nub;
 	struct stat ub;
 	int error;
 
 	error = kern_fstat(td, uap->fd, &ub);
 	if (error == 0) {
 		cvtnstat(&ub, &nub);
 		error = copyout(&nub, uap->sb, sizeof(nub));
 	}
 	return (error);
 }
 
 /*
  * Return pathconf information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fpathconf_args {
 	int	fd;
 	int	name;
 };
 #endif
 /* ARGSUSED */
 int
 sys_fpathconf(struct thread *td, struct fpathconf_args *uap)
 {
 	struct file *fp;
 	struct vnode *vp;
 	cap_rights_t rights;
 	int error;
 
 	error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FPATHCONF), &fp);
 	if (error != 0)
 		return (error);
 
 	if (uap->name == _PC_ASYNC_IO) {
 		td->td_retval[0] = _POSIX_ASYNCHRONOUS_IO;
 		goto out;
 	}
 	vp = fp->f_vnode;
 	if (vp != NULL) {
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
 		VOP_UNLOCK(vp, 0);
 	} else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
 		if (uap->name != _PC_PIPE_BUF) {
 			error = EINVAL;
 		} else {
 			td->td_retval[0] = PIPE_BUF;
 			error = 0;
 		}
 	} else {
 		error = EOPNOTSUPP;
 	}
 out:
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Initialize filecaps structure.
  */
 void
 filecaps_init(struct filecaps *fcaps)
 {
 
 	bzero(fcaps, sizeof(*fcaps));
 	fcaps->fc_nioctls = -1;
 }
 
 /*
  * Copy filecaps structure allocating memory for ioctls array if needed.
  *
  * The last parameter indicates whether the fdtable is locked. If it is not and
  * ioctls are encountered, copying fails and the caller must lock the table.
  *
  * Note that if the table was not locked, the caller has to check the relevant
  * sequence counter to determine whether the operation was successful.
  */
 int
 filecaps_copy(const struct filecaps *src, struct filecaps *dst, bool locked)
 {
 	size_t size;
 
 	*dst = *src;
 	if (src->fc_ioctls == NULL)
 		return (0);
 	if (!locked)
 		return (1);
 
 	KASSERT(src->fc_nioctls > 0,
 	    ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls));
 
 	size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls;
 	dst->fc_ioctls = malloc(size, M_FILECAPS, M_WAITOK);
 	bcopy(src->fc_ioctls, dst->fc_ioctls, size);
 	return (0);
 }
 
 /*
  * Move filecaps structure to the new place and clear the old place.
  */
 void
 filecaps_move(struct filecaps *src, struct filecaps *dst)
 {
 
 	*dst = *src;
 	bzero(src, sizeof(*src));
 }
 
 /*
  * Fill the given filecaps structure with full rights.
  */
 static void
 filecaps_fill(struct filecaps *fcaps)
 {
 
 	CAP_ALL(&fcaps->fc_rights);
 	fcaps->fc_ioctls = NULL;
 	fcaps->fc_nioctls = -1;
 	fcaps->fc_fcntls = CAP_FCNTL_ALL;
 }
 
 /*
  * Free memory allocated within filecaps structure.
  */
 void
 filecaps_free(struct filecaps *fcaps)
 {
 
 	free(fcaps->fc_ioctls, M_FILECAPS);
 	bzero(fcaps, sizeof(*fcaps));
 }
 
 /*
  * Validate the given filecaps structure.
  */
 static void
 filecaps_validate(const struct filecaps *fcaps, const char *func)
 {
 
 	KASSERT(cap_rights_is_valid(&fcaps->fc_rights),
 	    ("%s: invalid rights", func));
 	KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0,
 	    ("%s: invalid fcntls", func));
 	KASSERT(fcaps->fc_fcntls == 0 ||
 	    cap_rights_is_set(&fcaps->fc_rights, CAP_FCNTL),
 	    ("%s: fcntls without CAP_FCNTL", func));
 	KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 :
 	    (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0),
 	    ("%s: invalid ioctls", func));
 	KASSERT(fcaps->fc_nioctls == 0 ||
 	    cap_rights_is_set(&fcaps->fc_rights, CAP_IOCTL),
 	    ("%s: ioctls without CAP_IOCTL", func));
 }
 
 static void
 fdgrowtable_exp(struct filedesc *fdp, int nfd)
 {
 	int nfd1;
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	nfd1 = fdp->fd_nfiles * 2;
 	if (nfd1 < nfd)
 		nfd1 = nfd;
 	fdgrowtable(fdp, nfd1);
 }
 
 /*
  * Grow the file table to accommodate (at least) nfd descriptors.
  */
 static void
 fdgrowtable(struct filedesc *fdp, int nfd)
 {
 	struct filedesc0 *fdp0;
 	struct freetable *ft;
 	struct fdescenttbl *ntable;
 	struct fdescenttbl *otable;
 	int nnfiles, onfiles;
 	NDSLOTTYPE *nmap, *omap;
 
 	/*
 	 * If lastfile is -1 this struct filedesc was just allocated and we are
 	 * growing it to accommodate for the one we are going to copy from. There
 	 * is no need to have a lock on this one as it's not visible to anyone.
 	 */
 	if (fdp->fd_lastfile != -1)
 		FILEDESC_XLOCK_ASSERT(fdp);
 
 	KASSERT(fdp->fd_nfiles > 0, ("zero-length file table"));
 
 	/* save old values */
 	onfiles = fdp->fd_nfiles;
 	otable = fdp->fd_files;
 	omap = fdp->fd_map;
 
 	/* compute the size of the new table */
 	nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */
 	if (nnfiles <= onfiles)
 		/* the table is already large enough */
 		return;
 
 	/*
 	 * Allocate a new table.  We need enough space for the number of
 	 * entries, file entries themselves and the struct freetable we will use
 	 * when we decommission the table and place it on the freelist.
 	 * We place the struct freetable in the middle so we don't have
 	 * to worry about padding.
 	 */
 	ntable = malloc(offsetof(struct fdescenttbl, fdt_ofiles) +
 	    nnfiles * sizeof(ntable->fdt_ofiles[0]) +
 	    sizeof(struct freetable),
 	    M_FILEDESC, M_ZERO | M_WAITOK);
 	/* copy the old data */
 	ntable->fdt_nfiles = nnfiles;
 	memcpy(ntable->fdt_ofiles, otable->fdt_ofiles,
 	    onfiles * sizeof(ntable->fdt_ofiles[0]));
 
 	/*
 	 * Allocate a new map only if the old is not large enough.  It will
 	 * grow at a slower rate than the table as it can map more
 	 * entries than the table can hold.
 	 */
 	if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
 		nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC,
 		    M_ZERO | M_WAITOK);
 		/* copy over the old data and update the pointer */
 		memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap));
 		fdp->fd_map = nmap;
 	}
 
 	/*
 	 * Make sure that ntable is correctly initialized before we replace
 	 * fd_files poiner. Otherwise fget_unlocked() may see inconsistent
 	 * data.
 	 */
 	atomic_store_rel_ptr((volatile void *)&fdp->fd_files, (uintptr_t)ntable);
 
 	/*
 	 * Do not free the old file table, as some threads may still
 	 * reference entries within it.  Instead, place it on a freelist
 	 * which will be processed when the struct filedesc is released.
 	 *
 	 * Note that if onfiles == NDFILE, we're dealing with the original
 	 * static allocation contained within (struct filedesc0 *)fdp,
 	 * which must not be freed.
 	 */
 	if (onfiles > NDFILE) {
 		ft = (struct freetable *)&otable->fdt_ofiles[onfiles];
 		fdp0 = (struct filedesc0 *)fdp;
 		ft->ft_table = otable;
 		SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next);
 	}
 	/*
 	 * The map does not have the same possibility of threads still
 	 * holding references to it.  So always free it as long as it
 	 * does not reference the original static allocation.
 	 */
 	if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
 		free(omap, M_FILEDESC);
 }
 
 /*
  * Allocate a file descriptor for the process.
  */
 int
 fdalloc(struct thread *td, int minfd, int *result)
 {
 	struct proc *p = td->td_proc;
 	struct filedesc *fdp = p->p_fd;
 	int fd, maxfd, allocfd;
 #ifdef RACCT
 	int error;
 #endif
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	if (fdp->fd_freefile > minfd)
 		minfd = fdp->fd_freefile;
 
 	maxfd = getmaxfd(td);
 
 	/*
 	 * Search the bitmap for a free descriptor starting at minfd.
 	 * If none is found, grow the file table.
 	 */
 	fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
 	if (fd >= maxfd)
 		return (EMFILE);
 	if (fd >= fdp->fd_nfiles) {
 		allocfd = min(fd * 2, maxfd);
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(p);
 			error = racct_set(p, RACCT_NOFILE, allocfd);
 			PROC_UNLOCK(p);
 			if (error != 0)
 				return (EMFILE);
 		}
 #endif
 		/*
 		 * fd is already equal to first free descriptor >= minfd, so
 		 * we only need to grow the table and we are done.
 		 */
 		fdgrowtable_exp(fdp, allocfd);
 	}
 
 	/*
 	 * Perform some sanity checks, then mark the file descriptor as
 	 * used and return it to the caller.
 	 */
 	KASSERT(fd >= 0 && fd < min(maxfd, fdp->fd_nfiles),
 	    ("invalid descriptor %d", fd));
 	KASSERT(!fdisused(fdp, fd),
 	    ("fd_first_free() returned non-free descriptor"));
 	KASSERT(fdp->fd_ofiles[fd].fde_file == NULL,
 	    ("file descriptor isn't free"));
 	fdused(fdp, fd);
 	*result = fd;
 	return (0);
 }
 
 /*
  * Allocate n file descriptors for the process.
  */
 int
 fdallocn(struct thread *td, int minfd, int *fds, int n)
 {
 	struct proc *p = td->td_proc;
 	struct filedesc *fdp = p->p_fd;
 	int i;
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	for (i = 0; i < n; i++)
 		if (fdalloc(td, 0, &fds[i]) != 0)
 			break;
 
 	if (i < n) {
 		for (i--; i >= 0; i--)
 			fdunused(fdp, fds[i]);
 		return (EMFILE);
 	}
 
 	return (0);
 }
 
 /*
  * Create a new open file structure and allocate a file descriptor for the
  * process that refers to it.  We add one reference to the file for the
  * descriptor table and one reference for resultfp. This is to prevent us
  * being preempted and the entry in the descriptor table closed after we
  * release the FILEDESC lock.
  */
 int
 falloc_caps(struct thread *td, struct file **resultfp, int *resultfd, int flags,
     struct filecaps *fcaps)
 {
 	struct file *fp;
 	int error, fd;
 
 	error = falloc_noinstall(td, &fp);
 	if (error)
 		return (error);		/* no reference held on error */
 
 	error = finstall(td, fp, &fd, flags, fcaps);
 	if (error) {
 		fdrop(fp, td);		/* one reference (fp only) */
 		return (error);
 	}
 
 	if (resultfp != NULL)
 		*resultfp = fp;		/* copy out result */
 	else
 		fdrop(fp, td);		/* release local reference */
 
 	if (resultfd != NULL)
 		*resultfd = fd;
 
 	return (0);
 }
 
 /*
  * Create a new open file structure without allocating a file descriptor.
  */
 int
 falloc_noinstall(struct thread *td, struct file **resultfp)
 {
 	struct file *fp;
 	int maxuserfiles = maxfiles - (maxfiles / 20);
 	int openfiles_new;
 	static struct timeval lastfail;
 	static int curfail;
 
 	KASSERT(resultfp != NULL, ("%s: resultfp == NULL", __func__));
 
 	openfiles_new = atomic_fetchadd_int(&openfiles, 1) + 1;
 	if ((openfiles_new >= maxuserfiles &&
 	    priv_check(td, PRIV_MAXFILES) != 0) ||
 	    openfiles_new >= maxfiles) {
 		atomic_subtract_int(&openfiles, 1);
 		if (ppsratecheck(&lastfail, &curfail, 1)) {
 			printf("kern.maxfiles limit exceeded by uid %i, (%s) "
 			    "please see tuning(7).\n", td->td_ucred->cr_ruid, td->td_proc->p_comm);
 		}
 		return (ENFILE);
 	}
 	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
 	refcount_init(&fp->f_count, 1);
 	fp->f_cred = crhold(td->td_ucred);
 	fp->f_ops = &badfileops;
 	*resultfp = fp;
 	return (0);
 }
 
 /*
  * Install a file in a file descriptor table.
  */
 void
 _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags,
     struct filecaps *fcaps)
 {
 	struct filedescent *fde;
 
 	MPASS(fp != NULL);
 	if (fcaps != NULL)
 		filecaps_validate(fcaps, __func__);
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	fde = &fdp->fd_ofiles[fd];
 #ifdef CAPABILITIES
 	seq_write_begin(&fde->fde_seq);
 #endif
 	fde->fde_file = fp;
 	fde->fde_flags = (flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0;
 	if (fcaps != NULL)
 		filecaps_move(fcaps, &fde->fde_caps);
 	else
 		filecaps_fill(&fde->fde_caps);
 #ifdef CAPABILITIES
 	seq_write_end(&fde->fde_seq);
 #endif
 }
 
 int
 finstall(struct thread *td, struct file *fp, int *fd, int flags,
     struct filecaps *fcaps)
 {
 	struct filedesc *fdp = td->td_proc->p_fd;
 	int error;
 
 	MPASS(fd != NULL);
 
 	FILEDESC_XLOCK(fdp);
 	if ((error = fdalloc(td, 0, fd))) {
 		FILEDESC_XUNLOCK(fdp);
 		return (error);
 	}
 	fhold(fp);
 	_finstall(fdp, fp, *fd, flags, fcaps);
 	FILEDESC_XUNLOCK(fdp);
 	return (0);
 }
 
 /*
  * Build a new filedesc structure from another.
  * Copy the current, root, and jail root vnode references.
  *
  * If fdp is not NULL, return with it shared locked.
  */
 struct filedesc *
 fdinit(struct filedesc *fdp, bool prepfiles)
 {
 	struct filedesc0 *newfdp0;
 	struct filedesc *newfdp;
 
 	newfdp0 = uma_zalloc(filedesc0_zone, M_WAITOK | M_ZERO);
 	newfdp = &newfdp0->fd_fd;
 
 	/* Create the file descriptor table. */
 	FILEDESC_LOCK_INIT(newfdp);
 	refcount_init(&newfdp->fd_refcnt, 1);
 	refcount_init(&newfdp->fd_holdcnt, 1);
 	newfdp->fd_cmask = CMASK;
 	newfdp->fd_map = newfdp0->fd_dmap;
 	newfdp->fd_lastfile = -1;
 	newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles;
 	newfdp->fd_files->fdt_nfiles = NDFILE;
 
 	if (fdp == NULL)
 		return (newfdp);
 
 	if (prepfiles && fdp->fd_lastfile >= newfdp->fd_nfiles)
 		fdgrowtable(newfdp, fdp->fd_lastfile + 1);
 
 	FILEDESC_SLOCK(fdp);
 	newfdp->fd_cdir = fdp->fd_cdir;
 	if (newfdp->fd_cdir)
 		vrefact(newfdp->fd_cdir);
 	newfdp->fd_rdir = fdp->fd_rdir;
 	if (newfdp->fd_rdir)
 		vrefact(newfdp->fd_rdir);
 	newfdp->fd_jdir = fdp->fd_jdir;
 	if (newfdp->fd_jdir)
 		vrefact(newfdp->fd_jdir);
 
 	if (!prepfiles) {
 		FILEDESC_SUNLOCK(fdp);
 	} else {
 		while (fdp->fd_lastfile >= newfdp->fd_nfiles) {
 			FILEDESC_SUNLOCK(fdp);
 			fdgrowtable(newfdp, fdp->fd_lastfile + 1);
 			FILEDESC_SLOCK(fdp);
 		}
 	}
 
 	return (newfdp);
 }
 
 static struct filedesc *
 fdhold(struct proc *p)
 {
 	struct filedesc *fdp;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	fdp = p->p_fd;
 	if (fdp != NULL)
 		refcount_acquire(&fdp->fd_holdcnt);
 	return (fdp);
 }
 
 static void
 fddrop(struct filedesc *fdp)
 {
 
 	if (fdp->fd_holdcnt > 1) {
 		if (refcount_release(&fdp->fd_holdcnt) == 0)
 			return;
 	}
 
 	FILEDESC_LOCK_DESTROY(fdp);
 	uma_zfree(filedesc0_zone, fdp);
 }
 
 /*
  * Share a filedesc structure.
  */
 struct filedesc *
 fdshare(struct filedesc *fdp)
 {
 
 	refcount_acquire(&fdp->fd_refcnt);
 	return (fdp);
 }
 
 /*
  * Unshare a filedesc structure, if necessary by making a copy
  */
 void
 fdunshare(struct thread *td)
 {
 	struct filedesc *tmp;
 	struct proc *p = td->td_proc;
 
 	if (p->p_fd->fd_refcnt == 1)
 		return;
 
 	tmp = fdcopy(p->p_fd);
 	fdescfree(td);
 	p->p_fd = tmp;
 }
 
 void
 fdinstall_remapped(struct thread *td, struct filedesc *fdp)
 {
 
 	fdescfree(td);
 	td->td_proc->p_fd = fdp;
 }
 
 /*
  * Copy a filedesc structure.  A NULL pointer in returns a NULL reference,
  * this is to ease callers, not catch errors.
  */
 struct filedesc *
 fdcopy(struct filedesc *fdp)
 {
 	struct filedesc *newfdp;
 	struct filedescent *nfde, *ofde;
 	int i;
 
 	MPASS(fdp != NULL);
 
 	newfdp = fdinit(fdp, true);
 	/* copy all passable descriptors (i.e. not kqueue) */
 	newfdp->fd_freefile = -1;
 	for (i = 0; i <= fdp->fd_lastfile; ++i) {
 		ofde = &fdp->fd_ofiles[i];
 		if (ofde->fde_file == NULL ||
 		    (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0) {
 			if (newfdp->fd_freefile == -1)
 				newfdp->fd_freefile = i;
 			continue;
 		}
 		nfde = &newfdp->fd_ofiles[i];
 		*nfde = *ofde;
 		filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true);
 		fhold(nfde->fde_file);
 		fdused_init(newfdp, i);
 		newfdp->fd_lastfile = i;
 	}
 	if (newfdp->fd_freefile == -1)
 		newfdp->fd_freefile = i;
 	newfdp->fd_cmask = fdp->fd_cmask;
 	FILEDESC_SUNLOCK(fdp);
 	return (newfdp);
 }
 
 /*
  * Copies a filedesc structure, while remapping all file descriptors
  * stored inside using a translation table.
  *
  * File descriptors are copied over to the new file descriptor table,
  * regardless of whether the close-on-exec flag is set.
  */
 int
 fdcopy_remapped(struct filedesc *fdp, const int *fds, size_t nfds,
     struct filedesc **ret)
 {
 	struct filedesc *newfdp;
 	struct filedescent *nfde, *ofde;
 	int error, i;
 
 	MPASS(fdp != NULL);
 
 	newfdp = fdinit(fdp, true);
 	if (nfds > fdp->fd_lastfile + 1) {
 		/* New table cannot be larger than the old one. */
 		error = E2BIG;
 		goto bad;
 	}
 	/* Copy all passable descriptors (i.e. not kqueue). */
 	newfdp->fd_freefile = nfds;
 	for (i = 0; i < nfds; ++i) {
 		if (fds[i] < 0 || fds[i] > fdp->fd_lastfile) {
 			/* File descriptor out of bounds. */
 			error = EBADF;
 			goto bad;
 		}
 		ofde = &fdp->fd_ofiles[fds[i]];
 		if (ofde->fde_file == NULL) {
 			/* Unused file descriptor. */
 			error = EBADF;
 			goto bad;
 		}
 		if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0) {
 			/* File descriptor cannot be passed. */
 			error = EINVAL;
 			goto bad;
 		}
 		nfde = &newfdp->fd_ofiles[i];
 		*nfde = *ofde;
 		filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true);
 		fhold(nfde->fde_file);
 		fdused_init(newfdp, i);
 		newfdp->fd_lastfile = i;
 	}
 	newfdp->fd_cmask = fdp->fd_cmask;
 	FILEDESC_SUNLOCK(fdp);
 	*ret = newfdp;
 	return (0);
 bad:
 	FILEDESC_SUNLOCK(fdp);
 	fdescfree_remapped(newfdp);
 	return (error);
 }
 
 /*
  * Clear POSIX style locks. This is only used when fdp looses a reference (i.e.
  * one of processes using it exits) and the table used to be shared.
  */
 static void
 fdclearlocks(struct thread *td)
 {
 	struct filedesc *fdp;
 	struct filedesc_to_leader *fdtol;
 	struct flock lf;
 	struct file *fp;
 	struct proc *p;
 	struct vnode *vp;
 	int i;
 
 	p = td->td_proc;
 	fdp = p->p_fd;
 	fdtol = p->p_fdtol;
 	MPASS(fdtol != NULL);
 
 	FILEDESC_XLOCK(fdp);
 	KASSERT(fdtol->fdl_refcount > 0,
 	    ("filedesc_to_refcount botch: fdl_refcount=%d",
 	    fdtol->fdl_refcount));
 	if (fdtol->fdl_refcount == 1 &&
 	    (p->p_leader->p_flag & P_ADVLOCK) != 0) {
 		for (i = 0; i <= fdp->fd_lastfile; i++) {
 			fp = fdp->fd_ofiles[i].fde_file;
 			if (fp == NULL || fp->f_type != DTYPE_VNODE)
 				continue;
 			fhold(fp);
 			FILEDESC_XUNLOCK(fdp);
 			lf.l_whence = SEEK_SET;
 			lf.l_start = 0;
 			lf.l_len = 0;
 			lf.l_type = F_UNLCK;
 			vp = fp->f_vnode;
 			(void) VOP_ADVLOCK(vp,
 			    (caddr_t)p->p_leader, F_UNLCK,
 			    &lf, F_POSIX);
 			FILEDESC_XLOCK(fdp);
 			fdrop(fp, td);
 		}
 	}
 retry:
 	if (fdtol->fdl_refcount == 1) {
 		if (fdp->fd_holdleaderscount > 0 &&
 		    (p->p_leader->p_flag & P_ADVLOCK) != 0) {
 			/*
 			 * close() or kern_dup() has cleared a reference
 			 * in a shared file descriptor table.
 			 */
 			fdp->fd_holdleaderswakeup = 1;
 			sx_sleep(&fdp->fd_holdleaderscount,
 			    FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
 			goto retry;
 		}
 		if (fdtol->fdl_holdcount > 0) {
 			/*
 			 * Ensure that fdtol->fdl_leader remains
 			 * valid in closef().
 			 */
 			fdtol->fdl_wakeup = 1;
 			sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
 			    "fdlhold", 0);
 			goto retry;
 		}
 	}
 	fdtol->fdl_refcount--;
 	if (fdtol->fdl_refcount == 0 &&
 	    fdtol->fdl_holdcount == 0) {
 		fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
 		fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
 	} else
 		fdtol = NULL;
 	p->p_fdtol = NULL;
 	FILEDESC_XUNLOCK(fdp);
 	if (fdtol != NULL)
 		free(fdtol, M_FILEDESC_TO_LEADER);
 }
 
 /*
  * Release a filedesc structure.
  */
 static void
 fdescfree_fds(struct thread *td, struct filedesc *fdp, bool needclose)
 {
 	struct filedesc0 *fdp0;
 	struct freetable *ft, *tft;
 	struct filedescent *fde;
 	struct file *fp;
 	int i;
 
 	for (i = 0; i <= fdp->fd_lastfile; i++) {
 		fde = &fdp->fd_ofiles[i];
 		fp = fde->fde_file;
 		if (fp != NULL) {
 			fdefree_last(fde);
 			if (needclose)
 				(void) closef(fp, td);
 			else
 				fdrop(fp, td);
 		}
 	}
 
 	if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
 		free(fdp->fd_map, M_FILEDESC);
 	if (fdp->fd_nfiles > NDFILE)
 		free(fdp->fd_files, M_FILEDESC);
 
 	fdp0 = (struct filedesc0 *)fdp;
 	SLIST_FOREACH_SAFE(ft, &fdp0->fd_free, ft_next, tft)
 		free(ft->ft_table, M_FILEDESC);
 
 	fddrop(fdp);
 }
 
 void
 fdescfree(struct thread *td)
 {
 	struct proc *p;
 	struct filedesc *fdp;
 	struct vnode *cdir, *jdir, *rdir;
 
 	p = td->td_proc;
 	fdp = p->p_fd;
 	MPASS(fdp != NULL);
 
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(p);
 		racct_set(p, RACCT_NOFILE, 0);
 		PROC_UNLOCK(p);
 	}
 #endif
 
 	if (p->p_fdtol != NULL)
 		fdclearlocks(td);
 
 	PROC_LOCK(p);
 	p->p_fd = NULL;
 	PROC_UNLOCK(p);
 
 	if (refcount_release(&fdp->fd_refcnt) == 0)
 		return;
 
 	FILEDESC_XLOCK(fdp);
 	cdir = fdp->fd_cdir;
 	fdp->fd_cdir = NULL;
 	rdir = fdp->fd_rdir;
 	fdp->fd_rdir = NULL;
 	jdir = fdp->fd_jdir;
 	fdp->fd_jdir = NULL;
 	FILEDESC_XUNLOCK(fdp);
 
 	if (cdir != NULL)
 		vrele(cdir);
 	if (rdir != NULL)
 		vrele(rdir);
 	if (jdir != NULL)
 		vrele(jdir);
 
 	fdescfree_fds(td, fdp, 1);
 }
 
 void
 fdescfree_remapped(struct filedesc *fdp)
 {
 
 	if (fdp->fd_cdir != NULL)
 		vrele(fdp->fd_cdir);
 	if (fdp->fd_rdir != NULL)
 		vrele(fdp->fd_rdir);
 	if (fdp->fd_jdir != NULL)
 		vrele(fdp->fd_jdir);
 
 	fdescfree_fds(curthread, fdp, 0);
 }
 
 /*
  * For setugid programs, we don't want to people to use that setugidness
  * to generate error messages which write to a file which otherwise would
  * otherwise be off-limits to the process.  We check for filesystems where
  * the vnode can change out from under us after execve (like [lin]procfs).
  *
  * Since fdsetugidsafety calls this only for fd 0, 1 and 2, this check is
  * sufficient.  We also don't check for setugidness since we know we are.
  */
 static bool
 is_unsafe(struct file *fp)
 {
 	struct vnode *vp;
 
 	if (fp->f_type != DTYPE_VNODE)
 		return (false);
 
 	vp = fp->f_vnode;
 	return ((vp->v_vflag & VV_PROCDEP) != 0);
 }
 
 /*
  * Make this setguid thing safe, if at all possible.
  */
 void
 fdsetugidsafety(struct thread *td)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	int i;
 
 	fdp = td->td_proc->p_fd;
 	KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
 	MPASS(fdp->fd_nfiles >= 3);
 	for (i = 0; i <= 2; i++) {
 		fp = fdp->fd_ofiles[i].fde_file;
 		if (fp != NULL && is_unsafe(fp)) {
 			FILEDESC_XLOCK(fdp);
 			knote_fdclose(td, i);
 			/*
 			 * NULL-out descriptor prior to close to avoid
 			 * a race while close blocks.
 			 */
 			fdfree(fdp, i);
 			FILEDESC_XUNLOCK(fdp);
 			(void) closef(fp, td);
 		}
 	}
 }
 
 /*
  * If a specific file object occupies a specific file descriptor, close the
  * file descriptor entry and drop a reference on the file object.  This is a
  * convenience function to handle a subsequent error in a function that calls
  * falloc() that handles the race that another thread might have closed the
  * file descriptor out from under the thread creating the file object.
  */
 void
 fdclose(struct thread *td, struct file *fp, int idx)
 {
 	struct filedesc *fdp = td->td_proc->p_fd;
 
 	FILEDESC_XLOCK(fdp);
 	if (fdp->fd_ofiles[idx].fde_file == fp) {
 		fdfree(fdp, idx);
 		FILEDESC_XUNLOCK(fdp);
 		fdrop(fp, td);
 	} else
 		FILEDESC_XUNLOCK(fdp);
 }
 
 /*
  * Close any files on exec?
  */
 void
 fdcloseexec(struct thread *td)
 {
 	struct filedesc *fdp;
 	struct filedescent *fde;
 	struct file *fp;
 	int i;
 
 	fdp = td->td_proc->p_fd;
 	KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
 	for (i = 0; i <= fdp->fd_lastfile; i++) {
 		fde = &fdp->fd_ofiles[i];
 		fp = fde->fde_file;
 		if (fp != NULL && (fp->f_type == DTYPE_MQUEUE ||
 		    (fde->fde_flags & UF_EXCLOSE))) {
 			FILEDESC_XLOCK(fdp);
 			fdfree(fdp, i);
 			(void) closefp(fdp, i, fp, td, 0);
 			FILEDESC_UNLOCK_ASSERT(fdp);
 		}
 	}
 }
 
 /*
  * It is unsafe for set[ug]id processes to be started with file
  * descriptors 0..2 closed, as these descriptors are given implicit
  * significance in the Standard C library.  fdcheckstd() will create a
  * descriptor referencing /dev/null for each of stdin, stdout, and
  * stderr that is not already open.
  */
 int
 fdcheckstd(struct thread *td)
 {
 	struct filedesc *fdp;
 	register_t save;
 	int i, error, devnull;
 
 	fdp = td->td_proc->p_fd;
 	KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
 	MPASS(fdp->fd_nfiles >= 3);
 	devnull = -1;
 	for (i = 0; i <= 2; i++) {
 		if (fdp->fd_ofiles[i].fde_file != NULL)
 			continue;
 
 		save = td->td_retval[0];
 		if (devnull != -1) {
 			error = kern_dup(td, FDDUP_FIXED, 0, devnull, i);
 		} else {
 			error = kern_openat(td, AT_FDCWD, "/dev/null",
 			    UIO_SYSSPACE, O_RDWR, 0);
 			if (error == 0) {
 				devnull = td->td_retval[0];
 				KASSERT(devnull == i, ("we didn't get our fd"));
 			}
 		}
 		td->td_retval[0] = save;
 		if (error != 0)
 			return (error);
 	}
 	return (0);
 }
 
 /*
  * Internal form of close.  Decrement reference count on file structure.
  * Note: td may be NULL when closing a file that was being passed in a
  * message.
  *
  * XXXRW: Giant is not required for the caller, but often will be held; this
  * makes it moderately likely the Giant will be recursed in the VFS case.
  */
 int
 closef(struct file *fp, struct thread *td)
 {
 	struct vnode *vp;
 	struct flock lf;
 	struct filedesc_to_leader *fdtol;
 	struct filedesc *fdp;
 
 	/*
 	 * POSIX record locking dictates that any close releases ALL
 	 * locks owned by this process.  This is handled by setting
 	 * a flag in the unlock to free ONLY locks obeying POSIX
 	 * semantics, and not to free BSD-style file locks.
 	 * If the descriptor was in a message, POSIX-style locks
 	 * aren't passed with the descriptor, and the thread pointer
 	 * will be NULL.  Callers should be careful only to pass a
 	 * NULL thread pointer when there really is no owning
 	 * context that might have locks, or the locks will be
 	 * leaked.
 	 */
 	if (fp->f_type == DTYPE_VNODE && td != NULL) {
 		vp = fp->f_vnode;
 		if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
 			lf.l_whence = SEEK_SET;
 			lf.l_start = 0;
 			lf.l_len = 0;
 			lf.l_type = F_UNLCK;
 			(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
 			    F_UNLCK, &lf, F_POSIX);
 		}
 		fdtol = td->td_proc->p_fdtol;
 		if (fdtol != NULL) {
 			/*
 			 * Handle special case where file descriptor table is
 			 * shared between multiple process leaders.
 			 */
 			fdp = td->td_proc->p_fd;
 			FILEDESC_XLOCK(fdp);
 			for (fdtol = fdtol->fdl_next;
 			    fdtol != td->td_proc->p_fdtol;
 			    fdtol = fdtol->fdl_next) {
 				if ((fdtol->fdl_leader->p_flag &
 				    P_ADVLOCK) == 0)
 					continue;
 				fdtol->fdl_holdcount++;
 				FILEDESC_XUNLOCK(fdp);
 				lf.l_whence = SEEK_SET;
 				lf.l_start = 0;
 				lf.l_len = 0;
 				lf.l_type = F_UNLCK;
 				vp = fp->f_vnode;
 				(void) VOP_ADVLOCK(vp,
 				    (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf,
 				    F_POSIX);
 				FILEDESC_XLOCK(fdp);
 				fdtol->fdl_holdcount--;
 				if (fdtol->fdl_holdcount == 0 &&
 				    fdtol->fdl_wakeup != 0) {
 					fdtol->fdl_wakeup = 0;
 					wakeup(fdtol);
 				}
 			}
 			FILEDESC_XUNLOCK(fdp);
 		}
 	}
 	return (fdrop(fp, td));
 }
 
 /*
  * Initialize the file pointer with the specified properties.
  *
  * The ops are set with release semantics to be certain that the flags, type,
  * and data are visible when ops is.  This is to prevent ops methods from being
  * called with bad data.
  */
 void
 finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
 {
 	fp->f_data = data;
 	fp->f_flag = flag;
 	fp->f_type = type;
 	atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
 }
 
 int
 fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
     struct file **fpp, struct filecaps *havecapsp)
 {
 	struct filedescent *fde;
 	int error;
 
 	FILEDESC_LOCK_ASSERT(fdp);
 
 	fde = fdeget_locked(fdp, fd);
 	if (fde == NULL) {
 		error = EBADF;
 		goto out;
 	}
 
 #ifdef CAPABILITIES
 	error = cap_check(cap_rights_fde(fde), needrightsp);
 	if (error != 0)
 		goto out;
 #endif
 
 	if (havecapsp != NULL)
 		filecaps_copy(&fde->fde_caps, havecapsp, true);
 
 	*fpp = fde->fde_file;
 
 	error = 0;
 out:
 	return (error);
 }
 
 int
 fget_cap(struct thread *td, int fd, cap_rights_t *needrightsp,
     struct file **fpp, struct filecaps *havecapsp)
 {
 	struct filedesc *fdp = td->td_proc->p_fd;
 	int error;
 #ifndef CAPABILITIES
 	error = fget_unlocked(fdp, fd, needrightsp, fpp, NULL);
 	if (error == 0 && havecapsp != NULL)
 		filecaps_fill(havecapsp);
 #else
 	struct file *fp;
 	seq_t seq;
 
 	for (;;) {
 		error = fget_unlocked(fdp, fd, needrightsp, &fp, &seq);
 		if (error != 0)
 			return (error);
 
 		if (havecapsp != NULL) {
 			if (!filecaps_copy(&fdp->fd_ofiles[fd].fde_caps,
 			    havecapsp, false)) {
 				fdrop(fp, td);
 				goto get_locked;
 			}
 		}
 
 		if (!fd_modified(fdp, fd, seq))
 			break;
 		fdrop(fp, td);
 	}
 
 	*fpp = fp;
 	return (0);
 
 get_locked:
 	FILEDESC_SLOCK(fdp);
 	error = fget_cap_locked(fdp, fd, needrightsp, fpp, havecapsp);
 	if (error == 0)
 		fhold(*fpp);
 	FILEDESC_SUNLOCK(fdp);
 #endif
 	return (error);
 }
 
 int
 fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
     struct file **fpp, seq_t *seqp)
 {
 #ifdef CAPABILITIES
 	struct filedescent *fde;
 #endif
 	struct fdescenttbl *fdt;
 	struct file *fp;
 	u_int count;
 #ifdef CAPABILITIES
 	seq_t seq;
 	cap_rights_t haverights;
 	int error;
 #endif
 
 	fdt = fdp->fd_files;
 	if ((u_int)fd >= fdt->fdt_nfiles)
 		return (EBADF);
 	/*
 	 * Fetch the descriptor locklessly.  We avoid fdrop() races by
 	 * never raising a refcount above 0.  To accomplish this we have
 	 * to use a cmpset loop rather than an atomic_add.  The descriptor
 	 * must be re-verified once we acquire a reference to be certain
 	 * that the identity is still correct and we did not lose a race
 	 * due to preemption.
 	 */
 	for (;;) {
 #ifdef CAPABILITIES
 		seq = seq_read(fd_seq(fdt, fd));
 		fde = &fdt->fdt_ofiles[fd];
 		haverights = *cap_rights_fde(fde);
 		fp = fde->fde_file;
 		if (!seq_consistent(fd_seq(fdt, fd), seq))
 			continue;
 #else
 		fp = fdt->fdt_ofiles[fd].fde_file;
 #endif
 		if (fp == NULL)
 			return (EBADF);
 #ifdef CAPABILITIES
 		error = cap_check(&haverights, needrightsp);
 		if (error != 0)
 			return (error);
 #endif
-	retry:
 		count = fp->f_count;
+	retry:
 		if (count == 0) {
 			/*
 			 * Force a reload. Other thread could reallocate the
 			 * table before this fd was closed, so it possible that
 			 * there is a stale fp pointer in cached version.
 			 */
 			fdt = *(struct fdescenttbl * volatile *)&(fdp->fd_files);
 			continue;
 		}
 		/*
 		 * Use an acquire barrier to force re-reading of fdt so it is
 		 * refreshed for verification.
 		 */
-		if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) == 0)
+		if (atomic_fcmpset_acq_int(&fp->f_count, &count, count + 1) == 0)
 			goto retry;
 		fdt = fdp->fd_files;
 #ifdef	CAPABILITIES
 		if (seq_consistent_nomb(fd_seq(fdt, fd), seq))
 #else
 		if (fp == fdt->fdt_ofiles[fd].fde_file)
 #endif
 			break;
 		fdrop(fp, curthread);
 	}
 	*fpp = fp;
 	if (seqp != NULL) {
 #ifdef CAPABILITIES
 		*seqp = seq;
 #endif
 	}
 	return (0);
 }
 
 /*
  * Extract the file pointer associated with the specified descriptor for the
  * current user process.
  *
  * If the descriptor doesn't exist or doesn't match 'flags', EBADF is
  * returned.
  *
  * File's rights will be checked against the capability rights mask.
  *
  * If an error occurred the non-zero error is returned and *fpp is set to
  * NULL.  Otherwise *fpp is held and set and zero is returned.  Caller is
  * responsible for fdrop().
  */
 static __inline int
 _fget(struct thread *td, int fd, struct file **fpp, int flags,
     cap_rights_t *needrightsp, seq_t *seqp)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	int error;
 
 	*fpp = NULL;
 	fdp = td->td_proc->p_fd;
 	error = fget_unlocked(fdp, fd, needrightsp, &fp, seqp);
 	if (error != 0)
 		return (error);
 	if (fp->f_ops == &badfileops) {
 		fdrop(fp, td);
 		return (EBADF);
 	}
 
 	/*
 	 * FREAD and FWRITE failure return EBADF as per POSIX.
 	 */
 	error = 0;
 	switch (flags) {
 	case FREAD:
 	case FWRITE:
 		if ((fp->f_flag & flags) == 0)
 			error = EBADF;
 		break;
 	case FEXEC:
 	    	if ((fp->f_flag & (FREAD | FEXEC)) == 0 ||
 		    ((fp->f_flag & FWRITE) != 0))
 			error = EBADF;
 		break;
 	case 0:
 		break;
 	default:
 		KASSERT(0, ("wrong flags"));
 	}
 
 	if (error != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 
 	*fpp = fp;
 	return (0);
 }
 
 int
 fget(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 {
 
 	return (_fget(td, fd, fpp, 0, rightsp, NULL));
 }
 
 int
 fget_mmap(struct thread *td, int fd, cap_rights_t *rightsp, u_char *maxprotp,
     struct file **fpp)
 {
 	int error;
 #ifndef CAPABILITIES
 	error = _fget(td, fd, fpp, 0, rightsp, NULL);
 	if (maxprotp != NULL)
 		*maxprotp = VM_PROT_ALL;
 #else
 	struct filedesc *fdp = td->td_proc->p_fd;
 	seq_t seq;
 
 	MPASS(cap_rights_is_set(rightsp, CAP_MMAP));
 	for (;;) {
 		error = _fget(td, fd, fpp, 0, rightsp, &seq);
 		if (error != 0)
 			return (error);
 		/*
 		 * If requested, convert capability rights to access flags.
 		 */
 		if (maxprotp != NULL)
 			*maxprotp = cap_rights_to_vmprot(cap_rights(fdp, fd));
 		if (!fd_modified(fdp, fd, seq))
 			break;
 		fdrop(*fpp, td);
 	}
 #endif
 	return (error);
 }
 
 int
 fget_read(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 {
 
 	return (_fget(td, fd, fpp, FREAD, rightsp, NULL));
 }
 
 int
 fget_write(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 {
 
 	return (_fget(td, fd, fpp, FWRITE, rightsp, NULL));
 }
 
 int
 fget_fcntl(struct thread *td, int fd, cap_rights_t *rightsp, int needfcntl,
     struct file **fpp)
 {
 	struct filedesc *fdp = td->td_proc->p_fd;
 #ifndef CAPABILITIES
 	return (fget_unlocked(fdp, fd, rightsp, fpp, NULL));
 #else
 	int error;
 	seq_t seq;
 
 	MPASS(cap_rights_is_set(rightsp, CAP_FCNTL));
 	for (;;) {
 		error = fget_unlocked(fdp, fd, rightsp, fpp, &seq);
 		if (error != 0)
 			return (error);
 		error = cap_fcntl_check(fdp, fd, needfcntl);
 		if (!fd_modified(fdp, fd, seq))
 			break;
 		fdrop(*fpp, td);
 	}
 	if (error != 0) {
 		fdrop(*fpp, td);
 		*fpp = NULL;
 	}
 	return (error);
 #endif
 }
 
 /*
  * Like fget() but loads the underlying vnode, or returns an error if the
  * descriptor does not represent a vnode.  Note that pipes use vnodes but
  * never have VM objects.  The returned vnode will be vref()'d.
  *
  * XXX: what about the unused flags ?
  */
 static __inline int
 _fgetvp(struct thread *td, int fd, int flags, cap_rights_t *needrightsp,
     struct vnode **vpp)
 {
 	struct file *fp;
 	int error;
 
 	*vpp = NULL;
 	error = _fget(td, fd, &fp, flags, needrightsp, NULL);
 	if (error != 0)
 		return (error);
 	if (fp->f_vnode == NULL) {
 		error = EINVAL;
 	} else {
 		*vpp = fp->f_vnode;
 		vrefact(*vpp);
 	}
 	fdrop(fp, td);
 
 	return (error);
 }
 
 int
 fgetvp(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp)
 {
 
 	return (_fgetvp(td, fd, 0, rightsp, vpp));
 }
 
 int
 fgetvp_rights(struct thread *td, int fd, cap_rights_t *needrightsp,
     struct filecaps *havecaps, struct vnode **vpp)
 {
 	struct filedesc *fdp;
 	struct filecaps caps;
 	struct file *fp;
 	int error;
 
 	fdp = td->td_proc->p_fd;
 	error = fget_cap_locked(fdp, fd, needrightsp, &fp, &caps);
 	if (error != 0)
 		return (error);
 	if (fp->f_ops == &badfileops) {
 		error = EBADF;
 		goto out;
 	}
 	if (fp->f_vnode == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 
 	*havecaps = caps;
 	*vpp = fp->f_vnode;
 	vrefact(*vpp);
 
 	return (0);
 out:
 	filecaps_free(&caps);
 	return (error);
 }
 
 int
 fgetvp_read(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp)
 {
 
 	return (_fgetvp(td, fd, FREAD, rightsp, vpp));
 }
 
 int
 fgetvp_exec(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp)
 {
 
 	return (_fgetvp(td, fd, FEXEC, rightsp, vpp));
 }
 
 #ifdef notyet
 int
 fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp,
     struct vnode **vpp)
 {
 
 	return (_fgetvp(td, fd, FWRITE, rightsp, vpp));
 }
 #endif
 
 /*
  * Handle the last reference to a file being closed.
  */
 int
 _fdrop(struct file *fp, struct thread *td)
 {
 	int error;
 
 	if (fp->f_count != 0)
 		panic("fdrop: count %d", fp->f_count);
 	error = fo_close(fp, td);
 	atomic_subtract_int(&openfiles, 1);
 	crfree(fp->f_cred);
 	free(fp->f_advice, M_FADVISE);
 	uma_zfree(file_zone, fp);
 
 	return (error);
 }
 
 /*
  * Apply an advisory lock on a file descriptor.
  *
  * Just attempt to get a record lock of the requested type on the entire file
  * (l_whence = SEEK_SET, l_start = 0, l_len = 0).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct flock_args {
 	int	fd;
 	int	how;
 };
 #endif
 /* ARGSUSED */
 int
 sys_flock(struct thread *td, struct flock_args *uap)
 {
 	struct file *fp;
 	struct vnode *vp;
 	struct flock lf;
 	cap_rights_t rights;
 	int error;
 
 	error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FLOCK), &fp);
 	if (error != 0)
 		return (error);
 	if (fp->f_type != DTYPE_VNODE) {
 		fdrop(fp, td);
 		return (EOPNOTSUPP);
 	}
 
 	vp = fp->f_vnode;
 	lf.l_whence = SEEK_SET;
 	lf.l_start = 0;
 	lf.l_len = 0;
 	if (uap->how & LOCK_UN) {
 		lf.l_type = F_UNLCK;
 		atomic_clear_int(&fp->f_flag, FHASLOCK);
 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
 		goto done2;
 	}
 	if (uap->how & LOCK_EX)
 		lf.l_type = F_WRLCK;
 	else if (uap->how & LOCK_SH)
 		lf.l_type = F_RDLCK;
 	else {
 		error = EBADF;
 		goto done2;
 	}
 	atomic_set_int(&fp->f_flag, FHASLOCK);
 	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
 done2:
 	fdrop(fp, td);
 	return (error);
 }
 /*
  * Duplicate the specified descriptor to a free descriptor.
  */
 int
 dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode,
     int openerror, int *indxp)
 {
 	struct filedescent *newfde, *oldfde;
 	struct file *fp;
 	int error, indx;
 
 	KASSERT(openerror == ENODEV || openerror == ENXIO,
 	    ("unexpected error %d in %s", openerror, __func__));
 
 	/*
 	 * If the to-be-dup'd fd number is greater than the allowed number
 	 * of file descriptors, or the fd to be dup'd has already been
 	 * closed, then reject.
 	 */
 	FILEDESC_XLOCK(fdp);
 	if ((fp = fget_locked(fdp, dfd)) == NULL) {
 		FILEDESC_XUNLOCK(fdp);
 		return (EBADF);
 	}
 
 	error = fdalloc(td, 0, &indx);
 	if (error != 0) {
 		FILEDESC_XUNLOCK(fdp);
 		return (error);
 	}
 
 	/*
 	 * There are two cases of interest here.
 	 *
 	 * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
 	 *
 	 * For ENXIO steal away the file structure from (dfd) and store it in
 	 * (indx).  (dfd) is effectively closed by this operation.
 	 */
 	switch (openerror) {
 	case ENODEV:
 		/*
 		 * Check that the mode the file is being opened for is a
 		 * subset of the mode of the existing descriptor.
 		 */
 		if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
 			fdunused(fdp, indx);
 			FILEDESC_XUNLOCK(fdp);
 			return (EACCES);
 		}
 		fhold(fp);
 		newfde = &fdp->fd_ofiles[indx];
 		oldfde = &fdp->fd_ofiles[dfd];
 #ifdef CAPABILITIES
 		seq_write_begin(&newfde->fde_seq);
 #endif
 		memcpy(newfde, oldfde, fde_change_size);
 		filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps, true);
 #ifdef CAPABILITIES
 		seq_write_end(&newfde->fde_seq);
 #endif
 		break;
 	case ENXIO:
 		/*
 		 * Steal away the file pointer from dfd and stuff it into indx.
 		 */
 		newfde = &fdp->fd_ofiles[indx];
 		oldfde = &fdp->fd_ofiles[dfd];
 #ifdef CAPABILITIES
 		seq_write_begin(&newfde->fde_seq);
 #endif
 		memcpy(newfde, oldfde, fde_change_size);
 		oldfde->fde_file = NULL;
 		fdunused(fdp, dfd);
 #ifdef CAPABILITIES
 		seq_write_end(&newfde->fde_seq);
 #endif
 		break;
 	}
 	FILEDESC_XUNLOCK(fdp);
 	*indxp = indx;
 	return (0);
 }
 
 /*
  * This sysctl determines if we will allow a process to chroot(2) if it
  * has a directory open:
  *	0: disallowed for all processes.
  *	1: allowed for processes that were not already chroot(2)'ed.
  *	2: allowed for all processes.
  */
 
 static int chroot_allow_open_directories = 1;
 
 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
     &chroot_allow_open_directories, 0,
     "Allow a process to chroot(2) if it has a directory open");
 
 /*
  * Helper function for raised chroot(2) security function:  Refuse if
  * any filedescriptors are open directories.
  */
 static int
 chroot_refuse_vdir_fds(struct filedesc *fdp)
 {
 	struct vnode *vp;
 	struct file *fp;
 	int fd;
 
 	FILEDESC_LOCK_ASSERT(fdp);
 
 	for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
 		fp = fget_locked(fdp, fd);
 		if (fp == NULL)
 			continue;
 		if (fp->f_type == DTYPE_VNODE) {
 			vp = fp->f_vnode;
 			if (vp->v_type == VDIR)
 				return (EPERM);
 		}
 	}
 	return (0);
 }
 
 /*
  * Common routine for kern_chroot() and jail_attach().  The caller is
  * responsible for invoking priv_check() and mac_vnode_check_chroot() to
  * authorize this operation.
  */
 int
 pwd_chroot(struct thread *td, struct vnode *vp)
 {
 	struct filedesc *fdp;
 	struct vnode *oldvp;
 	int error;
 
 	fdp = td->td_proc->p_fd;
 	FILEDESC_XLOCK(fdp);
 	if (chroot_allow_open_directories == 0 ||
 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
 		error = chroot_refuse_vdir_fds(fdp);
 		if (error != 0) {
 			FILEDESC_XUNLOCK(fdp);
 			return (error);
 		}
 	}
 	oldvp = fdp->fd_rdir;
 	vrefact(vp);
 	fdp->fd_rdir = vp;
 	if (fdp->fd_jdir == NULL) {
 		vrefact(vp);
 		fdp->fd_jdir = vp;
 	}
 	FILEDESC_XUNLOCK(fdp);
 	vrele(oldvp);
 	return (0);
 }
 
 void
 pwd_chdir(struct thread *td, struct vnode *vp)
 {
 	struct filedesc *fdp;
 	struct vnode *oldvp;
 
 	fdp = td->td_proc->p_fd;
 	FILEDESC_XLOCK(fdp);
 	VNASSERT(vp->v_usecount > 0, vp,
 	    ("chdir to a vnode with zero usecount"));
 	oldvp = fdp->fd_cdir;
 	fdp->fd_cdir = vp;
 	FILEDESC_XUNLOCK(fdp);
 	vrele(oldvp);
 }
 
 /*
  * Scan all active processes and prisons to see if any of them have a current
  * or root directory of `olddp'. If so, replace them with the new mount point.
  */
 void
 mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
 {
 	struct filedesc *fdp;
 	struct prison *pr;
 	struct proc *p;
 	int nrele;
 
 	if (vrefcnt(olddp) == 1)
 		return;
 	nrele = 0;
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 		fdp = fdhold(p);
 		PROC_UNLOCK(p);
 		if (fdp == NULL)
 			continue;
 		FILEDESC_XLOCK(fdp);
 		if (fdp->fd_cdir == olddp) {
 			vrefact(newdp);
 			fdp->fd_cdir = newdp;
 			nrele++;
 		}
 		if (fdp->fd_rdir == olddp) {
 			vrefact(newdp);
 			fdp->fd_rdir = newdp;
 			nrele++;
 		}
 		if (fdp->fd_jdir == olddp) {
 			vrefact(newdp);
 			fdp->fd_jdir = newdp;
 			nrele++;
 		}
 		FILEDESC_XUNLOCK(fdp);
 		fddrop(fdp);
 	}
 	sx_sunlock(&allproc_lock);
 	if (rootvnode == olddp) {
 		vrefact(newdp);
 		rootvnode = newdp;
 		nrele++;
 	}
 	mtx_lock(&prison0.pr_mtx);
 	if (prison0.pr_root == olddp) {
 		vrefact(newdp);
 		prison0.pr_root = newdp;
 		nrele++;
 	}
 	mtx_unlock(&prison0.pr_mtx);
 	sx_slock(&allprison_lock);
 	TAILQ_FOREACH(pr, &allprison, pr_list) {
 		mtx_lock(&pr->pr_mtx);
 		if (pr->pr_root == olddp) {
 			vrefact(newdp);
 			pr->pr_root = newdp;
 			nrele++;
 		}
 		mtx_unlock(&pr->pr_mtx);
 	}
 	sx_sunlock(&allprison_lock);
 	while (nrele--)
 		vrele(olddp);
 }
 
 struct filedesc_to_leader *
 filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader)
 {
 	struct filedesc_to_leader *fdtol;
 
 	fdtol = malloc(sizeof(struct filedesc_to_leader),
 	    M_FILEDESC_TO_LEADER, M_WAITOK);
 	fdtol->fdl_refcount = 1;
 	fdtol->fdl_holdcount = 0;
 	fdtol->fdl_wakeup = 0;
 	fdtol->fdl_leader = leader;
 	if (old != NULL) {
 		FILEDESC_XLOCK(fdp);
 		fdtol->fdl_next = old->fdl_next;
 		fdtol->fdl_prev = old;
 		old->fdl_next = fdtol;
 		fdtol->fdl_next->fdl_prev = fdtol;
 		FILEDESC_XUNLOCK(fdp);
 	} else {
 		fdtol->fdl_next = fdtol;
 		fdtol->fdl_prev = fdtol;
 	}
 	return (fdtol);
 }
 
 static int
 sysctl_kern_proc_nfds(SYSCTL_HANDLER_ARGS)
 {
 	struct filedesc *fdp;
 	int i, count, slots;
 
 	if (*(int *)arg1 != 0)
 		return (EINVAL);
 
 	fdp = curproc->p_fd;
 	count = 0;
 	FILEDESC_SLOCK(fdp);
 	slots = NDSLOTS(fdp->fd_lastfile + 1);
 	for (i = 0; i < slots; i++)
 		count += bitcountl(fdp->fd_map[i]);
 	FILEDESC_SUNLOCK(fdp);
 
 	return (SYSCTL_OUT(req, &count, sizeof(count)));
 }
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_NFDS, nfds,
     CTLFLAG_RD|CTLFLAG_CAPRD|CTLFLAG_MPSAFE, sysctl_kern_proc_nfds,
     "Number of open file descriptors");
 
 /*
  * Get file structures globally.
  */
 static int
 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
 {
 	struct xfile xf;
 	struct filedesc *fdp;
 	struct file *fp;
 	struct proc *p;
 	int error, n;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	if (req->oldptr == NULL) {
 		n = 0;
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			fdp = fdhold(p);
 			PROC_UNLOCK(p);
 			if (fdp == NULL)
 				continue;
 			/* overestimates sparse tables. */
 			if (fdp->fd_lastfile > 0)
 				n += fdp->fd_lastfile;
 			fddrop(fdp);
 		}
 		sx_sunlock(&allproc_lock);
 		return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
 	}
 	error = 0;
 	bzero(&xf, sizeof(xf));
 	xf.xf_size = sizeof(xf);
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 		if (p->p_state == PRS_NEW) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		if (p_cansee(req->td, p) != 0) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		xf.xf_pid = p->p_pid;
 		xf.xf_uid = p->p_ucred->cr_uid;
 		fdp = fdhold(p);
 		PROC_UNLOCK(p);
 		if (fdp == NULL)
 			continue;
 		FILEDESC_SLOCK(fdp);
 		for (n = 0; fdp->fd_refcnt > 0 && n <= fdp->fd_lastfile; ++n) {
 			if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
 				continue;
 			xf.xf_fd = n;
 			xf.xf_file = fp;
 			xf.xf_data = fp->f_data;
 			xf.xf_vnode = fp->f_vnode;
 			xf.xf_type = fp->f_type;
 			xf.xf_count = fp->f_count;
 			xf.xf_msgcount = 0;
 			xf.xf_offset = foffset_get(fp);
 			xf.xf_flag = fp->f_flag;
 			error = SYSCTL_OUT(req, &xf, sizeof(xf));
 			if (error)
 				break;
 		}
 		FILEDESC_SUNLOCK(fdp);
 		fddrop(fdp);
 		if (error)
 			break;
 	}
 	sx_sunlock(&allproc_lock);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE,
     0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
 
 #ifdef KINFO_FILE_SIZE
 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
 #endif
 
 static int
 xlate_fflags(int fflags)
 {
 	static const struct {
 		int	fflag;
 		int	kf_fflag;
 	} fflags_table[] = {
 		{ FAPPEND, KF_FLAG_APPEND },
 		{ FASYNC, KF_FLAG_ASYNC },
 		{ FFSYNC, KF_FLAG_FSYNC },
 		{ FHASLOCK, KF_FLAG_HASLOCK },
 		{ FNONBLOCK, KF_FLAG_NONBLOCK },
 		{ FREAD, KF_FLAG_READ },
 		{ FWRITE, KF_FLAG_WRITE },
 		{ O_CREAT, KF_FLAG_CREAT },
 		{ O_DIRECT, KF_FLAG_DIRECT },
 		{ O_EXCL, KF_FLAG_EXCL },
 		{ O_EXEC, KF_FLAG_EXEC },
 		{ O_EXLOCK, KF_FLAG_EXLOCK },
 		{ O_NOFOLLOW, KF_FLAG_NOFOLLOW },
 		{ O_SHLOCK, KF_FLAG_SHLOCK },
 		{ O_TRUNC, KF_FLAG_TRUNC }
 	};
 	unsigned int i;
 	int kflags;
 
 	kflags = 0;
 	for (i = 0; i < nitems(fflags_table); i++)
 		if (fflags & fflags_table[i].fflag)
 			kflags |=  fflags_table[i].kf_fflag;
 	return (kflags);
 }
 
 /* Trim unused data from kf_path by truncating the structure size. */
 static void
 pack_kinfo(struct kinfo_file *kif)
 {
 
 	kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
 	    strlen(kif->kf_path) + 1;
 	kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
 }
 
 static void
 export_file_to_kinfo(struct file *fp, int fd, cap_rights_t *rightsp,
     struct kinfo_file *kif, struct filedesc *fdp, int flags)
 {
 	int error;
 
 	bzero(kif, sizeof(*kif));
 
 	/* Set a default type to allow for empty fill_kinfo() methods. */
 	kif->kf_type = KF_TYPE_UNKNOWN;
 	kif->kf_flags = xlate_fflags(fp->f_flag);
 	if (rightsp != NULL)
 		kif->kf_cap_rights = *rightsp;
 	else
 		cap_rights_init(&kif->kf_cap_rights);
 	kif->kf_fd = fd;
 	kif->kf_ref_count = fp->f_count;
 	kif->kf_offset = foffset_get(fp);
 
 	/*
 	 * This may drop the filedesc lock, so the 'fp' cannot be
 	 * accessed after this call.
 	 */
 	error = fo_fill_kinfo(fp, kif, fdp);
 	if (error == 0)
 		kif->kf_status |= KF_ATTR_VALID;
 	if ((flags & KERN_FILEDESC_PACK_KINFO) != 0)
 		pack_kinfo(kif);
 	else
 		kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t));
 }
 
 static void
 export_vnode_to_kinfo(struct vnode *vp, int fd, int fflags,
     struct kinfo_file *kif, int flags)
 {
 	int error;
 
 	bzero(kif, sizeof(*kif));
 
 	kif->kf_type = KF_TYPE_VNODE;
 	error = vn_fill_kinfo_vnode(vp, kif);
 	if (error == 0)
 		kif->kf_status |= KF_ATTR_VALID;
 	kif->kf_flags = xlate_fflags(fflags);
 	cap_rights_init(&kif->kf_cap_rights);
 	kif->kf_fd = fd;
 	kif->kf_ref_count = -1;
 	kif->kf_offset = -1;
 	if ((flags & KERN_FILEDESC_PACK_KINFO) != 0)
 		pack_kinfo(kif);
 	else
 		kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t));
 	vrele(vp);
 }
 
 struct export_fd_buf {
 	struct filedesc		*fdp;
 	struct sbuf 		*sb;
 	ssize_t			remainder;
 	struct kinfo_file	kif;
 	int			flags;
 };
 
 static int
 export_kinfo_to_sb(struct export_fd_buf *efbuf)
 {
 	struct kinfo_file *kif;
 
 	kif = &efbuf->kif;
 	if (efbuf->remainder != -1) {
 		if (efbuf->remainder < kif->kf_structsize) {
 			/* Terminate export. */
 			efbuf->remainder = 0;
 			return (0);
 		}
 		efbuf->remainder -= kif->kf_structsize;
 	}
 	return (sbuf_bcat(efbuf->sb, kif, kif->kf_structsize) == 0 ? 0 : ENOMEM);
 }
 
 static int
 export_file_to_sb(struct file *fp, int fd, cap_rights_t *rightsp,
     struct export_fd_buf *efbuf)
 {
 	int error;
 
 	if (efbuf->remainder == 0)
 		return (0);
 	export_file_to_kinfo(fp, fd, rightsp, &efbuf->kif, efbuf->fdp,
 	    efbuf->flags);
 	FILEDESC_SUNLOCK(efbuf->fdp);
 	error = export_kinfo_to_sb(efbuf);
 	FILEDESC_SLOCK(efbuf->fdp);
 	return (error);
 }
 
 static int
 export_vnode_to_sb(struct vnode *vp, int fd, int fflags,
     struct export_fd_buf *efbuf)
 {
 	int error;
 
 	if (efbuf->remainder == 0)
 		return (0);
 	if (efbuf->fdp != NULL)
 		FILEDESC_SUNLOCK(efbuf->fdp);
 	export_vnode_to_kinfo(vp, fd, fflags, &efbuf->kif, efbuf->flags);
 	error = export_kinfo_to_sb(efbuf);
 	if (efbuf->fdp != NULL)
 		FILEDESC_SLOCK(efbuf->fdp);
 	return (error);
 }
 
 /*
  * Store a process file descriptor information to sbuf.
  *
  * Takes a locked proc as argument, and returns with the proc unlocked.
  */
 int
 kern_proc_filedesc_out(struct proc *p,  struct sbuf *sb, ssize_t maxlen,
     int flags)
 {
 	struct file *fp;
 	struct filedesc *fdp;
 	struct export_fd_buf *efbuf;
 	struct vnode *cttyvp, *textvp, *tracevp;
 	int error, i;
 	cap_rights_t rights;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/* ktrace vnode */
 	tracevp = p->p_tracevp;
 	if (tracevp != NULL)
 		vrefact(tracevp);
 	/* text vnode */
 	textvp = p->p_textvp;
 	if (textvp != NULL)
 		vrefact(textvp);
 	/* Controlling tty. */
 	cttyvp = NULL;
 	if (p->p_pgrp != NULL && p->p_pgrp->pg_session != NULL) {
 		cttyvp = p->p_pgrp->pg_session->s_ttyvp;
 		if (cttyvp != NULL)
 			vrefact(cttyvp);
 	}
 	fdp = fdhold(p);
 	PROC_UNLOCK(p);
 	efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK);
 	efbuf->fdp = NULL;
 	efbuf->sb = sb;
 	efbuf->remainder = maxlen;
 	efbuf->flags = flags;
 	if (tracevp != NULL)
 		export_vnode_to_sb(tracevp, KF_FD_TYPE_TRACE, FREAD | FWRITE,
 		    efbuf);
 	if (textvp != NULL)
 		export_vnode_to_sb(textvp, KF_FD_TYPE_TEXT, FREAD, efbuf);
 	if (cttyvp != NULL)
 		export_vnode_to_sb(cttyvp, KF_FD_TYPE_CTTY, FREAD | FWRITE,
 		    efbuf);
 	error = 0;
 	if (fdp == NULL)
 		goto fail;
 	efbuf->fdp = fdp;
 	FILEDESC_SLOCK(fdp);
 	/* working directory */
 	if (fdp->fd_cdir != NULL) {
 		vrefact(fdp->fd_cdir);
 		export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf);
 	}
 	/* root directory */
 	if (fdp->fd_rdir != NULL) {
 		vrefact(fdp->fd_rdir);
 		export_vnode_to_sb(fdp->fd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf);
 	}
 	/* jail directory */
 	if (fdp->fd_jdir != NULL) {
 		vrefact(fdp->fd_jdir);
 		export_vnode_to_sb(fdp->fd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf);
 	}
 	for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) {
 		if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
 			continue;
 #ifdef CAPABILITIES
 		rights = *cap_rights(fdp, i);
 #else /* !CAPABILITIES */
 		cap_rights_init(&rights);
 #endif
 		/*
 		 * Create sysctl entry.  It is OK to drop the filedesc
 		 * lock inside of export_file_to_sb() as we will
 		 * re-validate and re-evaluate its properties when the
 		 * loop continues.
 		 */
 		error = export_file_to_sb(fp, i, &rights, efbuf);
 		if (error != 0 || efbuf->remainder == 0)
 			break;
 	}
 	FILEDESC_SUNLOCK(fdp);
 	fddrop(fdp);
 fail:
 	free(efbuf, M_TEMP);
 	return (error);
 }
 
 #define FILEDESC_SBUF_SIZE	(sizeof(struct kinfo_file) * 5)
 
 /*
  * Get per-process file descriptors for use by procstat(1), et al.
  */
 static int
 sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sb;
 	struct proc *p;
 	ssize_t maxlen;
 	int error, error2, *name;
 
 	name = (int *)arg1;
 
 	sbuf_new_for_sysctl(&sb, NULL, FILEDESC_SBUF_SIZE, req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 	if (error != 0) {
 		sbuf_delete(&sb);
 		return (error);
 	}
 	maxlen = req->oldptr != NULL ? req->oldlen : -1;
 	error = kern_proc_filedesc_out(p, &sb, maxlen,
 	    KERN_FILEDESC_PACK_KINFO);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 #ifdef KINFO_OFILE_SIZE
 CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE);
 #endif
 
 #ifdef COMPAT_FREEBSD7
 static void
 kinfo_to_okinfo(struct kinfo_file *kif, struct kinfo_ofile *okif)
 {
 
 	okif->kf_structsize = sizeof(*okif);
 	okif->kf_type = kif->kf_type;
 	okif->kf_fd = kif->kf_fd;
 	okif->kf_ref_count = kif->kf_ref_count;
 	okif->kf_flags = kif->kf_flags & (KF_FLAG_READ | KF_FLAG_WRITE |
 	    KF_FLAG_APPEND | KF_FLAG_ASYNC | KF_FLAG_FSYNC | KF_FLAG_NONBLOCK |
 	    KF_FLAG_DIRECT | KF_FLAG_HASLOCK);
 	okif->kf_offset = kif->kf_offset;
 	okif->kf_vnode_type = kif->kf_vnode_type;
 	okif->kf_sock_domain = kif->kf_sock_domain;
 	okif->kf_sock_type = kif->kf_sock_type;
 	okif->kf_sock_protocol = kif->kf_sock_protocol;
 	strlcpy(okif->kf_path, kif->kf_path, sizeof(okif->kf_path));
 	okif->kf_sa_local = kif->kf_sa_local;
 	okif->kf_sa_peer = kif->kf_sa_peer;
 }
 
 static int
 export_vnode_for_osysctl(struct vnode *vp, int type, struct kinfo_file *kif,
     struct kinfo_ofile *okif, struct filedesc *fdp, struct sysctl_req *req)
 {
 	int error;
 
 	vrefact(vp);
 	FILEDESC_SUNLOCK(fdp);
 	export_vnode_to_kinfo(vp, type, 0, kif, KERN_FILEDESC_PACK_KINFO);
 	kinfo_to_okinfo(kif, okif);
 	error = SYSCTL_OUT(req, okif, sizeof(*okif));
 	FILEDESC_SLOCK(fdp);
 	return (error);
 }
 
 /*
  * Get per-process file descriptors for use by procstat(1), et al.
  */
 static int
 sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
 {
 	struct kinfo_ofile *okif;
 	struct kinfo_file *kif;
 	struct filedesc *fdp;
 	int error, i, *name;
 	struct file *fp;
 	struct proc *p;
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 	if (error != 0)
 		return (error);
 	fdp = fdhold(p);
 	PROC_UNLOCK(p);
 	if (fdp == NULL)
 		return (ENOENT);
 	kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
 	okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK);
 	FILEDESC_SLOCK(fdp);
 	if (fdp->fd_cdir != NULL)
 		export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
 		    okif, fdp, req);
 	if (fdp->fd_rdir != NULL)
 		export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
 		    okif, fdp, req);
 	if (fdp->fd_jdir != NULL)
 		export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
 		    okif, fdp, req);
 	for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) {
 		if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
 			continue;
 		export_file_to_kinfo(fp, i, NULL, kif, fdp,
 		    KERN_FILEDESC_PACK_KINFO);
 		FILEDESC_SUNLOCK(fdp);
 		kinfo_to_okinfo(kif, okif);
 		error = SYSCTL_OUT(req, okif, sizeof(*okif));
 		FILEDESC_SLOCK(fdp);
 		if (error)
 			break;
 	}
 	FILEDESC_SUNLOCK(fdp);
 	fddrop(fdp);
 	free(kif, M_TEMP);
 	free(okif, M_TEMP);
 	return (0);
 }
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc,
     CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_ofiledesc,
     "Process ofiledesc entries");
 #endif	/* COMPAT_FREEBSD7 */
 
 int
 vntype_to_kinfo(int vtype)
 {
 	struct {
 		int	vtype;
 		int	kf_vtype;
 	} vtypes_table[] = {
 		{ VBAD, KF_VTYPE_VBAD },
 		{ VBLK, KF_VTYPE_VBLK },
 		{ VCHR, KF_VTYPE_VCHR },
 		{ VDIR, KF_VTYPE_VDIR },
 		{ VFIFO, KF_VTYPE_VFIFO },
 		{ VLNK, KF_VTYPE_VLNK },
 		{ VNON, KF_VTYPE_VNON },
 		{ VREG, KF_VTYPE_VREG },
 		{ VSOCK, KF_VTYPE_VSOCK }
 	};
 	unsigned int i;
 
 	/*
 	 * Perform vtype translation.
 	 */
 	for (i = 0; i < nitems(vtypes_table); i++)
 		if (vtypes_table[i].vtype == vtype)
 			return (vtypes_table[i].kf_vtype);
 
 	return (KF_VTYPE_UNKNOWN);
 }
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc,
     CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_filedesc,
     "Process filedesc entries");
 
 /*
  * Store a process current working directory information to sbuf.
  *
  * Takes a locked proc as argument, and returns with the proc unlocked.
  */
 int
 kern_proc_cwd_out(struct proc *p,  struct sbuf *sb, ssize_t maxlen)
 {
 	struct filedesc *fdp;
 	struct export_fd_buf *efbuf;
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	fdp = fdhold(p);
 	PROC_UNLOCK(p);
 	if (fdp == NULL)
 		return (EINVAL);
 
 	efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK);
 	efbuf->fdp = fdp;
 	efbuf->sb = sb;
 	efbuf->remainder = maxlen;
 
 	FILEDESC_SLOCK(fdp);
 	if (fdp->fd_cdir == NULL)
 		error = EINVAL;
 	else {
 		vrefact(fdp->fd_cdir);
 		error = export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD,
 		    FREAD, efbuf);
 	}
 	FILEDESC_SUNLOCK(fdp);
 	fddrop(fdp);
 	free(efbuf, M_TEMP);
 	return (error);
 }
 
 /*
  * Get per-process current working directory.
  */
 static int
 sysctl_kern_proc_cwd(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sb;
 	struct proc *p;
 	ssize_t maxlen;
 	int error, error2, *name;
 
 	name = (int *)arg1;
 
 	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_file), req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 	if (error != 0) {
 		sbuf_delete(&sb);
 		return (error);
 	}
 	maxlen = req->oldptr != NULL ? req->oldlen : -1;
 	error = kern_proc_cwd_out(p, &sb, maxlen);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, CTLFLAG_RD|CTLFLAG_MPSAFE,
     sysctl_kern_proc_cwd, "Process current working directory");
 
 #ifdef DDB
 /*
  * For the purposes of debugging, generate a human-readable string for the
  * file type.
  */
 static const char *
 file_type_to_name(short type)
 {
 
 	switch (type) {
 	case 0:
 		return ("zero");
 	case DTYPE_VNODE:
 		return ("vnod");
 	case DTYPE_SOCKET:
 		return ("sock");
 	case DTYPE_PIPE:
 		return ("pipe");
 	case DTYPE_FIFO:
 		return ("fifo");
 	case DTYPE_KQUEUE:
 		return ("kque");
 	case DTYPE_CRYPTO:
 		return ("crpt");
 	case DTYPE_MQUEUE:
 		return ("mque");
 	case DTYPE_SHM:
 		return ("shm");
 	case DTYPE_SEM:
 		return ("ksem");
 	default:
 		return ("unkn");
 	}
 }
 
 /*
  * For the purposes of debugging, identify a process (if any, perhaps one of
  * many) that references the passed file in its file descriptor array. Return
  * NULL if none.
  */
 static struct proc *
 file_to_first_proc(struct file *fp)
 {
 	struct filedesc *fdp;
 	struct proc *p;
 	int n;
 
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_state == PRS_NEW)
 			continue;
 		fdp = p->p_fd;
 		if (fdp == NULL)
 			continue;
 		for (n = 0; n <= fdp->fd_lastfile; n++) {
 			if (fp == fdp->fd_ofiles[n].fde_file)
 				return (p);
 		}
 	}
 	return (NULL);
 }
 
 static void
 db_print_file(struct file *fp, int header)
 {
 	struct proc *p;
 
 	if (header)
 		db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n",
 		    "File", "Type", "Data", "Flag", "GCFl", "Count",
 		    "MCount", "Vnode", "FPID", "FCmd");
 	p = file_to_first_proc(fp);
 	db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp,
 	    file_type_to_name(fp->f_type), fp->f_data, fp->f_flag,
 	    0, fp->f_count, 0, fp->f_vnode,
 	    p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
 }
 
 DB_SHOW_COMMAND(file, db_show_file)
 {
 	struct file *fp;
 
 	if (!have_addr) {
 		db_printf("usage: show file <addr>\n");
 		return;
 	}
 	fp = (struct file *)addr;
 	db_print_file(fp, 1);
 }
 
 DB_SHOW_COMMAND(files, db_show_files)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	struct proc *p;
 	int header;
 	int n;
 
 	header = 1;
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_state == PRS_NEW)
 			continue;
 		if ((fdp = p->p_fd) == NULL)
 			continue;
 		for (n = 0; n <= fdp->fd_lastfile; ++n) {
 			if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
 				continue;
 			db_print_file(fp, header);
 			header = 0;
 		}
 	}
 }
 #endif
 
 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
     &maxfilesperproc, 0, "Maximum files allowed open per process");
 
 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
     &maxfiles, 0, "Maximum number of files");
 
 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
     __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files");
 
 /* ARGSUSED*/
 static void
 filelistinit(void *dummy)
 {
 
 	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	filedesc0_zone = uma_zcreate("filedesc0", sizeof(struct filedesc0),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
 }
 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
 
 /*-------------------------------------------------------------------*/
 
 static int
 badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 static int
 badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_poll(struct file *fp, int events, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (0);
 }
 
 static int
 badfo_kqfilter(struct file *fp, struct knote *kn)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_close(struct file *fp, struct thread *td)
 {
 
 	return (0);
 }
 
 static int
 badfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
     struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
     struct thread *td)
 {
 
 	return (EBADF);
 }
 
 static int
 badfo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 {
 
 	return (0);
 }
 
 struct fileops badfileops = {
 	.fo_read = badfo_readwrite,
 	.fo_write = badfo_readwrite,
 	.fo_truncate = badfo_truncate,
 	.fo_ioctl = badfo_ioctl,
 	.fo_poll = badfo_poll,
 	.fo_kqfilter = badfo_kqfilter,
 	.fo_stat = badfo_stat,
 	.fo_close = badfo_close,
 	.fo_chmod = badfo_chmod,
 	.fo_chown = badfo_chown,
 	.fo_sendfile = badfo_sendfile,
 	.fo_fill_kinfo = badfo_fill_kinfo,
 };
 
 int
 invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 invfo_truncate(struct file *fp, off_t length, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 int
 invfo_ioctl(struct file *fp, u_long com, void *data,
     struct ucred *active_cred, struct thread *td)
 {
 
 	return (ENOTTY);
 }
 
 int
 invfo_poll(struct file *fp, int events, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (poll_no_poll(events));
 }
 
 int
 invfo_kqfilter(struct file *fp, struct knote *kn)
 {
 
 	return (EINVAL);
 }
 
 int
 invfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 int
 invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 int
 invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
     struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
     struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 /*-------------------------------------------------------------------*/
 
 /*
  * File Descriptor pseudo-device driver (/dev/fd/).
  *
  * Opening minor device N dup()s the file (if any) connected to file
  * descriptor N belonging to the calling process.  Note that this driver
  * consists of only the ``open()'' routine, because all subsequent
  * references to this file will be direct to the other driver.
  *
  * XXX: we could give this one a cloning event handler if necessary.
  */
 
 /* ARGSUSED */
 static int
 fdopen(struct cdev *dev, int mode, int type, struct thread *td)
 {
 
 	/*
 	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
 	 * the file descriptor being sought for duplication. The error
 	 * return ensures that the vnode for this device will be released
 	 * by vn_open. Open will detect this special error and take the
 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
 	 * will simply report the error.
 	 */
 	td->td_dupfd = dev2unit(dev);
 	return (ENODEV);
 }
 
 static struct cdevsw fildesc_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_open =	fdopen,
 	.d_name =	"FD",
 };
 
 static void
 fildesc_drvinit(void *unused)
 {
 	struct cdev *dev;
 
 	dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 0, NULL,
 	    UID_ROOT, GID_WHEEL, 0666, "fd/0");
 	make_dev_alias(dev, "stdin");
 	dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 1, NULL,
 	    UID_ROOT, GID_WHEEL, 0666, "fd/1");
 	make_dev_alias(dev, "stdout");
 	dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 2, NULL,
 	    UID_ROOT, GID_WHEEL, 0666, "fd/2");
 	make_dev_alias(dev, "stderr");
 }
 
 SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL);
Index: projects/netbsd-tests-upstream-01-2017/sys/kern/subr_witness.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/kern/subr_witness.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/kern/subr_witness.c	(revision 313267)
@@ -1,3025 +1,3024 @@
 /*-
  * Copyright (c) 2008 Isilon Systems, Inc.
  * Copyright (c) 2008 Ilya Maykov <ivmaykov@gmail.com>
  * Copyright (c) 1998 Berkeley Software Design, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
  */
 
 /*
  * Implementation of the `witness' lock verifier.  Originally implemented for
  * mutexes in BSD/OS.  Extended to handle generic lock objects and lock
  * classes in FreeBSD.
  */
 
 /*
  *	Main Entry: witness
  *	Pronunciation: 'wit-n&s
  *	Function: noun
  *	Etymology: Middle English witnesse, from Old English witnes knowledge,
  *	    testimony, witness, from 2wit
  *	Date: before 12th century
  *	1 : attestation of a fact or event : TESTIMONY
  *	2 : one that gives evidence; specifically : one who testifies in
  *	    a cause or before a judicial tribunal
  *	3 : one asked to be present at a transaction so as to be able to
  *	    testify to its having taken place
  *	4 : one who has personal knowledge of something
  *	5 a : something serving as evidence or proof : SIGN
  *	  b : public affirmation by word or example of usually
  *	      religious faith or conviction <the heroic witness to divine
  *	      life -- Pilot>
  *	6 capitalized : a member of the Jehovah's Witnesses 
  */
 
 /*
  * Special rules concerning Giant and lock orders:
  *
  * 1) Giant must be acquired before any other mutexes.  Stated another way,
  *    no other mutex may be held when Giant is acquired.
  *
  * 2) Giant must be released when blocking on a sleepable lock.
  *
  * This rule is less obvious, but is a result of Giant providing the same
  * semantics as spl().  Basically, when a thread sleeps, it must release
  * Giant.  When a thread blocks on a sleepable lock, it sleeps.  Hence rule
  * 2).
  *
  * 3) Giant may be acquired before or after sleepable locks.
  *
  * This rule is also not quite as obvious.  Giant may be acquired after
  * a sleepable lock because it is a non-sleepable lock and non-sleepable
  * locks may always be acquired while holding a sleepable lock.  The second
  * case, Giant before a sleepable lock, follows from rule 2) above.  Suppose
  * you have two threads T1 and T2 and a sleepable lock X.  Suppose that T1
  * acquires X and blocks on Giant.  Then suppose that T2 acquires Giant and
  * blocks on X.  When T2 blocks on X, T2 will release Giant allowing T1 to
  * execute.  Thus, acquiring Giant both before and after a sleepable lock
  * will not result in a lock order reversal.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_stack.h"
 #include "opt_witness.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/stack.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <machine/stdarg.h>
 
 #if !defined(DDB) && !defined(STACK)
 #error "DDB or STACK options are required for WITNESS"
 #endif
 
 /* Note that these traces do not work with KTR_ALQ. */
 #if 0
 #define	KTR_WITNESS	KTR_SUBSYS
 #else
 #define	KTR_WITNESS	0
 #endif
 
 #define	LI_RECURSEMASK	0x0000ffff	/* Recursion depth of lock instance. */
 #define	LI_EXCLUSIVE	0x00010000	/* Exclusive lock instance. */
 #define	LI_NORELEASE	0x00020000	/* Lock not allowed to be released. */
 
 /* Define this to check for blessed mutexes */
 #undef BLESSING
 
 #ifndef WITNESS_COUNT
 #define	WITNESS_COUNT 		1536
 #endif
 #define	WITNESS_HASH_SIZE	251	/* Prime, gives load factor < 2 */
 #define	WITNESS_PENDLIST	(1024 + MAXCPU)
 
 /* Allocate 256 KB of stack data space */
 #define	WITNESS_LO_DATA_COUNT	2048
 
 /* Prime, gives load factor of ~2 at full load */
 #define	WITNESS_LO_HASH_SIZE	1021
 
 /*
  * XXX: This is somewhat bogus, as we assume here that at most 2048 threads
  * will hold LOCK_NCHILDREN locks.  We handle failure ok, and we should
  * probably be safe for the most part, but it's still a SWAG.
  */
 #define	LOCK_NCHILDREN	5
 #define	LOCK_CHILDCOUNT	2048
 
 #define	MAX_W_NAME	64
 
 #define	FULLGRAPH_SBUF_SIZE	512
 
 /*
  * These flags go in the witness relationship matrix and describe the
  * relationship between any two struct witness objects.
  */
 #define	WITNESS_UNRELATED        0x00    /* No lock order relation. */
 #define	WITNESS_PARENT           0x01    /* Parent, aka direct ancestor. */
 #define	WITNESS_ANCESTOR         0x02    /* Direct or indirect ancestor. */
 #define	WITNESS_CHILD            0x04    /* Child, aka direct descendant. */
 #define	WITNESS_DESCENDANT       0x08    /* Direct or indirect descendant. */
 #define	WITNESS_ANCESTOR_MASK    (WITNESS_PARENT | WITNESS_ANCESTOR)
 #define	WITNESS_DESCENDANT_MASK  (WITNESS_CHILD | WITNESS_DESCENDANT)
 #define	WITNESS_RELATED_MASK						\
 	(WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
 #define	WITNESS_REVERSAL         0x10    /* A lock order reversal has been
 					  * observed. */
 #define	WITNESS_RESERVED1        0x20    /* Unused flag, reserved. */
 #define	WITNESS_RESERVED2        0x40    /* Unused flag, reserved. */
 #define	WITNESS_LOCK_ORDER_KNOWN 0x80    /* This lock order is known. */
 
 /* Descendant to ancestor flags */
 #define	WITNESS_DTOA(x)	(((x) & WITNESS_RELATED_MASK) >> 2)
 
 /* Ancestor to descendant flags */
 #define	WITNESS_ATOD(x)	(((x) & WITNESS_RELATED_MASK) << 2)
 
 #define	WITNESS_INDEX_ASSERT(i)						\
 	MPASS((i) > 0 && (i) <= w_max_used_index && (i) < witness_count)
 
 static MALLOC_DEFINE(M_WITNESS, "Witness", "Witness");
 
 /*
  * Lock instances.  A lock instance is the data associated with a lock while
  * it is held by witness.  For example, a lock instance will hold the
  * recursion count of a lock.  Lock instances are held in lists.  Spin locks
  * are held in a per-cpu list while sleep locks are held in per-thread list.
  */
 struct lock_instance {
 	struct lock_object	*li_lock;
 	const char		*li_file;
 	int			li_line;
 	u_int			li_flags;
 };
 
 /*
  * A simple list type used to build the list of locks held by a thread
  * or CPU.  We can't simply embed the list in struct lock_object since a
  * lock may be held by more than one thread if it is a shared lock.  Locks
  * are added to the head of the list, so we fill up each list entry from
  * "the back" logically.  To ease some of the arithmetic, we actually fill
  * in each list entry the normal way (children[0] then children[1], etc.) but
  * when we traverse the list we read children[count-1] as the first entry
  * down to children[0] as the final entry.
  */
 struct lock_list_entry {
 	struct lock_list_entry	*ll_next;
 	struct lock_instance	ll_children[LOCK_NCHILDREN];
 	u_int			ll_count;
 };
 
 /*
  * The main witness structure. One of these per named lock type in the system
  * (for example, "vnode interlock").
  */
 struct witness {
 	char  			w_name[MAX_W_NAME];
 	uint32_t 		w_index;  /* Index in the relationship matrix */
 	struct lock_class	*w_class;
 	STAILQ_ENTRY(witness) 	w_list;		/* List of all witnesses. */
 	STAILQ_ENTRY(witness) 	w_typelist;	/* Witnesses of a type. */
 	struct witness		*w_hash_next; /* Linked list in hash buckets. */
 	const char		*w_file; /* File where last acquired */
 	uint32_t 		w_line; /* Line where last acquired */
 	uint32_t 		w_refcount;
 	uint16_t 		w_num_ancestors; /* direct/indirect
 						  * ancestor count */
 	uint16_t 		w_num_descendants; /* direct/indirect
 						    * descendant count */
 	int16_t 		w_ddb_level;
 	unsigned		w_displayed:1;
 	unsigned		w_reversed:1;
 };
 
 STAILQ_HEAD(witness_list, witness);
 
 /*
  * The witness hash table. Keys are witness names (const char *), elements are
  * witness objects (struct witness *).
  */
 struct witness_hash {
 	struct witness	*wh_array[WITNESS_HASH_SIZE];
 	uint32_t	wh_size;
 	uint32_t	wh_count;
 };
 
 /*
  * Key type for the lock order data hash table.
  */
 struct witness_lock_order_key {
 	uint16_t	from;
 	uint16_t	to;
 };
 
 struct witness_lock_order_data {
 	struct stack			wlod_stack;
 	struct witness_lock_order_key	wlod_key;
 	struct witness_lock_order_data	*wlod_next;
 };
 
 /*
  * The witness lock order data hash table. Keys are witness index tuples
  * (struct witness_lock_order_key), elements are lock order data objects
  * (struct witness_lock_order_data). 
  */
 struct witness_lock_order_hash {
 	struct witness_lock_order_data	*wloh_array[WITNESS_LO_HASH_SIZE];
 	u_int	wloh_size;
 	u_int	wloh_count;
 };
 
 #ifdef BLESSING
 struct witness_blessed {
 	const char	*b_lock1;
 	const char	*b_lock2;
 };
 #endif
 
 struct witness_pendhelp {
 	const char		*wh_type;
 	struct lock_object	*wh_lock;
 };
 
 struct witness_order_list_entry {
 	const char		*w_name;
 	struct lock_class	*w_class;
 };
 
 /*
  * Returns 0 if one of the locks is a spin lock and the other is not.
  * Returns 1 otherwise.
  */
 static __inline int
 witness_lock_type_equal(struct witness *w1, struct witness *w2)
 {
 
 	return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) ==
 		(w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)));
 }
 
 static __inline int
 witness_lock_order_key_equal(const struct witness_lock_order_key *a,
     const struct witness_lock_order_key *b)
 {
 
 	return (a->from == b->from && a->to == b->to);
 }
 
 static int	_isitmyx(struct witness *w1, struct witness *w2, int rmask,
 		    const char *fname);
 static void	adopt(struct witness *parent, struct witness *child);
 #ifdef BLESSING
 static int	blessed(struct witness *, struct witness *);
 #endif
 static void	depart(struct witness *w);
 static struct witness	*enroll(const char *description,
 			    struct lock_class *lock_class);
 static struct lock_instance	*find_instance(struct lock_list_entry *list,
 				    const struct lock_object *lock);
 static int	isitmychild(struct witness *parent, struct witness *child);
 static int	isitmydescendant(struct witness *parent, struct witness *child);
 static void	itismychild(struct witness *parent, struct witness *child);
 static int	sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS);
 static int	sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS);
 static int	sysctl_debug_witness_fullgraph(SYSCTL_HANDLER_ARGS);
 static int	sysctl_debug_witness_channel(SYSCTL_HANDLER_ARGS);
 static void	witness_add_fullgraph(struct sbuf *sb, struct witness *parent);
 #ifdef DDB
 static void	witness_ddb_compute_levels(void);
 static void	witness_ddb_display(int(*)(const char *fmt, ...));
 static void	witness_ddb_display_descendants(int(*)(const char *fmt, ...),
 		    struct witness *, int indent);
 static void	witness_ddb_display_list(int(*prnt)(const char *fmt, ...),
 		    struct witness_list *list);
 static void	witness_ddb_level_descendants(struct witness *parent, int l);
 static void	witness_ddb_list(struct thread *td);
 #endif
 static void	witness_debugger(int cond, const char *msg);
 static void	witness_free(struct witness *m);
 static struct witness	*witness_get(void);
 static uint32_t	witness_hash_djb2(const uint8_t *key, uint32_t size);
 static struct witness	*witness_hash_get(const char *key);
 static void	witness_hash_put(struct witness *w);
 static void	witness_init_hash_tables(void);
 static void	witness_increment_graph_generation(void);
 static void	witness_lock_list_free(struct lock_list_entry *lle);
 static struct lock_list_entry	*witness_lock_list_get(void);
 static int	witness_lock_order_add(struct witness *parent,
 		    struct witness *child);
 static int	witness_lock_order_check(struct witness *parent,
 		    struct witness *child);
 static struct witness_lock_order_data	*witness_lock_order_get(
 					    struct witness *parent,
 					    struct witness *child);
 static void	witness_list_lock(struct lock_instance *instance,
 		    int (*prnt)(const char *fmt, ...));
 static int	witness_output(const char *fmt, ...) __printflike(1, 2);
 static int	witness_voutput(const char *fmt, va_list ap) __printflike(1, 0);
 static void	witness_setflag(struct lock_object *lock, int flag, int set);
 
 static SYSCTL_NODE(_debug, OID_AUTO, witness, CTLFLAG_RW, NULL,
     "Witness Locking");
 
 /*
  * If set to 0, lock order checking is disabled.  If set to -1,
  * witness is completely disabled.  Otherwise witness performs full
  * lock order checking for all locks.  At runtime, lock order checking
  * may be toggled.  However, witness cannot be reenabled once it is
  * completely disabled.
  */
 static int witness_watch = 1;
 SYSCTL_PROC(_debug_witness, OID_AUTO, watch, CTLFLAG_RWTUN | CTLTYPE_INT, NULL, 0,
     sysctl_debug_witness_watch, "I", "witness is watching lock operations");
 
 #ifdef KDB
 /*
  * When KDB is enabled and witness_kdb is 1, it will cause the system
  * to drop into kdebug() when:
  *	- a lock hierarchy violation occurs
  *	- locks are held when going to sleep.
  */
 #ifdef WITNESS_KDB
 int	witness_kdb = 1;
 #else
 int	witness_kdb = 0;
 #endif
 SYSCTL_INT(_debug_witness, OID_AUTO, kdb, CTLFLAG_RWTUN, &witness_kdb, 0, "");
 #endif /* KDB */
 
 #if defined(DDB) || defined(KDB)
 /*
  * When DDB or KDB is enabled and witness_trace is 1, it will cause the system
  * to print a stack trace:
  *	- a lock hierarchy violation occurs
  *	- locks are held when going to sleep.
  */
 int	witness_trace = 1;
 SYSCTL_INT(_debug_witness, OID_AUTO, trace, CTLFLAG_RWTUN, &witness_trace, 0, "");
 #endif /* DDB || KDB */
 
 #ifdef WITNESS_SKIPSPIN
 int	witness_skipspin = 1;
 #else
 int	witness_skipspin = 0;
 #endif
 SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, &witness_skipspin, 0, "");
 
 int badstack_sbuf_size;
 
 int witness_count = WITNESS_COUNT;
 SYSCTL_INT(_debug_witness, OID_AUTO, witness_count, CTLFLAG_RDTUN, 
     &witness_count, 0, "");
 
 /*
  * Output channel for witness messages.  By default we print to the console.
  */
 enum witness_channel {
 	WITNESS_CONSOLE,
 	WITNESS_LOG,
 	WITNESS_NONE,
 };
 
 static enum witness_channel witness_channel = WITNESS_CONSOLE;
 SYSCTL_PROC(_debug_witness, OID_AUTO, output_channel, CTLTYPE_STRING |
     CTLFLAG_RWTUN, NULL, 0, sysctl_debug_witness_channel, "A",
     "Output channel for warnings");
 
 /*
  * Call this to print out the relations between locks.
  */
 SYSCTL_PROC(_debug_witness, OID_AUTO, fullgraph, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_debug_witness_fullgraph, "A", "Show locks relation graphs");
 
 /*
  * Call this to print out the witness faulty stacks.
  */
 SYSCTL_PROC(_debug_witness, OID_AUTO, badstacks, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_debug_witness_badstacks, "A", "Show bad witness stacks");
 
 static struct mtx w_mtx;
 
 /* w_list */
 static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free);
 static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all);
 
 /* w_typelist */
 static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin);
 static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep);
 
 /* lock list */
 static struct lock_list_entry *w_lock_list_free = NULL;
 static struct witness_pendhelp pending_locks[WITNESS_PENDLIST];
 static u_int pending_cnt;
 
 static int w_free_cnt, w_spin_cnt, w_sleep_cnt;
 SYSCTL_INT(_debug_witness, OID_AUTO, free_cnt, CTLFLAG_RD, &w_free_cnt, 0, "");
 SYSCTL_INT(_debug_witness, OID_AUTO, spin_cnt, CTLFLAG_RD, &w_spin_cnt, 0, "");
 SYSCTL_INT(_debug_witness, OID_AUTO, sleep_cnt, CTLFLAG_RD, &w_sleep_cnt, 0,
     "");
 
 static struct witness *w_data;
 static uint8_t **w_rmatrix;
 static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT];
 static struct witness_hash w_hash;	/* The witness hash table. */
 
 /* The lock order data hash */
 static struct witness_lock_order_data w_lodata[WITNESS_LO_DATA_COUNT];
 static struct witness_lock_order_data *w_lofree = NULL;
 static struct witness_lock_order_hash w_lohash;
 static int w_max_used_index = 0;
 static unsigned int w_generation = 0;
 static const char w_notrunning[] = "Witness not running\n";
 static const char w_stillcold[] = "Witness is still cold\n";
 
 
 static struct witness_order_list_entry order_lists[] = {
 	/*
 	 * sx locks
 	 */
 	{ "proctree", &lock_class_sx },
 	{ "allproc", &lock_class_sx },
 	{ "allprison", &lock_class_sx },
 	{ NULL, NULL },
 	/*
 	 * Various mutexes
 	 */
 	{ "Giant", &lock_class_mtx_sleep },
 	{ "pipe mutex", &lock_class_mtx_sleep },
 	{ "sigio lock", &lock_class_mtx_sleep },
 	{ "process group", &lock_class_mtx_sleep },
 	{ "process lock", &lock_class_mtx_sleep },
 	{ "session", &lock_class_mtx_sleep },
 	{ "uidinfo hash", &lock_class_rw },
 #ifdef	HWPMC_HOOKS
 	{ "pmc-sleep", &lock_class_mtx_sleep },
 #endif
 	{ "time lock", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * umtx
 	 */
 	{ "umtx lock", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * Sockets
 	 */
 	{ "accept", &lock_class_mtx_sleep },
 	{ "so_snd", &lock_class_mtx_sleep },
 	{ "so_rcv", &lock_class_mtx_sleep },
 	{ "sellck", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * Routing
 	 */
 	{ "so_rcv", &lock_class_mtx_sleep },
 	{ "radix node head", &lock_class_rw },
 	{ "rtentry", &lock_class_mtx_sleep },
 	{ "ifaddr", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * IPv4 multicast:
 	 * protocol locks before interface locks, after UDP locks.
 	 */
 	{ "udpinp", &lock_class_rw },
 	{ "in_multi_mtx", &lock_class_mtx_sleep },
 	{ "igmp_mtx", &lock_class_mtx_sleep },
 	{ "if_addr_lock", &lock_class_rw },
 	{ NULL, NULL },
 	/*
 	 * IPv6 multicast:
 	 * protocol locks before interface locks, after UDP locks.
 	 */
 	{ "udpinp", &lock_class_rw },
 	{ "in6_multi_mtx", &lock_class_mtx_sleep },
 	{ "mld_mtx", &lock_class_mtx_sleep },
 	{ "if_addr_lock", &lock_class_rw },
 	{ NULL, NULL },
 	/*
 	 * UNIX Domain Sockets
 	 */
 	{ "unp_link_rwlock", &lock_class_rw },
 	{ "unp_list_lock", &lock_class_mtx_sleep },
 	{ "unp", &lock_class_mtx_sleep },
 	{ "so_snd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * UDP/IP
 	 */
 	{ "udp", &lock_class_rw },
 	{ "udpinp", &lock_class_rw },
 	{ "so_snd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * TCP/IP
 	 */
 	{ "tcp", &lock_class_rw },
 	{ "tcpinp", &lock_class_rw },
 	{ "so_snd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * BPF
 	 */
 	{ "bpf global lock", &lock_class_mtx_sleep },
 	{ "bpf interface lock", &lock_class_rw },
 	{ "bpf cdev lock", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * NFS server
 	 */
 	{ "nfsd_mtx", &lock_class_mtx_sleep },
 	{ "so_snd", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 
 	/*
 	 * IEEE 802.11
 	 */
 	{ "802.11 com lock", &lock_class_mtx_sleep},
 	{ NULL, NULL },
 	/*
 	 * Network drivers
 	 */
 	{ "network driver", &lock_class_mtx_sleep},
 	{ NULL, NULL },
 
 	/*
 	 * Netgraph
 	 */
 	{ "ng_node", &lock_class_mtx_sleep },
 	{ "ng_worklist", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * CDEV
 	 */
 	{ "vm map (system)", &lock_class_mtx_sleep },
 	{ "vm pagequeue", &lock_class_mtx_sleep },
 	{ "vnode interlock", &lock_class_mtx_sleep },
 	{ "cdev", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * VM
 	 */
 	{ "vm map (user)", &lock_class_sx },
 	{ "vm object", &lock_class_rw },
 	{ "vm page", &lock_class_mtx_sleep },
 	{ "vm pagequeue", &lock_class_mtx_sleep },
 	{ "pmap pv global", &lock_class_rw },
 	{ "pmap", &lock_class_mtx_sleep },
 	{ "pmap pv list", &lock_class_rw },
 	{ "vm page free queue", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * kqueue/VFS interaction
 	 */
 	{ "kqueue", &lock_class_mtx_sleep },
 	{ "struct mount mtx", &lock_class_mtx_sleep },
 	{ "vnode interlock", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * VFS namecache
 	 */
 	{ "ncvn", &lock_class_mtx_sleep },
 	{ "ncbuc", &lock_class_rw },
 	{ "vnode interlock", &lock_class_mtx_sleep },
 	{ "ncneg", &lock_class_mtx_sleep },
 	{ NULL, NULL },
 	/*
 	 * ZFS locking
 	 */
 	{ "dn->dn_mtx", &lock_class_sx },
 	{ "dr->dt.di.dr_mtx", &lock_class_sx },
 	{ "db->db_mtx", &lock_class_sx },
 	{ NULL, NULL },
 	/*
 	 * spin locks
 	 */
 #ifdef SMP
 	{ "ap boot", &lock_class_mtx_spin },
 #endif
 	{ "rm.mutex_mtx", &lock_class_mtx_spin },
 	{ "sio", &lock_class_mtx_spin },
 #ifdef __i386__
 	{ "cy", &lock_class_mtx_spin },
 #endif
 #ifdef __sparc64__
 	{ "pcib_mtx", &lock_class_mtx_spin },
 	{ "rtc_mtx", &lock_class_mtx_spin },
 #endif
 	{ "scc_hwmtx", &lock_class_mtx_spin },
 	{ "uart_hwmtx", &lock_class_mtx_spin },
 	{ "fast_taskqueue", &lock_class_mtx_spin },
 	{ "intr table", &lock_class_mtx_spin },
 #ifdef	HWPMC_HOOKS
 	{ "pmc-per-proc", &lock_class_mtx_spin },
 #endif
 	{ "process slock", &lock_class_mtx_spin },
 	{ "syscons video lock", &lock_class_mtx_spin },
 	{ "sleepq chain", &lock_class_mtx_spin },
 	{ "rm_spinlock", &lock_class_mtx_spin },
 	{ "turnstile chain", &lock_class_mtx_spin },
 	{ "turnstile lock", &lock_class_mtx_spin },
 	{ "sched lock", &lock_class_mtx_spin },
 	{ "td_contested", &lock_class_mtx_spin },
 	{ "callout", &lock_class_mtx_spin },
 	{ "entropy harvest mutex", &lock_class_mtx_spin },
 #ifdef SMP
 	{ "smp rendezvous", &lock_class_mtx_spin },
 #endif
 #ifdef __powerpc__
 	{ "tlb0", &lock_class_mtx_spin },
 #endif
 	/*
 	 * leaf locks
 	 */
 	{ "intrcnt", &lock_class_mtx_spin },
 	{ "icu", &lock_class_mtx_spin },
 #if defined(SMP) && defined(__sparc64__)
 	{ "ipi", &lock_class_mtx_spin },
 #endif
 #ifdef __i386__
 	{ "allpmaps", &lock_class_mtx_spin },
 	{ "descriptor tables", &lock_class_mtx_spin },
 #endif
 	{ "clk", &lock_class_mtx_spin },
 	{ "cpuset", &lock_class_mtx_spin },
 	{ "mprof lock", &lock_class_mtx_spin },
 	{ "zombie lock", &lock_class_mtx_spin },
 	{ "ALD Queue", &lock_class_mtx_spin },
 #if defined(__i386__) || defined(__amd64__)
 	{ "pcicfg", &lock_class_mtx_spin },
 	{ "NDIS thread lock", &lock_class_mtx_spin },
 #endif
 	{ "tw_osl_io_lock", &lock_class_mtx_spin },
 	{ "tw_osl_q_lock", &lock_class_mtx_spin },
 	{ "tw_cl_io_lock", &lock_class_mtx_spin },
 	{ "tw_cl_intr_lock", &lock_class_mtx_spin },
 	{ "tw_cl_gen_lock", &lock_class_mtx_spin },
 #ifdef	HWPMC_HOOKS
 	{ "pmc-leaf", &lock_class_mtx_spin },
 #endif
 	{ "blocked lock", &lock_class_mtx_spin },
 	{ NULL, NULL },
 	{ NULL, NULL }
 };
 
 #ifdef BLESSING
 /*
  * Pairs of locks which have been blessed
  * Don't complain about order problems with blessed locks
  */
 static struct witness_blessed blessed_list[] = {
 };
 #endif
 
 /*
  * This global is set to 0 once it becomes safe to use the witness code.
  */
 static int witness_cold = 1;
 
 /*
  * This global is set to 1 once the static lock orders have been enrolled
  * so that a warning can be issued for any spin locks enrolled later.
  */
 static int witness_spin_warn = 0;
 
 /* Trim useless garbage from filenames. */
 static const char *
 fixup_filename(const char *file)
 {
 
 	if (file == NULL)
 		return (NULL);
 	while (strncmp(file, "../", 3) == 0)
 		file += 3;
 	return (file);
 }
 
 /*
  * The WITNESS-enabled diagnostic code.  Note that the witness code does
  * assume that the early boot is single-threaded at least until after this
  * routine is completed.
  */
 static void
 witness_initialize(void *dummy __unused)
 {
 	struct lock_object *lock;
 	struct witness_order_list_entry *order;
 	struct witness *w, *w1;
 	int i;
 
 	w_data = malloc(sizeof (struct witness) * witness_count, M_WITNESS,
 	    M_WAITOK | M_ZERO);
 
 	w_rmatrix = malloc(sizeof(*w_rmatrix) * (witness_count + 1),
 	    M_WITNESS, M_WAITOK | M_ZERO);
 
 	for (i = 0; i < witness_count + 1; i++) {
 		w_rmatrix[i] = malloc(sizeof(*w_rmatrix[i]) *
 		    (witness_count + 1), M_WITNESS, M_WAITOK | M_ZERO);
 	}
 	badstack_sbuf_size = witness_count * 256;
 
 	/*
 	 * We have to release Giant before initializing its witness
 	 * structure so that WITNESS doesn't get confused.
 	 */
 	mtx_unlock(&Giant);
 	mtx_assert(&Giant, MA_NOTOWNED);
 
 	CTR1(KTR_WITNESS, "%s: initializing witness", __func__);
 	mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET |
 	    MTX_NOWITNESS | MTX_NOPROFILE);
 	for (i = witness_count - 1; i >= 0; i--) {
 		w = &w_data[i];
 		memset(w, 0, sizeof(*w));
 		w_data[i].w_index = i;	/* Witness index never changes. */
 		witness_free(w);
 	}
 	KASSERT(STAILQ_FIRST(&w_free)->w_index == 0,
 	    ("%s: Invalid list of free witness objects", __func__));
 
 	/* Witness with index 0 is not used to aid in debugging. */
 	STAILQ_REMOVE_HEAD(&w_free, w_list);
 	w_free_cnt--;
 
 	for (i = 0; i < witness_count; i++) {
 		memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * 
 		    (witness_count + 1));
 	}
 
 	for (i = 0; i < LOCK_CHILDCOUNT; i++)
 		witness_lock_list_free(&w_locklistdata[i]);
 	witness_init_hash_tables();
 
 	/* First add in all the specified order lists. */
 	for (order = order_lists; order->w_name != NULL; order++) {
 		w = enroll(order->w_name, order->w_class);
 		if (w == NULL)
 			continue;
 		w->w_file = "order list";
 		for (order++; order->w_name != NULL; order++) {
 			w1 = enroll(order->w_name, order->w_class);
 			if (w1 == NULL)
 				continue;
 			w1->w_file = "order list";
 			itismychild(w, w1);
 			w = w1;
 		}
 	}
 	witness_spin_warn = 1;
 
 	/* Iterate through all locks and add them to witness. */
 	for (i = 0; pending_locks[i].wh_lock != NULL; i++) {
 		lock = pending_locks[i].wh_lock;
 		KASSERT(lock->lo_flags & LO_WITNESS,
 		    ("%s: lock %s is on pending list but not LO_WITNESS",
 		    __func__, lock->lo_name));
 		lock->lo_witness = enroll(pending_locks[i].wh_type,
 		    LOCK_CLASS(lock));
 	}
 
 	/* Mark the witness code as being ready for use. */
 	witness_cold = 0;
 
 	mtx_lock(&Giant);
 }
 SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize,
     NULL);
 
 void
 witness_init(struct lock_object *lock, const char *type)
 {
 	struct lock_class *class;
 
 	/* Various sanity checks. */
 	class = LOCK_CLASS(lock);
 	if ((lock->lo_flags & LO_RECURSABLE) != 0 &&
 	    (class->lc_flags & LC_RECURSABLE) == 0)
 		kassert_panic("%s: lock (%s) %s can not be recursable",
 		    __func__, class->lc_name, lock->lo_name);
 	if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
 	    (class->lc_flags & LC_SLEEPABLE) == 0)
 		kassert_panic("%s: lock (%s) %s can not be sleepable",
 		    __func__, class->lc_name, lock->lo_name);
 	if ((lock->lo_flags & LO_UPGRADABLE) != 0 &&
 	    (class->lc_flags & LC_UPGRADABLE) == 0)
 		kassert_panic("%s: lock (%s) %s can not be upgradable",
 		    __func__, class->lc_name, lock->lo_name);
 
 	/*
 	 * If we shouldn't watch this lock, then just clear lo_witness.
 	 * Otherwise, if witness_cold is set, then it is too early to
 	 * enroll this lock, so defer it to witness_initialize() by adding
 	 * it to the pending_locks list.  If it is not too early, then enroll
 	 * the lock now.
 	 */
 	if (witness_watch < 1 || panicstr != NULL ||
 	    (lock->lo_flags & LO_WITNESS) == 0)
 		lock->lo_witness = NULL;
 	else if (witness_cold) {
 		pending_locks[pending_cnt].wh_lock = lock;
 		pending_locks[pending_cnt++].wh_type = type;
 		if (pending_cnt > WITNESS_PENDLIST)
 			panic("%s: pending locks list is too small, "
 			    "increase WITNESS_PENDLIST\n",
 			    __func__);
 	} else
 		lock->lo_witness = enroll(type, class);
 }
 
 void
 witness_destroy(struct lock_object *lock)
 {
 	struct lock_class *class;
 	struct witness *w;
 
 	class = LOCK_CLASS(lock);
 
 	if (witness_cold)
 		panic("lock (%s) %s destroyed while witness_cold",
 		    class->lc_name, lock->lo_name);
 
 	/* XXX: need to verify that no one holds the lock */
 	if ((lock->lo_flags & LO_WITNESS) == 0 || lock->lo_witness == NULL)
 		return;
 	w = lock->lo_witness;
 
 	mtx_lock_spin(&w_mtx);
 	MPASS(w->w_refcount > 0);
 	w->w_refcount--;
 
 	if (w->w_refcount == 0)
 		depart(w);
 	mtx_unlock_spin(&w_mtx);
 }
 
 #ifdef DDB
 static void
 witness_ddb_compute_levels(void)
 {
 	struct witness *w;
 
 	/*
 	 * First clear all levels.
 	 */
 	STAILQ_FOREACH(w, &w_all, w_list)
 		w->w_ddb_level = -1;
 
 	/*
 	 * Look for locks with no parents and level all their descendants.
 	 */
 	STAILQ_FOREACH(w, &w_all, w_list) {
 
 		/* If the witness has ancestors (is not a root), skip it. */
 		if (w->w_num_ancestors > 0)
 			continue;
 		witness_ddb_level_descendants(w, 0);
 	}
 }
 
 static void
 witness_ddb_level_descendants(struct witness *w, int l)
 {
 	int i;
 
 	if (w->w_ddb_level >= l)
 		return;
 
 	w->w_ddb_level = l;
 	l++;
 
 	for (i = 1; i <= w_max_used_index; i++) {
 		if (w_rmatrix[w->w_index][i] & WITNESS_PARENT)
 			witness_ddb_level_descendants(&w_data[i], l);
 	}
 }
 
 static void
 witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...),
     struct witness *w, int indent)
 {
 	int i;
 
  	for (i = 0; i < indent; i++)
  		prnt(" ");
 	prnt("%s (type: %s, depth: %d, active refs: %d)",
 	     w->w_name, w->w_class->lc_name,
 	     w->w_ddb_level, w->w_refcount);
  	if (w->w_displayed) {
  		prnt(" -- (already displayed)\n");
  		return;
  	}
  	w->w_displayed = 1;
 	if (w->w_file != NULL && w->w_line != 0)
 		prnt(" -- last acquired @ %s:%d\n", fixup_filename(w->w_file),
 		    w->w_line);
 	else
 		prnt(" -- never acquired\n");
 	indent++;
 	WITNESS_INDEX_ASSERT(w->w_index);
 	for (i = 1; i <= w_max_used_index; i++) {
 		if (db_pager_quit)
 			return;
 		if (w_rmatrix[w->w_index][i] & WITNESS_PARENT)
 			witness_ddb_display_descendants(prnt, &w_data[i],
 			    indent);
 	}
 }
 
 static void
 witness_ddb_display_list(int(*prnt)(const char *fmt, ...),
     struct witness_list *list)
 {
 	struct witness *w;
 
 	STAILQ_FOREACH(w, list, w_typelist) {
 		if (w->w_file == NULL || w->w_ddb_level > 0)
 			continue;
 
 		/* This lock has no anscestors - display its descendants. */
 		witness_ddb_display_descendants(prnt, w, 0);
 		if (db_pager_quit)
 			return;
 	}
 }
 	
 static void
 witness_ddb_display(int(*prnt)(const char *fmt, ...))
 {
 	struct witness *w;
 
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	witness_ddb_compute_levels();
 
 	/* Clear all the displayed flags. */
 	STAILQ_FOREACH(w, &w_all, w_list)
 		w->w_displayed = 0;
 
 	/*
 	 * First, handle sleep locks which have been acquired at least
 	 * once.
 	 */
 	prnt("Sleep locks:\n");
 	witness_ddb_display_list(prnt, &w_sleep);
 	if (db_pager_quit)
 		return;
 	
 	/*
 	 * Now do spin locks which have been acquired at least once.
 	 */
 	prnt("\nSpin locks:\n");
 	witness_ddb_display_list(prnt, &w_spin);
 	if (db_pager_quit)
 		return;
 	
 	/*
 	 * Finally, any locks which have not been acquired yet.
 	 */
 	prnt("\nLocks which were never acquired:\n");
 	STAILQ_FOREACH(w, &w_all, w_list) {
 		if (w->w_file != NULL || w->w_refcount == 0)
 			continue;
 		prnt("%s (type: %s, depth: %d)\n", w->w_name,
 		    w->w_class->lc_name, w->w_ddb_level);
 		if (db_pager_quit)
 			return;
 	}
 }
 #endif /* DDB */
 
 int
 witness_defineorder(struct lock_object *lock1, struct lock_object *lock2)
 {
 
 	if (witness_watch == -1 || panicstr != NULL)
 		return (0);
 
 	/* Require locks that witness knows about. */
 	if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL ||
 	    lock2->lo_witness == NULL)
 		return (EINVAL);
 
 	mtx_assert(&w_mtx, MA_NOTOWNED);
 	mtx_lock_spin(&w_mtx);
 
 	/*
 	 * If we already have either an explicit or implied lock order that
 	 * is the other way around, then return an error.
 	 */
 	if (witness_watch &&
 	    isitmydescendant(lock2->lo_witness, lock1->lo_witness)) {
 		mtx_unlock_spin(&w_mtx);
 		return (EDOOFUS);
 	}
 	
 	/* Try to add the new order. */
 	CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__,
 	    lock2->lo_witness->w_name, lock1->lo_witness->w_name);
 	itismychild(lock1->lo_witness, lock2->lo_witness);
 	mtx_unlock_spin(&w_mtx);
 	return (0);
 }
 
 void
 witness_checkorder(struct lock_object *lock, int flags, const char *file,
     int line, struct lock_object *interlock)
 {
 	struct lock_list_entry *lock_list, *lle;
 	struct lock_instance *lock1, *lock2, *plock;
 	struct lock_class *class, *iclass;
 	struct witness *w, *w1;
 	struct thread *td;
 	int i, j;
 
 	if (witness_cold || witness_watch < 1 || lock->lo_witness == NULL ||
 	    panicstr != NULL)
 		return;
 
 	w = lock->lo_witness;
 	class = LOCK_CLASS(lock);
 	td = curthread;
 
 	if (class->lc_flags & LC_SLEEPLOCK) {
 
 		/*
 		 * Since spin locks include a critical section, this check
 		 * implicitly enforces a lock order of all sleep locks before
 		 * all spin locks.
 		 */
 		if (td->td_critnest != 0 && !kdb_active)
 			kassert_panic("acquiring blockable sleep lock with "
 			    "spinlock or critical section held (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 
 		/*
 		 * If this is the first lock acquired then just return as
 		 * no order checking is needed.
 		 */
 		lock_list = td->td_sleeplocks;
 		if (lock_list == NULL || lock_list->ll_count == 0)
 			return;
 	} else {
 
 		/*
 		 * If this is the first lock, just return as no order
 		 * checking is needed.  Avoid problems with thread
 		 * migration pinning the thread while checking if
 		 * spinlocks are held.  If at least one spinlock is held
 		 * the thread is in a safe path and it is allowed to
 		 * unpin it.
 		 */
 		sched_pin();
 		lock_list = PCPU_GET(spinlocks);
 		if (lock_list == NULL || lock_list->ll_count == 0) {
 			sched_unpin();
 			return;
 		}
 		sched_unpin();
 	}
 
 	/*
 	 * Check to see if we are recursing on a lock we already own.  If
 	 * so, make sure that we don't mismatch exclusive and shared lock
 	 * acquires.
 	 */
 	lock1 = find_instance(lock_list, lock);
 	if (lock1 != NULL) {
 		if ((lock1->li_flags & LI_EXCLUSIVE) != 0 &&
 		    (flags & LOP_EXCLUSIVE) == 0) {
 			witness_output("shared lock of (%s) %s @ %s:%d\n",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 			witness_output("while exclusively locked from %s:%d\n",
 			    fixup_filename(lock1->li_file), lock1->li_line);
 			kassert_panic("excl->share");
 		}
 		if ((lock1->li_flags & LI_EXCLUSIVE) == 0 &&
 		    (flags & LOP_EXCLUSIVE) != 0) {
 			witness_output("exclusive lock of (%s) %s @ %s:%d\n",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 			witness_output("while share locked from %s:%d\n",
 			    fixup_filename(lock1->li_file), lock1->li_line);
 			kassert_panic("share->excl");
 		}
 		return;
 	}
 
 	/* Warn if the interlock is not locked exactly once. */
 	if (interlock != NULL) {
 		iclass = LOCK_CLASS(interlock);
 		lock1 = find_instance(lock_list, interlock);
 		if (lock1 == NULL)
 			kassert_panic("interlock (%s) %s not locked @ %s:%d",
 			    iclass->lc_name, interlock->lo_name,
 			    fixup_filename(file), line);
 		else if ((lock1->li_flags & LI_RECURSEMASK) != 0)
 			kassert_panic("interlock (%s) %s recursed @ %s:%d",
 			    iclass->lc_name, interlock->lo_name,
 			    fixup_filename(file), line);
 	}
 
 	/*
 	 * Find the previously acquired lock, but ignore interlocks.
 	 */
 	plock = &lock_list->ll_children[lock_list->ll_count - 1];
 	if (interlock != NULL && plock->li_lock == interlock) {
 		if (lock_list->ll_count > 1)
 			plock =
 			    &lock_list->ll_children[lock_list->ll_count - 2];
 		else {
 			lle = lock_list->ll_next;
 
 			/*
 			 * The interlock is the only lock we hold, so
 			 * simply return.
 			 */
 			if (lle == NULL)
 				return;
 			plock = &lle->ll_children[lle->ll_count - 1];
 		}
 	}
 	
 	/*
 	 * Try to perform most checks without a lock.  If this succeeds we
 	 * can skip acquiring the lock and return success.  Otherwise we redo
 	 * the check with the lock held to handle races with concurrent updates.
 	 */
 	w1 = plock->li_lock->lo_witness;
 	if (witness_lock_order_check(w1, w))
 		return;
 
 	mtx_lock_spin(&w_mtx);
 	if (witness_lock_order_check(w1, w)) {
 		mtx_unlock_spin(&w_mtx);
 		return;
 	}
 	witness_lock_order_add(w1, w);
 
 	/*
 	 * Check for duplicate locks of the same type.  Note that we only
 	 * have to check for this on the last lock we just acquired.  Any
 	 * other cases will be caught as lock order violations.
 	 */
 	if (w1 == w) {
 		i = w->w_index;
 		if (!(lock->lo_flags & LO_DUPOK) && !(flags & LOP_DUPOK) &&
 		    !(w_rmatrix[i][i] & WITNESS_REVERSAL)) {
 		    w_rmatrix[i][i] |= WITNESS_REVERSAL;
 			w->w_reversed = 1;
 			mtx_unlock_spin(&w_mtx);
 			witness_output(
 			    "acquiring duplicate lock of same type: \"%s\"\n", 
 			    w->w_name);
 			witness_output(" 1st %s @ %s:%d\n", plock->li_lock->lo_name,
 			    fixup_filename(plock->li_file), plock->li_line);
 			witness_output(" 2nd %s @ %s:%d\n", lock->lo_name,
 			    fixup_filename(file), line);
 			witness_debugger(1, __func__);
 		} else
 			mtx_unlock_spin(&w_mtx);
 		return;
 	}
 	mtx_assert(&w_mtx, MA_OWNED);
 
 	/*
 	 * If we know that the lock we are acquiring comes after
 	 * the lock we most recently acquired in the lock order tree,
 	 * then there is no need for any further checks.
 	 */
 	if (isitmychild(w1, w))
 		goto out;
 
 	for (j = 0, lle = lock_list; lle != NULL; lle = lle->ll_next) {
 		for (i = lle->ll_count - 1; i >= 0; i--, j++) {
 
 			MPASS(j < LOCK_CHILDCOUNT * LOCK_NCHILDREN);
 			lock1 = &lle->ll_children[i];
 
 			/*
 			 * Ignore the interlock.
 			 */
 			if (interlock == lock1->li_lock)
 				continue;
 
 			/*
 			 * If this lock doesn't undergo witness checking,
 			 * then skip it.
 			 */
 			w1 = lock1->li_lock->lo_witness;
 			if (w1 == NULL) {
 				KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0,
 				    ("lock missing witness structure"));
 				continue;
 			}
 
 			/*
 			 * If we are locking Giant and this is a sleepable
 			 * lock, then skip it.
 			 */
 			if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 &&
 			    lock == &Giant.lock_object)
 				continue;
 
 			/*
 			 * If we are locking a sleepable lock and this lock
 			 * is Giant, then skip it.
 			 */
 			if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
 			    lock1->li_lock == &Giant.lock_object)
 				continue;
 
 			/*
 			 * If we are locking a sleepable lock and this lock
 			 * isn't sleepable, we want to treat it as a lock
 			 * order violation to enfore a general lock order of
 			 * sleepable locks before non-sleepable locks.
 			 */
 			if (((lock->lo_flags & LO_SLEEPABLE) != 0 &&
 			    (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0))
 				goto reversal;
 
 			/*
 			 * If we are locking Giant and this is a non-sleepable
 			 * lock, then treat it as a reversal.
 			 */
 			if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 &&
 			    lock == &Giant.lock_object)
 				goto reversal;
 
 			/*
 			 * Check the lock order hierarchy for a reveresal.
 			 */
 			if (!isitmydescendant(w, w1))
 				continue;
 		reversal:
 
 			/*
 			 * We have a lock order violation, check to see if it
 			 * is allowed or has already been yelled about.
 			 */
 #ifdef BLESSING
 
 			/*
 			 * If the lock order is blessed, just bail.  We don't
 			 * look for other lock order violations though, which
 			 * may be a bug.
 			 */
 			if (blessed(w, w1))
 				goto out;
 #endif
 
 			/* Bail if this violation is known */
 			if (w_rmatrix[w1->w_index][w->w_index] & WITNESS_REVERSAL)
 				goto out;
 
 			/* Record this as a violation */
 			w_rmatrix[w1->w_index][w->w_index] |= WITNESS_REVERSAL;
 			w_rmatrix[w->w_index][w1->w_index] |= WITNESS_REVERSAL;
 			w->w_reversed = w1->w_reversed = 1;
 			witness_increment_graph_generation();
 			mtx_unlock_spin(&w_mtx);
 
 #ifdef WITNESS_NO_VNODE
 			/*
 			 * There are known LORs between VNODE locks. They are
 			 * not an indication of a bug. VNODE locks are flagged
 			 * as such (LO_IS_VNODE) and we don't yell if the LOR
 			 * is between 2 VNODE locks.
 			 */
 			if ((lock->lo_flags & LO_IS_VNODE) != 0 &&
 			    (lock1->li_lock->lo_flags & LO_IS_VNODE) != 0)
 				return;
 #endif
 
 			/*
 			 * Ok, yell about it.
 			 */
 			if (((lock->lo_flags & LO_SLEEPABLE) != 0 &&
 			    (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0))
 				witness_output(
 		"lock order reversal: (sleepable after non-sleepable)\n");
 			else if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0
 			    && lock == &Giant.lock_object)
 				witness_output(
 		"lock order reversal: (Giant after non-sleepable)\n");
 			else
 				witness_output("lock order reversal:\n");
 
 			/*
 			 * Try to locate an earlier lock with
 			 * witness w in our list.
 			 */
 			do {
 				lock2 = &lle->ll_children[i];
 				MPASS(lock2->li_lock != NULL);
 				if (lock2->li_lock->lo_witness == w)
 					break;
 				if (i == 0 && lle->ll_next != NULL) {
 					lle = lle->ll_next;
 					i = lle->ll_count - 1;
 					MPASS(i >= 0 && i < LOCK_NCHILDREN);
 				} else
 					i--;
 			} while (i >= 0);
 			if (i < 0) {
 				witness_output(" 1st %p %s (%s) @ %s:%d\n",
 				    lock1->li_lock, lock1->li_lock->lo_name,
 				    w1->w_name, fixup_filename(lock1->li_file),
 				    lock1->li_line);
 				witness_output(" 2nd %p %s (%s) @ %s:%d\n", lock,
 				    lock->lo_name, w->w_name,
 				    fixup_filename(file), line);
 			} else {
 				witness_output(" 1st %p %s (%s) @ %s:%d\n",
 				    lock2->li_lock, lock2->li_lock->lo_name,
 				    lock2->li_lock->lo_witness->w_name,
 				    fixup_filename(lock2->li_file),
 				    lock2->li_line);
 				witness_output(" 2nd %p %s (%s) @ %s:%d\n",
 				    lock1->li_lock, lock1->li_lock->lo_name,
 				    w1->w_name, fixup_filename(lock1->li_file),
 				    lock1->li_line);
 				witness_output(" 3rd %p %s (%s) @ %s:%d\n", lock,
 				    lock->lo_name, w->w_name,
 				    fixup_filename(file), line);
 			}
 			witness_debugger(1, __func__);
 			return;
 		}
 	}
 
 	/*
 	 * If requested, build a new lock order.  However, don't build a new
 	 * relationship between a sleepable lock and Giant if it is in the
 	 * wrong direction.  The correct lock order is that sleepable locks
 	 * always come before Giant.
 	 */
 	if (flags & LOP_NEWORDER &&
 	    !(plock->li_lock == &Giant.lock_object &&
 	    (lock->lo_flags & LO_SLEEPABLE) != 0)) {
 		CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__,
 		    w->w_name, plock->li_lock->lo_witness->w_name);
 		itismychild(plock->li_lock->lo_witness, w);
 	}
 out:
 	mtx_unlock_spin(&w_mtx);
 }
 
 void
 witness_lock(struct lock_object *lock, int flags, const char *file, int line)
 {
 	struct lock_list_entry **lock_list, *lle;
 	struct lock_instance *instance;
 	struct witness *w;
 	struct thread *td;
 
 	if (witness_cold || witness_watch == -1 || lock->lo_witness == NULL ||
 	    panicstr != NULL)
 		return;
 	w = lock->lo_witness;
 	td = curthread;
 
 	/* Determine lock list for this lock. */
 	if (LOCK_CLASS(lock)->lc_flags & LC_SLEEPLOCK)
 		lock_list = &td->td_sleeplocks;
 	else
 		lock_list = PCPU_PTR(spinlocks);
 
 	/* Check to see if we are recursing on a lock we already own. */
 	instance = find_instance(*lock_list, lock);
 	if (instance != NULL) {
 		instance->li_flags++;
 		CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__,
 		    td->td_proc->p_pid, lock->lo_name,
 		    instance->li_flags & LI_RECURSEMASK);
 		instance->li_file = file;
 		instance->li_line = line;
 		return;
 	}
 
 	/* Update per-witness last file and line acquire. */
 	w->w_file = file;
 	w->w_line = line;
 
 	/* Find the next open lock instance in the list and fill it. */
 	lle = *lock_list;
 	if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) {
 		lle = witness_lock_list_get();
 		if (lle == NULL)
 			return;
 		lle->ll_next = *lock_list;
 		CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__,
 		    td->td_proc->p_pid, lle);
 		*lock_list = lle;
 	}
 	instance = &lle->ll_children[lle->ll_count++];
 	instance->li_lock = lock;
 	instance->li_line = line;
 	instance->li_file = file;
 	if ((flags & LOP_EXCLUSIVE) != 0)
 		instance->li_flags = LI_EXCLUSIVE;
 	else
 		instance->li_flags = 0;
 	CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__,
 	    td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1);
 }
 
 void
 witness_upgrade(struct lock_object *lock, int flags, const char *file, int line)
 {
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	if (lock->lo_witness == NULL || witness_watch == -1 || panicstr != NULL)
 		return;
 	class = LOCK_CLASS(lock);
 	if (witness_watch) {
 		if ((lock->lo_flags & LO_UPGRADABLE) == 0)
 			kassert_panic(
 			    "upgrade of non-upgradable lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((class->lc_flags & LC_SLEEPLOCK) == 0)
 			kassert_panic(
 			    "upgrade of non-sleep lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 	}
 	instance = find_instance(curthread->td_sleeplocks, lock);
 	if (instance == NULL) {
 		kassert_panic("upgrade of unlocked lock (%s) %s @ %s:%d",
 		    class->lc_name, lock->lo_name,
 		    fixup_filename(file), line);
 		return;
 	}
 	if (witness_watch) {
 		if ((instance->li_flags & LI_EXCLUSIVE) != 0)
 			kassert_panic(
 			    "upgrade of exclusive lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((instance->li_flags & LI_RECURSEMASK) != 0)
 			kassert_panic(
 			    "upgrade of recursed lock (%s) %s r=%d @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    instance->li_flags & LI_RECURSEMASK,
 			    fixup_filename(file), line);
 	}
 	instance->li_flags |= LI_EXCLUSIVE;
 }
 
 void
 witness_downgrade(struct lock_object *lock, int flags, const char *file,
     int line)
 {
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	if (lock->lo_witness == NULL || witness_watch == -1 || panicstr != NULL)
 		return;
 	class = LOCK_CLASS(lock);
 	if (witness_watch) {
 		if ((lock->lo_flags & LO_UPGRADABLE) == 0)
 			kassert_panic(
 			    "downgrade of non-upgradable lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((class->lc_flags & LC_SLEEPLOCK) == 0)
 			kassert_panic(
 			    "downgrade of non-sleep lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 	}
 	instance = find_instance(curthread->td_sleeplocks, lock);
 	if (instance == NULL) {
 		kassert_panic("downgrade of unlocked lock (%s) %s @ %s:%d",
 		    class->lc_name, lock->lo_name,
 		    fixup_filename(file), line);
 		return;
 	}
 	if (witness_watch) {
 		if ((instance->li_flags & LI_EXCLUSIVE) == 0)
 			kassert_panic(
 			    "downgrade of shared lock (%s) %s @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((instance->li_flags & LI_RECURSEMASK) != 0)
 			kassert_panic(
 			    "downgrade of recursed lock (%s) %s r=%d @ %s:%d",
 			    class->lc_name, lock->lo_name,
 			    instance->li_flags & LI_RECURSEMASK,
 			    fixup_filename(file), line);
 	}
 	instance->li_flags &= ~LI_EXCLUSIVE;
 }
 
 void
 witness_unlock(struct lock_object *lock, int flags, const char *file, int line)
 {
 	struct lock_list_entry **lock_list, *lle;
 	struct lock_instance *instance;
 	struct lock_class *class;
 	struct thread *td;
 	register_t s;
 	int i, j;
 
 	if (witness_cold || lock->lo_witness == NULL || panicstr != NULL)
 		return;
 	td = curthread;
 	class = LOCK_CLASS(lock);
 
 	/* Find lock instance associated with this lock. */
 	if (class->lc_flags & LC_SLEEPLOCK)
 		lock_list = &td->td_sleeplocks;
 	else
 		lock_list = PCPU_PTR(spinlocks);
 	lle = *lock_list;
 	for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next)
 		for (i = 0; i < (*lock_list)->ll_count; i++) {
 			instance = &(*lock_list)->ll_children[i];
 			if (instance->li_lock == lock)
 				goto found;
 		}
 
 	/*
 	 * When disabling WITNESS through witness_watch we could end up in
 	 * having registered locks in the td_sleeplocks queue.
 	 * We have to make sure we flush these queues, so just search for
 	 * eventual register locks and remove them.
 	 */
 	if (witness_watch > 0) {
 		kassert_panic("lock (%s) %s not locked @ %s:%d", class->lc_name,
 		    lock->lo_name, fixup_filename(file), line);
 		return;
 	} else {
 		return;
 	}
 found:
 
 	/* First, check for shared/exclusive mismatches. */
 	if ((instance->li_flags & LI_EXCLUSIVE) != 0 && witness_watch > 0 &&
 	    (flags & LOP_EXCLUSIVE) == 0) {
 		witness_output("shared unlock of (%s) %s @ %s:%d\n",
 		    class->lc_name, lock->lo_name, fixup_filename(file), line);
 		witness_output("while exclusively locked from %s:%d\n",
 		    fixup_filename(instance->li_file), instance->li_line);
 		kassert_panic("excl->ushare");
 	}
 	if ((instance->li_flags & LI_EXCLUSIVE) == 0 && witness_watch > 0 &&
 	    (flags & LOP_EXCLUSIVE) != 0) {
 		witness_output("exclusive unlock of (%s) %s @ %s:%d\n",
 		    class->lc_name, lock->lo_name, fixup_filename(file), line);
 		witness_output("while share locked from %s:%d\n",
 		    fixup_filename(instance->li_file),
 		    instance->li_line);
 		kassert_panic("share->uexcl");
 	}
 	/* If we are recursed, unrecurse. */
 	if ((instance->li_flags & LI_RECURSEMASK) > 0) {
 		CTR4(KTR_WITNESS, "%s: pid %d unrecursed on %s r=%d", __func__,
 		    td->td_proc->p_pid, instance->li_lock->lo_name,
 		    instance->li_flags);
 		instance->li_flags--;
 		return;
 	}
 	/* The lock is now being dropped, check for NORELEASE flag */
 	if ((instance->li_flags & LI_NORELEASE) != 0 && witness_watch > 0) {
 		witness_output("forbidden unlock of (%s) %s @ %s:%d\n",
 		    class->lc_name, lock->lo_name, fixup_filename(file), line);
 		kassert_panic("lock marked norelease");
 	}
 
 	/* Otherwise, remove this item from the list. */
 	s = intr_disable();
 	CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__,
 	    td->td_proc->p_pid, instance->li_lock->lo_name,
 	    (*lock_list)->ll_count - 1);
 	for (j = i; j < (*lock_list)->ll_count - 1; j++)
 		(*lock_list)->ll_children[j] =
 		    (*lock_list)->ll_children[j + 1];
 	(*lock_list)->ll_count--;
 	intr_restore(s);
 
 	/*
 	 * In order to reduce contention on w_mtx, we want to keep always an
 	 * head object into lists so that frequent allocation from the 
 	 * free witness pool (and subsequent locking) is avoided.
 	 * In order to maintain the current code simple, when the head
 	 * object is totally unloaded it means also that we do not have
 	 * further objects in the list, so the list ownership needs to be
 	 * hand over to another object if the current head needs to be freed.
 	 */
 	if ((*lock_list)->ll_count == 0) {
 		if (*lock_list == lle) {
 			if (lle->ll_next == NULL)
 				return;
 		} else
 			lle = *lock_list;
 		*lock_list = lle->ll_next;
 		CTR3(KTR_WITNESS, "%s: pid %d removed lle %p", __func__,
 		    td->td_proc->p_pid, lle);
 		witness_lock_list_free(lle);
 	}
 }
 
 void
 witness_thread_exit(struct thread *td)
 {
 	struct lock_list_entry *lle;
 	int i, n;
 
 	lle = td->td_sleeplocks;
 	if (lle == NULL || panicstr != NULL)
 		return;
 	if (lle->ll_count != 0) {
 		for (n = 0; lle != NULL; lle = lle->ll_next)
 			for (i = lle->ll_count - 1; i >= 0; i--) {
 				if (n == 0)
 					witness_output(
 		    "Thread %p exiting with the following locks held:\n", td);
 				n++;
 				witness_list_lock(&lle->ll_children[i],
 				    witness_output);
 				
 			}
 		kassert_panic(
 		    "Thread %p cannot exit while holding sleeplocks\n", td);
 	}
 	witness_lock_list_free(lle);
 }
 
 /*
  * Warn if any locks other than 'lock' are held.  Flags can be passed in to
  * exempt Giant and sleepable locks from the checks as well.  If any
  * non-exempt locks are held, then a supplied message is printed to the
  * output channel along with a list of the offending locks.  If indicated in the
  * flags then a failure results in a panic as well.
  */
 int
 witness_warn(int flags, struct lock_object *lock, const char *fmt, ...)
 {
 	struct lock_list_entry *lock_list, *lle;
 	struct lock_instance *lock1;
 	struct thread *td;
 	va_list ap;
 	int i, n;
 
 	if (witness_cold || witness_watch < 1 || panicstr != NULL)
 		return (0);
 	n = 0;
 	td = curthread;
 	for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next)
 		for (i = lle->ll_count - 1; i >= 0; i--) {
 			lock1 = &lle->ll_children[i];
 			if (lock1->li_lock == lock)
 				continue;
 			if (flags & WARN_GIANTOK &&
 			    lock1->li_lock == &Giant.lock_object)
 				continue;
 			if (flags & WARN_SLEEPOK &&
 			    (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0)
 				continue;
 			if (n == 0) {
 				va_start(ap, fmt);
-				witness_voutput(fmt, ap);
+				vprintf(fmt, ap);
 				va_end(ap);
-				witness_output(
-				    " with the following %slocks held:\n",
+				printf(" with the following %slocks held:\n",
 				    (flags & WARN_SLEEPOK) != 0 ?
 				    "non-sleepable " : "");
 			}
 			n++;
-			witness_list_lock(lock1, witness_output);
+			witness_list_lock(lock1, printf);
 		}
 
 	/*
 	 * Pin the thread in order to avoid problems with thread migration.
 	 * Once that all verifies are passed about spinlocks ownership,
 	 * the thread is in a safe path and it can be unpinned.
 	 */
 	sched_pin();
 	lock_list = PCPU_GET(spinlocks);
 	if (lock_list != NULL && lock_list->ll_count != 0) {
 		sched_unpin();
 
 		/*
 		 * We should only have one spinlock and as long as
 		 * the flags cannot match for this locks class,
 		 * check if the first spinlock is the one curthread
 		 * should hold.
 		 */
 		lock1 = &lock_list->ll_children[lock_list->ll_count - 1];
 		if (lock_list->ll_count == 1 && lock_list->ll_next == NULL &&
 		    lock1->li_lock == lock && n == 0)
 			return (0);
 
 		va_start(ap, fmt);
-		witness_voutput(fmt, ap);
+		vprintf(fmt, ap);
 		va_end(ap);
-		witness_output(" with the following %slocks held:\n",
+		printf(" with the following %slocks held:\n",
 		    (flags & WARN_SLEEPOK) != 0 ?  "non-sleepable " : "");
-		n += witness_list_locks(&lock_list, witness_output);
+		n += witness_list_locks(&lock_list, printf);
 	} else
 		sched_unpin();
 	if (flags & WARN_PANIC && n)
 		kassert_panic("%s", __func__);
 	else
 		witness_debugger(n, __func__);
 	return (n);
 }
 
 const char *
 witness_file(struct lock_object *lock)
 {
 	struct witness *w;
 
 	if (witness_cold || witness_watch < 1 || lock->lo_witness == NULL)
 		return ("?");
 	w = lock->lo_witness;
 	return (w->w_file);
 }
 
 int
 witness_line(struct lock_object *lock)
 {
 	struct witness *w;
 
 	if (witness_cold || witness_watch < 1 || lock->lo_witness == NULL)
 		return (0);
 	w = lock->lo_witness;
 	return (w->w_line);
 }
 
 static struct witness *
 enroll(const char *description, struct lock_class *lock_class)
 {
 	struct witness *w;
 	struct witness_list *typelist;
 
 	MPASS(description != NULL);
 
 	if (witness_watch == -1 || panicstr != NULL)
 		return (NULL);
 	if ((lock_class->lc_flags & LC_SPINLOCK)) {
 		if (witness_skipspin)
 			return (NULL);
 		else
 			typelist = &w_spin;
 	} else if ((lock_class->lc_flags & LC_SLEEPLOCK)) {
 		typelist = &w_sleep;
 	} else {
 		kassert_panic("lock class %s is not sleep or spin",
 		    lock_class->lc_name);
 		return (NULL);
 	}
 
 	mtx_lock_spin(&w_mtx);
 	w = witness_hash_get(description);
 	if (w)
 		goto found;
 	if ((w = witness_get()) == NULL)
 		return (NULL);
 	MPASS(strlen(description) < MAX_W_NAME);
 	strcpy(w->w_name, description);
 	w->w_class = lock_class;
 	w->w_refcount = 1;
 	STAILQ_INSERT_HEAD(&w_all, w, w_list);
 	if (lock_class->lc_flags & LC_SPINLOCK) {
 		STAILQ_INSERT_HEAD(&w_spin, w, w_typelist);
 		w_spin_cnt++;
 	} else if (lock_class->lc_flags & LC_SLEEPLOCK) {
 		STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist);
 		w_sleep_cnt++;
 	}
 
 	/* Insert new witness into the hash */
 	witness_hash_put(w);
 	witness_increment_graph_generation();
 	mtx_unlock_spin(&w_mtx);
 	return (w);
 found:
 	w->w_refcount++;
 	mtx_unlock_spin(&w_mtx);
 	if (lock_class != w->w_class)
 		kassert_panic(
 			"lock (%s) %s does not match earlier (%s) lock",
 			description, lock_class->lc_name,
 			w->w_class->lc_name);
 	return (w);
 }
 
 static void
 depart(struct witness *w)
 {
 	struct witness_list *list;
 
 	MPASS(w->w_refcount == 0);
 	if (w->w_class->lc_flags & LC_SLEEPLOCK) {
 		list = &w_sleep;
 		w_sleep_cnt--;
 	} else {
 		list = &w_spin;
 		w_spin_cnt--;
 	}
 	/*
 	 * Set file to NULL as it may point into a loadable module.
 	 */
 	w->w_file = NULL;
 	w->w_line = 0;
 	witness_increment_graph_generation();
 }
 
 
 static void
 adopt(struct witness *parent, struct witness *child)
 {
 	int pi, ci, i, j;
 
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 
 	/* If the relationship is already known, there's no work to be done. */
 	if (isitmychild(parent, child))
 		return;
 
 	/* When the structure of the graph changes, bump up the generation. */
 	witness_increment_graph_generation();
 
 	/*
 	 * The hard part ... create the direct relationship, then propagate all
 	 * indirect relationships.
 	 */
 	pi = parent->w_index;
 	ci = child->w_index;
 	WITNESS_INDEX_ASSERT(pi);
 	WITNESS_INDEX_ASSERT(ci);
 	MPASS(pi != ci);
 	w_rmatrix[pi][ci] |= WITNESS_PARENT;
 	w_rmatrix[ci][pi] |= WITNESS_CHILD;
 
 	/*
 	 * If parent was not already an ancestor of child,
 	 * then we increment the descendant and ancestor counters.
 	 */
 	if ((w_rmatrix[pi][ci] & WITNESS_ANCESTOR) == 0) {
 		parent->w_num_descendants++;
 		child->w_num_ancestors++;
 	}
 
 	/* 
 	 * Find each ancestor of 'pi'. Note that 'pi' itself is counted as 
 	 * an ancestor of 'pi' during this loop.
 	 */
 	for (i = 1; i <= w_max_used_index; i++) {
 		if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 && 
 		    (i != pi))
 			continue;
 
 		/* Find each descendant of 'i' and mark it as a descendant. */
 		for (j = 1; j <= w_max_used_index; j++) {
 
 			/* 
 			 * Skip children that are already marked as
 			 * descendants of 'i'.
 			 */
 			if (w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK)
 				continue;
 
 			/*
 			 * We are only interested in descendants of 'ci'. Note
 			 * that 'ci' itself is counted as a descendant of 'ci'.
 			 */
 			if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 && 
 			    (j != ci))
 				continue;
 			w_rmatrix[i][j] |= WITNESS_ANCESTOR;
 			w_rmatrix[j][i] |= WITNESS_DESCENDANT;
 			w_data[i].w_num_descendants++;
 			w_data[j].w_num_ancestors++;
 
 			/* 
 			 * Make sure we aren't marking a node as both an
 			 * ancestor and descendant. We should have caught 
 			 * this as a lock order reversal earlier.
 			 */
 			if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) &&
 			    (w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) {
 				printf("witness rmatrix paradox! [%d][%d]=%d "
 				    "both ancestor and descendant\n",
 				    i, j, w_rmatrix[i][j]); 
 				kdb_backtrace();
 				printf("Witness disabled.\n");
 				witness_watch = -1;
 			}
 			if ((w_rmatrix[j][i] & WITNESS_ANCESTOR_MASK) &&
 			    (w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) {
 				printf("witness rmatrix paradox! [%d][%d]=%d "
 				    "both ancestor and descendant\n",
 				    j, i, w_rmatrix[j][i]); 
 				kdb_backtrace();
 				printf("Witness disabled.\n");
 				witness_watch = -1;
 			}
 		}
 	}
 }
 
 static void
 itismychild(struct witness *parent, struct witness *child)
 {
 	int unlocked;
 
 	MPASS(child != NULL && parent != NULL);
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 
 	if (!witness_lock_type_equal(parent, child)) {
 		if (witness_cold == 0) {
 			unlocked = 1;
 			mtx_unlock_spin(&w_mtx);
 		} else {
 			unlocked = 0;
 		}
 		kassert_panic(
 		    "%s: parent \"%s\" (%s) and child \"%s\" (%s) are not "
 		    "the same lock type", __func__, parent->w_name,
 		    parent->w_class->lc_name, child->w_name,
 		    child->w_class->lc_name);
 		if (unlocked)
 			mtx_lock_spin(&w_mtx);
 	}
 	adopt(parent, child);
 }
 
 /*
  * Generic code for the isitmy*() functions. The rmask parameter is the
  * expected relationship of w1 to w2.
  */
 static int
 _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname)
 {
 	unsigned char r1, r2;
 	int i1, i2;
 
 	i1 = w1->w_index;
 	i2 = w2->w_index;
 	WITNESS_INDEX_ASSERT(i1);
 	WITNESS_INDEX_ASSERT(i2);
 	r1 = w_rmatrix[i1][i2] & WITNESS_RELATED_MASK;
 	r2 = w_rmatrix[i2][i1] & WITNESS_RELATED_MASK;
 
 	/* The flags on one better be the inverse of the flags on the other */
 	if (!((WITNESS_ATOD(r1) == r2 && WITNESS_DTOA(r2) == r1) ||
 	    (WITNESS_DTOA(r1) == r2 && WITNESS_ATOD(r2) == r1))) {
 		/* Don't squawk if we're potentially racing with an update. */
 		if (!mtx_owned(&w_mtx))
 			return (0);
 		printf("%s: rmatrix mismatch between %s (index %d) and %s "
 		    "(index %d): w_rmatrix[%d][%d] == %hhx but "
 		    "w_rmatrix[%d][%d] == %hhx\n",
 		    fname, w1->w_name, i1, w2->w_name, i2, i1, i2, r1,
 		    i2, i1, r2);
 		kdb_backtrace();
 		printf("Witness disabled.\n");
 		witness_watch = -1;
 	}
 	return (r1 & rmask);
 }
 
 /*
  * Checks if @child is a direct child of @parent.
  */
 static int
 isitmychild(struct witness *parent, struct witness *child)
 {
 
 	return (_isitmyx(parent, child, WITNESS_PARENT, __func__));
 }
 
 /*
  * Checks if @descendant is a direct or inderect descendant of @ancestor.
  */
 static int
 isitmydescendant(struct witness *ancestor, struct witness *descendant)
 {
 
 	return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK,
 	    __func__));
 }
 
 #ifdef BLESSING
 static int
 blessed(struct witness *w1, struct witness *w2)
 {
 	int i;
 	struct witness_blessed *b;
 
 	for (i = 0; i < nitems(blessed_list); i++) {
 		b = &blessed_list[i];
 		if (strcmp(w1->w_name, b->b_lock1) == 0) {
 			if (strcmp(w2->w_name, b->b_lock2) == 0)
 				return (1);
 			continue;
 		}
 		if (strcmp(w1->w_name, b->b_lock2) == 0)
 			if (strcmp(w2->w_name, b->b_lock1) == 0)
 				return (1);
 	}
 	return (0);
 }
 #endif
 
 static struct witness *
 witness_get(void)
 {
 	struct witness *w;
 	int index;
 
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 
 	if (witness_watch == -1) {
 		mtx_unlock_spin(&w_mtx);
 		return (NULL);
 	}
 	if (STAILQ_EMPTY(&w_free)) {
 		witness_watch = -1;
 		mtx_unlock_spin(&w_mtx);
 		printf("WITNESS: unable to allocate a new witness object\n");
 		return (NULL);
 	}
 	w = STAILQ_FIRST(&w_free);
 	STAILQ_REMOVE_HEAD(&w_free, w_list);
 	w_free_cnt--;
 	index = w->w_index;
 	MPASS(index > 0 && index == w_max_used_index+1 &&
 	    index < witness_count);
 	bzero(w, sizeof(*w));
 	w->w_index = index;
 	if (index > w_max_used_index)
 		w_max_used_index = index;
 	return (w);
 }
 
 static void
 witness_free(struct witness *w)
 {
 
 	STAILQ_INSERT_HEAD(&w_free, w, w_list);
 	w_free_cnt++;
 }
 
 static struct lock_list_entry *
 witness_lock_list_get(void)
 {
 	struct lock_list_entry *lle;
 
 	if (witness_watch == -1)
 		return (NULL);
 	mtx_lock_spin(&w_mtx);
 	lle = w_lock_list_free;
 	if (lle == NULL) {
 		witness_watch = -1;
 		mtx_unlock_spin(&w_mtx);
 		printf("%s: witness exhausted\n", __func__);
 		return (NULL);
 	}
 	w_lock_list_free = lle->ll_next;
 	mtx_unlock_spin(&w_mtx);
 	bzero(lle, sizeof(*lle));
 	return (lle);
 }
 		
 static void
 witness_lock_list_free(struct lock_list_entry *lle)
 {
 
 	mtx_lock_spin(&w_mtx);
 	lle->ll_next = w_lock_list_free;
 	w_lock_list_free = lle;
 	mtx_unlock_spin(&w_mtx);
 }
 
 static struct lock_instance *
 find_instance(struct lock_list_entry *list, const struct lock_object *lock)
 {
 	struct lock_list_entry *lle;
 	struct lock_instance *instance;
 	int i;
 
 	for (lle = list; lle != NULL; lle = lle->ll_next)
 		for (i = lle->ll_count - 1; i >= 0; i--) {
 			instance = &lle->ll_children[i];
 			if (instance->li_lock == lock)
 				return (instance);
 		}
 	return (NULL);
 }
 
 static void
 witness_list_lock(struct lock_instance *instance,
     int (*prnt)(const char *fmt, ...))
 {
 	struct lock_object *lock;
 
 	lock = instance->li_lock;
 	prnt("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ?
 	    "exclusive" : "shared", LOCK_CLASS(lock)->lc_name, lock->lo_name);
 	if (lock->lo_witness->w_name != lock->lo_name)
 		prnt(" (%s)", lock->lo_witness->w_name);
 	prnt(" r = %d (%p) locked @ %s:%d\n",
 	    instance->li_flags & LI_RECURSEMASK, lock,
 	    fixup_filename(instance->li_file), instance->li_line);
 }
 
 static int
 witness_output(const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 
 	va_start(ap, fmt);
 	ret = witness_voutput(fmt, ap);
 	va_end(ap);
 	return (ret);
 }
 
 static int
 witness_voutput(const char *fmt, va_list ap)
 {
 	int ret;
 
 	ret = 0;
 	switch (witness_channel) {
 	case WITNESS_CONSOLE:
 		ret = vprintf(fmt, ap);
 		break;
 	case WITNESS_LOG:
 		vlog(LOG_NOTICE, fmt, ap);
 		break;
 	case WITNESS_NONE:
 		break;
 	}
 	return (ret);
 }
 
 #ifdef DDB
 static int
 witness_thread_has_locks(struct thread *td)
 {
 
 	if (td->td_sleeplocks == NULL)
 		return (0);
 	return (td->td_sleeplocks->ll_count != 0);
 }
 
 static int
 witness_proc_has_locks(struct proc *p)
 {
 	struct thread *td;
 
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (witness_thread_has_locks(td))
 			return (1);
 	}
 	return (0);
 }
 #endif
 
 int
 witness_list_locks(struct lock_list_entry **lock_list,
     int (*prnt)(const char *fmt, ...))
 {
 	struct lock_list_entry *lle;
 	int i, nheld;
 
 	nheld = 0;
 	for (lle = *lock_list; lle != NULL; lle = lle->ll_next)
 		for (i = lle->ll_count - 1; i >= 0; i--) {
 			witness_list_lock(&lle->ll_children[i], prnt);
 			nheld++;
 		}
 	return (nheld);
 }
 
 /*
  * This is a bit risky at best.  We call this function when we have timed
  * out acquiring a spin lock, and we assume that the other CPU is stuck
  * with this lock held.  So, we go groveling around in the other CPU's
  * per-cpu data to try to find the lock instance for this spin lock to
  * see when it was last acquired.
  */
 void
 witness_display_spinlock(struct lock_object *lock, struct thread *owner,
     int (*prnt)(const char *fmt, ...))
 {
 	struct lock_instance *instance;
 	struct pcpu *pc;
 
 	if (owner->td_critnest == 0 || owner->td_oncpu == NOCPU)
 		return;
 	pc = pcpu_find(owner->td_oncpu);
 	instance = find_instance(pc->pc_spinlocks, lock);
 	if (instance != NULL)
 		witness_list_lock(instance, prnt);
 }
 
 void
 witness_save(struct lock_object *lock, const char **filep, int *linep)
 {
 	struct lock_list_entry *lock_list;
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	/*
 	 * This function is used independently in locking code to deal with
 	 * Giant, SCHEDULER_STOPPED() check can be removed here after Giant
 	 * is gone.
 	 */
 	if (SCHEDULER_STOPPED())
 		return;
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	if (lock->lo_witness == NULL || witness_watch == -1 || panicstr != NULL)
 		return;
 	class = LOCK_CLASS(lock);
 	if (class->lc_flags & LC_SLEEPLOCK)
 		lock_list = curthread->td_sleeplocks;
 	else {
 		if (witness_skipspin)
 			return;
 		lock_list = PCPU_GET(spinlocks);
 	}
 	instance = find_instance(lock_list, lock);
 	if (instance == NULL) {
 		kassert_panic("%s: lock (%s) %s not locked", __func__,
 		    class->lc_name, lock->lo_name);
 		return;
 	}
 	*filep = instance->li_file;
 	*linep = instance->li_line;
 }
 
 void
 witness_restore(struct lock_object *lock, const char *file, int line)
 {
 	struct lock_list_entry *lock_list;
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	/*
 	 * This function is used independently in locking code to deal with
 	 * Giant, SCHEDULER_STOPPED() check can be removed here after Giant
 	 * is gone.
 	 */
 	if (SCHEDULER_STOPPED())
 		return;
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	if (lock->lo_witness == NULL || witness_watch == -1 || panicstr != NULL)
 		return;
 	class = LOCK_CLASS(lock);
 	if (class->lc_flags & LC_SLEEPLOCK)
 		lock_list = curthread->td_sleeplocks;
 	else {
 		if (witness_skipspin)
 			return;
 		lock_list = PCPU_GET(spinlocks);
 	}
 	instance = find_instance(lock_list, lock);
 	if (instance == NULL)
 		kassert_panic("%s: lock (%s) %s not locked", __func__,
 		    class->lc_name, lock->lo_name);
 	lock->lo_witness->w_file = file;
 	lock->lo_witness->w_line = line;
 	if (instance == NULL)
 		return;
 	instance->li_file = file;
 	instance->li_line = line;
 }
 
 void
 witness_assert(const struct lock_object *lock, int flags, const char *file,
     int line)
 {
 #ifdef INVARIANT_SUPPORT
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	if (lock->lo_witness == NULL || witness_watch < 1 || panicstr != NULL)
 		return;
 	class = LOCK_CLASS(lock);
 	if ((class->lc_flags & LC_SLEEPLOCK) != 0)
 		instance = find_instance(curthread->td_sleeplocks, lock);
 	else if ((class->lc_flags & LC_SPINLOCK) != 0)
 		instance = find_instance(PCPU_GET(spinlocks), lock);
 	else {
 		kassert_panic("Lock (%s) %s is not sleep or spin!",
 		    class->lc_name, lock->lo_name);
 		return;
 	}
 	switch (flags) {
 	case LA_UNLOCKED:
 		if (instance != NULL)
 			kassert_panic("Lock (%s) %s locked @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		break;
 	case LA_LOCKED:
 	case LA_LOCKED | LA_RECURSED:
 	case LA_LOCKED | LA_NOTRECURSED:
 	case LA_SLOCKED:
 	case LA_SLOCKED | LA_RECURSED:
 	case LA_SLOCKED | LA_NOTRECURSED:
 	case LA_XLOCKED:
 	case LA_XLOCKED | LA_RECURSED:
 	case LA_XLOCKED | LA_NOTRECURSED:
 		if (instance == NULL) {
 			kassert_panic("Lock (%s) %s not locked @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 			break;
 		}
 		if ((flags & LA_XLOCKED) != 0 &&
 		    (instance->li_flags & LI_EXCLUSIVE) == 0)
 			kassert_panic(
 			    "Lock (%s) %s not exclusively locked @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((flags & LA_SLOCKED) != 0 &&
 		    (instance->li_flags & LI_EXCLUSIVE) != 0)
 			kassert_panic(
 			    "Lock (%s) %s exclusively locked @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((flags & LA_RECURSED) != 0 &&
 		    (instance->li_flags & LI_RECURSEMASK) == 0)
 			kassert_panic("Lock (%s) %s not recursed @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		if ((flags & LA_NOTRECURSED) != 0 &&
 		    (instance->li_flags & LI_RECURSEMASK) != 0)
 			kassert_panic("Lock (%s) %s recursed @ %s:%d.",
 			    class->lc_name, lock->lo_name,
 			    fixup_filename(file), line);
 		break;
 	default:
 		kassert_panic("Invalid lock assertion at %s:%d.",
 		    fixup_filename(file), line);
 
 	}
 #endif	/* INVARIANT_SUPPORT */
 }
 
 static void
 witness_setflag(struct lock_object *lock, int flag, int set)
 {
 	struct lock_list_entry *lock_list;
 	struct lock_instance *instance;
 	struct lock_class *class;
 
 	if (lock->lo_witness == NULL || witness_watch == -1 || panicstr != NULL)
 		return;
 	class = LOCK_CLASS(lock);
 	if (class->lc_flags & LC_SLEEPLOCK)
 		lock_list = curthread->td_sleeplocks;
 	else {
 		if (witness_skipspin)
 			return;
 		lock_list = PCPU_GET(spinlocks);
 	}
 	instance = find_instance(lock_list, lock);
 	if (instance == NULL) {
 		kassert_panic("%s: lock (%s) %s not locked", __func__,
 		    class->lc_name, lock->lo_name);
 		return;
 	}
 
 	if (set)
 		instance->li_flags |= flag;
 	else
 		instance->li_flags &= ~flag;
 }
 
 void
 witness_norelease(struct lock_object *lock)
 {
 
 	witness_setflag(lock, LI_NORELEASE, 1);
 }
 
 void
 witness_releaseok(struct lock_object *lock)
 {
 
 	witness_setflag(lock, LI_NORELEASE, 0);
 }
 
 #ifdef DDB
 static void
 witness_ddb_list(struct thread *td)
 {
 
 	KASSERT(witness_cold == 0, ("%s: witness_cold", __func__));
 	KASSERT(kdb_active, ("%s: not in the debugger", __func__));
 
 	if (witness_watch < 1)
 		return;
 
 	witness_list_locks(&td->td_sleeplocks, db_printf);
 
 	/*
 	 * We only handle spinlocks if td == curthread.  This is somewhat broken
 	 * if td is currently executing on some other CPU and holds spin locks
 	 * as we won't display those locks.  If we had a MI way of getting
 	 * the per-cpu data for a given cpu then we could use
 	 * td->td_oncpu to get the list of spinlocks for this thread
 	 * and "fix" this.
 	 *
 	 * That still wouldn't really fix this unless we locked the scheduler
 	 * lock or stopped the other CPU to make sure it wasn't changing the
 	 * list out from under us.  It is probably best to just not try to
 	 * handle threads on other CPU's for now.
 	 */
 	if (td == curthread && PCPU_GET(spinlocks) != NULL)
 		witness_list_locks(PCPU_PTR(spinlocks), db_printf);
 }
 
 DB_SHOW_COMMAND(locks, db_witness_list)
 {
 	struct thread *td;
 
 	if (have_addr)
 		td = db_lookup_thread(addr, true);
 	else
 		td = kdb_thread;
 	witness_ddb_list(td);
 }
 
 DB_SHOW_ALL_COMMAND(locks, db_witness_list_all)
 {
 	struct thread *td;
 	struct proc *p;
 
 	/*
 	 * It would be nice to list only threads and processes that actually
 	 * held sleep locks, but that information is currently not exported
 	 * by WITNESS.
 	 */
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (!witness_proc_has_locks(p))
 			continue;
 		FOREACH_THREAD_IN_PROC(p, td) {
 			if (!witness_thread_has_locks(td))
 				continue;
 			db_printf("Process %d (%s) thread %p (%d)\n", p->p_pid,
 			    p->p_comm, td, td->td_tid);
 			witness_ddb_list(td);
 			if (db_pager_quit)
 				return;
 		}
 	}
 }
 DB_SHOW_ALIAS(alllocks, db_witness_list_all)
 
 DB_SHOW_COMMAND(witness, db_witness_display)
 {
 
 	witness_ddb_display(db_printf);
 }
 #endif
 
 static int
 sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS)
 {
 	struct witness_lock_order_data *data1, *data2, *tmp_data1, *tmp_data2;
 	struct witness *tmp_w1, *tmp_w2, *w1, *w2;
 	struct sbuf *sb;
 	u_int w_rmatrix1, w_rmatrix2;
 	int error, generation, i, j;
 
 	tmp_data1 = NULL;
 	tmp_data2 = NULL;
 	tmp_w1 = NULL;
 	tmp_w2 = NULL;
 	if (witness_watch < 1) {
 		error = SYSCTL_OUT(req, w_notrunning, sizeof(w_notrunning));
 		return (error);
 	}
 	if (witness_cold) {
 		error = SYSCTL_OUT(req, w_stillcold, sizeof(w_stillcold));
 		return (error);
 	}
 	error = 0;
 	sb = sbuf_new(NULL, NULL, badstack_sbuf_size, SBUF_AUTOEXTEND);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	/* Allocate and init temporary storage space. */
 	tmp_w1 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
 	tmp_w2 = malloc(sizeof(struct witness), M_TEMP, M_WAITOK | M_ZERO);
 	tmp_data1 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, 
 	    M_WAITOK | M_ZERO);
 	tmp_data2 = malloc(sizeof(struct witness_lock_order_data), M_TEMP, 
 	    M_WAITOK | M_ZERO);
 	stack_zero(&tmp_data1->wlod_stack);
 	stack_zero(&tmp_data2->wlod_stack);
 
 restart:
 	mtx_lock_spin(&w_mtx);
 	generation = w_generation;
 	mtx_unlock_spin(&w_mtx);
 	sbuf_printf(sb, "Number of known direct relationships is %d\n",
 	    w_lohash.wloh_count);
 	for (i = 1; i < w_max_used_index; i++) {
 		mtx_lock_spin(&w_mtx);
 		if (generation != w_generation) {
 			mtx_unlock_spin(&w_mtx);
 
 			/* The graph has changed, try again. */
 			req->oldidx = 0;
 			sbuf_clear(sb);
 			goto restart;
 		}
 
 		w1 = &w_data[i];
 		if (w1->w_reversed == 0) {
 			mtx_unlock_spin(&w_mtx);
 			continue;
 		}
 
 		/* Copy w1 locally so we can release the spin lock. */
 		*tmp_w1 = *w1;
 		mtx_unlock_spin(&w_mtx);
 
 		if (tmp_w1->w_reversed == 0)
 			continue;
 		for (j = 1; j < w_max_used_index; j++) {
 			if ((w_rmatrix[i][j] & WITNESS_REVERSAL) == 0 || i > j)
 				continue;
 
 			mtx_lock_spin(&w_mtx);
 			if (generation != w_generation) {
 				mtx_unlock_spin(&w_mtx);
 
 				/* The graph has changed, try again. */
 				req->oldidx = 0;
 				sbuf_clear(sb);
 				goto restart;
 			}
 
 			w2 = &w_data[j];
 			data1 = witness_lock_order_get(w1, w2);
 			data2 = witness_lock_order_get(w2, w1);
 
 			/*
 			 * Copy information locally so we can release the
 			 * spin lock.
 			 */
 			*tmp_w2 = *w2;
 			w_rmatrix1 = (unsigned int)w_rmatrix[i][j];
 			w_rmatrix2 = (unsigned int)w_rmatrix[j][i];
 
 			if (data1) {
 				stack_zero(&tmp_data1->wlod_stack);
 				stack_copy(&data1->wlod_stack,
 				    &tmp_data1->wlod_stack);
 			}
 			if (data2 && data2 != data1) {
 				stack_zero(&tmp_data2->wlod_stack);
 				stack_copy(&data2->wlod_stack,
 				    &tmp_data2->wlod_stack);
 			}
 			mtx_unlock_spin(&w_mtx);
 
 			sbuf_printf(sb,
 	    "\nLock order reversal between \"%s\"(%s) and \"%s\"(%s)!\n",
 			    tmp_w1->w_name, tmp_w1->w_class->lc_name, 
 			    tmp_w2->w_name, tmp_w2->w_class->lc_name);
 			if (data1) {
 				sbuf_printf(sb,
 			"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
 				    tmp_w1->w_name, tmp_w1->w_class->lc_name, 
 				    tmp_w2->w_name, tmp_w2->w_class->lc_name);
 				stack_sbuf_print(sb, &tmp_data1->wlod_stack);
 				sbuf_printf(sb, "\n");
 			}
 			if (data2 && data2 != data1) {
 				sbuf_printf(sb,
 			"Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
 				    tmp_w2->w_name, tmp_w2->w_class->lc_name, 
 				    tmp_w1->w_name, tmp_w1->w_class->lc_name);
 				stack_sbuf_print(sb, &tmp_data2->wlod_stack);
 				sbuf_printf(sb, "\n");
 			}
 		}
 	}
 	mtx_lock_spin(&w_mtx);
 	if (generation != w_generation) {
 		mtx_unlock_spin(&w_mtx);
 
 		/*
 		 * The graph changed while we were printing stack data,
 		 * try again.
 		 */
 		req->oldidx = 0;
 		sbuf_clear(sb);
 		goto restart;
 	}
 	mtx_unlock_spin(&w_mtx);
 
 	/* Free temporary storage space. */
 	free(tmp_data1, M_TEMP);
 	free(tmp_data2, M_TEMP);
 	free(tmp_w1, M_TEMP);
 	free(tmp_w2, M_TEMP);
 
 	sbuf_finish(sb);
 	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 	sbuf_delete(sb);
 
 	return (error);
 }
 
 static int
 sysctl_debug_witness_channel(SYSCTL_HANDLER_ARGS)
 {
 	static const struct {
 		enum witness_channel channel;
 		const char *name;
 	} channels[] = {
 		{ WITNESS_CONSOLE, "console" },
 		{ WITNESS_LOG, "log" },
 		{ WITNESS_NONE, "none" },
 	};
 	char buf[16];
 	u_int i;
 	int error;
 
 	buf[0] = '\0';
 	for (i = 0; i < nitems(channels); i++)
 		if (witness_channel == channels[i].channel) {
 			snprintf(buf, sizeof(buf), "%s", channels[i].name);
 			break;
 		}
 
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	error = EINVAL;
 	for (i = 0; i < nitems(channels); i++)
 		if (strcmp(channels[i].name, buf) == 0) {
 			witness_channel = channels[i].channel;
 			error = 0;
 			break;
 		}
 	return (error);
 }
 
 static int
 sysctl_debug_witness_fullgraph(SYSCTL_HANDLER_ARGS)
 {
 	struct witness *w;
 	struct sbuf *sb;
 	int error;
 
 	if (witness_watch < 1) {
 		error = SYSCTL_OUT(req, w_notrunning, sizeof(w_notrunning));
 		return (error);
 	}
 	if (witness_cold) {
 		error = SYSCTL_OUT(req, w_stillcold, sizeof(w_stillcold));
 		return (error);
 	}
 	error = 0;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sb = sbuf_new_for_sysctl(NULL, NULL, FULLGRAPH_SBUF_SIZE, req);
 	if (sb == NULL)
 		return (ENOMEM);
 	sbuf_printf(sb, "\n");
 
 	mtx_lock_spin(&w_mtx);
 	STAILQ_FOREACH(w, &w_all, w_list)
 		w->w_displayed = 0;
 	STAILQ_FOREACH(w, &w_all, w_list)
 		witness_add_fullgraph(sb, w);
 	mtx_unlock_spin(&w_mtx);
 
 	/*
 	 * Close the sbuf and return to userland.
 	 */
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (error);
 }
 
 static int
 sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS)
 {
 	int error, value;
 
 	value = witness_watch;
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value > 1 || value < -1 ||
 	    (witness_watch == -1 && value != witness_watch))
 		return (EINVAL);
 	witness_watch = value;
 	return (0);
 }
 
 static void
 witness_add_fullgraph(struct sbuf *sb, struct witness *w)
 {
 	int i;
 
 	if (w->w_displayed != 0 || (w->w_file == NULL && w->w_line == 0))
 		return;
 	w->w_displayed = 1;
 
 	WITNESS_INDEX_ASSERT(w->w_index);
 	for (i = 1; i <= w_max_used_index; i++) {
 		if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) {
 			sbuf_printf(sb, "\"%s\",\"%s\"\n", w->w_name,
 			    w_data[i].w_name);
 			witness_add_fullgraph(sb, &w_data[i]);
 		}
 	}
 }
 
 /*
  * A simple hash function. Takes a key pointer and a key size. If size == 0,
  * interprets the key as a string and reads until the null
  * terminator. Otherwise, reads the first size bytes. Returns an unsigned 32-bit
  * hash value computed from the key.
  */
 static uint32_t
 witness_hash_djb2(const uint8_t *key, uint32_t size)
 {
 	unsigned int hash = 5381;
 	int i;
 
 	/* hash = hash * 33 + key[i] */
 	if (size)
 		for (i = 0; i < size; i++)
 			hash = ((hash << 5) + hash) + (unsigned int)key[i];
 	else
 		for (i = 0; key[i] != 0; i++)
 			hash = ((hash << 5) + hash) + (unsigned int)key[i];
 
 	return (hash);
 }
 
 
 /*
  * Initializes the two witness hash tables. Called exactly once from
  * witness_initialize().
  */
 static void
 witness_init_hash_tables(void)
 {
 	int i;
 
 	MPASS(witness_cold);
 
 	/* Initialize the hash tables. */
 	for (i = 0; i < WITNESS_HASH_SIZE; i++)
 		w_hash.wh_array[i] = NULL;
 
 	w_hash.wh_size = WITNESS_HASH_SIZE;
 	w_hash.wh_count = 0;
 
 	/* Initialize the lock order data hash. */
 	w_lofree = NULL;
 	for (i = 0; i < WITNESS_LO_DATA_COUNT; i++) {
 		memset(&w_lodata[i], 0, sizeof(w_lodata[i]));
 		w_lodata[i].wlod_next = w_lofree;
 		w_lofree = &w_lodata[i];
 	}
 	w_lohash.wloh_size = WITNESS_LO_HASH_SIZE;
 	w_lohash.wloh_count = 0;
 	for (i = 0; i < WITNESS_LO_HASH_SIZE; i++)
 		w_lohash.wloh_array[i] = NULL;
 }
 
 static struct witness *
 witness_hash_get(const char *key)
 {
 	struct witness *w;
 	uint32_t hash;
 	
 	MPASS(key != NULL);
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 	hash = witness_hash_djb2(key, 0) % w_hash.wh_size;
 	w = w_hash.wh_array[hash];
 	while (w != NULL) {
 		if (strcmp(w->w_name, key) == 0)
 			goto out;
 		w = w->w_hash_next;
 	}
 
 out:
 	return (w);
 }
 
 static void
 witness_hash_put(struct witness *w)
 {
 	uint32_t hash;
 
 	MPASS(w != NULL);
 	MPASS(w->w_name != NULL);
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 	KASSERT(witness_hash_get(w->w_name) == NULL,
 	    ("%s: trying to add a hash entry that already exists!", __func__));
 	KASSERT(w->w_hash_next == NULL,
 	    ("%s: w->w_hash_next != NULL", __func__));
 
 	hash = witness_hash_djb2(w->w_name, 0) % w_hash.wh_size;
 	w->w_hash_next = w_hash.wh_array[hash];
 	w_hash.wh_array[hash] = w;
 	w_hash.wh_count++;
 }
 
 
 static struct witness_lock_order_data *
 witness_lock_order_get(struct witness *parent, struct witness *child)
 {
 	struct witness_lock_order_data *data = NULL;
 	struct witness_lock_order_key key;
 	unsigned int hash;
 
 	MPASS(parent != NULL && child != NULL);
 	key.from = parent->w_index;
 	key.to = child->w_index;
 	WITNESS_INDEX_ASSERT(key.from);
 	WITNESS_INDEX_ASSERT(key.to);
 	if ((w_rmatrix[parent->w_index][child->w_index]
 	    & WITNESS_LOCK_ORDER_KNOWN) == 0)
 		goto out;
 
 	hash = witness_hash_djb2((const char*)&key,
 	    sizeof(key)) % w_lohash.wloh_size;
 	data = w_lohash.wloh_array[hash];
 	while (data != NULL) {
 		if (witness_lock_order_key_equal(&data->wlod_key, &key))
 			break;
 		data = data->wlod_next;
 	}
 
 out:
 	return (data);
 }
 
 /*
  * Verify that parent and child have a known relationship, are not the same,
  * and child is actually a child of parent.  This is done without w_mtx
  * to avoid contention in the common case.
  */
 static int
 witness_lock_order_check(struct witness *parent, struct witness *child)
 {
 
 	if (parent != child &&
 	    w_rmatrix[parent->w_index][child->w_index]
 	    & WITNESS_LOCK_ORDER_KNOWN &&
 	    isitmychild(parent, child))
 		return (1);
 
 	return (0);
 }
 
 static int
 witness_lock_order_add(struct witness *parent, struct witness *child)
 {
 	struct witness_lock_order_data *data = NULL;
 	struct witness_lock_order_key key;
 	unsigned int hash;
 	
 	MPASS(parent != NULL && child != NULL);
 	key.from = parent->w_index;
 	key.to = child->w_index;
 	WITNESS_INDEX_ASSERT(key.from);
 	WITNESS_INDEX_ASSERT(key.to);
 	if (w_rmatrix[parent->w_index][child->w_index]
 	    & WITNESS_LOCK_ORDER_KNOWN)
 		return (1);
 
 	hash = witness_hash_djb2((const char*)&key,
 	    sizeof(key)) % w_lohash.wloh_size;
 	w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN;
 	data = w_lofree;
 	if (data == NULL)
 		return (0);
 	w_lofree = data->wlod_next;
 	data->wlod_next = w_lohash.wloh_array[hash];
 	data->wlod_key = key;
 	w_lohash.wloh_array[hash] = data;
 	w_lohash.wloh_count++;
 	stack_zero(&data->wlod_stack);
 	stack_save(&data->wlod_stack);
 	return (1);
 }
 
 /* Call this whenever the structure of the witness graph changes. */
 static void
 witness_increment_graph_generation(void)
 {
 
 	if (witness_cold == 0)
 		mtx_assert(&w_mtx, MA_OWNED);
 	w_generation++;
 }
 
 static int
 witness_output_drain(void *arg __unused, const char *data, int len)
 {
 
 	witness_output("%.*s", len, data);
 	return (len);
 }
 
 static void
 witness_debugger(int cond, const char *msg)
 {
 	char buf[32];
 	struct sbuf sb;
 	struct stack st;
 
 	if (!cond)
 		return;
 
 	if (witness_trace) {
 		sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
 		sbuf_set_drain(&sb, witness_output_drain, NULL);
 
 		stack_zero(&st);
 		stack_save(&st);
 		witness_output("stack backtrace:\n");
 		stack_sbuf_print_ddb(&sb, &st);
 
 		sbuf_finish(&sb);
 	}
 
 #ifdef KDB
 	if (witness_kdb)
 		kdb_enter(KDB_WHY_WITNESS, msg);
 #endif
 }
Index: projects/netbsd-tests-upstream-01-2017/sys/net/iflib.c
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/net/iflib.c	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/net/iflib.c	(revision 313267)
@@ -1,5246 +1,5244 @@
 /*-
  * Copyright (c) 2014-2017, Matthew Macy <mmacy@nextbsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  *  1. Redistributions of source code must retain the above copyright notice,
  *     this list of conditions and the following disclaimer.
  *
  *  2. Neither the name of Matthew Macy nor the names of its
  *     contributors may be used to endorse or promote products derived from
  *     this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_acpi.h"
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 #include <sys/sockio.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/module.h>
 #include <sys/kobj.h>
 #include <sys/rman.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/taskqueue.h>
 #include <sys/limits.h>
 
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/mp_ring.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_lro.h>
 #include <netinet/in_systm.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 
 #include <machine/bus.h>
 #include <machine/in_cksum.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <dev/led/led.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pci_private.h>
 
 #include <net/iflib.h>
 
 #include "ifdi_if.h"
 
 #if defined(__i386__) || defined(__amd64__)
 #include <sys/memdesc.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #include <x86/include/busdma_impl.h>
 #include <x86/iommu/busdma_dmar.h>
 #endif
 
 /*
  * enable accounting of every mbuf as it comes in to and goes out of iflib's software descriptor references
  */
 #define MEMORY_LOGGING 0
 /*
  * Enable mbuf vectors for compressing long mbuf chains
  */
 
 /*
  * NB:
  * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead
  *   we prefetch needs to be determined by the time spent in m_free vis a vis
  *   the cost of a prefetch. This will of course vary based on the workload:
  *      - NFLX's m_free path is dominated by vm-based M_EXT manipulation which
  *        is quite expensive, thus suggesting very little prefetch.
  *      - small packet forwarding which is just returning a single mbuf to
  *        UMA will typically be very fast vis a vis the cost of a memory
  *        access.
  */
 
 
 /*
  * File organization:
  *  - private structures
  *  - iflib private utility functions
  *  - ifnet functions
  *  - vlan registry and other exported functions
  *  - iflib public core functions
  *
  *
  */
 static MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library");
 
 struct iflib_txq;
 typedef struct iflib_txq *iflib_txq_t;
 struct iflib_rxq;
 typedef struct iflib_rxq *iflib_rxq_t;
 struct iflib_fl;
 typedef struct iflib_fl *iflib_fl_t;
 
 struct iflib_ctx;
 
 typedef struct iflib_filter_info {
 	driver_filter_t *ifi_filter;
 	void *ifi_filter_arg;
 	struct grouptask *ifi_task;
 	struct iflib_ctx *ifi_ctx;
 } *iflib_filter_info_t;
 
 struct iflib_ctx {
 	KOBJ_FIELDS;
    /*
    * Pointer to hardware driver's softc
    */
 	void *ifc_softc;
 	device_t ifc_dev;
 	if_t ifc_ifp;
 
 	cpuset_t ifc_cpus;
 	if_shared_ctx_t ifc_sctx;
 	struct if_softc_ctx ifc_softc_ctx;
 
 	struct mtx ifc_mtx;
 
 	uint16_t ifc_nhwtxqs;
 	uint16_t ifc_nhwrxqs;
 
 	iflib_txq_t ifc_txqs;
 	iflib_rxq_t ifc_rxqs;
 	uint32_t ifc_if_flags;
 	uint32_t ifc_flags;
 	uint32_t ifc_max_fl_buf_size;
 	int ifc_in_detach;
 
 	int ifc_link_state;
 	int ifc_link_irq;
 	int ifc_pause_frames;
 	int ifc_watchdog_events;
 	struct cdev *ifc_led_dev;
 	struct resource *ifc_msix_mem;
 
 	struct if_irq ifc_legacy_irq;
 	struct grouptask ifc_admin_task;
 	struct grouptask ifc_vflr_task;
 	struct iflib_filter_info ifc_filter_info;
 	struct ifmedia	ifc_media;
 
 	struct sysctl_oid *ifc_sysctl_node;
 	uint16_t ifc_sysctl_ntxqs;
 	uint16_t ifc_sysctl_nrxqs;
 	uint16_t ifc_sysctl_qs_eq_override;
 
 	uint16_t ifc_sysctl_ntxds[8];
 	uint16_t ifc_sysctl_nrxds[8];
 	struct if_txrx ifc_txrx;
 #define isc_txd_encap  ifc_txrx.ift_txd_encap
 #define isc_txd_flush  ifc_txrx.ift_txd_flush
 #define isc_txd_credits_update  ifc_txrx.ift_txd_credits_update
 #define isc_rxd_available ifc_txrx.ift_rxd_available
 #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get
 #define isc_rxd_refill ifc_txrx.ift_rxd_refill
 #define isc_rxd_flush ifc_txrx.ift_rxd_flush
 #define isc_rxd_refill ifc_txrx.ift_rxd_refill
 #define isc_rxd_refill ifc_txrx.ift_rxd_refill
 #define isc_legacy_intr ifc_txrx.ift_legacy_intr
 	eventhandler_tag ifc_vlan_attach_event;
 	eventhandler_tag ifc_vlan_detach_event;
 	uint8_t ifc_mac[ETHER_ADDR_LEN];
 	char ifc_mtx_name[16];
 };
 
 
 void *
 iflib_get_softc(if_ctx_t ctx)
 {
 
 	return (ctx->ifc_softc);
 }
 
 device_t
 iflib_get_dev(if_ctx_t ctx)
 {
 
 	return (ctx->ifc_dev);
 }
 
 if_t
 iflib_get_ifp(if_ctx_t ctx)
 {
 
 	return (ctx->ifc_ifp);
 }
 
 struct ifmedia *
 iflib_get_media(if_ctx_t ctx)
 {
 
 	return (&ctx->ifc_media);
 }
 
 void
 iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN])
 {
 
 	bcopy(mac, ctx->ifc_mac, ETHER_ADDR_LEN);
 }
 
 if_softc_ctx_t
 iflib_get_softc_ctx(if_ctx_t ctx)
 {
 
 	return (&ctx->ifc_softc_ctx);
 }
 
 if_shared_ctx_t
 iflib_get_sctx(if_ctx_t ctx)
 {
 
 	return (ctx->ifc_sctx);
 }
 
 #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*))
 
 #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP)
 #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF)
 
 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
 #define RX_SW_DESC_INUSE        (1 << 3)
 #define TX_SW_DESC_MAPPED       (1 << 4)
 
 typedef struct iflib_sw_rx_desc_array {
 	bus_dmamap_t	*ifsd_map;         /* bus_dma maps for packet */
 	struct mbuf	**ifsd_m;           /* pkthdr mbufs */
 	caddr_t		*ifsd_cl;          /* direct cluster pointer for rx */
 	uint8_t		*ifsd_flags;
 } iflib_rxsd_array_t;
 
 typedef struct iflib_sw_tx_desc_array {
 	bus_dmamap_t    *ifsd_map;         /* bus_dma maps for packet */
 	struct mbuf    **ifsd_m;           /* pkthdr mbufs */
 	uint8_t		*ifsd_flags;
 } iflib_txsd_array_t;
 
 
 /* magic number that should be high enough for any hardware */
 #define IFLIB_MAX_TX_SEGS		128
 #define IFLIB_MAX_RX_SEGS		32
 #define IFLIB_RX_COPY_THRESH		63
 #define IFLIB_MAX_RX_REFRESH		32
 #define IFLIB_QUEUE_IDLE		0
 #define IFLIB_QUEUE_HUNG		1
 #define IFLIB_QUEUE_WORKING		2
 
 /* this should really scale with ring size - 32 is a fairly arbitrary value for this */
 #define TX_BATCH_SIZE			16
 
 #define IFLIB_RESTART_BUDGET		8
 
 #define	IFC_LEGACY		0x01
 #define	IFC_QFLUSH		0x02
 #define	IFC_MULTISEG		0x04
 #define	IFC_DMAR		0x08
 #define	IFC_SC_ALLOCATED	0x10
 #define	IFC_INIT_DONE		0x20
 
 
 #define CSUM_OFFLOAD		(CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \
 				 CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \
 				 CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP)
 struct iflib_txq {
 	uint16_t	ift_in_use;
 	uint16_t	ift_cidx;
 	uint16_t	ift_cidx_processed;
 	uint16_t	ift_pidx;
 	uint8_t		ift_gen;
 	uint8_t		ift_db_pending;
 	uint8_t		ift_db_pending_queued;
 	uint8_t		ift_npending;
 	uint8_t		ift_br_offset;
 	/* implicit pad */
 	uint64_t	ift_processed;
 	uint64_t	ift_cleaned;
 #if MEMORY_LOGGING
 	uint64_t	ift_enqueued;
 	uint64_t	ift_dequeued;
 #endif
 	uint64_t	ift_no_tx_dma_setup;
 	uint64_t	ift_no_desc_avail;
 	uint64_t	ift_mbuf_defrag_failed;
 	uint64_t	ift_mbuf_defrag;
 	uint64_t	ift_map_failed;
 	uint64_t	ift_txd_encap_efbig;
 	uint64_t	ift_pullups;
 
 	struct mtx	ift_mtx;
 	struct mtx	ift_db_mtx;
 
 	/* constant values */
 	if_ctx_t	ift_ctx;
 	struct ifmp_ring        **ift_br;
 	struct grouptask	ift_task;
 	uint16_t	ift_size;
 	uint16_t	ift_id;
 	struct callout	ift_timer;
 	struct callout	ift_db_check;
 
 	iflib_txsd_array_t	ift_sds;
 	uint8_t			ift_nbr;
 	uint8_t			ift_qstatus;
 	uint8_t			ift_active;
 	uint8_t			ift_closed;
 	int			ift_watchdog_time;
 	struct iflib_filter_info ift_filter_info;
 	bus_dma_tag_t		ift_desc_tag;
 	bus_dma_tag_t		ift_tso_desc_tag;
 	iflib_dma_info_t	ift_ifdi;
 #define MTX_NAME_LEN 16
 	char                    ift_mtx_name[MTX_NAME_LEN];
 	char                    ift_db_mtx_name[MTX_NAME_LEN];
 	bus_dma_segment_t	ift_segs[IFLIB_MAX_TX_SEGS]  __aligned(CACHE_LINE_SIZE);
 #ifdef IFLIB_DIAGNOSTICS
 	uint64_t ift_cpu_exec_count[256];
 #endif
 } __aligned(CACHE_LINE_SIZE);
 
 struct iflib_fl {
 	uint16_t	ifl_cidx;
 	uint16_t	ifl_pidx;
 	uint16_t	ifl_credits;
 	uint8_t		ifl_gen;
 #if MEMORY_LOGGING
 	uint64_t	ifl_m_enqueued;
 	uint64_t	ifl_m_dequeued;
 	uint64_t	ifl_cl_enqueued;
 	uint64_t	ifl_cl_dequeued;
 #endif
 	/* implicit pad */
 
 	/* constant */
 	uint16_t	ifl_size;
 	uint16_t	ifl_buf_size;
 	uint16_t	ifl_cltype;
 	uma_zone_t	ifl_zone;
 	iflib_rxsd_array_t	ifl_sds;
 	iflib_rxq_t	ifl_rxq;
 	uint8_t		ifl_id;
 	bus_dma_tag_t           ifl_desc_tag;
 	iflib_dma_info_t	ifl_ifdi;
 	uint64_t	ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE);
 	caddr_t		ifl_vm_addrs[IFLIB_MAX_RX_REFRESH];
 }  __aligned(CACHE_LINE_SIZE);
 
 static inline int
 get_inuse(int size, int cidx, int pidx, int gen)
 {
 	int used;
 
 	if (pidx > cidx)
 		used = pidx - cidx;
 	else if (pidx < cidx)
 		used = size - cidx + pidx;
 	else if (gen == 0 && pidx == cidx)
 		used = 0;
 	else if (gen == 1 && pidx == cidx)
 		used = size;
 	else
 		panic("bad state");
 
 	return (used);
 }
 
 #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen))
 
 #define IDXDIFF(head, tail, wrap) \
 	((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head))
 
 struct iflib_rxq {
 	/* If there is a separate completion queue -
 	 * these are the cq cidx and pidx. Otherwise
 	 * these are unused.
 	 */
 	uint16_t	ifr_size;
 	uint16_t	ifr_cq_cidx;
 	uint16_t	ifr_cq_pidx;
 	uint8_t		ifr_cq_gen;
 	uint8_t		ifr_fl_offset;
 
 	if_ctx_t	ifr_ctx;
 	iflib_fl_t	ifr_fl;
 	uint64_t	ifr_rx_irq;
 	uint16_t	ifr_id;
 	uint8_t		ifr_lro_enabled;
 	uint8_t		ifr_nfl;
 	struct lro_ctrl			ifr_lc;
 	struct grouptask        ifr_task;
 	struct iflib_filter_info ifr_filter_info;
 	iflib_dma_info_t		ifr_ifdi;
 	/* dynamically allocate if any drivers need a value substantially larger than this */
 	struct if_rxd_frag	ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE);
 #ifdef IFLIB_DIAGNOSTICS
 	uint64_t ifr_cpu_exec_count[256];
 #endif
 }  __aligned(CACHE_LINE_SIZE);
 
 /*
  * Only allow a single packet to take up most 1/nth of the tx ring
  */
 #define MAX_SINGLE_PACKET_FRACTION 12
 #define IF_BAD_DMA (bus_addr_t)-1
 
 static int enable_msix = 1;
 
 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING))
 
 #define CTX_LOCK_INIT(_sc, _name)  mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF)
 
 #define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx)
 #define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx)
 #define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx)
 
 
 #define TXDB_LOCK_INIT(txq)  mtx_init(&(txq)->ift_db_mtx, (txq)->ift_db_mtx_name, NULL, MTX_DEF)
 #define TXDB_TRYLOCK(txq) mtx_trylock(&(txq)->ift_db_mtx)
 #define TXDB_LOCK(txq) mtx_lock(&(txq)->ift_db_mtx)
 #define TXDB_UNLOCK(txq) mtx_unlock(&(txq)->ift_db_mtx)
 #define TXDB_LOCK_DESTROY(txq) mtx_destroy(&(txq)->ift_db_mtx)
 
 #define CALLOUT_LOCK(txq)	mtx_lock(&txq->ift_mtx)
 #define CALLOUT_UNLOCK(txq) 	mtx_unlock(&txq->ift_mtx)
 
 
 /* Our boot-time initialization hook */
 static int	iflib_module_event_handler(module_t, int, void *);
 
 static moduledata_t iflib_moduledata = {
 	"iflib",
 	iflib_module_event_handler,
 	NULL
 };
 
 DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(iflib, 1);
 
 MODULE_DEPEND(iflib, pci, 1, 1, 1);
 MODULE_DEPEND(iflib, ether, 1, 1, 1);
 
 TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1);
 TASKQGROUP_DEFINE(if_config_tqg, 1, 1);
 
 #ifndef IFLIB_DEBUG_COUNTERS
 #ifdef INVARIANTS
 #define IFLIB_DEBUG_COUNTERS 1
 #else
 #define IFLIB_DEBUG_COUNTERS 0
 #endif /* !INVARIANTS */
 #endif
 
 static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0,
                    "iflib driver parameters");
 
 /*
  * XXX need to ensure that this can't accidentally cause the head to be moved backwards 
  */
 static int iflib_min_tx_latency = 0;
 
 SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW,
 		   &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput");
 
 
 #if IFLIB_DEBUG_COUNTERS
 
 static int iflib_tx_seen;
 static int iflib_tx_sent;
 static int iflib_tx_encap;
 static int iflib_rx_allocs;
 static int iflib_fl_refills;
 static int iflib_fl_refills_large;
 static int iflib_tx_frees;
 
 SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD,
 		   &iflib_tx_seen, 0, "# tx mbufs seen");
 SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD,
 		   &iflib_tx_sent, 0, "# tx mbufs sent");
 SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD,
 		   &iflib_tx_encap, 0, "# tx mbufs encapped");
 SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD,
 		   &iflib_tx_frees, 0, "# tx frees");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD,
 		   &iflib_rx_allocs, 0, "# rx allocations");
 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD,
 		   &iflib_fl_refills, 0, "# refills");
 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD,
 		   &iflib_fl_refills_large, 0, "# large refills");
 
 
 static int iflib_txq_drain_flushing;
 static int iflib_txq_drain_oactive;
 static int iflib_txq_drain_notready;
 static int iflib_txq_drain_encapfail;
 
 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD,
 		   &iflib_txq_drain_flushing, 0, "# drain flushes");
 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD,
 		   &iflib_txq_drain_oactive, 0, "# drain oactives");
 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD,
 		   &iflib_txq_drain_notready, 0, "# drain notready");
 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_encapfail, CTLFLAG_RD,
 		   &iflib_txq_drain_encapfail, 0, "# drain encap fails");
 
 
 static int iflib_encap_load_mbuf_fail;
 static int iflib_encap_txq_avail_fail;
 static int iflib_encap_txd_encap_fail;
 
 SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD,
 		   &iflib_encap_load_mbuf_fail, 0, "# busdma load failures");
 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD,
 		   &iflib_encap_txq_avail_fail, 0, "# txq avail failures");
 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD,
 		   &iflib_encap_txd_encap_fail, 0, "# driver encap failures");
 
 static int iflib_task_fn_rxs;
 static int iflib_rx_intr_enables;
 static int iflib_fast_intrs;
 static int iflib_intr_link;
 static int iflib_intr_msix; 
 static int iflib_rx_unavail;
 static int iflib_rx_ctx_inactive;
 static int iflib_rx_zero_len;
 static int iflib_rx_if_input;
 static int iflib_rx_mbuf_null;
 static int iflib_rxd_flush;
 
 static int iflib_verbose_debug;
 
 SYSCTL_INT(_net_iflib, OID_AUTO, intr_link, CTLFLAG_RD,
 		   &iflib_intr_link, 0, "# intr link calls");
 SYSCTL_INT(_net_iflib, OID_AUTO, intr_msix, CTLFLAG_RD,
 		   &iflib_intr_msix, 0, "# intr msix calls");
 SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD,
 		   &iflib_task_fn_rxs, 0, "# task_fn_rx calls");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD,
 		   &iflib_rx_intr_enables, 0, "# rx intr enables");
 SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD,
 		   &iflib_fast_intrs, 0, "# fast_intr calls");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD,
 		   &iflib_rx_unavail, 0, "# times rxeof called with no available data");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD,
 		   &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_zero_len, CTLFLAG_RD,
 		   &iflib_rx_zero_len, 0, "# times rxeof saw zero len mbuf");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD,
 		   &iflib_rx_if_input, 0, "# times rxeof called if_input");
 SYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD,
 		   &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf");
 SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD,
 	         &iflib_rxd_flush, 0, "# times rxd_flush called");
 SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW,
 		   &iflib_verbose_debug, 0, "enable verbose debugging");
 
 #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1)
 static void
 iflib_debug_reset(void)
 {
 	iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs =
 		iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees =
 		iflib_txq_drain_flushing = iflib_txq_drain_oactive =
 		iflib_txq_drain_notready = iflib_txq_drain_encapfail =
 		iflib_encap_load_mbuf_fail = iflib_encap_txq_avail_fail =
 		iflib_encap_txd_encap_fail = iflib_task_fn_rxs = iflib_rx_intr_enables =
 		iflib_fast_intrs = iflib_intr_link = iflib_intr_msix = iflib_rx_unavail =
 		iflib_rx_ctx_inactive = iflib_rx_zero_len = iflib_rx_if_input =
 		iflib_rx_mbuf_null = iflib_rxd_flush = 0;
 }
 
 #else
 #define DBG_COUNTER_INC(name)
 static void iflib_debug_reset(void) {}
 #endif
 
 
 
 #define IFLIB_DEBUG 0
 
 static void iflib_tx_structures_free(if_ctx_t ctx);
 static void iflib_rx_structures_free(if_ctx_t ctx);
 static int iflib_queues_alloc(if_ctx_t ctx);
 static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq);
 static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget);
 static int iflib_qset_structures_setup(if_ctx_t ctx);
 static int iflib_msix_init(if_ctx_t ctx);
 static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str);
 static void iflib_txq_check_drain(iflib_txq_t txq, int budget);
 static uint32_t iflib_txq_can_drain(struct ifmp_ring *);
 static int iflib_register(if_ctx_t);
 static void iflib_init_locked(if_ctx_t ctx);
 static void iflib_add_device_sysctl_pre(if_ctx_t ctx);
 static void iflib_add_device_sysctl_post(if_ctx_t ctx);
 static void iflib_ifmp_purge(iflib_txq_t txq);
 static void _iflib_pre_assert(if_softc_ctx_t scctx);
 
 #ifdef DEV_NETMAP
 #include <sys/selinfo.h>
 #include <net/netmap.h>
 #include <dev/netmap/netmap_kern.h>
 
 MODULE_DEPEND(iflib, netmap, 1, 1, 1);
 
 /*
  * device-specific sysctl variables:
  *
  * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
  *	During regular operations the CRC is stripped, but on some
  *	hardware reception of frames not multiple of 64 is slower,
  *	so using crcstrip=0 helps in benchmarks.
  *
  * iflib_rx_miss, iflib_rx_miss_bufs:
  *	count packets that might be missed due to lost interrupts.
  */
 SYSCTL_DECL(_dev_netmap);
 /*
  * The xl driver by default strips CRCs and we do not override it.
  */
 
 int iflib_crcstrip = 1;
 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip,
     CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames");
 
 int iflib_rx_miss, iflib_rx_miss_bufs;
 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss,
     CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr");
 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs,
     CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs");
 
 /*
  * Register/unregister. We are already under netmap lock.
  * Only called on the first register or the last unregister.
  */
 static int
 iflib_netmap_register(struct netmap_adapter *na, int onoff)
 {
 	struct ifnet *ifp = na->ifp;
 	if_ctx_t ctx = ifp->if_softc;
 
 	CTX_LOCK(ctx);
 	IFDI_INTR_DISABLE(ctx);
 
 	/* Tell the stack that the interface is no longer active */
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	if (!CTX_IS_VF(ctx))
 		IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip);
 
 	/* enable or disable flags and callbacks in na and ifp */
 	if (onoff) {
 		nm_set_native_flags(na);
 	} else {
 		nm_clear_native_flags(na);
 	}
 	IFDI_INIT(ctx);
 	IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ?
 	CTX_UNLOCK(ctx);
 	return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
 }
 
 /*
  * Reconcile kernel and user view of the transmit ring.
  *
  * All information is in the kring.
  * Userspace wants to send packets up to the one before kring->rhead,
  * kernel knows kring->nr_hwcur is the first unsent packet.
  *
  * Here we push packets out (as many as possible), and possibly
  * reclaim buffers from previously completed transmission.
  *
  * The caller (netmap) guarantees that there is only one instance
  * running at any time. Any interference with other driver
  * methods should be handled by the individual drivers.
  */
 static int
 iflib_netmap_txsync(struct netmap_kring *kring, int flags)
 {
 	struct netmap_adapter *na = kring->na;
 	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
 	u_int nm_i;	/* index into the netmap ring */
 	u_int nic_i;	/* index into the NIC ring */
 	u_int n;
 	u_int const lim = kring->nkr_num_slots - 1;
 	u_int const head = kring->rhead;
 	struct if_pkt_info pi;
 
 	/*
 	 * interrupts on every tx packet are expensive so request
 	 * them every half ring, or where NS_REPORT is set
 	 */
 	u_int report_frequency = kring->nkr_num_slots >> 1;
 	/* device-specific */
 	if_ctx_t ctx = ifp->if_softc;
 	iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id];
 
 	pi.ipi_segs = txq->ift_segs;
 	pi.ipi_qsidx = kring->ring_id;
 	pi.ipi_ndescs = 0;
 
 	bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
 					BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 
 	/*
 	 * First part: process new packets to send.
 	 * nm_i is the current index in the netmap ring,
 	 * nic_i is the corresponding index in the NIC ring.
 	 *
 	 * If we have packets to send (nm_i != head)
 	 * iterate over the netmap ring, fetch length and update
 	 * the corresponding slot in the NIC ring. Some drivers also
 	 * need to update the buffer's physical address in the NIC slot
 	 * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
 	 *
 	 * The netmap_reload_map() calls is especially expensive,
 	 * even when (as in this case) the tag is 0, so do only
 	 * when the buffer has actually changed.
 	 *
 	 * If possible do not set the report/intr bit on all slots,
 	 * but only a few times per ring or when NS_REPORT is set.
 	 *
 	 * Finally, on 10G and faster drivers, it might be useful
 	 * to prefetch the next slot and txr entry.
 	 */
 
 	nm_i = kring->nr_hwcur;
 	if (nm_i != head) {	/* we have new packets to send */
 		nic_i = netmap_idx_k2n(kring, nm_i);
 
 		__builtin_prefetch(&ring->slot[nm_i]);
 		__builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]);
 		__builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]);
 
 		for (n = 0; nm_i != head; n++) {
 			struct netmap_slot *slot = &ring->slot[nm_i];
 			u_int len = slot->len;
 			uint64_t paddr;
 			void *addr = PNMB(na, slot, &paddr);
 			int flags = (slot->flags & NS_REPORT ||
 				nic_i == 0 || nic_i == report_frequency) ?
 				IPI_TX_INTR : 0;
 
 			/* device-specific */
 			pi.ipi_pidx = nic_i;
 			pi.ipi_flags = flags;
 
 			/* Fill the slot in the NIC ring. */
 			ctx->isc_txd_encap(ctx->ifc_softc, &pi);
 
 			/* prefetch for next round */
 			__builtin_prefetch(&ring->slot[nm_i + 1]);
 			__builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]);
 			__builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]);
 
 			NM_CHECK_ADDR_LEN(na, addr, len);
 
 			if (slot->flags & NS_BUF_CHANGED) {
 				/* buffer has changed, reload map */
 				netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr);
 			}
 			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
 
 			/* make sure changes to the buffer are synced */
 			bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i],
 							BUS_DMASYNC_PREWRITE);
 
 			nm_i = nm_next(nm_i, lim);
 			nic_i = nm_next(nic_i, lim);
 		}
 		kring->nr_hwcur = head;
 
 		/* synchronize the NIC ring */
 		bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
 						BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 		/* (re)start the tx unit up to slot nic_i (excluded) */
 		ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i);
 	}
 
 	/*
 	 * Second part: reclaim buffers for completed transmissions.
 	 */
 	if (iflib_tx_credits_update(ctx, txq)) {
 		/* some tx completed, increment avail */
 		nic_i = txq->ift_cidx_processed;
 		kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
 	}
 	return (0);
 }
 
 /*
  * Reconcile kernel and user view of the receive ring.
  * Same as for the txsync, this routine must be efficient.
  * The caller guarantees a single invocations, but races against
  * the rest of the driver should be handled here.
  *
  * On call, kring->rhead is the first packet that userspace wants
  * to keep, and kring->rcur is the wakeup point.
  * The kernel has previously reported packets up to kring->rtail.
  *
  * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
  * of whether or not we received an interrupt.
  */
 static int
 iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
 {
 	struct netmap_adapter *na = kring->na;
 	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
 	u_int nm_i;	/* index into the netmap ring */
 	u_int nic_i;	/* index into the NIC ring */
 	u_int i, n;
 	u_int const lim = kring->nkr_num_slots - 1;
 	u_int const head = kring->rhead;
 	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
 	struct if_rxd_info ri;
 	/* device-specific */
 	if_ctx_t ctx = ifp->if_softc;
 	iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id];
 	iflib_fl_t fl = rxq->ifr_fl;
 	if (head > lim)
 		return netmap_ring_reinit(kring);
 
 	bzero(&ri, sizeof(ri));
 	ri.iri_qsidx = kring->ring_id;
 	ri.iri_ifp = ctx->ifc_ifp;
 	/* XXX check sync modes */
 	for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++)
 		bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map,
 				BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	/*
 	 * First part: import newly received packets.
 	 *
 	 * nm_i is the index of the next free slot in the netmap ring,
 	 * nic_i is the index of the next received packet in the NIC ring,
 	 * and they may differ in case if_init() has been called while
 	 * in netmap mode. For the receive ring we have
 	 *
 	 *	nic_i = rxr->next_check;
 	 *	nm_i = kring->nr_hwtail (previous)
 	 * and
 	 *	nm_i == (nic_i + kring->nkr_hwofs) % ring_size
 	 *
 	 * rxr->next_check is set to 0 on a ring reinit
 	 */
 	if (netmap_no_pendintr || force_update) {
 		int crclen = iflib_crcstrip ? 0 : 4;
 		int error, avail;
 		uint16_t slot_flags = kring->nkr_slot_flags;
 
 		for (fl = rxq->ifr_fl, i = 0; i < rxq->ifr_nfl; i++, fl++) {
 			nic_i = fl->ifl_cidx;
 			nm_i = netmap_idx_n2k(kring, nic_i);
 			avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i, INT_MAX);
 			for (n = 0; avail > 0; n++, avail--) {
 				error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
 				if (error)
 					ring->slot[nm_i].len = 0;
 				else
 					ring->slot[nm_i].len = ri.iri_len - crclen;
 				ring->slot[nm_i].flags = slot_flags;
 				bus_dmamap_sync(fl->ifl_ifdi->idi_tag,
 								fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD);
 				nm_i = nm_next(nm_i, lim);
 				nic_i = nm_next(nic_i, lim);
 			}
 			if (n) { /* update the state variables */
 				if (netmap_no_pendintr && !force_update) {
 					/* diagnostics */
 					iflib_rx_miss ++;
 					iflib_rx_miss_bufs += n;
 				}
 				fl->ifl_cidx = nic_i;
 				kring->nr_hwtail = nm_i;
 			}
 			kring->nr_kflags &= ~NKR_PENDINTR;
 		}
 	}
 	/*
 	 * Second part: skip past packets that userspace has released.
 	 * (kring->nr_hwcur to head excluded),
 	 * and make the buffers available for reception.
 	 * As usual nm_i is the index in the netmap ring,
 	 * nic_i is the index in the NIC ring, and
 	 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
 	 */
 	/* XXX not sure how this will work with multiple free lists */
 	nm_i = kring->nr_hwcur;
 	if (nm_i != head) {
 		nic_i = netmap_idx_k2n(kring, nm_i);
 		for (n = 0; nm_i != head; n++) {
 			struct netmap_slot *slot = &ring->slot[nm_i];
 			uint64_t paddr;
 			caddr_t vaddr;
 			void *addr = PNMB(na, slot, &paddr);
 
 			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
 				goto ring_reset;
 
 			vaddr = addr;
 			if (slot->flags & NS_BUF_CHANGED) {
 				/* buffer has changed, reload map */
 				netmap_reload_map(na, fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], addr);
 				slot->flags &= ~NS_BUF_CHANGED;
 			}
 			/*
 			 * XXX we should be batching this operation - TODO
 			 */
 			ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1, fl->ifl_buf_size);
 			bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i],
 			    BUS_DMASYNC_PREREAD);
 			nm_i = nm_next(nm_i, lim);
 			nic_i = nm_next(nic_i, lim);
 		}
 		kring->nr_hwcur = head;
 
 		bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		/*
 		 * IMPORTANT: we must leave one free slot in the ring,
 		 * so move nic_i back by one unit
 		 */
 		nic_i = nm_prev(nic_i, lim);
 		ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
 	}
 
 	return 0;
 
 ring_reset:
 	return netmap_ring_reinit(kring);
 }
 
 static int
 iflib_netmap_attach(if_ctx_t ctx)
 {
 	struct netmap_adapter na;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 
 	bzero(&na, sizeof(na));
 
 	na.ifp = ctx->ifc_ifp;
 	na.na_flags = NAF_BDG_MAYSLEEP;
 	MPASS(ctx->ifc_softc_ctx.isc_ntxqsets);
 	MPASS(ctx->ifc_softc_ctx.isc_nrxqsets);
 
 	na.num_tx_desc = scctx->isc_ntxd[0];
 	na.num_rx_desc = scctx->isc_nrxd[0];
 	na.nm_txsync = iflib_netmap_txsync;
 	na.nm_rxsync = iflib_netmap_rxsync;
 	na.nm_register = iflib_netmap_register;
 	na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets;
 	na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets;
 	return (netmap_attach(&na));
 }
 
 static void
 iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq)
 {
 	struct netmap_adapter *na = NA(ctx->ifc_ifp);
 	struct netmap_slot *slot;
 
 	slot = netmap_reset(na, NR_TX, txq->ift_id, 0);
 	if (slot == 0)
 		return;
 
 	for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) {
 
 		/*
 		 * In netmap mode, set the map for the packet buffer.
 		 * NOTE: Some drivers (not this one) also need to set
 		 * the physical buffer address in the NIC ring.
 		 * netmap_idx_n2k() maps a nic index, i, into the corresponding
 		 * netmap slot index, si
 		 */
 		int si = netmap_idx_n2k(&na->tx_rings[txq->ift_id], i);
 		netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si));
 	}
 }
 static void
 iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
 {
 	struct netmap_adapter *na = NA(ctx->ifc_ifp);
 	struct netmap_slot *slot;
 	bus_dmamap_t *map;
 	int nrxd;
 
 	slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0);
 	if (slot == 0)
 		return;
 	map = rxq->ifr_fl[0].ifl_sds.ifsd_map;
 	nrxd = ctx->ifc_softc_ctx.isc_nrxd[0];
 	for (int i = 0; i < nrxd; i++, map++) {
 			int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i);
 			uint64_t paddr;
 			void *addr;
 			caddr_t vaddr;
 
 			vaddr = addr = PNMB(na, slot + sj, &paddr);
 			netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, *map, addr);
 			/* Update descriptor and the cached value */
 			ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1, rxq->ifr_fl[0].ifl_buf_size);
 	}
 	/* preserve queue */
 	if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) {
 		struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id];
 		int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
 		ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, t);
 	} else
 		ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, nrxd-1);
 }
 
 #define iflib_netmap_detach(ifp) netmap_detach(ifp)
 
 #else
 #define iflib_netmap_txq_init(ctx, txq)
 #define iflib_netmap_rxq_init(ctx, rxq)
 #define iflib_netmap_detach(ifp)
 
 #define iflib_netmap_attach(ctx) (0)
 #define netmap_rx_irq(ifp, qid, budget) (0)
 
 #endif
 
 #if defined(__i386__) || defined(__amd64__)
 static __inline void
 prefetch(void *x)
 {
 	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
 }
 #else
 #define prefetch(x)
 #endif
 
 static void
 _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
 {
 	if (err)
 		return;
 	*(bus_addr_t *) arg = segs[0].ds_addr;
 }
 
 int
 iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags)
 {
 	int err;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	device_t dev = ctx->ifc_dev;
 
 	KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized"));
 
 	err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
 				sctx->isc_q_align, 0,	/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				size,			/* maxsize */
 				1,			/* nsegments */
 				size,			/* maxsegsize */
 				BUS_DMA_ALLOCNOW,	/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockarg */
 				&dma->idi_tag);
 	if (err) {
 		device_printf(dev,
 		    "%s: bus_dma_tag_create failed: %d\n",
 		    __func__, err);
 		goto fail_0;
 	}
 
 	err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr,
 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map);
 	if (err) {
 		device_printf(dev,
 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
 		    __func__, (uintmax_t)size, err);
 		goto fail_1;
 	}
 
 	dma->idi_paddr = IF_BAD_DMA;
 	err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr,
 	    size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT);
 	if (err || dma->idi_paddr == IF_BAD_DMA) {
 		device_printf(dev,
 		    "%s: bus_dmamap_load failed: %d\n",
 		    __func__, err);
 		goto fail_2;
 	}
 
 	dma->idi_size = size;
 	return (0);
 
 fail_2:
 	bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map);
 fail_1:
 	bus_dma_tag_destroy(dma->idi_tag);
 fail_0:
 	dma->idi_tag = NULL;
 
 	return (err);
 }
 
 int
 iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count)
 {
 	int i, err;
 	iflib_dma_info_t *dmaiter;
 
 	dmaiter = dmalist;
 	for (i = 0; i < count; i++, dmaiter++) {
 		if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0)
 			break;
 	}
 	if (err)
 		iflib_dma_free_multi(dmalist, i);
 	return (err);
 }
 
 void
 iflib_dma_free(iflib_dma_info_t dma)
 {
 	if (dma->idi_tag == NULL)
 		return;
 	if (dma->idi_paddr != IF_BAD_DMA) {
 		bus_dmamap_sync(dma->idi_tag, dma->idi_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(dma->idi_tag, dma->idi_map);
 		dma->idi_paddr = IF_BAD_DMA;
 	}
 	if (dma->idi_vaddr != NULL) {
 		bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map);
 		dma->idi_vaddr = NULL;
 	}
 	bus_dma_tag_destroy(dma->idi_tag);
 	dma->idi_tag = NULL;
 }
 
 void
 iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count)
 {
 	int i;
 	iflib_dma_info_t *dmaiter = dmalist;
 
 	for (i = 0; i < count; i++, dmaiter++)
 		iflib_dma_free(*dmaiter);
 }
 
 #ifdef EARLY_AP_STARTUP
 static const int iflib_started = 1;
 #else
 /*
  * We used to abuse the smp_started flag to decide if the queues have been
  * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()).
  * That gave bad races, since the SYSINIT() runs strictly after smp_started
  * is set.  Run a SYSINIT() strictly after that to just set a usable
  * completion flag.
  */
 
 static int iflib_started;
 
 static void
 iflib_record_started(void *arg)
 {
 	iflib_started = 1;
 }
 
 SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST,
 	iflib_record_started, NULL);
 #endif
 
 static int
 iflib_fast_intr(void *arg)
 {
 	iflib_filter_info_t info = arg;
 	struct grouptask *gtask = info->ifi_task;
 
 	if (!iflib_started)
 		return (FILTER_HANDLED);
 
 	DBG_COUNTER_INC(fast_intrs);
 	if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
 		return (FILTER_HANDLED);
 
 	GROUPTASK_ENQUEUE(gtask);
 	return (FILTER_HANDLED);
 }
 
 static int
 _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
 	driver_filter_t filter, driver_intr_t handler, void *arg,
 				 char *name)
 {
 	int rc;
 	struct resource *res;
 	void *tag;
 	device_t dev = ctx->ifc_dev;
 
 	MPASS(rid < 512);
 	irq->ii_rid = rid;
 	res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid,
 				     RF_SHAREABLE | RF_ACTIVE);
 	if (res == NULL) {
 		device_printf(dev,
 		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
 		return (ENOMEM);
 	}
 	irq->ii_res = res;
 	KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL"));
 	rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET,
 						filter, handler, arg, &tag);
 	if (rc != 0) {
 		device_printf(dev,
 		    "failed to setup interrupt for rid %d, name %s: %d\n",
 					  rid, name ? name : "unknown", rc);
 		return (rc);
 	} else if (name)
 		bus_describe_intr(dev, res, tag, "%s", name);
 
 	irq->ii_tag = tag;
 	return (0);
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
  *  the information needed to transmit a packet on the wire. This is
  *  called only once at attach, setup is done every reset.
  *
  **********************************************************************/
 
 static int
 iflib_txsd_alloc(iflib_txq_t txq)
 {
 	if_ctx_t ctx = txq->ift_ctx;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	device_t dev = ctx->ifc_dev;
 	int err, nsegments, ntsosegments;
 
 	nsegments = scctx->isc_tx_nsegments;
 	ntsosegments = scctx->isc_tx_tso_segments_max;
 	MPASS(scctx->isc_ntxd[0] > 0);
 	MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0);
 	MPASS(nsegments > 0);
 	MPASS(ntsosegments > 0);
 	/*
 	 * Setup DMA descriptor areas.
 	 */
 	if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       sctx->isc_tx_maxsize,		/* maxsize */
 			       nsegments,	/* nsegments */
 			       sctx->isc_tx_maxsegsize,	/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txq->ift_desc_tag))) {
 		device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err);
 		device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n",
 					  sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize);
 		goto fail;
 	}
 	if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       scctx->isc_tx_tso_size_max,		/* maxsize */
 			       ntsosegments,	/* nsegments */
 			       scctx->isc_tx_tso_segsize_max,	/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txq->ift_tso_desc_tag))) {
 		device_printf(dev,"Unable to allocate TX TSO DMA tag: %d\n", err);
 
 		goto fail;
 	}
 	if (!(txq->ift_sds.ifsd_flags =
 	    (uint8_t *) malloc(sizeof(uint8_t) *
 	    scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
 		err = ENOMEM;
 		goto fail;
 	}
 	if (!(txq->ift_sds.ifsd_m =
 	    (struct mbuf **) malloc(sizeof(struct mbuf *) *
 	    scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
 		err = ENOMEM;
 		goto fail;
 	}
 
         /* Create the descriptor buffer dma maps */
 #if defined(ACPI_DMAR) || (!(defined(__i386__) && !defined(__amd64__)))
 	if ((ctx->ifc_flags & IFC_DMAR) == 0)
 		return (0);
 
 	if (!(txq->ift_sds.ifsd_map =
 	    (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer map memory\n");
 		err = ENOMEM;
 		goto fail;
 	}
 
 	for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) {
 		err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]);
 		if (err != 0) {
 			device_printf(dev, "Unable to create TX DMA map\n");
 			goto fail;
 		}
 	}
 #endif
 	return (0);
 fail:
 	/* We free all, it handles case where we are in the middle */
 	iflib_tx_structures_free(ctx);
 	return (err);
 }
 
 static void
 iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i)
 {
 	bus_dmamap_t map;
 
 	map = NULL;
 	if (txq->ift_sds.ifsd_map != NULL)
 		map = txq->ift_sds.ifsd_map[i];
 	if (map != NULL) {
 		bus_dmamap_unload(txq->ift_desc_tag, map);
 		bus_dmamap_destroy(txq->ift_desc_tag, map);
 		txq->ift_sds.ifsd_map[i] = NULL;
 	}
 }
 
 static void
 iflib_txq_destroy(iflib_txq_t txq)
 {
 	if_ctx_t ctx = txq->ift_ctx;
 
 	for (int i = 0; i < txq->ift_size; i++)
 		iflib_txsd_destroy(ctx, txq, i);
 	if (txq->ift_sds.ifsd_map != NULL) {
 		free(txq->ift_sds.ifsd_map, M_IFLIB);
 		txq->ift_sds.ifsd_map = NULL;
 	}
 	if (txq->ift_sds.ifsd_m != NULL) {
 		free(txq->ift_sds.ifsd_m, M_IFLIB);
 		txq->ift_sds.ifsd_m = NULL;
 	}
 	if (txq->ift_sds.ifsd_flags != NULL) {
 		free(txq->ift_sds.ifsd_flags, M_IFLIB);
 		txq->ift_sds.ifsd_flags = NULL;
 	}
 	if (txq->ift_desc_tag != NULL) {
 		bus_dma_tag_destroy(txq->ift_desc_tag);
 		txq->ift_desc_tag = NULL;
 	}
 	if (txq->ift_tso_desc_tag != NULL) {
 		bus_dma_tag_destroy(txq->ift_tso_desc_tag);
 		txq->ift_tso_desc_tag = NULL;
 	}
 }
 
 static void
 iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i)
 {
 	struct mbuf **mp;
 
 	mp = &txq->ift_sds.ifsd_m[i];
 	if (*mp == NULL)
 		return;
 
 	if (txq->ift_sds.ifsd_map != NULL) {
 		bus_dmamap_sync(txq->ift_desc_tag,
 				txq->ift_sds.ifsd_map[i],
 				BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txq->ift_desc_tag,
 				  txq->ift_sds.ifsd_map[i]);
 	}
 	m_free(*mp);
 	DBG_COUNTER_INC(tx_frees);
 	*mp = NULL;
 }
 
 static int
 iflib_txq_setup(iflib_txq_t txq)
 {
 	if_ctx_t ctx = txq->ift_ctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	iflib_dma_info_t di;
 	int i;
 
 	/* Set number of descriptors available */
 	txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 
 	/* Reset indices */
 	txq->ift_cidx_processed = txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
 	txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset];
 
 	for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++)
 		bzero((void *)di->idi_vaddr, di->idi_size);
 
 	IFDI_TXQ_SETUP(ctx, txq->ift_id);
 	for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++)
 		bus_dmamap_sync(di->idi_tag, di->idi_map,
 						BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Allocate memory for rx_buffer structures. Since we use one
  *  rx_buffer per received packet, the maximum number of rx_buffer's
  *  that we'll need is equal to the number of receive descriptors
  *  that we've allocated.
  *
  **********************************************************************/
 static int
 iflib_rxsd_alloc(iflib_rxq_t rxq)
 {
 	if_ctx_t ctx = rxq->ifr_ctx;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	device_t dev = ctx->ifc_dev;
 	iflib_fl_t fl;
 	int			err;
 
 	MPASS(scctx->isc_nrxd[0] > 0);
 	MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0);
 
 	fl = rxq->ifr_fl;
 	for (int i = 0; i <  rxq->ifr_nfl; i++, fl++) {
 		fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */
 		err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
 					 1, 0,			/* alignment, bounds */
 					 BUS_SPACE_MAXADDR,	/* lowaddr */
 					 BUS_SPACE_MAXADDR,	/* highaddr */
 					 NULL, NULL,		/* filter, filterarg */
 					 sctx->isc_rx_maxsize,	/* maxsize */
 					 sctx->isc_rx_nsegments,	/* nsegments */
 					 sctx->isc_rx_maxsegsize,	/* maxsegsize */
 					 0,			/* flags */
 					 NULL,			/* lockfunc */
 					 NULL,			/* lockarg */
 					 &fl->ifl_desc_tag);
 		if (err) {
 			device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
 				__func__, err);
 			goto fail;
 		}
 		if (!(fl->ifl_sds.ifsd_flags =
 		      (uint8_t *) malloc(sizeof(uint8_t) *
 					 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 			device_printf(dev, "Unable to allocate tx_buffer memory\n");
 			err = ENOMEM;
 			goto fail;
 		}
 		if (!(fl->ifl_sds.ifsd_m =
 		      (struct mbuf **) malloc(sizeof(struct mbuf *) *
 					      scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 			device_printf(dev, "Unable to allocate tx_buffer memory\n");
 			err = ENOMEM;
 			goto fail;
 		}
 		if (!(fl->ifl_sds.ifsd_cl =
 		      (caddr_t *) malloc(sizeof(caddr_t) *
 					      scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 			device_printf(dev, "Unable to allocate tx_buffer memory\n");
 			err = ENOMEM;
 			goto fail;
 		}
 
 		/* Create the descriptor buffer dma maps */
 #if defined(ACPI_DMAR) || (!(defined(__i386__) && !defined(__amd64__)))
 		if ((ctx->ifc_flags & IFC_DMAR) == 0)
 			continue;
 
 		if (!(fl->ifl_sds.ifsd_map =
 		      (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 			device_printf(dev, "Unable to allocate tx_buffer map memory\n");
 			err = ENOMEM;
 			goto fail;
 		}
 
 		for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) {
 			err = bus_dmamap_create(fl->ifl_desc_tag, 0, &fl->ifl_sds.ifsd_map[i]);
 			if (err != 0) {
 				device_printf(dev, "Unable to create TX DMA map\n");
 				goto fail;
 			}
 		}
 #endif
 	}
 	return (0);
 
 fail:
 	iflib_rx_structures_free(ctx);
 	return (err);
 }
 
 
 /*
  * Internal service routines
  */
 
 struct rxq_refill_cb_arg {
 	int               error;
 	bus_dma_segment_t seg;
 	int               nseg;
 };
 
 static void
 _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	struct rxq_refill_cb_arg *cb_arg = arg;
 
 	cb_arg->error = error;
 	cb_arg->seg = segs[0];
 	cb_arg->nseg = nseg;
 }
 
 
 #ifdef ACPI_DMAR
 #define IS_DMAR(ctx) (ctx->ifc_flags & IFC_DMAR)
 #else
 #define IS_DMAR(ctx) (0)
 #endif
 
 /**
  *	rxq_refill - refill an rxq  free-buffer list
  *	@ctx: the iflib context
  *	@rxq: the free-list to refill
  *	@n: the number of new buffers to allocate
  *
  *	(Re)populate an rxq free-buffer list with up to @n new packet buffers.
  *	The caller must assure that @n does not exceed the queue's capacity.
  */
 static void
 _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
 {
 	struct mbuf *m;
 	int idx, pidx = fl->ifl_pidx;
 	caddr_t cl, *sd_cl;
 	struct mbuf **sd_m;
 	uint8_t *sd_flags;
 	bus_dmamap_t *sd_map;
 	int n, i = 0;
 	uint64_t bus_addr;
 	int err;
 
 	sd_m = fl->ifl_sds.ifsd_m;
 	sd_map = fl->ifl_sds.ifsd_map;
 	sd_cl = fl->ifl_sds.ifsd_cl;
 	sd_flags = fl->ifl_sds.ifsd_flags;
 	idx = pidx;
 
 	n  = count;
 	MPASS(n > 0);
 	MPASS(fl->ifl_credits + n <= fl->ifl_size);
 
 	if (pidx < fl->ifl_cidx)
 		MPASS(pidx + n <= fl->ifl_cidx);
 	if (pidx == fl->ifl_cidx && (fl->ifl_credits < fl->ifl_size))
 		MPASS(fl->ifl_gen == 0);
 	if (pidx > fl->ifl_cidx)
 		MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx);
 
 	DBG_COUNTER_INC(fl_refills);
 	if (n > 8)
 		DBG_COUNTER_INC(fl_refills_large);
 
 	while (n--) {
 		/*
 		 * We allocate an uninitialized mbuf + cluster, mbuf is
 		 * initialized after rx.
 		 *
 		 * If the cluster is still set then we know a minimum sized packet was received
 		 */
 		if ((cl = sd_cl[idx]) == NULL) {
 			if ((cl = sd_cl[idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL)
 				break;
 #if MEMORY_LOGGING
 			fl->ifl_cl_enqueued++;
 #endif
 		}
 		if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
 			break;
 		}
 #if MEMORY_LOGGING
 		fl->ifl_m_enqueued++;
 #endif
 
 		DBG_COUNTER_INC(rx_allocs);
 #ifdef notyet
 		if ((sd_flags[pidx] & RX_SW_DESC_MAP_CREATED) == 0) {
 			int err;
 
 			if ((err = bus_dmamap_create(fl->ifl_ifdi->idi_tag, 0, &sd_map[idx]))) {
 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
 				uma_zfree(fl->ifl_zone, cl);
 				n = 0;
 				goto done;
 			}
 			sd_flags[idx] |= RX_SW_DESC_MAP_CREATED;
 		}
 #endif
 #if defined(__i386__) || defined(__amd64__)
 		if (!IS_DMAR(ctx)) {
 			bus_addr = pmap_kextract((vm_offset_t)cl);
 		} else
 #endif
 		{
 			struct rxq_refill_cb_arg cb_arg;
 			iflib_rxq_t q;
 
 			cb_arg.error = 0;
 			q = fl->ifl_rxq;
 			err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[idx],
 		         cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0);
 
 			if (err != 0 || cb_arg.error) {
 				/*
 				 * !zone_pack ?
 				 */
 				if (fl->ifl_zone == zone_pack)
 					uma_zfree(fl->ifl_zone, cl);
 				m_free(m);
 				n = 0;
 				goto done;
 			}
 			bus_addr = cb_arg.seg.ds_addr;
 		}
 		sd_flags[idx] |= RX_SW_DESC_INUSE;
 
 		MPASS(sd_m[idx] == NULL);
 		sd_cl[idx] = cl;
 		sd_m[idx] = m;
 		fl->ifl_bus_addrs[i] = bus_addr;
 		fl->ifl_vm_addrs[i] = cl;
 		fl->ifl_credits++;
 		i++;
 		MPASS(fl->ifl_credits <= fl->ifl_size);
 		if (++idx == fl->ifl_size) {
 			fl->ifl_gen = 1;
 			idx = 0;
 		}
 		if (n == 0 || i == IFLIB_MAX_RX_REFRESH) {
 			ctx->isc_rxd_refill(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx,
 								 fl->ifl_bus_addrs, fl->ifl_vm_addrs, i, fl->ifl_buf_size);
 			i = 0;
 			pidx = idx;
 		}
 		fl->ifl_pidx = idx;
 
 	}
 done:
 	DBG_COUNTER_INC(rxd_flush);
 	if (fl->ifl_pidx == 0)
 		pidx = fl->ifl_size - 1;
 	else
 		pidx = fl->ifl_pidx - 1;
 	ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx);
 }
 
 static __inline void
 __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max)
 {
 	/* we avoid allowing pidx to catch up with cidx as it confuses ixl */
 	int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1;
 #ifdef INVARIANTS
 	int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1;
 #endif
 
 	MPASS(fl->ifl_credits <= fl->ifl_size);
 	MPASS(reclaimable == delta);
 
 	if (reclaimable > 0)
 		_iflib_fl_refill(ctx, fl, min(max, reclaimable));
 }
 
 static void
 iflib_fl_bufs_free(iflib_fl_t fl)
 {
 	iflib_dma_info_t idi = fl->ifl_ifdi;
 	uint32_t i;
 
 	for (i = 0; i < fl->ifl_size; i++) {
 		struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i];
 		uint8_t *sd_flags = &fl->ifl_sds.ifsd_flags[i];
 		caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i];
 
 		if (*sd_flags & RX_SW_DESC_INUSE) {
 			if (fl->ifl_sds.ifsd_map != NULL) {
 				bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i];
 				bus_dmamap_unload(fl->ifl_desc_tag, sd_map);
 				bus_dmamap_destroy(fl->ifl_desc_tag, sd_map);
 			}
 			if (*sd_m != NULL) {
 				m_init(*sd_m, M_NOWAIT, MT_DATA, 0);
 				uma_zfree(zone_mbuf, *sd_m);
 			}
 			if (*sd_cl != NULL)
 				uma_zfree(fl->ifl_zone, *sd_cl);
 			*sd_flags = 0;
 		} else {
 			MPASS(*sd_cl == NULL);
 			MPASS(*sd_m == NULL);
 		}
 #if MEMORY_LOGGING
 		fl->ifl_m_dequeued++;
 		fl->ifl_cl_dequeued++;
 #endif
 		*sd_cl = NULL;
 		*sd_m = NULL;
 	}
 	/*
 	 * Reset free list values
 	 */
 	fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = 0;;
 	bzero(idi->idi_vaddr, idi->idi_size);
 }
 
 /*********************************************************************
  *
  *  Initialize a receive ring and its buffers.
  *
  **********************************************************************/
 static int
 iflib_fl_setup(iflib_fl_t fl)
 {
 	iflib_rxq_t rxq = fl->ifl_rxq;
 	if_ctx_t ctx = rxq->ifr_ctx;
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 
 	/*
 	** Free current RX buffer structs and their mbufs
 	*/
 	iflib_fl_bufs_free(fl);
 	/* Now replenish the mbufs */
 	MPASS(fl->ifl_credits == 0);
 	/*
 	 * XXX don't set the max_frame_size to larger
 	 * than the hardware can handle
 	 */
 	if (sctx->isc_max_frame_size <= 2048)
 		fl->ifl_buf_size = MCLBYTES;
 	else if (sctx->isc_max_frame_size <= 4096)
 		fl->ifl_buf_size = MJUMPAGESIZE;
 	else if (sctx->isc_max_frame_size <= 9216)
 		fl->ifl_buf_size = MJUM9BYTES;
 	else
 		fl->ifl_buf_size = MJUM16BYTES;
 	if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size)
 		ctx->ifc_max_fl_buf_size = fl->ifl_buf_size;
 	fl->ifl_cltype = m_gettype(fl->ifl_buf_size);
 	fl->ifl_zone = m_getzone(fl->ifl_buf_size);
 
 
 	/* avoid pre-allocating zillions of clusters to an idle card
 	 * potentially speeding up attach
 	 */
 	_iflib_fl_refill(ctx, fl, min(128, fl->ifl_size));
 	MPASS(min(128, fl->ifl_size) == fl->ifl_credits);
 	if (min(128, fl->ifl_size) != fl->ifl_credits)
 		return (ENOBUFS);
 	/*
 	 * handle failure
 	 */
 	MPASS(rxq != NULL);
 	MPASS(fl->ifl_ifdi != NULL);
 	bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Free receive ring data structures
  *
  **********************************************************************/
 static void
 iflib_rx_sds_free(iflib_rxq_t rxq)
 {
 	iflib_fl_t fl;
 	int i;
 
 	if (rxq->ifr_fl != NULL) {
 		for (i = 0; i < rxq->ifr_nfl; i++) {
 			fl = &rxq->ifr_fl[i];
 			if (fl->ifl_desc_tag != NULL) {
 				bus_dma_tag_destroy(fl->ifl_desc_tag);
 				fl->ifl_desc_tag = NULL;
 			}
 			free(fl->ifl_sds.ifsd_m, M_IFLIB);
 			free(fl->ifl_sds.ifsd_cl, M_IFLIB);
 			/* XXX destroy maps first */
 			free(fl->ifl_sds.ifsd_map, M_IFLIB);
 			fl->ifl_sds.ifsd_m = NULL;
 			fl->ifl_sds.ifsd_cl = NULL;
 			fl->ifl_sds.ifsd_map = NULL;
 		}
 		free(rxq->ifr_fl, M_IFLIB);
 		rxq->ifr_fl = NULL;
 		rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0;
 	}
 }
 
 /*
  * MI independent logic
  *
  */
 static void
 iflib_timer(void *arg)
 {
 	iflib_txq_t txq = arg;
 	if_ctx_t ctx = txq->ift_ctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 
 	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 		return;
 	/*
 	** Check on the state of the TX queue(s), this
 	** can be done without the lock because its RO
 	** and the HUNG state will be static if set.
 	*/
 	IFDI_TIMER(ctx, txq->ift_id);
 	if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) &&
 		(ctx->ifc_pause_frames == 0))
 		goto hung;
 
 	if (TXQ_AVAIL(txq) <= 2*scctx->isc_tx_nsegments ||
 	    ifmp_ring_is_stalled(txq->ift_br[0]))
 		GROUPTASK_ENQUEUE(&txq->ift_task);
 
 	ctx->ifc_pause_frames = 0;
 	if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) 
 		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
 	return;
 hung:
 	CTX_LOCK(ctx);
 	if_setdrvflagbits(ctx->ifc_ifp, 0, IFF_DRV_RUNNING);
 	device_printf(ctx->ifc_dev,  "TX(%d) desc avail = %d, pidx = %d\n",
 				  txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx);
 
 	IFDI_WATCHDOG_RESET(ctx);
 	ctx->ifc_watchdog_events++;
 	ctx->ifc_pause_frames = 0;
 
 	iflib_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 }
 
 static void
 iflib_init_locked(if_ctx_t ctx)
 {
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	if_t ifp = ctx->ifc_ifp;
 	iflib_fl_t fl;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
 	int i, j, tx_ip_csum_flags, tx_ip6_csum_flags;
 
 
 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 	IFDI_INTR_DISABLE(ctx);
 
 	tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP);
 	tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP);
 	/* Set hardware offload abilities */
 	if_clearhwassist(ifp);
 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
 		if_sethwassistbits(ifp, tx_ip_csum_flags, 0);
 	if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
 		if_sethwassistbits(ifp,  tx_ip6_csum_flags, 0);
 	if (if_getcapenable(ifp) & IFCAP_TSO4)
 		if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
 	if (if_getcapenable(ifp) & IFCAP_TSO6)
 		if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
 
 	for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) {
 		CALLOUT_LOCK(txq);
 		callout_stop(&txq->ift_timer);
 		callout_stop(&txq->ift_db_check);
 		CALLOUT_UNLOCK(txq);
 		iflib_netmap_txq_init(ctx, txq);
 	}
 	for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
 		iflib_netmap_rxq_init(ctx, rxq);
 	}
 #ifdef INVARIANTS
 	i = if_getdrvflags(ifp);
 #endif
 	IFDI_INIT(ctx);
 	MPASS(if_getdrvflags(ifp) == i);
 	for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
 			if (iflib_fl_setup(fl)) {
 				device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n");
 				goto done;
 			}
 		}
 	}
 	done:
 	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
 	IFDI_INTR_ENABLE(ctx);
 	txq = ctx->ifc_txqs;
 	for (i = 0; i < sctx->isc_ntxqsets; i++, txq++)
 		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq,
 			txq->ift_timer.c_cpu);
 }
 
 static int
 iflib_media_change(if_t ifp)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 	int err;
 
 	CTX_LOCK(ctx);
 	if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0)
 		iflib_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 	return (err);
 }
 
 static void
 iflib_media_status(if_t ifp, struct ifmediareq *ifmr)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 
 	CTX_LOCK(ctx);
 	IFDI_UPDATE_ADMIN_STATUS(ctx);
 	IFDI_MEDIA_STATUS(ctx, ifmr);
 	CTX_UNLOCK(ctx);
 }
 
 static void
 iflib_stop(if_ctx_t ctx)
 {
 	iflib_txq_t txq = ctx->ifc_txqs;
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	iflib_dma_info_t di;
 	iflib_fl_t fl;
 	int i, j;
 
 	/* Tell the stack that the interface is no longer active */
 	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 
 	IFDI_INTR_DISABLE(ctx);
 	DELAY(100000);
 	IFDI_STOP(ctx);
 	DELAY(100000);
 
 	iflib_debug_reset();
 	/* Wait for current tx queue users to exit to disarm watchdog timer. */
 	for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) {
 		/* make sure all transmitters have completed before proceeding XXX */
 
 		/* clean any enqueued buffers */
 		iflib_ifmp_purge(txq);
 		/* Free any existing tx buffers. */
 		for (j = 0; j < txq->ift_size; j++) {
 			iflib_txsd_free(ctx, txq, j);
 		}
 		txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
 		txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
 		txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0;
 		txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0;
 		txq->ift_pullups = 0;
 		ifmp_ring_reset_stats(txq->ift_br[0]);
 		for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++)
 			bzero((void *)di->idi_vaddr, di->idi_size);
 	}
 	for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) {
 		/* make sure all transmitters have completed before proceeding XXX */
 
 		for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, di++)
 			bzero((void *)di->idi_vaddr, di->idi_size);
 		/* also resets the free lists pidx/cidx */
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
 			iflib_fl_bufs_free(fl);
 	}
 }
 
 static inline void
 prefetch_pkts(iflib_fl_t fl, int cidx)
 {
 	int nextptr;
 	int nrxd = fl->ifl_size;
 
 	nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1);
 	prefetch(&fl->ifl_sds.ifsd_m[nextptr]);
 	prefetch(&fl->ifl_sds.ifsd_cl[nextptr]);
 	prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]);
 	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]);
 }
 
 static void
 rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int *cltype, int unload, iflib_fl_t *pfl, int *pcidx)
 {
 	int flid, cidx;
 	bus_dmamap_t map;
 	iflib_fl_t fl;
 	iflib_dma_info_t di;
 	int next;
 
 	flid = irf->irf_flid;
 	cidx = irf->irf_idx;
 	fl = &rxq->ifr_fl[flid];
 	fl->ifl_credits--;
 #if MEMORY_LOGGING
 	fl->ifl_m_dequeued++;
 	if (cltype)
 		fl->ifl_cl_dequeued++;
 #endif
 	prefetch_pkts(fl, cidx);
 	if (fl->ifl_sds.ifsd_map != NULL) {
 		next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1);
 		prefetch(&fl->ifl_sds.ifsd_map[next]);
 		map = fl->ifl_sds.ifsd_map[cidx];
 		di = fl->ifl_ifdi;
 		next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1);
 		prefetch(&fl->ifl_sds.ifsd_flags[next]);
 		bus_dmamap_sync(di->idi_tag, di->idi_map,
 				BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	/* not valid assert if bxe really does SGE from non-contiguous elements */
 		MPASS(fl->ifl_cidx == cidx);
 		if (unload)
 			bus_dmamap_unload(fl->ifl_desc_tag, map);
 	}
 	if (__predict_false(++fl->ifl_cidx == fl->ifl_size)) {
 		fl->ifl_cidx = 0;
 		fl->ifl_gen = 0;
 	}
 	/* YES ick */
 	if (cltype)
 		*cltype = fl->ifl_cltype;
 	*pfl = fl;
 	*pcidx = cidx;
 }
 
 static struct mbuf *
 assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri)
 {
 	int i, padlen , flags, cltype;
 	struct mbuf *m, *mh, *mt, *sd_m;
 	iflib_fl_t fl;
 	int cidx;
 	caddr_t cl, sd_cl;
 
 	i = 0;
 	mh = NULL;
 	do {
 		rxd_frag_to_sd(rxq, &ri->iri_frags[i], &cltype, TRUE, &fl, &cidx);
 		sd_m = fl->ifl_sds.ifsd_m[cidx];
 		sd_cl = fl->ifl_sds.ifsd_cl[cidx];
 
 		MPASS(sd_cl != NULL);
 		MPASS(sd_m != NULL);
 
 		/* Don't include zero-length frags */
 		if (ri->iri_frags[i].irf_len == 0) {
 			/* XXX we can save the cluster here, but not the mbuf */
 			m_init(sd_m, M_NOWAIT, MT_DATA, 0);
 			m_free(sd_m);
 			fl->ifl_sds.ifsd_m[cidx] = NULL;
 			continue;
 		}
 		m = sd_m;
 		if (mh == NULL) {
 			flags = M_PKTHDR|M_EXT;
 			mh = mt = m;
 			padlen = ri->iri_pad;
 		} else {
 			flags = M_EXT;
 			mt->m_next = m;
 			mt = m;
 			/* assuming padding is only on the first fragment */
 			padlen = 0;
 		}
 		fl->ifl_sds.ifsd_m[cidx] = NULL;
 		cl = fl->ifl_sds.ifsd_cl[cidx];
 		fl->ifl_sds.ifsd_cl[cidx] = NULL;
 
 		/* Can these two be made one ? */
 		m_init(m, M_NOWAIT, MT_DATA, flags);
 		m_cljset(m, cl, cltype);
 		/*
 		 * These must follow m_init and m_cljset
 		 */
 		m->m_data += padlen;
 		ri->iri_len -= padlen;
 		m->m_len = ri->iri_frags[i].irf_len;
 	} while (++i < ri->iri_nfrags);
 
 	return (mh);
 }
 
 /*
  * Process one software descriptor
  */
 static struct mbuf *
 iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri)
 {
 	struct mbuf *m;
 	iflib_fl_t fl;
 	caddr_t sd_cl;
 	int cidx;
 
 	/* should I merge this back in now that the two paths are basically duplicated? */
 	if (ri->iri_nfrags == 1 &&
 	    ri->iri_frags[0].irf_len <= IFLIB_RX_COPY_THRESH) {
 		rxd_frag_to_sd(rxq, &ri->iri_frags[0], NULL, FALSE, &fl, &cidx);
 		m = fl->ifl_sds.ifsd_m[cidx];
 		fl->ifl_sds.ifsd_m[cidx] = NULL;
 		sd_cl = fl->ifl_sds.ifsd_cl[cidx];
 		m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR);
 		memcpy(m->m_data, sd_cl, ri->iri_len);
 		m->m_len = ri->iri_frags[0].irf_len;
        } else {
 		m = assemble_segments(rxq, ri);
 	}
 	m->m_pkthdr.len = ri->iri_len;
 	m->m_pkthdr.rcvif = ri->iri_ifp;
 	m->m_flags |= ri->iri_flags;
 	m->m_pkthdr.ether_vtag = ri->iri_vtag;
 	m->m_pkthdr.flowid = ri->iri_flowid;
 	M_HASHTYPE_SET(m, ri->iri_rsstype);
 	m->m_pkthdr.csum_flags = ri->iri_csum_flags;
 	m->m_pkthdr.csum_data = ri->iri_csum_data;
 	return (m);
 }
 
 static bool
 iflib_rxeof(iflib_rxq_t rxq, int budget)
 {
 	if_ctx_t ctx = rxq->ifr_ctx;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	int avail, i;
 	uint16_t *cidxp;
 	struct if_rxd_info ri;
 	int err, budget_left, rx_bytes, rx_pkts;
 	iflib_fl_t fl;
 	struct ifnet *ifp;
 	int lro_enabled;
 	/*
 	 * XXX early demux data packets so that if_input processing only handles
 	 * acks in interrupt context
 	 */
 	struct mbuf *m, *mh, *mt;
 
 	if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &budget)) {
 		return (FALSE);
 	}
 
 	mh = mt = NULL;
 	MPASS(budget > 0);
 	rx_pkts	= rx_bytes = 0;
 	if (sctx->isc_flags & IFLIB_HAS_RXCQ)
 		cidxp = &rxq->ifr_cq_cidx;
 	else
 		cidxp = &rxq->ifr_fl[0].ifl_cidx;
 	if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) {
 		for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
 			__iflib_fl_refill_lt(ctx, fl, budget + 8);
 		DBG_COUNTER_INC(rx_unavail);
 		return (false);
 	}
 
 	for (budget_left = budget; (budget_left > 0) && (avail > 0); budget_left--, avail--) {
 		if (__predict_false(!CTX_ACTIVE(ctx))) {
 			DBG_COUNTER_INC(rx_ctx_inactive);
 			break;
 		}
 		/*
 		 * Reset client set fields to their default values
 		 */
 		bzero(&ri, sizeof(ri));
 		ri.iri_qsidx = rxq->ifr_id;
 		ri.iri_cidx = *cidxp;
 		ri.iri_ifp = ctx->ifc_ifp;
 		ri.iri_frags = rxq->ifr_frags;
 		err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
 
 		/* in lieu of handling correctly - make sure it isn't being unhandled */
 		MPASS(err == 0);
 		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
 			*cidxp = ri.iri_cidx;
 			/* Update our consumer index */
 			while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) {
 				rxq->ifr_cq_cidx -= scctx->isc_nrxd[0];
 				rxq->ifr_cq_gen = 0;
 			}
 			/* was this only a completion queue message? */
 			if (__predict_false(ri.iri_nfrags == 0))
 				continue;
 		}
 		MPASS(ri.iri_nfrags != 0);
 		MPASS(ri.iri_len != 0);
 
 		/* will advance the cidx on the corresponding free lists */
 		m = iflib_rxd_pkt_get(rxq, &ri);
 		if (avail == 0 && budget_left)
 			avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left);
 
 		if (__predict_false(m == NULL)) {
 			DBG_COUNTER_INC(rx_mbuf_null);
 			continue;
 		}
 		/* imm_pkt: -- cxgb */
 		if (mh == NULL)
 			mh = mt = m;
 		else {
 			mt->m_nextpkt = m;
 			mt = m;
 		}
 	}
 	/* make sure that we can refill faster than drain */
 	for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
 		__iflib_fl_refill_lt(ctx, fl, budget + 8);
 
 	ifp = ctx->ifc_ifp;
 	lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO);
 	while (mh != NULL) {
 		m = mh;
 		mh = mh->m_nextpkt;
 		m->m_nextpkt = NULL;
 		rx_bytes += m->m_pkthdr.len;
 		rx_pkts++;
 #if defined(INET6) || defined(INET)
 		if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0)
 			continue;
 #endif
 		DBG_COUNTER_INC(rx_if_input);
 		ifp->if_input(ifp, m);
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts);
 
 	/*
 	 * Flush any outstanding LRO work
 	 */
 #if defined(INET6) || defined(INET)
 	tcp_lro_flush_all(&rxq->ifr_lc);
 #endif
 	if (avail)
 		return true;
 	return (iflib_rxd_avail(ctx, rxq, *cidxp, 1));
 }
 
 #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags)
 #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG)
 #define TXQ_MAX_DB_DEFERRED(size) (size >> 5)
 #define TXQ_MAX_DB_CONSUMED(size) (size >> 4)
 
 static __inline void
 iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring)
 {
 	uint32_t dbval;
 
 	if (ring || txq->ift_db_pending >=
 	    TXQ_MAX_DB_DEFERRED(txq->ift_size)) {
 
 		/* the lock will only ever be contended in the !min_latency case */
 		if (!TXDB_TRYLOCK(txq))
 			return;
 		dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx;
 		ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval);
 		txq->ift_db_pending = txq->ift_npending = 0;
 		TXDB_UNLOCK(txq);
 	}
 }
 
 static void
 iflib_txd_deferred_db_check(void * arg)
 {
 	iflib_txq_t txq = arg;
 
 	/* simple non-zero boolean so use bitwise OR */
 	if ((txq->ift_db_pending | txq->ift_npending) &&
 	    txq->ift_db_pending >= txq->ift_db_pending_queued)
 		iflib_txd_db_check(txq->ift_ctx, txq, TRUE);
 	txq->ift_db_pending_queued = 0;
 	if (ifmp_ring_is_stalled(txq->ift_br[0]))
 		iflib_txq_check_drain(txq, 4);
 }
 
 #ifdef PKT_DEBUG
 static void
 print_pkt(if_pkt_info_t pi)
 {
 	printf("pi len:  %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n",
 	       pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx);
 	printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n",
 	       pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag);
 	printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n",
 	       pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto);
 }
 #endif
 
 #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO)
 #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO)
 
 static int
 iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
 {
 	if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
 	struct ether_vlan_header *eh;
 	struct mbuf *m, *n;
 
 	n = m = *mp;
 	if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
 	    M_WRITABLE(m) == 0) {
 		if ((m = m_dup(m, M_NOWAIT)) == NULL) {
 			return (ENOMEM);
 		} else {
 			m_freem(*mp);
 			n = *mp = m;
 		}
 	}
 
 	/*
 	 * Determine where frame payload starts.
 	 * Jump over vlan headers if already present,
 	 * helpful for QinQ too.
 	 */
 	if (__predict_false(m->m_len < sizeof(*eh))) {
 		txq->ift_pullups++;
 		if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL))
 			return (ENOMEM);
 	}
 	eh = mtod(m, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		pi->ipi_etype = ntohs(eh->evl_proto);
 		pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	} else {
 		pi->ipi_etype = ntohs(eh->evl_encap_proto);
 		pi->ipi_ehdrlen = ETHER_HDR_LEN;
 	}
 
 	switch (pi->ipi_etype) {
 #ifdef INET
 	case ETHERTYPE_IP:
 	{
 		struct ip *ip = NULL;
 		struct tcphdr *th = NULL;
 		int minthlen;
 
 		minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th));
 		if (__predict_false(m->m_len < minthlen)) {
 			/*
 			 * if this code bloat is causing too much of a hit
 			 * move it to a separate function and mark it noinline
 			 */
 			if (m->m_len == pi->ipi_ehdrlen) {
 				n = m->m_next;
 				MPASS(n);
 				if (n->m_len >= sizeof(*ip))  {
 					ip = (struct ip *)n->m_data;
 					if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th))
 						th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 				} else {
 					txq->ift_pullups++;
 					if (__predict_false((m = m_pullup(m, minthlen)) == NULL))
 						return (ENOMEM);
 					ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
 				}
 			} else {
 				txq->ift_pullups++;
 				if (__predict_false((m = m_pullup(m, minthlen)) == NULL))
 					return (ENOMEM);
 				ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
 				if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
 					th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 			}
 		} else {
 			ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
 			if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
 				th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 		}
 		pi->ipi_ip_hlen = ip->ip_hl << 2;
 		pi->ipi_ipproto = ip->ip_p;
 		pi->ipi_flags |= IPI_TX_IPV4;
 
 		if (pi->ipi_csum_flags & CSUM_IP)
                        ip->ip_sum = 0;
 
 		if (pi->ipi_ipproto == IPPROTO_TCP) {
 			if (__predict_false(th == NULL)) {
 				txq->ift_pullups++;
 				if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
 					return (ENOMEM);
 				th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
 			}
 			pi->ipi_tcp_hflags = th->th_flags;
 			pi->ipi_tcp_hlen = th->th_off << 2;
 			pi->ipi_tcp_seq = th->th_seq;
 		}
 		if (IS_TSO4(pi)) {
 			if (__predict_false(ip->ip_p != IPPROTO_TCP))
 				return (ENXIO);
 			th->th_sum = in_pseudo(ip->ip_src.s_addr,
 					       ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 			pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
 			if (sctx->isc_flags & IFLIB_TSO_INIT_IP) {
 				ip->ip_sum = 0;
 				ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz);
 			}
 		}
 		break;
 	}
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 	{
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen);
 		struct tcphdr *th;
 		pi->ipi_ip_hlen = sizeof(struct ip6_hdr);
 
 		if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) {
 			if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL))
 				return (ENOMEM);
 		}
 		th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen);
 
 		/* XXX-BZ this will go badly in case of ext hdrs. */
 		pi->ipi_ipproto = ip6->ip6_nxt;
 		pi->ipi_flags |= IPI_TX_IPV6;
 
 		if (pi->ipi_ipproto == IPPROTO_TCP) {
 			if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
 				if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
 					return (ENOMEM);
 			}
 			pi->ipi_tcp_hflags = th->th_flags;
 			pi->ipi_tcp_hlen = th->th_off << 2;
 		}
 		if (IS_TSO6(pi)) {
 
 			if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP))
 				return (ENXIO);
 			/*
 			 * The corresponding flag is set by the stack in the IPv4
 			 * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
 			 * So, set it here because the rest of the flow requires it.
 			 */
 			pi->ipi_csum_flags |= CSUM_TCP_IPV6;
 			th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 			pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
 		}
 		break;
 	}
 #endif
 	default:
 		pi->ipi_csum_flags &= ~CSUM_OFFLOAD;
 		pi->ipi_ip_hlen = 0;
 		break;
 	}
 	*mp = m;
 
 	return (0);
 }
 
 static  __noinline  struct mbuf *
 collapse_pkthdr(struct mbuf *m0)
 {
 	struct mbuf *m, *m_next, *tmp;
 
 	m = m0;
 	m_next = m->m_next;
 	while (m_next != NULL && m_next->m_len == 0) {
 		m = m_next;
 		m->m_next = NULL;
 		m_free(m);
 		m_next = m_next->m_next;
 	}
 	m = m0;
 	m->m_next = m_next;
 	if ((m_next->m_flags & M_EXT) == 0) {
 		m = m_defrag(m, M_NOWAIT);
 	} else {
 		tmp = m_next->m_next;
 		memcpy(m_next, m, MPKTHSIZE);
 		m = m_next;
 		m->m_next = tmp;
 	}
 	return (m);
 }
 
 /*
  * If dodgy hardware rejects the scatter gather chain we've handed it
  * we'll need to remove the mbuf chain from ifsg_m[] before we can add the
  * m_defrag'd mbufs
  */
 static __noinline struct mbuf *
 iflib_remove_mbuf(iflib_txq_t txq)
 {
 	int ntxd, i, pidx;
 	struct mbuf *m, *mh, **ifsd_m;
 
 	pidx = txq->ift_pidx;
 	ifsd_m = txq->ift_sds.ifsd_m;
 	ntxd = txq->ift_size;
 	mh = m = ifsd_m[pidx];
 	ifsd_m[pidx] = NULL;
 #if MEMORY_LOGGING
 	txq->ift_dequeued++;
 #endif
 	i = 1;
 
 	while (m) {
 		ifsd_m[(pidx + i) & (ntxd -1)] = NULL;
 #if MEMORY_LOGGING
 		txq->ift_dequeued++;
 #endif
 		m = m->m_next;
 		i++;
 	}
 	return (mh);
 }
 
 static int
 iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
 			  struct mbuf **m0, bus_dma_segment_t *segs, int *nsegs,
 			  int max_segs, int flags)
 {
 	if_ctx_t ctx;
 	if_shared_ctx_t		sctx;
 	if_softc_ctx_t		scctx;
 	int i, next, pidx, mask, err, maxsegsz, ntxd, count;
 	struct mbuf *m, *tmp, **ifsd_m, **mp;
 
 	m = *m0;
 
 	/*
 	 * Please don't ever do this
 	 */
 	if (__predict_false(m->m_len == 0))
 		*m0 = m = collapse_pkthdr(m);
 
 	ctx = txq->ift_ctx;
 	sctx = ctx->ifc_sctx;
 	scctx = &ctx->ifc_softc_ctx;
 	ifsd_m = txq->ift_sds.ifsd_m;
 	ntxd = txq->ift_size;
 	pidx = txq->ift_pidx;
 	if (map != NULL) {
 		uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags;
 
 		err = bus_dmamap_load_mbuf_sg(tag, map,
 					      *m0, segs, nsegs, BUS_DMA_NOWAIT);
 		if (err)
 			return (err);
 		ifsd_flags[pidx] |= TX_SW_DESC_MAPPED;
 		i = 0;
 		next = pidx;
 		mask = (txq->ift_size-1);
 		m = *m0;
 		do {
 			mp = &ifsd_m[next];
 			*mp = m;
 			m = m->m_next;
 			if (__predict_false((*mp)->m_len == 0)) {
 				m_free(*mp);
 				*mp = NULL;
 			} else
 				next = (pidx + i) & (ntxd-1);
 		} while (m != NULL);
 	} else {
 		int buflen, sgsize, max_sgsize;
 		vm_offset_t vaddr;
 		vm_paddr_t curaddr;
 
 		count = i = 0;
 		maxsegsz = sctx->isc_tx_maxsize;
 		m = *m0;
 		do {
 			if (__predict_false(m->m_len <= 0)) {
 				tmp = m;
 				m = m->m_next;
 				tmp->m_next = NULL;
 				m_free(tmp);
 				continue;
 			}
 			buflen = m->m_len;
 			vaddr = (vm_offset_t)m->m_data;
 			/*
 			 * see if we can't be smarter about physically
 			 * contiguous mappings
 			 */
 			next = (pidx + count) & (ntxd-1);
 			MPASS(ifsd_m[next] == NULL);
 #if MEMORY_LOGGING
 			txq->ift_enqueued++;
 #endif
 			ifsd_m[next] = m;
 			while (buflen > 0) {
 				max_sgsize = MIN(buflen, maxsegsz);
 				curaddr = pmap_kextract(vaddr);
 				sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 				sgsize = MIN(sgsize, max_sgsize);
 				segs[i].ds_addr = curaddr;
 				segs[i].ds_len = sgsize;
 				vaddr += sgsize;
 				buflen -= sgsize;
 				i++;
 				if (i >= max_segs)
 					goto err;
 			}
 			count++;
 			tmp = m;
 			m = m->m_next;
 		} while (m != NULL);
 		*nsegs = i;
 	}
 	return (0);
 err:
 	*m0 = iflib_remove_mbuf(txq);
 	return (EFBIG);
 }
 
 static int
 iflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
 {
 	if_ctx_t		ctx;
 	if_shared_ctx_t		sctx;
 	if_softc_ctx_t		scctx;
 	bus_dma_segment_t	*segs;
 	struct mbuf		*m_head;
 	bus_dmamap_t		map;
 	struct if_pkt_info	pi;
 	int remap = 0;
 	int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd;
 	bus_dma_tag_t desc_tag;
 
 	segs = txq->ift_segs;
 	ctx = txq->ift_ctx;
 	sctx = ctx->ifc_sctx;
 	scctx = &ctx->ifc_softc_ctx;
 	segs = txq->ift_segs;
 	ntxd = txq->ift_size;
 	m_head = *m_headp;
 	map = NULL;
 
 	/*
 	 * If we're doing TSO the next descriptor to clean may be quite far ahead
 	 */
 	cidx = txq->ift_cidx;
 	pidx = txq->ift_pidx;
 	next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1);
 
 	/* prefetch the next cache line of mbuf pointers and flags */
 	prefetch(&txq->ift_sds.ifsd_m[next]);
 	if (txq->ift_sds.ifsd_map != NULL) {
 		prefetch(&txq->ift_sds.ifsd_map[next]);
 		map = txq->ift_sds.ifsd_map[pidx];
 		next = (cidx + CACHE_LINE_SIZE) & (ntxd-1);
 		prefetch(&txq->ift_sds.ifsd_flags[next]);
 	}
 
 
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 		desc_tag = txq->ift_tso_desc_tag;
 		max_segs = scctx->isc_tx_tso_segments_max;
 	} else {
 		desc_tag = txq->ift_desc_tag;
 		max_segs = scctx->isc_tx_nsegments;
 	}
 	m_head = *m_headp;
 	bzero(&pi, sizeof(pi));
 	pi.ipi_len = m_head->m_pkthdr.len;
 	pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST));
 	pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
 	pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0;
 	pi.ipi_pidx = pidx;
 	pi.ipi_qsidx = txq->ift_id;
 
 	/* deliberate bitwise OR to make one condition */
 	if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) {
 		if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0))
 			return (err);
 		m_head = *m_headp;
 	}
 
 retry:
 	err = iflib_busdma_load_mbuf_sg(txq, desc_tag, map, m_headp, segs, &nsegs, max_segs, BUS_DMA_NOWAIT);
 defrag:
 	if (__predict_false(err)) {
 		switch (err) {
 		case EFBIG:
 			/* try collapse once and defrag once */
 			if (remap == 0)
 				m_head = m_collapse(*m_headp, M_NOWAIT, max_segs);
 			if (remap == 1)
 				m_head = m_defrag(*m_headp, M_NOWAIT);
 			remap++;
 			if (__predict_false(m_head == NULL))
 				goto defrag_failed;
 			txq->ift_mbuf_defrag++;
 			*m_headp = m_head;
 			goto retry;
 			break;
 		case ENOMEM:
 			txq->ift_no_tx_dma_setup++;
 			break;
 		default:
 			txq->ift_no_tx_dma_setup++;
 			m_freem(*m_headp);
 			DBG_COUNTER_INC(tx_frees);
 			*m_headp = NULL;
 			break;
 		}
 		txq->ift_map_failed++;
 		DBG_COUNTER_INC(encap_load_mbuf_fail);
 		return (err);
 	}
 
 	/*
 	 * XXX assumes a 1 to 1 relationship between segments and
 	 *        descriptors - this does not hold true on all drivers, e.g.
 	 *        cxgb
 	 */
 	if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) {
 		txq->ift_no_desc_avail++;
 		if (map != NULL)
 			bus_dmamap_unload(desc_tag, map);
 		DBG_COUNTER_INC(encap_txq_avail_fail);
 		if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0)
 			GROUPTASK_ENQUEUE(&txq->ift_task);
 		return (ENOBUFS);
 	}
 	pi.ipi_segs = segs;
 	pi.ipi_nsegs = nsegs;
 
 	MPASS(pidx >= 0 && pidx < txq->ift_size);
 #ifdef PKT_DEBUG
 	print_pkt(&pi);
 #endif
 	if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) {
 		bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
 						BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 		DBG_COUNTER_INC(tx_encap);
 		MPASS(pi.ipi_new_pidx >= 0 &&
 		    pi.ipi_new_pidx < txq->ift_size);
 
 		ndesc = pi.ipi_new_pidx - pi.ipi_pidx;
 		if (pi.ipi_new_pidx < pi.ipi_pidx) {
 			ndesc += txq->ift_size;
 			txq->ift_gen = 1;
 		}
 		/*
 		 * drivers can need as many as 
 		 * two sentinels
 		 */
 		MPASS(ndesc <= pi.ipi_nsegs + 2);
 		MPASS(pi.ipi_new_pidx != pidx);
 		MPASS(ndesc > 0);
 		txq->ift_in_use += ndesc;
 		/*
 		 * We update the last software descriptor again here because there may
 		 * be a sentinel and/or there may be more mbufs than segments
 		 */
 		txq->ift_pidx = pi.ipi_new_pidx;
 		txq->ift_npending += pi.ipi_ndescs;
 	} else if (__predict_false(err == EFBIG && remap < 2)) {
 		*m_headp = m_head = iflib_remove_mbuf(txq);
 		remap = 1;
 		txq->ift_txd_encap_efbig++;
 		goto defrag;
 	} else
 		DBG_COUNTER_INC(encap_txd_encap_fail);
 	return (err);
 
 defrag_failed:
 	txq->ift_mbuf_defrag_failed++;
 	txq->ift_map_failed++;
 	m_freem(*m_headp);
 	DBG_COUNTER_INC(tx_frees);
 	*m_headp = NULL;
 	return (ENOMEM);
 }
 
 /* forward compatibility for cxgb */
 #define FIRST_QSET(ctx) 0
 
 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets)
 #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets)
 #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx))
 #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments))
 #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh)
 #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max)
 
 
 
 /* if there are more than TXQ_MIN_OCCUPANCY packets pending we consider deferring
  * doorbell writes
  *
  * ORing with 2 assures that min occupancy is never less than 2 without any conditional logic
  */
 #define TXQ_MIN_OCCUPANCY(size) ((size >> 6)| 0x2)
 
 static inline int
 iflib_txq_min_occupancy(iflib_txq_t txq)
 {
 	if_ctx_t ctx;
 
 	ctx = txq->ift_ctx;
 	return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx,
 	    txq->ift_gen) < TXQ_MIN_OCCUPANCY(txq->ift_size) +
 	    MAX_TX_DESC(ctx));
 }
 
 static void
 iflib_tx_desc_free(iflib_txq_t txq, int n)
 {
 	int hasmap;
 	uint32_t qsize, cidx, mask, gen;
 	struct mbuf *m, **ifsd_m;
 	uint8_t *ifsd_flags;
 	bus_dmamap_t *ifsd_map;
 
 	cidx = txq->ift_cidx;
 	gen = txq->ift_gen;
 	qsize = txq->ift_size;
 	mask = qsize-1;
 	hasmap = txq->ift_sds.ifsd_map != NULL;
 	ifsd_flags = txq->ift_sds.ifsd_flags;
 	ifsd_m = txq->ift_sds.ifsd_m;
 	ifsd_map = txq->ift_sds.ifsd_map;
 
 	while (n--) {
 		prefetch(ifsd_m[(cidx + 3) & mask]);
 		prefetch(ifsd_m[(cidx + 4) & mask]);
 
 		if (ifsd_m[cidx] != NULL) {
 			prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]);
 			prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]);
 			if (hasmap && (ifsd_flags[cidx] & TX_SW_DESC_MAPPED)) {
 				/*
 				 * does it matter if it's not the TSO tag? If so we'll
 				 * have to add the type to flags
 				 */
 				bus_dmamap_unload(txq->ift_desc_tag, ifsd_map[cidx]);
 				ifsd_flags[cidx] &= ~TX_SW_DESC_MAPPED;
 			}
 			if ((m = ifsd_m[cidx]) != NULL) {
 				/* XXX we don't support any drivers that batch packets yet */
 				MPASS(m->m_nextpkt == NULL);
 
 				m_free(m);
 				ifsd_m[cidx] = NULL;
 #if MEMORY_LOGGING
 				txq->ift_dequeued++;
 #endif
 				DBG_COUNTER_INC(tx_frees);
 			}
 		}
 		if (__predict_false(++cidx == qsize)) {
 			cidx = 0;
 			gen = 0;
 		}
 	}
 	txq->ift_cidx = cidx;
 	txq->ift_gen = gen;
 }
 
 static __inline int
 iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh)
 {
 	int reclaim;
 	if_ctx_t ctx = txq->ift_ctx;
 
 	KASSERT(thresh >= 0, ("invalid threshold to reclaim"));
 	MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size);
 
 	/*
 	 * Need a rate-limiting check so that this isn't called every time
 	 */
 	iflib_tx_credits_update(ctx, txq);
 	reclaim = DESC_RECLAIMABLE(txq);
 
 	if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) {
 #ifdef INVARIANTS
 		if (iflib_verbose_debug) {
 			printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__,
 			       txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments,
 			       reclaim, thresh);
 
 		}
 #endif
 		return (0);
 	}
 	iflib_tx_desc_free(txq, reclaim);
 	txq->ift_cleaned += reclaim;
 	txq->ift_in_use -= reclaim;
 
 	if (txq->ift_active == FALSE)
 		txq->ift_active = TRUE;
 
 	return (reclaim);
 }
 
 static struct mbuf **
 _ring_peek_one(struct ifmp_ring *r, int cidx, int offset)
 {
 
 	return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (r->size-1)]));
 }
 
 static void
 iflib_txq_check_drain(iflib_txq_t txq, int budget)
 {
 
 	ifmp_ring_check_drainage(txq->ift_br[0], budget);
 }
 
 static uint32_t
 iflib_txq_can_drain(struct ifmp_ring *r)
 {
 	iflib_txq_t txq = r->cookie;
 	if_ctx_t ctx = txq->ift_ctx;
 
 	return ((TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) ||
 		ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, txq->ift_cidx_processed, false));
 }
 
 static uint32_t
 iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
 {
 	iflib_txq_t txq = r->cookie;
 	if_ctx_t ctx = txq->ift_ctx;
 	if_t ifp = ctx->ifc_ifp;
 	struct mbuf **mp, *m;
 	int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail, err, in_use_prev, desc_used;
 
 	if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) ||
 			    !LINK_ACTIVE(ctx))) {
 		DBG_COUNTER_INC(txq_drain_notready);
 		return (0);
 	}
 
 	avail = IDXDIFF(pidx, cidx, r->size);
 	if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) {
 		DBG_COUNTER_INC(txq_drain_flushing);
 		for (i = 0; i < avail; i++) {
 			m_free(r->items[(cidx + i) & (r->size-1)]);
 			r->items[(cidx + i) & (r->size-1)] = NULL;
 		}
 		return (avail);
 	}
 	iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
 	if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) {
 		txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 		CALLOUT_LOCK(txq);
 		callout_stop(&txq->ift_timer);
 		callout_stop(&txq->ift_db_check);
 		CALLOUT_UNLOCK(txq);
 		DBG_COUNTER_INC(txq_drain_oactive);
 		return (0);
 	}
 	consumed = mcast_sent = bytes_sent = pkt_sent = 0;
 	count = MIN(avail, TX_BATCH_SIZE);
 #ifdef INVARIANTS
 	if (iflib_verbose_debug)
 		printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__,
 		       avail, ctx->ifc_flags, TXQ_AVAIL(txq));
 #endif
 
 	for (desc_used = i = 0; i < count && TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2; i++) {
 		mp = _ring_peek_one(r, cidx, i);
 		MPASS(mp != NULL && *mp != NULL);
 		in_use_prev = txq->ift_in_use;
 		if ((err = iflib_encap(txq, mp)) == ENOBUFS) {
 			DBG_COUNTER_INC(txq_drain_encapfail);
 			/* no room - bail out */
 			break;
 		}
 		consumed++;
 		if (err) {
 			DBG_COUNTER_INC(txq_drain_encapfail);
 			/* we can't send this packet - skip it */
 			continue;
 		}
 		pkt_sent++;
 		m = *mp;
 		DBG_COUNTER_INC(tx_sent);
 		bytes_sent += m->m_pkthdr.len;
 		if (m->m_flags & M_MCAST)
 			mcast_sent++;
 
 		txq->ift_db_pending += (txq->ift_in_use - in_use_prev);
 		desc_used += (txq->ift_in_use - in_use_prev);
 		iflib_txd_db_check(ctx, txq, FALSE);
 		ETHER_BPF_MTAP(ifp, m);
 		if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 			break;
 
 		if (desc_used >= TXQ_MAX_DB_CONSUMED(txq->ift_size))
 			break;
 	}
 
 	if ((iflib_min_tx_latency || iflib_txq_min_occupancy(txq)) && txq->ift_db_pending)
 		iflib_txd_db_check(ctx, txq, TRUE);
 	else if ((txq->ift_db_pending || TXQ_AVAIL(txq) <= MAX_TX_DESC(ctx) + 2) &&
 		 (callout_pending(&txq->ift_db_check) == 0)) {
 		txq->ift_db_pending_queued = txq->ift_db_pending;
 		callout_reset_on(&txq->ift_db_check, 1, iflib_txd_deferred_db_check,
 				 txq, txq->ift_db_check.c_cpu);
 	}
 	if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent);
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent);
 	if (mcast_sent)
 		if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent);
 #ifdef INVARIANTS
 	if (iflib_verbose_debug)
 		printf("consumed=%d\n", consumed);
 #endif
 	return (consumed);
 }
 
 static uint32_t
 iflib_txq_drain_always(struct ifmp_ring *r)
 {
 	return (1);
 }
 
 static uint32_t
 iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
 {
 	int i, avail;
 	struct mbuf **mp;
 	iflib_txq_t txq;
 
 	txq = r->cookie;
 
 	txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 	CALLOUT_LOCK(txq);
 	callout_stop(&txq->ift_timer);
 	callout_stop(&txq->ift_db_check);
 	CALLOUT_UNLOCK(txq);
 
 	avail = IDXDIFF(pidx, cidx, r->size);
 	for (i = 0; i < avail; i++) {
 		mp = _ring_peek_one(r, cidx, i);
 		m_freem(*mp);
 	}
 	MPASS(ifmp_ring_is_stalled(r) == 0);
 	return (avail);
 }
 
 static void
 iflib_ifmp_purge(iflib_txq_t txq)
 {
 	struct ifmp_ring *r;
 
 	r = txq->ift_br[0];
 	r->drain = iflib_txq_drain_free;
 	r->can_drain = iflib_txq_drain_always;
 
 	ifmp_ring_check_drainage(r, r->size);
 
 	r->drain = iflib_txq_drain;
 	r->can_drain = iflib_txq_can_drain;
 }
 
 static void
 _task_fn_tx(void *context)
 {
 	iflib_txq_t txq = context;
 	if_ctx_t ctx = txq->ift_ctx;
 
 #ifdef IFLIB_DIAGNOSTICS
 	txq->ift_cpu_exec_count[curcpu]++;
 #endif
 	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 		return;
 	ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE);
 }
 
 static void
 _task_fn_rx(void *context)
 {
 	iflib_rxq_t rxq = context;
 	if_ctx_t ctx = rxq->ifr_ctx;
 	bool more;
 	int rc;
 
 #ifdef IFLIB_DIAGNOSTICS
 	rxq->ifr_cpu_exec_count[curcpu]++;
 #endif
 	DBG_COUNTER_INC(task_fn_rxs);
 	if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 		return;
 
 	if ((more = iflib_rxeof(rxq, 16 /* XXX */)) == false) {
 		if (ctx->ifc_flags & IFC_LEGACY)
 			IFDI_INTR_ENABLE(ctx);
 		else {
 			DBG_COUNTER_INC(rx_intr_enables);
 			rc = IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
 			KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
 		}
 	}
 	if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 		return;
 	if (more)
 		GROUPTASK_ENQUEUE(&rxq->ifr_task);
 }
 
 static void
 _task_fn_admin(void *context)
 {
 	if_ctx_t ctx = context;
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 	iflib_txq_t txq;
 	int i;
 
 	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 		return;
 
 	CTX_LOCK(ctx);
 	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) {
 		CALLOUT_LOCK(txq);
 		callout_stop(&txq->ift_timer);
 		CALLOUT_UNLOCK(txq);
 	}
 	IFDI_UPDATE_ADMIN_STATUS(ctx);
 	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
 		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
 	IFDI_LINK_INTR_ENABLE(ctx);
 	CTX_UNLOCK(ctx);
 
 	if (LINK_ACTIVE(ctx) == 0)
 		return;
 	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
 		iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
 }
 
 
 static void
 _task_fn_iov(void *context)
 {
 	if_ctx_t ctx = context;
 
 	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 		return;
 
 	CTX_LOCK(ctx);
 	IFDI_VFLR_HANDLE(ctx);
 	CTX_UNLOCK(ctx);
 }
 
 static int
 iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
 {
 	int err;
 	if_int_delay_info_t info;
 	if_ctx_t ctx;
 
 	info = (if_int_delay_info_t)arg1;
 	ctx = info->iidi_ctx;
 	info->iidi_req = req;
 	info->iidi_oidp = oidp;
 	CTX_LOCK(ctx);
 	err = IFDI_SYSCTL_INT_DELAY(ctx, info);
 	CTX_UNLOCK(ctx);
 	return (err);
 }
 
 /*********************************************************************
  *
  *  IFNET FUNCTIONS
  *
  **********************************************************************/
 
 static void
 iflib_if_init_locked(if_ctx_t ctx)
 {
 	iflib_stop(ctx);
 	iflib_init_locked(ctx);
 }
 
 
 static void
 iflib_if_init(void *arg)
 {
 	if_ctx_t ctx = arg;
 
 	CTX_LOCK(ctx);
 	iflib_if_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 }
 
 static int
 iflib_if_transmit(if_t ifp, struct mbuf *m)
 {
 	if_ctx_t	ctx = if_getsoftc(ifp);
 
 	iflib_txq_t txq;
 	int err, qidx;
 
 	if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
 		DBG_COUNTER_INC(tx_frees);
 		m_freem(m);
 		return (ENOBUFS);
 	}
 
 	MPASS(m->m_nextpkt == NULL);
 	qidx = 0;
 	if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m))
 		qidx = QIDX(ctx, m);
 	/*
 	 * XXX calculate buf_ring based on flowid (divvy up bits?)
 	 */
 	txq = &ctx->ifc_txqs[qidx];
 
 #ifdef DRIVER_BACKPRESSURE
 	if (txq->ift_closed) {
 		while (m != NULL) {
 			next = m->m_nextpkt;
 			m->m_nextpkt = NULL;
 			m_freem(m);
 			m = next;
 		}
 		return (ENOBUFS);
 	}
 #endif
 #ifdef notyet
 	qidx = count = 0;
 	mp = marr;
 	next = m;
 	do {
 		count++;
 		next = next->m_nextpkt;
 	} while (next != NULL);
 
 	if (count > nitems(marr))
 		if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) {
 			/* XXX check nextpkt */
 			m_freem(m);
 			/* XXX simplify for now */
 			DBG_COUNTER_INC(tx_frees);
 			return (ENOBUFS);
 		}
 	for (next = m, i = 0; next != NULL; i++) {
 		mp[i] = next;
 		next = next->m_nextpkt;
 		mp[i]->m_nextpkt = NULL;
 	}
 #endif
 	DBG_COUNTER_INC(tx_seen);
 	err = ifmp_ring_enqueue(txq->ift_br[0], (void **)&m, 1, TX_BATCH_SIZE);
 
 	if (err) {
 		GROUPTASK_ENQUEUE(&txq->ift_task);
 		/* support forthcoming later */
 #ifdef DRIVER_BACKPRESSURE
 		txq->ift_closed = TRUE;
 #endif
 		ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE);
 		m_freem(m);
 	} else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) {
 		GROUPTASK_ENQUEUE(&txq->ift_task);
 	}
 
 	return (err);
 }
 
 static void
 iflib_if_qflush(if_t ifp)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 	iflib_txq_t txq = ctx->ifc_txqs;
 	int i;
 
 	CTX_LOCK(ctx);
 	ctx->ifc_flags |= IFC_QFLUSH;
 	CTX_UNLOCK(ctx);
 	for (i = 0; i < NTXQSETS(ctx); i++, txq++)
 		while (!(ifmp_ring_is_idle(txq->ift_br[0]) || ifmp_ring_is_stalled(txq->ift_br[0])))
 			iflib_txq_check_drain(txq, 0);
 	CTX_LOCK(ctx);
 	ctx->ifc_flags &= ~IFC_QFLUSH;
 	CTX_UNLOCK(ctx);
 
 	if_qflush(ifp);
 }
 
 
 #define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
 		     IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING |	\
 		     IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO)
 
 static int
 iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 	struct ifreq	*ifr = (struct ifreq *)data;
 #if defined(INET) || defined(INET6)
 	struct ifaddr	*ifa = (struct ifaddr *)data;
 #endif
 	bool		avoid_reset = FALSE;
 	int		err = 0, reinit = 0, bits;
 
 	switch (command) {
 	case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET)
 			avoid_reset = TRUE;
 #endif
 #ifdef INET6
 		if (ifa->ifa_addr->sa_family == AF_INET6)
 			avoid_reset = TRUE;
 #endif
 		/*
 		** Calling init results in link renegotiation,
 		** so we avoid doing it when possible.
 		*/
 		if (avoid_reset) {
 			if_setflagbits(ifp, IFF_UP,0);
 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
 				reinit = 1;
 #ifdef INET
 			if (!(if_getflags(ifp) & IFF_NOARP))
 				arp_ifinit(ifp, ifa);
 #endif
 		} else
 			err = ether_ioctl(ifp, command, data);
 		break;
 	case SIOCSIFMTU:
 		CTX_LOCK(ctx);
 		if (ifr->ifr_mtu == if_getmtu(ifp)) {
 			CTX_UNLOCK(ctx);
 			break;
 		}
 		bits = if_getdrvflags(ifp);
 		/* stop the driver and free any clusters before proceeding */
 		iflib_stop(ctx);
 
 		if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) {
 			if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size)
 				ctx->ifc_flags |= IFC_MULTISEG;
 			else
 				ctx->ifc_flags &= ~IFC_MULTISEG;
 			err = if_setmtu(ifp, ifr->ifr_mtu);
 		}
 		iflib_init_locked(ctx);
 		if_setdrvflags(ifp, bits);
 		CTX_UNLOCK(ctx);
 		break;
 	case SIOCSIFFLAGS:
 		CTX_LOCK(ctx);
 		if (if_getflags(ifp) & IFF_UP) {
 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 				if ((if_getflags(ifp) ^ ctx->ifc_if_flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					err = IFDI_PROMISC_SET(ctx, if_getflags(ifp));
 				}
 			} else
 				reinit = 1;
 		} else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 			iflib_stop(ctx);
 		}
 		ctx->ifc_if_flags = if_getflags(ifp);
 		CTX_UNLOCK(ctx);
 		break;
-
-		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 			CTX_LOCK(ctx);
 			IFDI_INTR_DISABLE(ctx);
 			IFDI_MULTI_SET(ctx);
 			IFDI_INTR_ENABLE(ctx);
 			CTX_UNLOCK(ctx);
 		}
 		break;
 	case SIOCSIFMEDIA:
 		CTX_LOCK(ctx);
 		IFDI_MEDIA_SET(ctx);
 		CTX_UNLOCK(ctx);
 		/* falls thru */
 	case SIOCGIFMEDIA:
 		err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command);
 		break;
 	case SIOCGI2C:
 	{
 		struct ifi2creq i2c;
 
 		err = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
 		if (err != 0)
 			break;
 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
 			err = EINVAL;
 			break;
 		}
 		if (i2c.len > sizeof(i2c.data)) {
 			err = EINVAL;
 			break;
 		}
 
 		if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0)
 			err = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
 		break;
 	}
 	case SIOCSIFCAP:
 	{
 		int mask, setmask;
 
 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
 		setmask = 0;
 #ifdef TCP_OFFLOAD
 		setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6);
 #endif
 		setmask |= (mask & IFCAP_FLAGS);
 
 		if (setmask  & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
 			setmask |= (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6);
 		if ((mask & IFCAP_WOL) &&
 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0)
 			setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC));
 		if_vlancap(ifp);
 		/*
 		 * want to ensure that traffic has stopped before we change any of the flags
 		 */
 		if (setmask) {
 			CTX_LOCK(ctx);
 			bits = if_getdrvflags(ifp);
 			if (bits & IFF_DRV_RUNNING)
 				iflib_stop(ctx);
 			if_togglecapenable(ifp, setmask);
 			if (bits & IFF_DRV_RUNNING)
 				iflib_init_locked(ctx);
 			if_setdrvflags(ifp, bits);
 			CTX_UNLOCK(ctx);
 		}
 		break;
 	    }
 	case SIOCGPRIVATE_0:
 	case SIOCSDRVSPEC:
 	case SIOCGDRVSPEC:
 		CTX_LOCK(ctx);
 		err = IFDI_PRIV_IOCTL(ctx, command, data);
 		CTX_UNLOCK(ctx);
 		break;
 	default:
 		err = ether_ioctl(ifp, command, data);
 		break;
 	}
 	if (reinit)
 		iflib_if_init(ctx);
 	return (err);
 }
 
 static uint64_t
 iflib_if_get_counter(if_t ifp, ift_counter cnt)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 
 	return (IFDI_GET_COUNTER(ctx, cnt));
 }
 
 /*********************************************************************
  *
  *  OTHER FUNCTIONS EXPORTED TO THE STACK
  *
  **********************************************************************/
 
 static void
 iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 
 	if ((void *)ctx != arg)
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))
 		return;
 
 	CTX_LOCK(ctx);
 	IFDI_VLAN_REGISTER(ctx, vtag);
 	/* Re-init to load the changes */
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 		iflib_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 }
 
 static void
 iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 
 	if ((void *)ctx != arg)
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))
 		return;
 
 	CTX_LOCK(ctx);
 	IFDI_VLAN_UNREGISTER(ctx, vtag);
 	/* Re-init to load the changes */
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 		iflib_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 }
 
 static void
 iflib_led_func(void *arg, int onoff)
 {
 	if_ctx_t ctx = arg;
 
 	CTX_LOCK(ctx);
 	IFDI_LED_FUNC(ctx, onoff);
 	CTX_UNLOCK(ctx);
 }
 
 /*********************************************************************
  *
  *  BUS FUNCTION DEFINITIONS
  *
  **********************************************************************/
 
 int
 iflib_device_probe(device_t dev)
 {
 	pci_vendor_info_t *ent;
 
 	uint16_t	pci_vendor_id, pci_device_id;
 	uint16_t	pci_subvendor_id, pci_subdevice_id;
 	uint16_t	pci_rev_id;
 	if_shared_ctx_t sctx;
 
 	if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC)
 		return (ENOTSUP);
 
 	pci_vendor_id = pci_get_vendor(dev);
 	pci_device_id = pci_get_device(dev);
 	pci_subvendor_id = pci_get_subvendor(dev);
 	pci_subdevice_id = pci_get_subdevice(dev);
 	pci_rev_id = pci_get_revid(dev);
 	if (sctx->isc_parse_devinfo != NULL)
 		sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id);
 
 	ent = sctx->isc_vendor_info;
 	while (ent->pvi_vendor_id != 0) {
 		if (pci_vendor_id != ent->pvi_vendor_id) {
 			ent++;
 			continue;
 		}
 		if ((pci_device_id == ent->pvi_device_id) &&
 		    ((pci_subvendor_id == ent->pvi_subvendor_id) ||
 		     (ent->pvi_subvendor_id == 0)) &&
 		    ((pci_subdevice_id == ent->pvi_subdevice_id) ||
 		     (ent->pvi_subdevice_id == 0)) &&
 		    ((pci_rev_id == ent->pvi_rev_id) ||
 		     (ent->pvi_rev_id == 0))) {
 
 			device_set_desc_copy(dev, ent->pvi_name);
 			/* this needs to be changed to zero if the bus probing code
 			 * ever stops re-probing on best match because the sctx
 			 * may have its values over written by register calls
 			 * in subsequent probes
 			 */
 			return (BUS_PROBE_DEFAULT);
 		}
 		ent++;
 	}
 	return (ENXIO);
 }
 
 int
 iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp)
 {
 	int err, rid, msix, msix_bar;
 	if_ctx_t ctx;
 	if_t ifp;
 	if_softc_ctx_t scctx;
 	int i;
 	uint16_t main_txq;
 	uint16_t main_rxq;
 
 
 	ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO);
 
 	if (sc == NULL) {
 		sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO);
 		device_set_softc(dev, ctx);
 		ctx->ifc_flags |= IFC_SC_ALLOCATED;
 	}
 
 	ctx->ifc_sctx = sctx;
 	ctx->ifc_dev = dev;
 	ctx->ifc_softc = sc;
 
 	if ((err = iflib_register(ctx)) != 0) {
 		device_printf(dev, "iflib_register failed %d\n", err);
 		return (err);
 	}
 	iflib_add_device_sysctl_pre(ctx);
 
 	scctx = &ctx->ifc_softc_ctx;
 	ifp = ctx->ifc_ifp;
 
 	/*
 	 * XXX sanity check that ntxd & nrxd are a power of 2
 	 */
 	if (ctx->ifc_sysctl_ntxqs != 0)
 		scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs;
 	if (ctx->ifc_sysctl_nrxqs != 0)
 		scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs;
 
 	for (i = 0; i < sctx->isc_ntxqs; i++) {
 		if (ctx->ifc_sysctl_ntxds[i] != 0)
 			scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i];
 		else
 			scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i];
 	}
 
 	for (i = 0; i < sctx->isc_nrxqs; i++) {
 		if (ctx->ifc_sysctl_nrxds[i] != 0)
 			scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i];
 		else
 			scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i];
 	}
 
 	for (i = 0; i < sctx->isc_nrxqs; i++) {
 		if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) {
 			device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n",
 				      i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]);
 			scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i];
 		}
 		if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) {
 			device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n",
 				      i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]);
 			scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i];
 		}
 	}
 
 	for (i = 0; i < sctx->isc_ntxqs; i++) {
 		if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) {
 			device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n",
 				      i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]);
 			scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i];
 		}
 		if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) {
 			device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n",
 				      i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]);
 			scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i];
 		}
 	}
 
 	if ((err = IFDI_ATTACH_PRE(ctx)) != 0) {
 		device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
 		return (err);
 	}
 	_iflib_pre_assert(scctx);
 	ctx->ifc_txrx = *scctx->isc_txrx;
 
 #ifdef INVARIANTS
 	MPASS(scctx->isc_capenable);
 	if (scctx->isc_capenable & IFCAP_TXCSUM)
 		MPASS(scctx->isc_tx_csum_flags);
 #endif
 
 	if_setcapabilities(ifp, scctx->isc_capenable);
 	if_setcapenable(ifp, scctx->isc_capenable);
 
 	if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets))
 		scctx->isc_ntxqsets = scctx->isc_ntxqsets_max;
 	if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets))
 		scctx->isc_nrxqsets = scctx->isc_nrxqsets_max;
 
 #ifdef ACPI_DMAR
 	if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL)
 		ctx->ifc_flags |= IFC_DMAR;
 #endif
 
 	msix_bar = scctx->isc_msix_bar;
 
 	if(sctx->isc_flags & IFLIB_HAS_TXCQ)
 		main_txq = 1;
 	else
 		main_txq = 0;
 
 	if(sctx->isc_flags & IFLIB_HAS_RXCQ)
 		main_rxq = 1;
 	else
 		main_rxq = 0;
 
 	/* XXX change for per-queue sizes */
 	device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
 		      scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]);
 	for (i = 0; i < sctx->isc_nrxqs; i++) {
 		if (!powerof2(scctx->isc_nrxd[i])) {
 			/* round down instead? */
 			device_printf(dev, "# rx descriptors must be a power of 2\n");
 			err = EINVAL;
 			goto fail;
 		}
 	}
 	for (i = 0; i < sctx->isc_ntxqs; i++) {
 		if (!powerof2(scctx->isc_ntxd[i])) {
 			device_printf(dev,
 			    "# tx descriptors must be a power of 2");
 			err = EINVAL;
 			goto fail;
 		}
 	}
 
 	if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] /
 	    MAX_SINGLE_PACKET_FRACTION)
 		scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] /
 		    MAX_SINGLE_PACKET_FRACTION);
 	if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] /
 	    MAX_SINGLE_PACKET_FRACTION)
 		scctx->isc_tx_tso_segments_max = max(1,
 		    scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION);
 
 	/*
 	 * Protect the stack against modern hardware
 	 */
 	if (scctx->isc_tx_tso_size_max > FREEBSD_TSO_SIZE_MAX)
 		scctx->isc_tx_tso_size_max = FREEBSD_TSO_SIZE_MAX;
 
 	/* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */
 	ifp->if_hw_tsomaxsegcount = scctx->isc_tx_tso_segments_max;
 	ifp->if_hw_tsomax = scctx->isc_tx_tso_size_max;
 	ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max;
 	if (scctx->isc_rss_table_size == 0)
 		scctx->isc_rss_table_size = 64;
 	scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
 
 	GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
 	/* XXX format name */
 	taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin");
 	/*
 	** Now setup MSI or MSI/X, should
 	** return us the number of supported
 	** vectors. (Will be 1 for MSI)
 	*/
 	if (sctx->isc_flags & IFLIB_SKIP_MSIX) {
 		msix = scctx->isc_vectors;
 	} else if (scctx->isc_msix_bar != 0)
 	       /*
 		* The simple fact that isc_msix_bar is not 0 does not mean we
 		* we have a good value there that is known to work.
 		*/
 		msix = iflib_msix_init(ctx);
 	else {
 		scctx->isc_vectors = 1;
 		scctx->isc_ntxqsets = 1;
 		scctx->isc_nrxqsets = 1;
 		scctx->isc_intr = IFLIB_INTR_LEGACY;
 		msix = 0;
 	}
 	/* Get memory for the station queues */
 	if ((err = iflib_queues_alloc(ctx))) {
 		device_printf(dev, "Unable to allocate queue memory\n");
 		goto fail;
 	}
 
 	if ((err = iflib_qset_structures_setup(ctx))) {
 		device_printf(dev, "qset structure setup failed %d\n", err);
 		goto fail_queues;
 	}
 
 	/*
 	 * Group taskqueues aren't properly set up until SMP is started,
 	 * so we disable interrupts until we can handle them post
 	 * SI_SUB_SMP.
 	 *
 	 * XXX: disabling interrupts doesn't actually work, at least for
 	 * the non-MSI case.  When they occur before SI_SUB_SMP completes,
 	 * we do null handling and depend on this not causing too large an
 	 * interrupt storm.
 	 */
 	IFDI_INTR_DISABLE(ctx);
 	if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) {
 		device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err);
 		goto fail_intr_free;
 	}
 	if (msix <= 1) {
 		rid = 0;
 		if (scctx->isc_intr == IFLIB_INTR_MSI) {
 			MPASS(msix == 1);
 			rid = 1;
 		}
 		if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) {
 			device_printf(dev, "iflib_legacy_setup failed %d\n", err);
 			goto fail_intr_free;
 		}
 	}
 	ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac);
 	if ((err = IFDI_ATTACH_POST(ctx)) != 0) {
 		device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err);
 		goto fail_detach;
 	}
 	if ((err = iflib_netmap_attach(ctx))) {
 		device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err);
 		goto fail_detach;
 	}
 	*ctxp = ctx;
 
 	if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
 	iflib_add_device_sysctl_post(ctx);
 	ctx->ifc_flags |= IFC_INIT_DONE;
 	return (0);
 fail_detach:
 	ether_ifdetach(ctx->ifc_ifp);
 fail_intr_free:
 	if (scctx->isc_intr == IFLIB_INTR_MSIX || scctx->isc_intr == IFLIB_INTR_MSI)
 		pci_release_msi(ctx->ifc_dev);
 fail_queues:
 	/* XXX free queues */
 fail:
 	IFDI_DETACH(ctx);
 	return (err);
 }
 
 int
 iflib_device_attach(device_t dev)
 {
 	if_ctx_t ctx;
 	if_shared_ctx_t sctx;
 
 	if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC)
 		return (ENOTSUP);
 
 	pci_enable_busmaster(dev);
 
 	return (iflib_device_register(dev, NULL, sctx, &ctx));
 }
 
 int
 iflib_device_deregister(if_ctx_t ctx)
 {
 	if_t ifp = ctx->ifc_ifp;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
 	device_t dev = ctx->ifc_dev;
 	int i;
 	struct taskqgroup *tqg;
 
 	/* Make sure VLANS are not using driver */
 	if (if_vlantrunkinuse(ifp)) {
 		device_printf(dev,"Vlan in use, detach first\n");
 		return (EBUSY);
 	}
 
 	CTX_LOCK(ctx);
 	ctx->ifc_in_detach = 1;
 	iflib_stop(ctx);
 	CTX_UNLOCK(ctx);
 
 	/* Unregister VLAN events */
 	if (ctx->ifc_vlan_attach_event != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event);
 	if (ctx->ifc_vlan_detach_event != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event);
 
 	iflib_netmap_detach(ifp);
 	ether_ifdetach(ifp);
 	/* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/
 	CTX_LOCK_DESTROY(ctx);
 	if (ctx->ifc_led_dev != NULL)
 		led_destroy(ctx->ifc_led_dev);
 	/* XXX drain any dependent tasks */
 	tqg = qgroup_if_io_tqg;
 	for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
 		callout_drain(&txq->ift_timer);
 		callout_drain(&txq->ift_db_check);
 		if (txq->ift_task.gt_uniq != NULL)
 			taskqgroup_detach(tqg, &txq->ift_task);
 	}
 	for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) {
 		if (rxq->ifr_task.gt_uniq != NULL)
 			taskqgroup_detach(tqg, &rxq->ifr_task);
 	}
 	tqg = qgroup_if_config_tqg;
 	if (ctx->ifc_admin_task.gt_uniq != NULL)
 		taskqgroup_detach(tqg, &ctx->ifc_admin_task);
 	if (ctx->ifc_vflr_task.gt_uniq != NULL)
 		taskqgroup_detach(tqg, &ctx->ifc_vflr_task);
 
 	IFDI_DETACH(ctx);
 	device_set_softc(ctx->ifc_dev, NULL);
 	if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) {
 		pci_release_msi(dev);
 	}
 	if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) {
 		iflib_irq_free(ctx, &ctx->ifc_legacy_irq);
 	}
 	if (ctx->ifc_msix_mem != NULL) {
 		bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY,
 			ctx->ifc_softc_ctx.isc_msix_bar, ctx->ifc_msix_mem);
 		ctx->ifc_msix_mem = NULL;
 	}
 
 	bus_generic_detach(dev);
 	if_free(ifp);
 
 	iflib_tx_structures_free(ctx);
 	iflib_rx_structures_free(ctx);
 	if (ctx->ifc_flags & IFC_SC_ALLOCATED)
 		free(ctx->ifc_softc, M_IFLIB);
 	free(ctx, M_IFLIB);
 	return (0);
 }
 
 
 int
 iflib_device_detach(device_t dev)
 {
 	if_ctx_t ctx = device_get_softc(dev);
 
 	return (iflib_device_deregister(ctx));
 }
 
 int
 iflib_device_suspend(device_t dev)
 {
 	if_ctx_t ctx = device_get_softc(dev);
 
 	CTX_LOCK(ctx);
 	IFDI_SUSPEND(ctx);
 	CTX_UNLOCK(ctx);
 
 	return bus_generic_suspend(dev);
 }
 int
 iflib_device_shutdown(device_t dev)
 {
 	if_ctx_t ctx = device_get_softc(dev);
 
 	CTX_LOCK(ctx);
 	IFDI_SHUTDOWN(ctx);
 	CTX_UNLOCK(ctx);
 
 	return bus_generic_suspend(dev);
 }
 
 
 int
 iflib_device_resume(device_t dev)
 {
 	if_ctx_t ctx = device_get_softc(dev);
 	iflib_txq_t txq = ctx->ifc_txqs;
 
 	CTX_LOCK(ctx);
 	IFDI_RESUME(ctx);
 	iflib_init_locked(ctx);
 	CTX_UNLOCK(ctx);
 	for (int i = 0; i < NTXQSETS(ctx); i++, txq++)
 		iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
 
 	return (bus_generic_resume(dev));
 }
 
 int
 iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params)
 {
 	int error;
 	if_ctx_t ctx = device_get_softc(dev);
 
 	CTX_LOCK(ctx);
 	error = IFDI_IOV_INIT(ctx, num_vfs, params);
 	CTX_UNLOCK(ctx);
 
 	return (error);
 }
 
 void
 iflib_device_iov_uninit(device_t dev)
 {
 	if_ctx_t ctx = device_get_softc(dev);
 
 	CTX_LOCK(ctx);
 	IFDI_IOV_UNINIT(ctx);
 	CTX_UNLOCK(ctx);
 }
 
 int
 iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params)
 {
 	int error;
 	if_ctx_t ctx = device_get_softc(dev);
 
 	CTX_LOCK(ctx);
 	error = IFDI_IOV_VF_ADD(ctx, vfnum, params);
 	CTX_UNLOCK(ctx);
 
 	return (error);
 }
 
 /*********************************************************************
  *
  *  MODULE FUNCTION DEFINITIONS
  *
  **********************************************************************/
 
 /*
  * - Start a fast taskqueue thread for each core
  * - Start a taskqueue for control operations
  */
 static int
 iflib_module_init(void)
 {
 	return (0);
 }
 
 static int
 iflib_module_event_handler(module_t mod, int what, void *arg)
 {
 	int err;
 
 	switch (what) {
 	case MOD_LOAD:
 		if ((err = iflib_module_init()) != 0)
 			return (err);
 		break;
 	case MOD_UNLOAD:
 		return (EBUSY);
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  PUBLIC FUNCTION DEFINITIONS
  *     ordered as in iflib.h
  *
  **********************************************************************/
 
 
 static void
 _iflib_assert(if_shared_ctx_t sctx)
 {
 	MPASS(sctx->isc_tx_maxsize);
 	MPASS(sctx->isc_tx_maxsegsize);
 
 	MPASS(sctx->isc_rx_maxsize);
 	MPASS(sctx->isc_rx_nsegments);
 	MPASS(sctx->isc_rx_maxsegsize);
 
 	MPASS(sctx->isc_nrxd_min[0]);
 	MPASS(sctx->isc_nrxd_max[0]);
 	MPASS(sctx->isc_nrxd_default[0]);
 	MPASS(sctx->isc_ntxd_min[0]);
 	MPASS(sctx->isc_ntxd_max[0]);
 	MPASS(sctx->isc_ntxd_default[0]);
 }
 
 static void
 _iflib_pre_assert(if_softc_ctx_t scctx)
 {
 
 	MPASS(scctx->isc_txrx->ift_txd_encap);
 	MPASS(scctx->isc_txrx->ift_txd_flush);
 	MPASS(scctx->isc_txrx->ift_txd_credits_update);
 	MPASS(scctx->isc_txrx->ift_rxd_available);
 	MPASS(scctx->isc_txrx->ift_rxd_pkt_get);
 	MPASS(scctx->isc_txrx->ift_rxd_refill);
 	MPASS(scctx->isc_txrx->ift_rxd_flush);
 }
 
 static int
 iflib_register(if_ctx_t ctx)
 {
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	driver_t *driver = sctx->isc_driver;
 	device_t dev = ctx->ifc_dev;
 	if_t ifp;
 
 	_iflib_assert(sctx);
 
 	CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev));
 
 	ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "can not allocate ifnet structure\n");
 		return (ENOMEM);
 	}
 
 	/*
 	 * Initialize our context's device specific methods
 	 */
 	kobj_init((kobj_t) ctx, (kobj_class_t) driver);
 	kobj_class_compile((kobj_class_t) driver);
 	driver->refs++;
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	if_setsoftc(ifp, ctx);
 	if_setdev(ifp, dev);
 	if_setinitfn(ifp, iflib_if_init);
 	if_setioctlfn(ifp, iflib_if_ioctl);
 	if_settransmitfn(ifp, iflib_if_transmit);
 	if_setqflushfn(ifp, iflib_if_qflush);
 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
 
 	ctx->ifc_vlan_attach_event =
 		EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx,
 							  EVENTHANDLER_PRI_FIRST);
 	ctx->ifc_vlan_detach_event =
 		EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx,
 							  EVENTHANDLER_PRI_FIRST);
 
 	ifmedia_init(&ctx->ifc_media, IFM_IMASK,
 					 iflib_media_change, iflib_media_status);
 
 	return (0);
 }
 
 
 static int
 iflib_queues_alloc(if_ctx_t ctx)
 {
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	device_t dev = ctx->ifc_dev;
 	int nrxqsets = scctx->isc_nrxqsets;
 	int ntxqsets = scctx->isc_ntxqsets;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
 	iflib_fl_t fl = NULL;
 	int i, j, cpu, err, txconf, rxconf;
 	iflib_dma_info_t ifdip;
 	uint32_t *rxqsizes = scctx->isc_rxqsizes;
 	uint32_t *txqsizes = scctx->isc_txqsizes;
 	uint8_t nrxqs = sctx->isc_nrxqs;
 	uint8_t ntxqs = sctx->isc_ntxqs;
 	int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1;
 	caddr_t *vaddrs;
 	uint64_t *paddrs;
 	struct ifmp_ring **brscp;
 	int nbuf_rings = 1; /* XXX determine dynamically */
 
 	KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1"));
 	KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1"));
 
 	brscp = NULL;
 	txq = NULL;
 	rxq = NULL;
 
 /* Allocate the TX ring struct memory */
 	if (!(txq =
 	    (iflib_txq_t) malloc(sizeof(struct iflib_txq) *
 	    ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate TX ring memory\n");
 		err = ENOMEM;
 		goto fail;
 	}
 
 	/* Now allocate the RX */
 	if (!(rxq =
 	    (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) *
 	    nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate RX ring memory\n");
 		err = ENOMEM;
 		goto rx_fail;
 	}
 	if (!(brscp = malloc(sizeof(void *) * nbuf_rings * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to buf_ring_sc * memory\n");
 		err = ENOMEM;
 		goto rx_fail;
 	}
 
 	ctx->ifc_txqs = txq;
 	ctx->ifc_rxqs = rxq;
 
 	/*
 	 * XXX handle allocation failure
 	 */
 	for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) {
 		/* Set up some basics */
 
 		if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) {
 			device_printf(dev, "failed to allocate iflib_dma_info\n");
 			err = ENOMEM;
 			goto err_tx_desc;
 		}
 		txq->ift_ifdi = ifdip;
 		for (j = 0; j < ntxqs; j++, ifdip++) {
 			if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, BUS_DMA_NOWAIT)) {
 				device_printf(dev, "Unable to allocate Descriptor memory\n");
 				err = ENOMEM;
 				goto err_tx_desc;
 			}
 			bzero((void *)ifdip->idi_vaddr, txqsizes[j]);
 		}
 		txq->ift_ctx = ctx;
 		txq->ift_id = i;
 		if (sctx->isc_flags & IFLIB_HAS_TXCQ) {
 			txq->ift_br_offset = 1;
 		} else {
 			txq->ift_br_offset = 0;
 		}
 		/* XXX fix this */
 		txq->ift_timer.c_cpu = cpu;
 		txq->ift_db_check.c_cpu = cpu;
 		txq->ift_nbr = nbuf_rings;
 
 		if (iflib_txsd_alloc(txq)) {
 			device_printf(dev, "Critical Failure setting up TX buffers\n");
 			err = ENOMEM;
 			goto err_tx_desc;
 		}
 
 		/* Initialize the TX lock */
 		snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout",
 		    device_get_nameunit(dev), txq->ift_id);
 		mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF);
 		callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0);
 		callout_init_mtx(&txq->ift_db_check, &txq->ift_mtx, 0);
 
 		snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db",
 			 device_get_nameunit(dev), txq->ift_id);
 		TXDB_LOCK_INIT(txq);
 
 		txq->ift_br = brscp + i*nbuf_rings;
 		for (j = 0; j < nbuf_rings; j++) {
 			err = ifmp_ring_alloc(&txq->ift_br[j], 2048, txq, iflib_txq_drain,
 					      iflib_txq_can_drain, M_IFLIB, M_WAITOK);
 			if (err) {
 				/* XXX free any allocated rings */
 				device_printf(dev, "Unable to allocate buf_ring\n");
 				goto err_tx_desc;
 			}
 		}
 	}
 
 	for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) {
 		/* Set up some basics */
 
 		if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) {
 			device_printf(dev, "failed to allocate iflib_dma_info\n");
 			err = ENOMEM;
 			goto err_tx_desc;
 		}
 
 		rxq->ifr_ifdi = ifdip;
 		for (j = 0; j < nrxqs; j++, ifdip++) {
 			if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) {
 				device_printf(dev, "Unable to allocate Descriptor memory\n");
 				err = ENOMEM;
 				goto err_tx_desc;
 			}
 			bzero((void *)ifdip->idi_vaddr, rxqsizes[j]);
 		}
 		rxq->ifr_ctx = ctx;
 		rxq->ifr_id = i;
 		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
 			rxq->ifr_fl_offset = 1;
 		} else {
 			rxq->ifr_fl_offset = 0;
 		}
 		rxq->ifr_nfl = nfree_lists;
 		if (!(fl =
 			  (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) {
 			device_printf(dev, "Unable to allocate free list memory\n");
 			err = ENOMEM;
 			goto err_tx_desc;
 		}
 		rxq->ifr_fl = fl;
 		for (j = 0; j < nfree_lists; j++) {
 			rxq->ifr_fl[j].ifl_rxq = rxq;
 			rxq->ifr_fl[j].ifl_id = j;
 			rxq->ifr_fl[j].ifl_ifdi =
 			    &rxq->ifr_ifdi[j + rxq->ifr_fl_offset];
 		}
         /* Allocate receive buffers for the ring*/
 		if (iflib_rxsd_alloc(rxq)) {
 			device_printf(dev,
 			    "Critical Failure setting up receive buffers\n");
 			err = ENOMEM;
 			goto err_rx_desc;
 		}
 	}
 
 	/* TXQs */
 	vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK);
 	paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK);
 	for (i = 0; i < ntxqsets; i++) {
 		iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi;
 
 		for (j = 0; j < ntxqs; j++, di++) {
 			vaddrs[i*ntxqs + j] = di->idi_vaddr;
 			paddrs[i*ntxqs + j] = di->idi_paddr;
 		}
 	}
 	if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) {
 		device_printf(ctx->ifc_dev, "device queue allocation failed\n");
 		iflib_tx_structures_free(ctx);
 		free(vaddrs, M_IFLIB);
 		free(paddrs, M_IFLIB);
 		goto err_rx_desc;
 	}
 	free(vaddrs, M_IFLIB);
 	free(paddrs, M_IFLIB);
 
 	/* RXQs */
 	vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK);
 	paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK);
 	for (i = 0; i < nrxqsets; i++) {
 		iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi;
 
 		for (j = 0; j < nrxqs; j++, di++) {
 			vaddrs[i*nrxqs + j] = di->idi_vaddr;
 			paddrs[i*nrxqs + j] = di->idi_paddr;
 		}
 	}
 	if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) {
 		device_printf(ctx->ifc_dev, "device queue allocation failed\n");
 		iflib_tx_structures_free(ctx);
 		free(vaddrs, M_IFLIB);
 		free(paddrs, M_IFLIB);
 		goto err_rx_desc;
 	}
 	free(vaddrs, M_IFLIB);
 	free(paddrs, M_IFLIB);
 
 	return (0);
 
 /* XXX handle allocation failure changes */
 err_rx_desc:
 err_tx_desc:
 	if (ctx->ifc_rxqs != NULL)
 		free(ctx->ifc_rxqs, M_IFLIB);
 	ctx->ifc_rxqs = NULL;
 	if (ctx->ifc_txqs != NULL)
 		free(ctx->ifc_txqs, M_IFLIB);
 	ctx->ifc_txqs = NULL;
 rx_fail:
 	if (brscp != NULL)
 		free(brscp, M_IFLIB);
 	if (rxq != NULL)
 		free(rxq, M_IFLIB);
 	if (txq != NULL)
 		free(txq, M_IFLIB);
 fail:
 	return (err);
 }
 
 static int
 iflib_tx_structures_setup(if_ctx_t ctx)
 {
 	iflib_txq_t txq = ctx->ifc_txqs;
 	int i;
 
 	for (i = 0; i < NTXQSETS(ctx); i++, txq++)
 		iflib_txq_setup(txq);
 
 	return (0);
 }
 
 static void
 iflib_tx_structures_free(if_ctx_t ctx)
 {
 	iflib_txq_t txq = ctx->ifc_txqs;
 	int i, j;
 
 	for (i = 0; i < NTXQSETS(ctx); i++, txq++) {
 		iflib_txq_destroy(txq);
 		for (j = 0; j < ctx->ifc_nhwtxqs; j++)
 			iflib_dma_free(&txq->ift_ifdi[j]);
 	}
 	free(ctx->ifc_txqs, M_IFLIB);
 	ctx->ifc_txqs = NULL;
 	IFDI_QUEUES_FREE(ctx);
 }
 
 /*********************************************************************
  *
  *  Initialize all receive rings.
  *
  **********************************************************************/
 static int
 iflib_rx_structures_setup(if_ctx_t ctx)
 {
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
 	int q;
 #if defined(INET6) || defined(INET)
 	int i, err;
 #endif
 
 	for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) {
 #if defined(INET6) || defined(INET)
 		tcp_lro_free(&rxq->ifr_lc);
 		if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp,
 		    TCP_LRO_ENTRIES, min(1024,
 		    ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) {
 			device_printf(ctx->ifc_dev, "LRO Initialization failed!\n");
 			goto fail;
 		}
 		rxq->ifr_lro_enabled = TRUE;
 #endif
 		IFDI_RXQ_SETUP(ctx, rxq->ifr_id);
 	}
 	return (0);
 #if defined(INET6) || defined(INET)
 fail:
 	/*
 	 * Free RX software descriptors allocated so far, we will only handle
 	 * the rings that completed, the failing case will have
 	 * cleaned up for itself. 'q' failed, so its the terminus.
 	 */
 	rxq = ctx->ifc_rxqs;
 	for (i = 0; i < q; ++i, rxq++) {
 		iflib_rx_sds_free(rxq);
 		rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0;
 	}
 	return (err);
 #endif
 }
 
 /*********************************************************************
  *
  *  Free all receive rings.
  *
  **********************************************************************/
 static void
 iflib_rx_structures_free(if_ctx_t ctx)
 {
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
 
 	for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) {
 		iflib_rx_sds_free(rxq);
 	}
 }
 
 static int
 iflib_qset_structures_setup(if_ctx_t ctx)
 {
 	int err;
 
 	if ((err = iflib_tx_structures_setup(ctx)) != 0)
 		return (err);
 
 	if ((err = iflib_rx_structures_setup(ctx)) != 0) {
 		device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err);
 		iflib_tx_structures_free(ctx);
 		iflib_rx_structures_free(ctx);
 	}
 	return (err);
 }
 
 int
 iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
 				driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, char *name)
 {
 
 	return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name));
 }
 
 static int
 find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid)
 {
 	int i, cpuid, eqid, count;
 
 	CPU_COPY(&ctx->ifc_cpus, cpus);
 	count = CPU_COUNT(&ctx->ifc_cpus);
 	eqid = qid % count;
 	/* clear up to the qid'th bit */
 	for (i = 0; i < eqid; i++) {
 		cpuid = CPU_FFS(cpus);
 		MPASS(cpuid != 0);
 		CPU_CLR(cpuid-1, cpus);
 	}
 	cpuid = CPU_FFS(cpus);
 	MPASS(cpuid != 0);
 	return (cpuid-1);
 }
 
 int
 iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
 						iflib_intr_type_t type, driver_filter_t *filter,
 						void *filter_arg, int qid, char *name)
 {
 	struct grouptask *gtask;
 	struct taskqgroup *tqg;
 	iflib_filter_info_t info;
 	cpuset_t cpus;
 	gtask_fn_t *fn;
 	int tqrid, err, cpuid;
 	void *q;
 
 	info = &ctx->ifc_filter_info;
 	tqrid = rid;
 
 	switch (type) {
 	/* XXX merge tx/rx for netmap? */
 	case IFLIB_INTR_TX:
 		q = &ctx->ifc_txqs[qid];
 		info = &ctx->ifc_txqs[qid].ift_filter_info;
 		gtask = &ctx->ifc_txqs[qid].ift_task;
 		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_tx;
 		GROUPTASK_INIT(gtask, 0, fn, q);
 		break;
 	case IFLIB_INTR_RX:
 		q = &ctx->ifc_rxqs[qid];
 		info = &ctx->ifc_rxqs[qid].ifr_filter_info;
 		gtask = &ctx->ifc_rxqs[qid].ifr_task;
 		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_rx;
 		GROUPTASK_INIT(gtask, 0, fn, q);
 		break;
 	case IFLIB_INTR_ADMIN:
 		q = ctx;
 		tqrid = -1;
 		info = &ctx->ifc_filter_info;
 		gtask = &ctx->ifc_admin_task;
 		tqg = qgroup_if_config_tqg;
 		fn = _task_fn_admin;
 		break;
 	default:
 		panic("unknown net intr type");
 	}
 
 	info->ifi_filter = filter;
 	info->ifi_filter_arg = filter_arg;
 	info->ifi_task = gtask;
 	info->ifi_ctx = ctx;
 
 	err = _iflib_irq_alloc(ctx, irq, rid, iflib_fast_intr, NULL, info,  name);
 	if (err != 0) {
 		device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err);
 		return (err);
 	}
 	if (type == IFLIB_INTR_ADMIN)
 		return (0);
 
 	if (tqrid != -1) {
 		cpuid = find_nth(ctx, &cpus, qid);
 		taskqgroup_attach_cpu(tqg, gtask, q, cpuid, irq->ii_rid, name);
 	} else {
 		taskqgroup_attach(tqg, gtask, q, tqrid, name);
 	}
 
 	return (0);
 }
 
 void
 iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type,  void *arg, int qid, char *name)
 {
 	struct grouptask *gtask;
 	struct taskqgroup *tqg;
 	gtask_fn_t *fn;
 	void *q;
 
 	switch (type) {
 	case IFLIB_INTR_TX:
 		q = &ctx->ifc_txqs[qid];
 		gtask = &ctx->ifc_txqs[qid].ift_task;
 		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_tx;
 		break;
 	case IFLIB_INTR_RX:
 		q = &ctx->ifc_rxqs[qid];
 		gtask = &ctx->ifc_rxqs[qid].ifr_task;
 		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_rx;
 		break;
 	case IFLIB_INTR_IOV:
 		q = ctx;
 		gtask = &ctx->ifc_vflr_task;
 		tqg = qgroup_if_config_tqg;
 		rid = -1;
 		fn = _task_fn_iov;
 		break;
 	default:
 		panic("unknown net intr type");
 	}
 	GROUPTASK_INIT(gtask, 0, fn, q);
 	taskqgroup_attach(tqg, gtask, q, rid, name);
 }
 
 void
 iflib_irq_free(if_ctx_t ctx, if_irq_t irq)
 {
 	if (irq->ii_tag)
 		bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag);
 
 	if (irq->ii_res)
 		bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res);
 }
 
 static int
 iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, char *name)
 {
 	iflib_txq_t txq = ctx->ifc_txqs;
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
 	if_irq_t irq = &ctx->ifc_legacy_irq;
 	iflib_filter_info_t info;
 	struct grouptask *gtask;
 	struct taskqgroup *tqg;
 	gtask_fn_t *fn;
 	int tqrid;
 	void *q;
 	int err;
 
 	q = &ctx->ifc_rxqs[0];
 	info = &rxq[0].ifr_filter_info;
 	gtask = &rxq[0].ifr_task;
 	tqg = qgroup_if_io_tqg;
 	tqrid = irq->ii_rid = *rid;
 	fn = _task_fn_rx;
 
 	ctx->ifc_flags |= IFC_LEGACY;
 	info->ifi_filter = filter;
 	info->ifi_filter_arg = filter_arg;
 	info->ifi_task = gtask;
 	info->ifi_ctx = ctx;
 
 	/* We allocate a single interrupt resource */
 	if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr, NULL, info, name)) != 0)
 		return (err);
 	GROUPTASK_INIT(gtask, 0, fn, q);
 	taskqgroup_attach(tqg, gtask, q, tqrid, name);
 
 	GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq);
 	taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, tqrid, "tx");
 	return (0);
 }
 
 void
 iflib_led_create(if_ctx_t ctx)
 {
 
 	ctx->ifc_led_dev = led_create(iflib_led_func, ctx,
 								  device_get_nameunit(ctx->ifc_dev));
 }
 
 void
 iflib_tx_intr_deferred(if_ctx_t ctx, int txqid)
 {
 
 	GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task);
 }
 
 void
 iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid)
 {
 
 	GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task);
 }
 
 void
 iflib_admin_intr_deferred(if_ctx_t ctx)
 {
 #ifdef INVARIANTS
 	struct grouptask *gtask;
 
 	gtask = &ctx->ifc_admin_task;
 	MPASS(gtask->gt_taskqueue != NULL);
 #endif
 
 	GROUPTASK_ENQUEUE(&ctx->ifc_admin_task);
 }
 
 void
 iflib_iov_intr_deferred(if_ctx_t ctx)
 {
 
 	GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task);
 }
 
 void
 iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name)
 {
 
 	taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name);
 }
 
 void
 iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn,
 	char *name)
 {
 
 	GROUPTASK_INIT(gtask, 0, fn, ctx);
 	taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name);
 }
 
 void
 iflib_config_gtask_deinit(struct grouptask *gtask)
 {
 
 	taskqgroup_detach(qgroup_if_config_tqg, gtask);	
 }
 
 void
 iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate)
 {
 	if_t ifp = ctx->ifc_ifp;
 	iflib_txq_t txq = ctx->ifc_txqs;
 
 	if_setbaudrate(ifp, baudrate);
 
 	/* If link down, disable watchdog */
 	if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) {
 		for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++)
 			txq->ift_qstatus = IFLIB_QUEUE_IDLE;
 	}
 	ctx->ifc_link_state = link_state;
 	if_link_state_change(ifp, link_state);
 }
 
 static int
 iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq)
 {
 	int credits;
 #ifdef INVARIANTS
 	int credits_pre = txq->ift_cidx_processed;
 #endif	
 
 	if (ctx->isc_txd_credits_update == NULL)
 		return (0);
 
 	if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, txq->ift_cidx_processed, true)) == 0)
 		return (0);
 
 	txq->ift_processed += credits;
 	txq->ift_cidx_processed += credits;
 
 	MPASS(credits_pre + credits == txq->ift_cidx_processed);
 	if (txq->ift_cidx_processed >= txq->ift_size)
 		txq->ift_cidx_processed -= txq->ift_size;
 	return (credits);
 }
 
 static int
 iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget)
 {
 
 	return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx,
 	    budget));
 }
 
 void
 iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name,
 	const char *description, if_int_delay_info_t info,
 	int offset, int value)
 {
 	info->iidi_ctx = ctx;
 	info->iidi_offset = offset;
 	info->iidi_value = value;
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)),
 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
 	    info, 0, iflib_sysctl_int_delay, "I", description);
 }
 
 struct mtx *
 iflib_ctx_lock_get(if_ctx_t ctx)
 {
 
 	return (&ctx->ifc_mtx);
 }
 
 static int
 iflib_msix_init(if_ctx_t ctx)
 {
 	device_t dev = ctx->ifc_dev;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs;
 	int iflib_num_tx_queues, iflib_num_rx_queues;
 	int err, admincnt, bar;
 
 	iflib_num_tx_queues = scctx->isc_ntxqsets;
 	iflib_num_rx_queues = scctx->isc_nrxqsets;
 
 	device_printf(dev, "msix_init qsets capped at %d\n", iflib_num_tx_queues);
 	
 	bar = ctx->ifc_softc_ctx.isc_msix_bar;
 	admincnt = sctx->isc_admin_intrcnt;
 	/* Override by tuneable */
 	if (enable_msix == 0)
 		goto msi;
 
 	/*
 	** When used in a virtualized environment
 	** PCI BUSMASTER capability may not be set
 	** so explicity set it here and rewrite
 	** the ENABLE in the MSIX control register
 	** at this point to cause the host to
 	** successfully initialize us.
 	*/
 	{
 		int msix_ctrl, rid;
 
  		pci_enable_busmaster(dev);
 		rid = 0;
 		if (pci_find_cap(dev, PCIY_MSIX, &rid) == 0 && rid != 0) {
 			rid += PCIR_MSIX_CTRL;
 			msix_ctrl = pci_read_config(dev, rid, 2);
 			msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
 			pci_write_config(dev, rid, msix_ctrl, 2);
 		} else {
 			device_printf(dev, "PCIY_MSIX capability not found; "
 			                   "or rid %d == 0.\n", rid);
 			goto msi;
 		}
 	}
 
 	/*
 	 * bar == -1 => "trust me I know what I'm doing"
 	 * https://www.youtube.com/watch?v=nnwWKkNau4I
 	 * Some drivers are for hardware that is so shoddily
 	 * documented that no one knows which bars are which
 	 * so the developer has to map all bars. This hack
 	 * allows shoddy garbage to use msix in this framework.
 	 */
 	if (bar != -1) {
 		ctx->ifc_msix_mem = bus_alloc_resource_any(dev,
 	            SYS_RES_MEMORY, &bar, RF_ACTIVE);
 		if (ctx->ifc_msix_mem == NULL) {
 			/* May not be enabled */
 			device_printf(dev, "Unable to map MSIX table \n");
 			goto msi;
 		}
 	}
 	/* First try MSI/X */
 	if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */
 		device_printf(dev, "System has MSIX disabled \n");
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    bar, ctx->ifc_msix_mem);
 		ctx->ifc_msix_mem = NULL;
 		goto msi;
 	}
 #if IFLIB_DEBUG
 	/* use only 1 qset in debug mode */
 	queuemsgs = min(msgs - admincnt, 1);
 #else
 	queuemsgs = msgs - admincnt;
 #endif
 	if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) == 0) {
 #ifdef RSS
 		queues = imin(queuemsgs, rss_getnumbuckets());
 #else
 		queues = queuemsgs;
 #endif
 		queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues);
 		device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n",
 					  CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt);
 	} else {
 		device_printf(dev, "Unable to fetch CPU list\n");
 		/* Figure out a reasonable auto config value */
 		queues = min(queuemsgs, mp_ncpus);
 	}
 #ifdef  RSS
 	/* If we're doing RSS, clamp at the number of RSS buckets */
 	if (queues > rss_getnumbuckets())
 		queues = rss_getnumbuckets();
 #endif
 	if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt)
 		rx_queues = iflib_num_rx_queues;
 	else
 		rx_queues = queues;
 	/*
 	 * We want this to be all logical CPUs by default
 	 */
 	if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues)
 		tx_queues = iflib_num_tx_queues;
 	else
 		tx_queues = mp_ncpus;
 
 	if (ctx->ifc_sysctl_qs_eq_override == 0) {
 #ifdef INVARIANTS
 		if (tx_queues != rx_queues)
 			device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n",
 				      min(rx_queues, tx_queues), min(rx_queues, tx_queues));
 #endif
 		tx_queues = min(rx_queues, tx_queues);
 		rx_queues = min(rx_queues, tx_queues);
 	}
 
 	device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues);
 
 	vectors = rx_queues + admincnt;
 	if ((err = pci_alloc_msix(dev, &vectors)) == 0) {
 		device_printf(dev,
 					  "Using MSIX interrupts with %d vectors\n", vectors);
 		scctx->isc_vectors = vectors;
 		scctx->isc_nrxqsets = rx_queues;
 		scctx->isc_ntxqsets = tx_queues;
 		scctx->isc_intr = IFLIB_INTR_MSIX;
 
 		return (vectors);
 	} else {
 		device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err);
 	}
 msi:
 	vectors = pci_msi_count(dev);
 	scctx->isc_nrxqsets = 1;
 	scctx->isc_ntxqsets = 1;
 	scctx->isc_vectors = vectors;
 	if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) {
 		device_printf(dev,"Using an MSI interrupt\n");
 		scctx->isc_intr = IFLIB_INTR_MSI;
 	} else {
 		device_printf(dev,"Using a Legacy interrupt\n");
 		scctx->isc_intr = IFLIB_INTR_LEGACY;
 	}
 
 	return (vectors);
 }
 
 char * ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" };
 
 static int
 mp_ring_state_handler(SYSCTL_HANDLER_ARGS)
 {
 	int rc;
 	uint16_t *state = ((uint16_t *)oidp->oid_arg1);
 	struct sbuf *sb;
 	char *ring_state = "UNKNOWN";
 
 	/* XXX needed ? */
 	rc = sysctl_wire_old_buffer(req, 0);
 	MPASS(rc == 0);
 	if (rc != 0)
 		return (rc);
 	sb = sbuf_new_for_sysctl(NULL, NULL, 80, req);
 	MPASS(sb != NULL);
 	if (sb == NULL)
 		return (ENOMEM);
 	if (state[3] <= 3)
 		ring_state = ring_states[state[3]];
 
 	sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s",
 		    state[0], state[1], state[2], ring_state);
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
         return(rc);
 }
 
 enum iflib_ndesc_handler {
 	IFLIB_NTXD_HANDLER,
 	IFLIB_NRXD_HANDLER,
 };
 
 static int
 mp_ndesc_handler(SYSCTL_HANDLER_ARGS)
 {
 	if_ctx_t ctx = (void *)arg1;
 	enum iflib_ndesc_handler type = arg2;
 	char buf[256] = {0};
 	uint16_t *ndesc;
 	char *p, *next;
 	int nqs, rc, i;
 
 	MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER);
 
 	nqs = 8;
 	switch(type) {
 	case IFLIB_NTXD_HANDLER:
 		ndesc = ctx->ifc_sysctl_ntxds;
 		if (ctx->ifc_sctx)
 			nqs = ctx->ifc_sctx->isc_ntxqs;
 		break;
 	case IFLIB_NRXD_HANDLER:
 		ndesc = ctx->ifc_sysctl_nrxds;
 		if (ctx->ifc_sctx)
 			nqs = ctx->ifc_sctx->isc_nrxqs;
 		break;
 	}
 	if (nqs == 0)
 		nqs = 8;
 
 	for (i=0; i<8; i++) {
 		if (i >= nqs)
 			break;
 		if (i)
 			strcat(buf, ",");
 		sprintf(strchr(buf, 0), "%d", ndesc[i]);
 	}
 
 	rc = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (rc || req->newptr == NULL)
 		return rc;
 
 	for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p;
 	    i++, p = strsep(&next, " ,")) {
 		ndesc[i] = strtoul(p, NULL, 10);
 	}
 
 	return(rc);
 }
 
 #define NAME_BUFLEN 32
 static void
 iflib_add_device_sysctl_pre(if_ctx_t ctx)
 {
         device_t dev = iflib_get_dev(ctx);
 	struct sysctl_oid_list *child, *oid_list;
 	struct sysctl_ctx_list *ctx_list;
 	struct sysctl_oid *node;
 
 	ctx_list = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 	ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib",
 						      CTLFLAG_RD, NULL, "IFLIB fields");
 	oid_list = SYSCTL_CHILDREN(node);
 
 	SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version",
 		       CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0,
 		       "driver version");
 
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0,
 			"# of txqs to use, 0 => use default #");
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0,
 			"# of rxqs to use, 0 => use default #");
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
                        "permit #txq != #rxq");
 
 	/* XXX change for per-queue sizes */
 	SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
 		       CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER,
                        mp_ndesc_handler, "A",
                        "list of # of tx descriptors to use, 0 = use default #");
 	SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds",
 		       CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER,
                        mp_ndesc_handler, "A",
                        "list of # of rx descriptors to use, 0 = use default #");
 }
 
 static void
 iflib_add_device_sysctl_post(if_ctx_t ctx)
 {
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
         device_t dev = iflib_get_dev(ctx);
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx_list;
 	iflib_fl_t fl;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
 	int i, j;
 	char namebuf[NAME_BUFLEN];
 	char *qfmt;
 	struct sysctl_oid *queue_node, *fl_node, *node;
 	struct sysctl_oid_list *queue_list, *fl_list;
 	ctx_list = device_get_sysctl_ctx(dev);
 
 	node = ctx->ifc_sysctl_node;
 	child = SYSCTL_CHILDREN(node);
 
 	if (scctx->isc_ntxqsets > 100)
 		qfmt = "txq%03d";
 	else if (scctx->isc_ntxqsets > 10)
 		qfmt = "txq%02d";
 	else
 		qfmt = "txq%d";
 	for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) {
 		snprintf(namebuf, NAME_BUFLEN, qfmt, i);
 		queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
 					     CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 #if MEMORY_LOGGING
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued",
 				CTLFLAG_RD,
 				&txq->ift_dequeued, "total mbufs freed");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued",
 				CTLFLAG_RD,
 				&txq->ift_enqueued, "total mbufs enqueued");
 #endif
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag",
 				   CTLFLAG_RD,
 				   &txq->ift_mbuf_defrag, "# of times m_defrag was called");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups",
 				   CTLFLAG_RD,
 				   &txq->ift_pullups, "# of times m_pullup was called");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed",
 				   CTLFLAG_RD,
 				   &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail",
 				   CTLFLAG_RD,
 				   &txq->ift_no_desc_avail, "# of times no descriptors were available");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed",
 				   CTLFLAG_RD,
 				   &txq->ift_map_failed, "# of times dma map failed");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig",
 				   CTLFLAG_RD,
 				   &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup",
 				   CTLFLAG_RD,
 				   &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG");
 		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx",
 				   CTLFLAG_RD,
 				   &txq->ift_pidx, 1, "Producer Index");
 		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx",
 				   CTLFLAG_RD,
 				   &txq->ift_cidx, 1, "Consumer Index");
 		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed",
 				   CTLFLAG_RD,
 				   &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update");
 		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use",
 				   CTLFLAG_RD,
 				   &txq->ift_in_use, 1, "descriptors in use");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed",
 				   CTLFLAG_RD,
 				   &txq->ift_processed, "descriptors procesed for clean");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned",
 				   CTLFLAG_RD,
 				   &txq->ift_cleaned, "total cleaned");
 		SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state",
 				CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br[0]->state),
 				0, mp_ring_state_handler, "A", "soft ring state");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues",
 				       CTLFLAG_RD, &txq->ift_br[0]->enqueues,
 				       "# of enqueues to the mp_ring for this queue");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops",
 				       CTLFLAG_RD, &txq->ift_br[0]->drops,
 				       "# of drops in the mp_ring for this queue");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts",
 				       CTLFLAG_RD, &txq->ift_br[0]->starts,
 				       "# of normal consumer starts in the mp_ring for this queue");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls",
 				       CTLFLAG_RD, &txq->ift_br[0]->stalls,
 					       "# of consumer stalls in the mp_ring for this queue");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts",
 			       CTLFLAG_RD, &txq->ift_br[0]->restarts,
 				       "# of consumer restarts in the mp_ring for this queue");
 		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications",
 				       CTLFLAG_RD, &txq->ift_br[0]->abdications,
 				       "# of consumer abdications in the mp_ring for this queue");
 	}
 
 	if (scctx->isc_nrxqsets > 100)
 		qfmt = "rxq%03d";
 	else if (scctx->isc_nrxqsets > 10)
 		qfmt = "rxq%02d";
 	else
 		qfmt = "rxq%d";
 	for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) {
 		snprintf(namebuf, NAME_BUFLEN, qfmt, i);
 		queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
 					     CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
 			SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx",
 				       CTLFLAG_RD,
 				       &rxq->ifr_cq_pidx, 1, "Producer Index");
 			SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx",
 				       CTLFLAG_RD,
 				       &rxq->ifr_cq_cidx, 1, "Consumer Index");
 		}
 
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
 			snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j);
 			fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf,
 						     CTLFLAG_RD, NULL, "freelist Name");
 			fl_list = SYSCTL_CHILDREN(fl_node);
 			SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx",
 				       CTLFLAG_RD,
 				       &fl->ifl_pidx, 1, "Producer Index");
 			SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx",
 				       CTLFLAG_RD,
 				       &fl->ifl_cidx, 1, "Consumer Index");
 			SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits",
 				       CTLFLAG_RD,
 				       &fl->ifl_credits, 1, "credits available");
 #if MEMORY_LOGGING
 			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued",
 					CTLFLAG_RD,
 					&fl->ifl_m_enqueued, "mbufs allocated");
 			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued",
 					CTLFLAG_RD,
 					&fl->ifl_m_dequeued, "mbufs freed");
 			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued",
 					CTLFLAG_RD,
 					&fl->ifl_cl_enqueued, "clusters allocated");
 			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued",
 					CTLFLAG_RD,
 					&fl->ifl_cl_dequeued, "clusters freed");
 #endif
 
 		}
 	}
 
 }
Index: projects/netbsd-tests-upstream-01-2017/sys/riscv/include/atomic.h
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/riscv/include/atomic.h	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/riscv/include/atomic.h	(revision 313267)
@@ -1,461 +1,562 @@
 /*-
  * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_ATOMIC_H_
 #define	_MACHINE_ATOMIC_H_
 
 #define	fence()	__asm __volatile("fence" ::: "memory");
 #define	mb()	fence()
 #define	rmb()	fence()
 #define	wmb()	fence()
 
 #define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
 static __inline  void							\
 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
 {									\
 	atomic_##NAME##_##WIDTH(p, v);					\
 	fence(); 							\
 }									\
 									\
 static __inline  void							\
 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
 {									\
 	fence();							\
 	atomic_##NAME##_##WIDTH(p, v);					\
 }
 
 static __inline void
 atomic_add_32(volatile uint32_t *p, uint32_t val)
 {
 
 	__asm __volatile("amoadd.w zero, %1, %0"
 			: "+A" (*p)
 			: "r" (val)
 			: "memory");
 }
 
 static __inline void
 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
 {
 
 	__asm __volatile("amoadd.w zero, %1, %0"
 			: "+A" (*p)
 			: "r" (-val)
 			: "memory");
 }
 
 static __inline void
 atomic_set_32(volatile uint32_t *p, uint32_t val)
 {
 
 	__asm __volatile("amoor.w zero, %1, %0"
 			: "+A" (*p)
 			: "r" (val)
 			: "memory");
 }
 
 static __inline void
 atomic_clear_32(volatile uint32_t *p, uint32_t val)
 {
 
 	__asm __volatile("amoand.w zero, %1, %0"
 			: "+A" (*p)
 			: "r" (~val)
 			: "memory");
 }
 
 static __inline int
 atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
 {
 	uint32_t tmp;
 	int res;
 
 	res = 0;
 
 	__asm __volatile(
 		"0:"
 			"li   %1, 1\n" /* Preset to fail */
 			"lr.w %0, %2\n"
 			"bne  %0, %z3, 1f\n"
 			"sc.w %1, %z4, %2\n"
 			"bnez %1, 0b\n"
 		"1:"
 			: "=&r" (tmp), "=&r" (res), "+A" (*p)
 			: "rJ" (cmpval), "rJ" (newval)
 			: "memory");
 
 	return (!res);
 }
 
+static __inline int
+atomic_fcmpset_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
+{
+	uint32_t tmp;
+	int res;
+
+	res = 0;
+
+	__asm __volatile(
+		"0:"
+			"li   %1, 1\n"		/* Preset to fail */
+			"lr.w %0, %2\n"		/* Load old value */
+			"bne  %0, %z4, 1f\n"	/* Compare */
+			"sc.w %1, %z5, %2\n"	/* Try to store new value */
+			"j 2f\n"
+		"1:"
+			"sw   %0, %3\n"		/* Save old value */
+		"2:"
+			: "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval)
+			: "rJ" (*cmpval), "rJ" (newval)
+			: "memory");
+
+	return (!res);
+}
+
 static __inline uint32_t
 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t ret;
 
 	__asm __volatile("amoadd.w %0, %2, %1"
 			: "=&r" (ret), "+A" (*p)
 			: "r" (val)
 			: "memory");
 
 	return (ret);
 }
 
 static __inline uint32_t
 atomic_readandclear_32(volatile uint32_t *p)
 {
 	uint32_t ret;
 	uint32_t val;
 
 	val = 0;
 
 	__asm __volatile("amoswap.w %0, %2, %1"
 			: "=&r"(ret), "+A" (*p)
 			: "r" (val)
 			: "memory");
 
 	return (ret);
 }
 
 #define	atomic_add_int		atomic_add_32
 #define	atomic_clear_int	atomic_clear_32
 #define	atomic_cmpset_int	atomic_cmpset_32
+#define	atomic_fcmpset_int	atomic_fcmpset_32
 #define	atomic_fetchadd_int	atomic_fetchadd_32
 #define	atomic_readandclear_int	atomic_readandclear_32
 #define	atomic_set_int		atomic_set_32
 #define	atomic_subtract_int	atomic_subtract_32
 
 ATOMIC_ACQ_REL(set, 32)
 ATOMIC_ACQ_REL(clear, 32)
 ATOMIC_ACQ_REL(add, 32)
 ATOMIC_ACQ_REL(subtract, 32)
 
 static __inline int
 atomic_cmpset_acq_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
 {
 	int res;
 
 	res = atomic_cmpset_32(p, cmpval, newval);
 
 	fence();
 
 	return (res);
 }
 
 static __inline int
 atomic_cmpset_rel_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
 {
 
 	fence();
 
 	return (atomic_cmpset_32(p, cmpval, newval));
 }
 
+static __inline int
+atomic_fcmpset_acq_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
+{
+	int res;
+
+	res = atomic_fcmpset_32(p, cmpval, newval);
+
+	fence();
+
+	return (res);
+}
+
+static __inline int
+atomic_fcmpset_rel_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
+{
+
+	fence();
+
+	return (atomic_fcmpset_32(p, cmpval, newval));
+}
+
 static __inline uint32_t
 atomic_load_acq_32(volatile uint32_t *p)
 {
 	uint32_t ret;
 
 	ret = *p;
 
 	fence();
 
 	return (ret);
 }
 
 static __inline void
 atomic_store_rel_32(volatile uint32_t *p, uint32_t val)
 {
 
 	fence();
 
 	*p = val;
 }
 
 #define	atomic_add_acq_int	atomic_add_acq_32
 #define	atomic_clear_acq_int	atomic_clear_acq_32
 #define	atomic_cmpset_acq_int	atomic_cmpset_acq_32
+#define	atomic_fcmpset_acq_int	atomic_fcmpset_acq_32
 #define	atomic_load_acq_int	atomic_load_acq_32
 #define	atomic_set_acq_int	atomic_set_acq_32
 #define	atomic_subtract_acq_int	atomic_subtract_acq_32
 
 #define	atomic_add_rel_int	atomic_add_rel_32
 #define	atomic_clear_rel_int	atomic_add_rel_32
 #define	atomic_cmpset_rel_int	atomic_cmpset_rel_32
+#define	atomic_fcmpset_rel_int	atomic_fcmpset_rel_32
 #define	atomic_set_rel_int	atomic_set_rel_32
 #define	atomic_subtract_rel_int	atomic_subtract_rel_32
 #define	atomic_store_rel_int	atomic_store_rel_32
 
 static __inline void
 atomic_add_64(volatile uint64_t *p, uint64_t val)
 {
 
 	__asm __volatile("amoadd.d zero, %1, %0"
 			: "+A" (*p)
 			: "r" (val)
 			: "memory");
 }
 
 static __inline void
 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
 {
 
 	__asm __volatile("amoadd.d zero, %1, %0"
 			: "+A" (*p)
 			: "r" (-val)
 			: "memory");
 }
 
 static __inline void
 atomic_set_64(volatile uint64_t *p, uint64_t val)
 {
 
 	__asm __volatile("amoor.d zero, %1, %0"
 			: "+A" (*p)
 			: "r" (val)
 			: "memory");
 }
 
 static __inline void
 atomic_clear_64(volatile uint64_t *p, uint64_t val)
 {
 
 	__asm __volatile("amoand.d zero, %1, %0"
 			: "+A" (*p)
 			: "r" (~val)
 			: "memory");
 }
 
 static __inline int
 atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
 {
 	uint64_t tmp;
 	int res;
 
 	res = 0;
 
 	__asm __volatile(
 		"0:"
 			"li   %1, 1\n" /* Preset to fail */
 			"lr.d %0, %2\n"
 			"bne  %0, %z3, 1f\n"
 			"sc.d %1, %z4, %2\n"
 			"bnez %1, 0b\n"
 		"1:"
 			: "=&r" (tmp), "=&r" (res), "+A" (*p)
 			: "rJ" (cmpval), "rJ" (newval)
 			: "memory");
 
 	return (!res);
 }
 
+static __inline int
+atomic_fcmpset_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
+{
+	uint64_t tmp;
+	int res;
+
+	res = 0;
+
+	__asm __volatile(
+		"0:"
+			"li   %1, 1\n"		/* Preset to fail */
+			"lr.d %0, %2\n"		/* Load old value */
+			"bne  %0, %z4, 1f\n"	/* Compare */
+			"sc.d %1, %z5, %2\n"	/* Try to store new value */
+			"j 2f\n"
+		"1:"
+			"sd   %0, %3\n"		/* Save old value */
+		"2:"
+			: "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval)
+			: "rJ" (*cmpval), "rJ" (newval)
+			: "memory");
+
+	return (!res);
+}
+
 static __inline uint64_t
 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t ret;
 
 	__asm __volatile("amoadd.d %0, %2, %1"
 			: "=&r" (ret), "+A" (*p)
 			: "r" (val)
 			: "memory");
 
 	return (ret);
 }
 
 static __inline uint64_t
 atomic_readandclear_64(volatile uint64_t *p)
 {
 	uint64_t ret;
 	uint64_t val;
 
 	val = 0;
 
 	__asm __volatile("amoswap.d %0, %2, %1"
 			: "=&r"(ret), "+A" (*p)
 			: "r" (val)
 			: "memory");
 
 	return (ret);
 }
 
 static __inline uint32_t
 atomic_swap_32(volatile uint32_t *p, uint32_t val)
 {
 	uint32_t old;
 
 	__asm __volatile("amoswap.w %0, %2, %1"
 			: "=&r"(old), "+A" (*p)
 			: "r" (val)
 			: "memory");
 
 	return (old);
 }
 
 static __inline uint64_t
 atomic_swap_64(volatile uint64_t *p, uint64_t val)
 {
 	uint64_t old;
 
 	__asm __volatile("amoswap.d %0, %2, %1"
 			: "=&r"(old), "+A" (*p)
 			: "r" (val)
 			: "memory");
 
 	return (old);
 }
 
 #define	atomic_add_long			atomic_add_64
 #define	atomic_clear_long		atomic_clear_64
 #define	atomic_cmpset_long		atomic_cmpset_64
+#define	atomic_fcmpset_long		atomic_fcmpset_64
 #define	atomic_fetchadd_long		atomic_fetchadd_64
 #define	atomic_readandclear_long	atomic_readandclear_64
 #define	atomic_set_long			atomic_set_64
 #define	atomic_subtract_long		atomic_subtract_64
 
 #define	atomic_add_ptr			atomic_add_64
 #define	atomic_clear_ptr		atomic_clear_64
 #define	atomic_cmpset_ptr		atomic_cmpset_64
+#define	atomic_fcmpset_ptr		atomic_fcmpset_64
 #define	atomic_fetchadd_ptr		atomic_fetchadd_64
 #define	atomic_readandclear_ptr		atomic_readandclear_64
 #define	atomic_set_ptr			atomic_set_64
 #define	atomic_subtract_ptr		atomic_subtract_64
 
 ATOMIC_ACQ_REL(set, 64)
 ATOMIC_ACQ_REL(clear, 64)
 ATOMIC_ACQ_REL(add, 64)
 ATOMIC_ACQ_REL(subtract, 64)
 
 static __inline int
 atomic_cmpset_acq_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
 {
 	int res;
 
 	res = atomic_cmpset_64(p, cmpval, newval);
 
 	fence();
 
 	return (res);
 }
 
 static __inline int
 atomic_cmpset_rel_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
 {
 
 	fence();
 
 	return (atomic_cmpset_64(p, cmpval, newval));
 }
 
+static __inline int
+atomic_fcmpset_acq_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
+{
+	int res;
+
+	res = atomic_fcmpset_64(p, cmpval, newval);
+
+	fence();
+
+	return (res);
+}
+
+static __inline int
+atomic_fcmpset_rel_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
+{
+
+	fence();
+
+	return (atomic_fcmpset_64(p, cmpval, newval));
+}
+
 static __inline uint64_t
 atomic_load_acq_64(volatile uint64_t *p)
 {
 	uint64_t ret;
 
 	ret = *p;
 
 	fence();
 
 	return (ret);
 }
 
 static __inline void
 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
 {
 
 	fence();
 
 	*p = val;
 }
 
 #define	atomic_add_acq_long		atomic_add_acq_64
 #define	atomic_clear_acq_long		atomic_add_acq_64
 #define	atomic_cmpset_acq_long		atomic_cmpset_acq_64
+#define	atomic_fcmpset_acq_long		atomic_fcmpset_acq_64
 #define	atomic_load_acq_long		atomic_load_acq_64
 #define	atomic_set_acq_long		atomic_set_acq_64
 #define	atomic_subtract_acq_long	atomic_subtract_acq_64
 
 #define	atomic_add_acq_ptr		atomic_add_acq_64
 #define	atomic_clear_acq_ptr		atomic_add_acq_64
 #define	atomic_cmpset_acq_ptr		atomic_cmpset_acq_64
+#define	atomic_fcmpset_acq_ptr		atomic_fcmpset_acq_64
 #define	atomic_load_acq_ptr		atomic_load_acq_64
 #define	atomic_set_acq_ptr		atomic_set_acq_64
 #define	atomic_subtract_acq_ptr		atomic_subtract_acq_64
 
 static __inline void
 atomic_thread_fence_acq(void)
 {
 
 	fence();
 }
 
 static __inline void
 atomic_thread_fence_rel(void)
 {
 
 	fence();
 }
 
 static __inline void
 atomic_thread_fence_acq_rel(void)
 {
 
 	fence();
 }
 
 static __inline void
 atomic_thread_fence_seq_cst(void)
 {
 
 	fence();
 }
 
 #define	atomic_add_rel_long		atomic_add_rel_64
 #define	atomic_clear_rel_long		atomic_clear_rel_64
 
 #define	atomic_add_rel_long		atomic_add_rel_64
 #define	atomic_clear_rel_long		atomic_clear_rel_64
 #define	atomic_cmpset_rel_long		atomic_cmpset_rel_64
+#define	atomic_fcmpset_rel_long		atomic_fcmpset_rel_64
 #define	atomic_set_rel_long		atomic_set_rel_64
 #define	atomic_subtract_rel_long	atomic_subtract_rel_64
 #define	atomic_store_rel_long		atomic_store_rel_64
 
 #define	atomic_add_rel_ptr		atomic_add_rel_64
 #define	atomic_clear_rel_ptr		atomic_clear_rel_64
 #define	atomic_cmpset_rel_ptr		atomic_cmpset_rel_64
+#define	atomic_fcmpset_rel_ptr		atomic_fcmpset_rel_64
 #define	atomic_set_rel_ptr		atomic_set_rel_64
 #define	atomic_subtract_rel_ptr		atomic_subtract_rel_64
 #define	atomic_store_rel_ptr		atomic_store_rel_64
 
 #endif /* _MACHINE_ATOMIC_H_ */
Index: projects/netbsd-tests-upstream-01-2017/sys/vm/vm_object.h
===================================================================
--- projects/netbsd-tests-upstream-01-2017/sys/vm/vm_object.h	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017/sys/vm/vm_object.h	(revision 313267)
@@ -1,316 +1,316 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_object.h	8.3 (Berkeley) 1/12/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 /*
  *	Virtual memory object module definitions.
  */
 
 #ifndef	_VM_OBJECT_
 #define	_VM_OBJECT_
 
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_rwlock.h>
 
 #include <vm/_vm_radix.h>
 
 /*
  *	Types defined:
  *
  *	vm_object_t		Virtual memory object.
  *
  * List of locks
  *	(c)	const until freed
  *	(o)	per-object lock 
  *	(f)	free pages queue mutex
  *
  */
 
 struct vm_object {
 	struct rwlock lock;
 	TAILQ_ENTRY(vm_object) object_list; /* list of all objects */
 	LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */
 	LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */
 	TAILQ_HEAD(respgs, vm_page) memq; /* list of resident pages */
 	struct vm_radix rtree;		/* root of the resident page radix trie*/
 	vm_pindex_t size;		/* Object size */
 	int generation;			/* generation ID */
 	int ref_count;			/* How many refs?? */
 	int shadow_count;		/* how many objects that this is a shadow for */
 	vm_memattr_t memattr;		/* default memory attribute for pages */
 	objtype_t type;			/* type of pager */
 	u_short flags;			/* see below */
 	u_short pg_color;		/* (c) color of first page in obj */
 	u_int paging_in_progress;	/* Paging (in or out) so don't collapse or destroy */
 	int resident_page_count;	/* number of resident pages */
 	struct vm_object *backing_object; /* object that I'm a shadow of */
 	vm_ooffset_t backing_object_offset;/* Offset in backing object */
 	TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */
 	LIST_HEAD(, vm_reserv) rvq;	/* list of reservations */
 	void *handle;
 	union {
 		/*
 		 * VNode pager
 		 *
 		 *	vnp_size - current size of file
 		 */
 		struct {
 			off_t vnp_size;
 			vm_ooffset_t writemappings;
 		} vnp;
 
 		/*
 		 * Device pager
 		 *
 		 *	devp_pglist - list of allocated pages
 		 */
 		struct {
 			TAILQ_HEAD(, vm_page) devp_pglist;
 			struct cdev_pager_ops *ops;
 			struct cdev *dev;
 		} devp;
 
 		/*
 		 * SG pager
 		 *
 		 *	sgp_pglist - list of allocated pages
 		 */
 		struct {
 			TAILQ_HEAD(, vm_page) sgp_pglist;
 		} sgp;
 
 		/*
 		 * Swap pager
 		 *
 		 *	swp_tmpfs - back-pointer to the tmpfs vnode,
 		 *		     if any, which uses the vm object
 		 *		     as backing store.  The handle
 		 *		     cannot be reused for linking,
 		 *		     because the vnode can be
 		 *		     reclaimed and recreated, making
 		 *		     the handle changed and hash-chain
 		 *		     invalid.
 		 *
 		 *	swp_bcount - number of swap 'swblock' metablocks, each
 		 *		     contains up to 16 swapblk assignments.
 		 *		     see vm/swap_pager.h
 		 */
 		struct {
 			void *swp_tmpfs;
 			int swp_bcount;
 		} swp;
 	} un_pager;
 	struct ucred *cred;
 	vm_ooffset_t charge;
 	void *umtx_data;
 };
 
 /*
  * Flags
  */
 #define	OBJ_FICTITIOUS	0x0001		/* (c) contains fictitious pages */
 #define	OBJ_UNMANAGED	0x0002		/* (c) contains unmanaged pages */
 #define	OBJ_POPULATE	0x0004		/* pager implements populate() */
 #define OBJ_DEAD	0x0008		/* dead objects (during rundown) */
 #define	OBJ_NOSPLIT	0x0010		/* dont split this object */
 #define	OBJ_UMTXDEAD	0x0020		/* umtx pshared was terminated */
 #define OBJ_PIPWNT	0x0040		/* paging in progress wanted */
 #define OBJ_MIGHTBEDIRTY 0x0100		/* object might be dirty, only for vnode */
 #define	OBJ_TMPFS_NODE	0x0200		/* object belongs to tmpfs VREG node */
 #define	OBJ_TMPFS_DIRTY	0x0400		/* dirty tmpfs obj */
 #define	OBJ_COLORED	0x1000		/* pg_color is defined */
 #define	OBJ_ONEMAPPING	0x2000		/* One USE (a single, non-forked) mapping flag */
 #define	OBJ_DISCONNECTWNT 0x4000	/* disconnect from vnode wanted */
 #define	OBJ_TMPFS	0x8000		/* has tmpfs vnode allocated */
 
-#define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
-#define OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT))
+#define	IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
+#define	OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT))
 
 #ifdef	_KERNEL
 
 #define OBJPC_SYNC	0x1			/* sync I/O */
 #define OBJPC_INVAL	0x2			/* invalidate */
 #define OBJPC_NOSYNC	0x4			/* skip if VPO_NOSYNC */
 
 /*
  * The following options are supported by vm_object_page_remove().
  */
 #define	OBJPR_CLEANONLY	0x1		/* Don't remove dirty pages. */
 #define	OBJPR_NOTMAPPED	0x2		/* Don't unmap pages. */
 
 TAILQ_HEAD(object_q, vm_object);
 
 extern struct object_q vm_object_list;	/* list of allocated objects */
 extern struct mtx vm_object_list_mtx;	/* lock for object list and count */
 
 extern struct vm_object kernel_object_store;
 extern struct vm_object kmem_object_store;
 
 #define	kernel_object	(&kernel_object_store)
 #define	kmem_object	(&kmem_object_store)
 
 #define	VM_OBJECT_ASSERT_LOCKED(object)					\
 	rw_assert(&(object)->lock, RA_LOCKED)
 #define	VM_OBJECT_ASSERT_RLOCKED(object)				\
 	rw_assert(&(object)->lock, RA_RLOCKED)
 #define	VM_OBJECT_ASSERT_WLOCKED(object)				\
 	rw_assert(&(object)->lock, RA_WLOCKED)
 #define	VM_OBJECT_ASSERT_UNLOCKED(object)				\
 	rw_assert(&(object)->lock, RA_UNLOCKED)
 #define	VM_OBJECT_LOCK_DOWNGRADE(object)				\
 	rw_downgrade(&(object)->lock)
 #define	VM_OBJECT_RLOCK(object)						\
 	rw_rlock(&(object)->lock)
 #define	VM_OBJECT_RUNLOCK(object)					\
 	rw_runlock(&(object)->lock)
 #define	VM_OBJECT_SLEEP(object, wchan, pri, wmesg, timo)		\
 	rw_sleep((wchan), &(object)->lock, (pri), (wmesg), (timo))
 #define	VM_OBJECT_TRYRLOCK(object)					\
 	rw_try_rlock(&(object)->lock)
 #define	VM_OBJECT_TRYWLOCK(object)					\
 	rw_try_wlock(&(object)->lock)
 #define	VM_OBJECT_TRYUPGRADE(object)					\
 	rw_try_upgrade(&(object)->lock)
 #define	VM_OBJECT_WLOCK(object)						\
 	rw_wlock(&(object)->lock)
 #define	VM_OBJECT_WOWNED(object)					\
 	rw_wowned(&(object)->lock)
 #define	VM_OBJECT_WUNLOCK(object)					\
 	rw_wunlock(&(object)->lock)
 
 /*
  *	The object must be locked or thread private.
  */
 static __inline void
 vm_object_set_flag(vm_object_t object, u_short bits)
 {
 
 	object->flags |= bits;
 }
 
 /*
  *	Conditionally set the object's color, which (1) enables the allocation
  *	of physical memory reservations for anonymous objects and larger-than-
  *	superpage-sized named objects and (2) determines the first page offset
  *	within the object at which a reservation may be allocated.  In other
  *	words, the color determines the alignment of the object with respect
  *	to the largest superpage boundary.  When mapping named objects, like
  *	files or POSIX shared memory objects, the color should be set to zero
  *	before a virtual address is selected for the mapping.  In contrast,
  *	for anonymous objects, the color may be set after the virtual address
  *	is selected.
  *
  *	The object must be locked.
  */
 static __inline void
 vm_object_color(vm_object_t object, u_short color)
 {
 
 	if ((object->flags & OBJ_COLORED) == 0) {
 		object->pg_color = color;
 		object->flags |= OBJ_COLORED;
 	}
 }
 
 void vm_object_clear_flag(vm_object_t object, u_short bits);
 void vm_object_pip_add(vm_object_t object, short i);
 void vm_object_pip_subtract(vm_object_t object, short i);
 void vm_object_pip_wakeup(vm_object_t object);
 void vm_object_pip_wakeupn(vm_object_t object, short i);
 void vm_object_pip_wait(vm_object_t object, char *waitid);
 
 void umtx_shm_object_init(vm_object_t object);
 void umtx_shm_object_terminated(vm_object_t object);
 extern int umtx_shm_vnobj_persistent;
 
 vm_object_t vm_object_allocate (objtype_t, vm_pindex_t);
 boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t,
    boolean_t);
 void vm_object_collapse (vm_object_t);
 void vm_object_deallocate (vm_object_t);
 void vm_object_destroy (vm_object_t);
 void vm_object_terminate (vm_object_t);
 void vm_object_set_writeable_dirty (vm_object_t);
 void vm_object_init (void);
 void vm_object_madvise(vm_object_t, vm_pindex_t, vm_pindex_t, int);
 boolean_t vm_object_page_clean(vm_object_t object, vm_ooffset_t start,
     vm_ooffset_t end, int flags);
 void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start,
     vm_pindex_t end);
 void vm_object_page_remove(vm_object_t object, vm_pindex_t start,
     vm_pindex_t end, int options);
 boolean_t vm_object_populate(vm_object_t, vm_pindex_t, vm_pindex_t);
 void vm_object_print(long addr, boolean_t have_addr, long count, char *modif);
 void vm_object_reference (vm_object_t);
 void vm_object_reference_locked(vm_object_t);
 int  vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr);
 void vm_object_shadow (vm_object_t *, vm_ooffset_t *, vm_size_t);
 void vm_object_split(vm_map_entry_t);
 boolean_t vm_object_sync(vm_object_t, vm_ooffset_t, vm_size_t, boolean_t,
     boolean_t);
 void vm_object_unwire(vm_object_t object, vm_ooffset_t offset,
     vm_size_t length, uint8_t queue);
 struct vnode *vm_object_vnode(vm_object_t object);
 #endif				/* _KERNEL */
 
 #endif				/* _VM_OBJECT_ */
Index: projects/netbsd-tests-upstream-01-2017
===================================================================
--- projects/netbsd-tests-upstream-01-2017	(revision 313266)
+++ projects/netbsd-tests-upstream-01-2017	(revision 313267)

Property changes on: projects/netbsd-tests-upstream-01-2017
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r313244-313266